linux/fs/fcntl.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/fcntl.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/syscalls.h>
   8#include <linux/init.h>
   9#include <linux/mm.h>
  10#include <linux/fs.h>
  11#include <linux/file.h>
  12#include <linux/fdtable.h>
  13#include <linux/capability.h>
  14#include <linux/dnotify.h>
  15#include <linux/slab.h>
  16#include <linux/module.h>
  17#include <linux/pipe_fs_i.h>
  18#include <linux/security.h>
  19#include <linux/ptrace.h>
  20#include <linux/signal.h>
  21#include <linux/rcupdate.h>
  22#include <linux/pid_namespace.h>
  23#include <linux/user_namespace.h>
  24
  25#include <asm/poll.h>
  26#include <asm/siginfo.h>
  27#include <asm/uaccess.h>
  28
  29void set_close_on_exec(unsigned int fd, int flag)
  30{
  31        struct files_struct *files = current->files;
  32        struct fdtable *fdt;
  33        spin_lock(&files->file_lock);
  34        fdt = files_fdtable(files);
  35        if (flag)
  36                __set_close_on_exec(fd, fdt);
  37        else
  38                __clear_close_on_exec(fd, fdt);
  39        spin_unlock(&files->file_lock);
  40}
  41
  42static bool get_close_on_exec(unsigned int fd)
  43{
  44        struct files_struct *files = current->files;
  45        struct fdtable *fdt;
  46        bool res;
  47        rcu_read_lock();
  48        fdt = files_fdtable(files);
  49        res = close_on_exec(fd, fdt);
  50        rcu_read_unlock();
  51        return res;
  52}
  53
  54SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
  55{
  56        int err = -EBADF;
  57        struct file * file, *tofree;
  58        struct files_struct * files = current->files;
  59        struct fdtable *fdt;
  60
  61        if ((flags & ~O_CLOEXEC) != 0)
  62                return -EINVAL;
  63
  64        if (unlikely(oldfd == newfd))
  65                return -EINVAL;
  66
  67        spin_lock(&files->file_lock);
  68        err = expand_files(files, newfd);
  69        file = fcheck(oldfd);
  70        if (unlikely(!file))
  71                goto Ebadf;
  72        if (unlikely(err < 0)) {
  73                if (err == -EMFILE)
  74                        goto Ebadf;
  75                goto out_unlock;
  76        }
  77        /*
  78         * We need to detect attempts to do dup2() over allocated but still
  79         * not finished descriptor.  NB: OpenBSD avoids that at the price of
  80         * extra work in their equivalent of fget() - they insert struct
  81         * file immediately after grabbing descriptor, mark it larval if
  82         * more work (e.g. actual opening) is needed and make sure that
  83         * fget() treats larval files as absent.  Potentially interesting,
  84         * but while extra work in fget() is trivial, locking implications
  85         * and amount of surgery on open()-related paths in VFS are not.
  86         * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
  87         * deadlocks in rather amusing ways, AFAICS.  All of that is out of
  88         * scope of POSIX or SUS, since neither considers shared descriptor
  89         * tables and this condition does not arise without those.
  90         */
  91        err = -EBUSY;
  92        fdt = files_fdtable(files);
  93        tofree = fdt->fd[newfd];
  94        if (!tofree && fd_is_open(newfd, fdt))
  95                goto out_unlock;
  96        get_file(file);
  97        rcu_assign_pointer(fdt->fd[newfd], file);
  98        __set_open_fd(newfd, fdt);
  99        if (flags & O_CLOEXEC)
 100                __set_close_on_exec(newfd, fdt);
 101        else
 102                __clear_close_on_exec(newfd, fdt);
 103        spin_unlock(&files->file_lock);
 104
 105        if (tofree)
 106                filp_close(tofree, files);
 107
 108        return newfd;
 109
 110Ebadf:
 111        err = -EBADF;
 112out_unlock:
 113        spin_unlock(&files->file_lock);
 114        return err;
 115}
 116
 117SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
 118{
 119        if (unlikely(newfd == oldfd)) { /* corner case */
 120                struct files_struct *files = current->files;
 121                int retval = oldfd;
 122
 123                rcu_read_lock();
 124                if (!fcheck_files(files, oldfd))
 125                        retval = -EBADF;
 126                rcu_read_unlock();
 127                return retval;
 128        }
 129        return sys_dup3(oldfd, newfd, 0);
 130}
 131
 132SYSCALL_DEFINE1(dup, unsigned int, fildes)
 133{
 134        int ret = -EBADF;
 135        struct file *file = fget_raw(fildes);
 136
 137        if (file) {
 138                ret = get_unused_fd();
 139                if (ret >= 0)
 140                        fd_install(ret, file);
 141                else
 142                        fput(file);
 143        }
 144        return ret;
 145}
 146
 147#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 148
 149static int setfl(int fd, struct file * filp, unsigned long arg)
 150{
 151        struct inode * inode = filp->f_path.dentry->d_inode;
 152        int error = 0;
 153
 154        /*
 155         * O_APPEND cannot be cleared if the file is marked as append-only
 156         * and the file is open for write.
 157         */
 158        if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
 159                return -EPERM;
 160
 161        /* O_NOATIME can only be set by the owner or superuser */
 162        if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
 163                if (!inode_owner_or_capable(inode))
 164                        return -EPERM;
 165
 166        /* required for strict SunOS emulation */
 167        if (O_NONBLOCK != O_NDELAY)
 168               if (arg & O_NDELAY)
 169                   arg |= O_NONBLOCK;
 170
 171        if (arg & O_DIRECT) {
 172                if (!filp->f_mapping || !filp->f_mapping->a_ops ||
 173                        !filp->f_mapping->a_ops->direct_IO)
 174                                return -EINVAL;
 175        }
 176
 177        if (filp->f_op && filp->f_op->check_flags)
 178                error = filp->f_op->check_flags(arg);
 179        if (error)
 180                return error;
 181
 182        /*
 183         * ->fasync() is responsible for setting the FASYNC bit.
 184         */
 185        if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
 186                        filp->f_op->fasync) {
 187                error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 188                if (error < 0)
 189                        goto out;
 190                if (error > 0)
 191                        error = 0;
 192        }
 193        spin_lock(&filp->f_lock);
 194        filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 195        spin_unlock(&filp->f_lock);
 196
 197 out:
 198        return error;
 199}
 200
 201static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 202                     int force)
 203{
 204        write_lock_irq(&filp->f_owner.lock);
 205        if (force || !filp->f_owner.pid) {
 206                put_pid(filp->f_owner.pid);
 207                filp->f_owner.pid = get_pid(pid);
 208                filp->f_owner.pid_type = type;
 209
 210                if (pid) {
 211                        const struct cred *cred = current_cred();
 212                        filp->f_owner.uid = cred->uid;
 213                        filp->f_owner.euid = cred->euid;
 214                }
 215        }
 216        write_unlock_irq(&filp->f_owner.lock);
 217}
 218
 219int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 220                int force)
 221{
 222        int err;
 223
 224        err = security_file_set_fowner(filp);
 225        if (err)
 226                return err;
 227
 228        f_modown(filp, pid, type, force);
 229        return 0;
 230}
 231EXPORT_SYMBOL(__f_setown);
 232
 233int f_setown(struct file *filp, unsigned long arg, int force)
 234{
 235        enum pid_type type;
 236        struct pid *pid;
 237        int who = arg;
 238        int result;
 239        type = PIDTYPE_PID;
 240        if (who < 0) {
 241                type = PIDTYPE_PGID;
 242                who = -who;
 243        }
 244        rcu_read_lock();
 245        pid = find_vpid(who);
 246        result = __f_setown(filp, pid, type, force);
 247        rcu_read_unlock();
 248        return result;
 249}
 250EXPORT_SYMBOL(f_setown);
 251
 252void f_delown(struct file *filp)
 253{
 254        f_modown(filp, NULL, PIDTYPE_PID, 1);
 255}
 256
 257pid_t f_getown(struct file *filp)
 258{
 259        pid_t pid;
 260        read_lock(&filp->f_owner.lock);
 261        pid = pid_vnr(filp->f_owner.pid);
 262        if (filp->f_owner.pid_type == PIDTYPE_PGID)
 263                pid = -pid;
 264        read_unlock(&filp->f_owner.lock);
 265        return pid;
 266}
 267
 268static int f_setown_ex(struct file *filp, unsigned long arg)
 269{
 270        struct f_owner_ex * __user owner_p = (void * __user)arg;
 271        struct f_owner_ex owner;
 272        struct pid *pid;
 273        int type;
 274        int ret;
 275
 276        ret = copy_from_user(&owner, owner_p, sizeof(owner));
 277        if (ret)
 278                return -EFAULT;
 279
 280        switch (owner.type) {
 281        case F_OWNER_TID:
 282                type = PIDTYPE_MAX;
 283                break;
 284
 285        case F_OWNER_PID:
 286                type = PIDTYPE_PID;
 287                break;
 288
 289        case F_OWNER_PGRP:
 290                type = PIDTYPE_PGID;
 291                break;
 292
 293        default:
 294                return -EINVAL;
 295        }
 296
 297        rcu_read_lock();
 298        pid = find_vpid(owner.pid);
 299        if (owner.pid && !pid)
 300                ret = -ESRCH;
 301        else
 302                ret = __f_setown(filp, pid, type, 1);
 303        rcu_read_unlock();
 304
 305        return ret;
 306}
 307
 308static int f_getown_ex(struct file *filp, unsigned long arg)
 309{
 310        struct f_owner_ex * __user owner_p = (void * __user)arg;
 311        struct f_owner_ex owner;
 312        int ret = 0;
 313
 314        read_lock(&filp->f_owner.lock);
 315        owner.pid = pid_vnr(filp->f_owner.pid);
 316        switch (filp->f_owner.pid_type) {
 317        case PIDTYPE_MAX:
 318                owner.type = F_OWNER_TID;
 319                break;
 320
 321        case PIDTYPE_PID:
 322                owner.type = F_OWNER_PID;
 323                break;
 324
 325        case PIDTYPE_PGID:
 326                owner.type = F_OWNER_PGRP;
 327                break;
 328
 329        default:
 330                WARN_ON(1);
 331                ret = -EINVAL;
 332                break;
 333        }
 334        read_unlock(&filp->f_owner.lock);
 335
 336        if (!ret) {
 337                ret = copy_to_user(owner_p, &owner, sizeof(owner));
 338                if (ret)
 339                        ret = -EFAULT;
 340        }
 341        return ret;
 342}
 343
 344#ifdef CONFIG_CHECKPOINT_RESTORE
 345static int f_getowner_uids(struct file *filp, unsigned long arg)
 346{
 347        struct user_namespace *user_ns = current_user_ns();
 348        uid_t * __user dst = (void * __user)arg;
 349        uid_t src[2];
 350        int err;
 351
 352        read_lock(&filp->f_owner.lock);
 353        src[0] = from_kuid(user_ns, filp->f_owner.uid);
 354        src[1] = from_kuid(user_ns, filp->f_owner.euid);
 355        read_unlock(&filp->f_owner.lock);
 356
 357        err  = put_user(src[0], &dst[0]);
 358        err |= put_user(src[1], &dst[1]);
 359
 360        return err;
 361}
 362#else
 363static int f_getowner_uids(struct file *filp, unsigned long arg)
 364{
 365        return -EINVAL;
 366}
 367#endif
 368
 369static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 370                struct file *filp)
 371{
 372        long err = -EINVAL;
 373
 374        switch (cmd) {
 375        case F_DUPFD:
 376        case F_DUPFD_CLOEXEC:
 377                if (arg >= rlimit(RLIMIT_NOFILE))
 378                        break;
 379                err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
 380                if (err >= 0) {
 381                        get_file(filp);
 382                        fd_install(err, filp);
 383                }
 384                break;
 385        case F_GETFD:
 386                err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
 387                break;
 388        case F_SETFD:
 389                err = 0;
 390                set_close_on_exec(fd, arg & FD_CLOEXEC);
 391                break;
 392        case F_GETFL:
 393                err = filp->f_flags;
 394                break;
 395        case F_SETFL:
 396                err = setfl(fd, filp, arg);
 397                break;
 398        case F_GETLK:
 399                err = fcntl_getlk(filp, (struct flock __user *) arg);
 400                break;
 401        case F_SETLK:
 402        case F_SETLKW:
 403                err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
 404                break;
 405        case F_GETOWN:
 406                /*
 407                 * XXX If f_owner is a process group, the
 408                 * negative return value will get converted
 409                 * into an error.  Oops.  If we keep the
 410                 * current syscall conventions, the only way
 411                 * to fix this will be in libc.
 412                 */
 413                err = f_getown(filp);
 414                force_successful_syscall_return();
 415                break;
 416        case F_SETOWN:
 417                err = f_setown(filp, arg, 1);
 418                break;
 419        case F_GETOWN_EX:
 420                err = f_getown_ex(filp, arg);
 421                break;
 422        case F_SETOWN_EX:
 423                err = f_setown_ex(filp, arg);
 424                break;
 425        case F_GETOWNER_UIDS:
 426                err = f_getowner_uids(filp, arg);
 427                break;
 428        case F_GETSIG:
 429                err = filp->f_owner.signum;
 430                break;
 431        case F_SETSIG:
 432                /* arg == 0 restores default behaviour. */
 433                if (!valid_signal(arg)) {
 434                        break;
 435                }
 436                err = 0;
 437                filp->f_owner.signum = arg;
 438                break;
 439        case F_GETLEASE:
 440                err = fcntl_getlease(filp);
 441                break;
 442        case F_SETLEASE:
 443                err = fcntl_setlease(fd, filp, arg);
 444                break;
 445        case F_NOTIFY:
 446                err = fcntl_dirnotify(fd, filp, arg);
 447                break;
 448        case F_SETPIPE_SZ:
 449        case F_GETPIPE_SZ:
 450                err = pipe_fcntl(filp, cmd, arg);
 451                break;
 452        default:
 453                break;
 454        }
 455        return err;
 456}
 457
 458static int check_fcntl_cmd(unsigned cmd)
 459{
 460        switch (cmd) {
 461        case F_DUPFD:
 462        case F_DUPFD_CLOEXEC:
 463        case F_GETFD:
 464        case F_SETFD:
 465        case F_GETFL:
 466                return 1;
 467        }
 468        return 0;
 469}
 470
 471SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 472{       
 473        struct file *filp;
 474        int fput_needed;
 475        long err = -EBADF;
 476
 477        filp = fget_raw_light(fd, &fput_needed);
 478        if (!filp)
 479                goto out;
 480
 481        if (unlikely(filp->f_mode & FMODE_PATH)) {
 482                if (!check_fcntl_cmd(cmd))
 483                        goto out1;
 484        }
 485
 486        err = security_file_fcntl(filp, cmd, arg);
 487        if (!err)
 488                err = do_fcntl(fd, cmd, arg, filp);
 489
 490out1:
 491        fput_light(filp, fput_needed);
 492out:
 493        return err;
 494}
 495
 496#if BITS_PER_LONG == 32
 497SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 498                unsigned long, arg)
 499{       
 500        struct file * filp;
 501        long err = -EBADF;
 502        int fput_needed;
 503
 504        filp = fget_raw_light(fd, &fput_needed);
 505        if (!filp)
 506                goto out;
 507
 508        if (unlikely(filp->f_mode & FMODE_PATH)) {
 509                if (!check_fcntl_cmd(cmd))
 510                        goto out1;
 511        }
 512
 513        err = security_file_fcntl(filp, cmd, arg);
 514        if (err)
 515                goto out1;
 516        
 517        switch (cmd) {
 518                case F_GETLK64:
 519                        err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
 520                        break;
 521                case F_SETLK64:
 522                case F_SETLKW64:
 523                        err = fcntl_setlk64(fd, filp, cmd,
 524                                        (struct flock64 __user *) arg);
 525                        break;
 526                default:
 527                        err = do_fcntl(fd, cmd, arg, filp);
 528                        break;
 529        }
 530out1:
 531        fput_light(filp, fput_needed);
 532out:
 533        return err;
 534}
 535#endif
 536
 537/* Table to convert sigio signal codes into poll band bitmaps */
 538
 539static const long band_table[NSIGPOLL] = {
 540        POLLIN | POLLRDNORM,                    /* POLL_IN */
 541        POLLOUT | POLLWRNORM | POLLWRBAND,      /* POLL_OUT */
 542        POLLIN | POLLRDNORM | POLLMSG,          /* POLL_MSG */
 543        POLLERR,                                /* POLL_ERR */
 544        POLLPRI | POLLRDBAND,                   /* POLL_PRI */
 545        POLLHUP | POLLERR                       /* POLL_HUP */
 546};
 547
 548static inline int sigio_perm(struct task_struct *p,
 549                             struct fown_struct *fown, int sig)
 550{
 551        const struct cred *cred;
 552        int ret;
 553
 554        rcu_read_lock();
 555        cred = __task_cred(p);
 556        ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
 557                uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
 558                uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
 559               !security_file_send_sigiotask(p, fown, sig));
 560        rcu_read_unlock();
 561        return ret;
 562}
 563
 564static void send_sigio_to_task(struct task_struct *p,
 565                               struct fown_struct *fown,
 566                               int fd, int reason, int group)
 567{
 568        /*
 569         * F_SETSIG can change ->signum lockless in parallel, make
 570         * sure we read it once and use the same value throughout.
 571         */
 572        int signum = ACCESS_ONCE(fown->signum);
 573
 574        if (!sigio_perm(p, fown, signum))
 575                return;
 576
 577        switch (signum) {
 578                siginfo_t si;
 579                default:
 580                        /* Queue a rt signal with the appropriate fd as its
 581                           value.  We use SI_SIGIO as the source, not 
 582                           SI_KERNEL, since kernel signals always get 
 583                           delivered even if we can't queue.  Failure to
 584                           queue in this case _should_ be reported; we fall
 585                           back to SIGIO in that case. --sct */
 586                        si.si_signo = signum;
 587                        si.si_errno = 0;
 588                        si.si_code  = reason;
 589                        /* Make sure we are called with one of the POLL_*
 590                           reasons, otherwise we could leak kernel stack into
 591                           userspace.  */
 592                        BUG_ON((reason & __SI_MASK) != __SI_POLL);
 593                        if (reason - POLL_IN >= NSIGPOLL)
 594                                si.si_band  = ~0L;
 595                        else
 596                                si.si_band = band_table[reason - POLL_IN];
 597                        si.si_fd    = fd;
 598                        if (!do_send_sig_info(signum, &si, p, group))
 599                                break;
 600                /* fall-through: fall back on the old plain SIGIO signal */
 601                case 0:
 602                        do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
 603        }
 604}
 605
 606void send_sigio(struct fown_struct *fown, int fd, int band)
 607{
 608        struct task_struct *p;
 609        enum pid_type type;
 610        struct pid *pid;
 611        int group = 1;
 612        
 613        read_lock(&fown->lock);
 614
 615        type = fown->pid_type;
 616        if (type == PIDTYPE_MAX) {
 617                group = 0;
 618                type = PIDTYPE_PID;
 619        }
 620
 621        pid = fown->pid;
 622        if (!pid)
 623                goto out_unlock_fown;
 624        
 625        read_lock(&tasklist_lock);
 626        do_each_pid_task(pid, type, p) {
 627                send_sigio_to_task(p, fown, fd, band, group);
 628        } while_each_pid_task(pid, type, p);
 629        read_unlock(&tasklist_lock);
 630 out_unlock_fown:
 631        read_unlock(&fown->lock);
 632}
 633
 634static void send_sigurg_to_task(struct task_struct *p,
 635                                struct fown_struct *fown, int group)
 636{
 637        if (sigio_perm(p, fown, SIGURG))
 638                do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
 639}
 640
 641int send_sigurg(struct fown_struct *fown)
 642{
 643        struct task_struct *p;
 644        enum pid_type type;
 645        struct pid *pid;
 646        int group = 1;
 647        int ret = 0;
 648        
 649        read_lock(&fown->lock);
 650
 651        type = fown->pid_type;
 652        if (type == PIDTYPE_MAX) {
 653                group = 0;
 654                type = PIDTYPE_PID;
 655        }
 656
 657        pid = fown->pid;
 658        if (!pid)
 659                goto out_unlock_fown;
 660
 661        ret = 1;
 662        
 663        read_lock(&tasklist_lock);
 664        do_each_pid_task(pid, type, p) {
 665                send_sigurg_to_task(p, fown, group);
 666        } while_each_pid_task(pid, type, p);
 667        read_unlock(&tasklist_lock);
 668 out_unlock_fown:
 669        read_unlock(&fown->lock);
 670        return ret;
 671}
 672
 673static DEFINE_SPINLOCK(fasync_lock);
 674static struct kmem_cache *fasync_cache __read_mostly;
 675
 676static void fasync_free_rcu(struct rcu_head *head)
 677{
 678        kmem_cache_free(fasync_cache,
 679                        container_of(head, struct fasync_struct, fa_rcu));
 680}
 681
 682/*
 683 * Remove a fasync entry. If successfully removed, return
 684 * positive and clear the FASYNC flag. If no entry exists,
 685 * do nothing and return 0.
 686 *
 687 * NOTE! It is very important that the FASYNC flag always
 688 * match the state "is the filp on a fasync list".
 689 *
 690 */
 691int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 692{
 693        struct fasync_struct *fa, **fp;
 694        int result = 0;
 695
 696        spin_lock(&filp->f_lock);
 697        spin_lock(&fasync_lock);
 698        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 699                if (fa->fa_file != filp)
 700                        continue;
 701
 702                spin_lock_irq(&fa->fa_lock);
 703                fa->fa_file = NULL;
 704                spin_unlock_irq(&fa->fa_lock);
 705
 706                *fp = fa->fa_next;
 707                call_rcu(&fa->fa_rcu, fasync_free_rcu);
 708                filp->f_flags &= ~FASYNC;
 709                result = 1;
 710                break;
 711        }
 712        spin_unlock(&fasync_lock);
 713        spin_unlock(&filp->f_lock);
 714        return result;
 715}
 716
 717struct fasync_struct *fasync_alloc(void)
 718{
 719        return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
 720}
 721
 722/*
 723 * NOTE! This can be used only for unused fasync entries:
 724 * entries that actually got inserted on the fasync list
 725 * need to be released by rcu - see fasync_remove_entry.
 726 */
 727void fasync_free(struct fasync_struct *new)
 728{
 729        kmem_cache_free(fasync_cache, new);
 730}
 731
 732/*
 733 * Insert a new entry into the fasync list.  Return the pointer to the
 734 * old one if we didn't use the new one.
 735 *
 736 * NOTE! It is very important that the FASYNC flag always
 737 * match the state "is the filp on a fasync list".
 738 */
 739struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
 740{
 741        struct fasync_struct *fa, **fp;
 742
 743        spin_lock(&filp->f_lock);
 744        spin_lock(&fasync_lock);
 745        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 746                if (fa->fa_file != filp)
 747                        continue;
 748
 749                spin_lock_irq(&fa->fa_lock);
 750                fa->fa_fd = fd;
 751                spin_unlock_irq(&fa->fa_lock);
 752                goto out;
 753        }
 754
 755        spin_lock_init(&new->fa_lock);
 756        new->magic = FASYNC_MAGIC;
 757        new->fa_file = filp;
 758        new->fa_fd = fd;
 759        new->fa_next = *fapp;
 760        rcu_assign_pointer(*fapp, new);
 761        filp->f_flags |= FASYNC;
 762
 763out:
 764        spin_unlock(&fasync_lock);
 765        spin_unlock(&filp->f_lock);
 766        return fa;
 767}
 768
 769/*
 770 * Add a fasync entry. Return negative on error, positive if
 771 * added, and zero if did nothing but change an existing one.
 772 */
 773static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
 774{
 775        struct fasync_struct *new;
 776
 777        new = fasync_alloc();
 778        if (!new)
 779                return -ENOMEM;
 780
 781        /*
 782         * fasync_insert_entry() returns the old (update) entry if
 783         * it existed.
 784         *
 785         * So free the (unused) new entry and return 0 to let the
 786         * caller know that we didn't add any new fasync entries.
 787         */
 788        if (fasync_insert_entry(fd, filp, fapp, new)) {
 789                fasync_free(new);
 790                return 0;
 791        }
 792
 793        return 1;
 794}
 795
 796/*
 797 * fasync_helper() is used by almost all character device drivers
 798 * to set up the fasync queue, and for regular files by the file
 799 * lease code. It returns negative on error, 0 if it did no changes
 800 * and positive if it added/deleted the entry.
 801 */
 802int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 803{
 804        if (!on)
 805                return fasync_remove_entry(filp, fapp);
 806        return fasync_add_entry(fd, filp, fapp);
 807}
 808
 809EXPORT_SYMBOL(fasync_helper);
 810
 811/*
 812 * rcu_read_lock() is held
 813 */
 814static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 815{
 816        while (fa) {
 817                struct fown_struct *fown;
 818                unsigned long flags;
 819
 820                if (fa->magic != FASYNC_MAGIC) {
 821                        printk(KERN_ERR "kill_fasync: bad magic number in "
 822                               "fasync_struct!\n");
 823                        return;
 824                }
 825                spin_lock_irqsave(&fa->fa_lock, flags);
 826                if (fa->fa_file) {
 827                        fown = &fa->fa_file->f_owner;
 828                        /* Don't send SIGURG to processes which have not set a
 829                           queued signum: SIGURG has its own default signalling
 830                           mechanism. */
 831                        if (!(sig == SIGURG && fown->signum == 0))
 832                                send_sigio(fown, fa->fa_fd, band);
 833                }
 834                spin_unlock_irqrestore(&fa->fa_lock, flags);
 835                fa = rcu_dereference(fa->fa_next);
 836        }
 837}
 838
 839void kill_fasync(struct fasync_struct **fp, int sig, int band)
 840{
 841        /* First a quick test without locking: usually
 842         * the list is empty.
 843         */
 844        if (*fp) {
 845                rcu_read_lock();
 846                kill_fasync_rcu(rcu_dereference(*fp), sig, band);
 847                rcu_read_unlock();
 848        }
 849}
 850EXPORT_SYMBOL(kill_fasync);
 851
 852static int __init fcntl_init(void)
 853{
 854        /*
 855         * Please add new bits here to ensure allocation uniqueness.
 856         * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 857         * is defined as O_NONBLOCK on some platforms and not on others.
 858         */
 859        BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
 860                O_RDONLY        | O_WRONLY      | O_RDWR        |
 861                O_CREAT         | O_EXCL        | O_NOCTTY      |
 862                O_TRUNC         | O_APPEND      | /* O_NONBLOCK | */
 863                __O_SYNC        | O_DSYNC       | FASYNC        |
 864                O_DIRECT        | O_LARGEFILE   | O_DIRECTORY   |
 865                O_NOFOLLOW      | O_NOATIME     | O_CLOEXEC     |
 866                __FMODE_EXEC    | O_PATH
 867                ));
 868
 869        fasync_cache = kmem_cache_create("fasync_cache",
 870                sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
 871        return 0;
 872}
 873
 874module_init(fcntl_init)
 875
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.