linux/fs/fcntl.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/fcntl.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/syscalls.h>
   8#include <linux/init.h>
   9#include <linux/mm.h>
  10#include <linux/fs.h>
  11#include <linux/file.h>
  12#include <linux/capability.h>
  13#include <linux/dnotify.h>
  14#include <linux/smp_lock.h>
  15#include <linux/slab.h>
  16#include <linux/module.h>
  17#include <linux/security.h>
  18#include <linux/ptrace.h>
  19#include <linux/signal.h>
  20#include <linux/rcupdate.h>
  21
  22#include <asm/poll.h>
  23#include <asm/siginfo.h>
  24#include <asm/uaccess.h>
  25
  26void fastcall set_close_on_exec(unsigned int fd, int flag)
  27{
  28        struct files_struct *files = current->files;
  29        struct fdtable *fdt;
  30        spin_lock(&files->file_lock);
  31        fdt = files_fdtable(files);
  32        if (flag)
  33                FD_SET(fd, fdt->close_on_exec);
  34        else
  35                FD_CLR(fd, fdt->close_on_exec);
  36        spin_unlock(&files->file_lock);
  37}
  38
  39static int get_close_on_exec(unsigned int fd)
  40{
  41        struct files_struct *files = current->files;
  42        struct fdtable *fdt;
  43        int res;
  44        rcu_read_lock();
  45        fdt = files_fdtable(files);
  46        res = FD_ISSET(fd, fdt->close_on_exec);
  47        rcu_read_unlock();
  48        return res;
  49}
  50
  51/*
  52 * locate_fd finds a free file descriptor in the open_fds fdset,
  53 * expanding the fd arrays if necessary.  Must be called with the
  54 * file_lock held for write.
  55 */
  56
  57static int locate_fd(struct files_struct *files, 
  58                            struct file *file, unsigned int orig_start)
  59{
  60        unsigned int newfd;
  61        unsigned int start;
  62        int error;
  63        struct fdtable *fdt;
  64
  65        error = -EINVAL;
  66        if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
  67                goto out;
  68
  69repeat:
  70        fdt = files_fdtable(files);
  71        /*
  72         * Someone might have closed fd's in the range
  73         * orig_start..fdt->next_fd
  74         */
  75        start = orig_start;
  76        if (start < files->next_fd)
  77                start = files->next_fd;
  78
  79        newfd = start;
  80        if (start < fdt->max_fds)
  81                newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
  82                                           fdt->max_fds, start);
  83        
  84        error = -EMFILE;
  85        if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
  86                goto out;
  87
  88        error = expand_files(files, newfd);
  89        if (error < 0)
  90                goto out;
  91
  92        /*
  93         * If we needed to expand the fs array we
  94         * might have blocked - try again.
  95         */
  96        if (error)
  97                goto repeat;
  98
  99        /*
 100         * We reacquired files_lock, so we are safe as long as
 101         * we reacquire the fdtable pointer and use it while holding
 102         * the lock, no one can free it during that time.
 103         */
 104        if (start <= files->next_fd)
 105                files->next_fd = newfd + 1;
 106
 107        error = newfd;
 108        
 109out:
 110        return error;
 111}
 112
 113static int dupfd(struct file *file, unsigned int start)
 114{
 115        struct files_struct * files = current->files;
 116        struct fdtable *fdt;
 117        int fd;
 118
 119        spin_lock(&files->file_lock);
 120        fd = locate_fd(files, file, start);
 121        if (fd >= 0) {
 122                /* locate_fd() may have expanded fdtable, load the ptr */
 123                fdt = files_fdtable(files);
 124                FD_SET(fd, fdt->open_fds);
 125                FD_CLR(fd, fdt->close_on_exec);
 126                spin_unlock(&files->file_lock);
 127                fd_install(fd, file);
 128        } else {
 129                spin_unlock(&files->file_lock);
 130                fput(file);
 131        }
 132
 133        return fd;
 134}
 135
 136asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 137{
 138        int err = -EBADF;
 139        struct file * file, *tofree;
 140        struct files_struct * files = current->files;
 141        struct fdtable *fdt;
 142
 143        spin_lock(&files->file_lock);
 144        if (!(file = fcheck(oldfd)))
 145                goto out_unlock;
 146        err = newfd;
 147        if (newfd == oldfd)
 148                goto out_unlock;
 149        err = -EBADF;
 150        if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 151                goto out_unlock;
 152        get_file(file);                 /* We are now finished with oldfd */
 153
 154        err = expand_files(files, newfd);
 155        if (err < 0)
 156                goto out_fput;
 157
 158        /* To avoid races with open() and dup(), we will mark the fd as
 159         * in-use in the open-file bitmap throughout the entire dup2()
 160         * process.  This is quite safe: do_close() uses the fd array
 161         * entry, not the bitmap, to decide what work needs to be
 162         * done.  --sct */
 163        /* Doesn't work. open() might be there first. --AV */
 164
 165        /* Yes. It's a race. In user space. Nothing sane to do */
 166        err = -EBUSY;
 167        fdt = files_fdtable(files);
 168        tofree = fdt->fd[newfd];
 169        if (!tofree && FD_ISSET(newfd, fdt->open_fds))
 170                goto out_fput;
 171
 172        rcu_assign_pointer(fdt->fd[newfd], file);
 173        FD_SET(newfd, fdt->open_fds);
 174        FD_CLR(newfd, fdt->close_on_exec);
 175        spin_unlock(&files->file_lock);
 176
 177        if (tofree)
 178                filp_close(tofree, files);
 179        err = newfd;
 180out:
 181        return err;
 182out_unlock:
 183        spin_unlock(&files->file_lock);
 184        goto out;
 185
 186out_fput:
 187        spin_unlock(&files->file_lock);
 188        fput(file);
 189        goto out;
 190}
 191
 192asmlinkage long sys_dup(unsigned int fildes)
 193{
 194        int ret = -EBADF;
 195        struct file * file = fget(fildes);
 196
 197        if (file)
 198                ret = dupfd(file, 0);
 199        return ret;
 200}
 201
 202#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
 203
 204static int setfl(int fd, struct file * filp, unsigned long arg)
 205{
 206        struct inode * inode = filp->f_path.dentry->d_inode;
 207        int error = 0;
 208
 209        /*
 210         * O_APPEND cannot be cleared if the file is marked as append-only
 211         * and the file is open for write.
 212         */
 213        if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
 214                return -EPERM;
 215
 216        /* O_NOATIME can only be set by the owner or superuser */
 217        if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
 218                if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
 219                        return -EPERM;
 220
 221        /* required for strict SunOS emulation */
 222        if (O_NONBLOCK != O_NDELAY)
 223               if (arg & O_NDELAY)
 224                   arg |= O_NONBLOCK;
 225
 226        if (arg & O_DIRECT) {
 227                if (!filp->f_mapping || !filp->f_mapping->a_ops ||
 228                        !filp->f_mapping->a_ops->direct_IO)
 229                                return -EINVAL;
 230        }
 231
 232        if (filp->f_op && filp->f_op->check_flags)
 233                error = filp->f_op->check_flags(arg);
 234        if (error)
 235                return error;
 236
 237        lock_kernel();
 238        if ((arg ^ filp->f_flags) & FASYNC) {
 239                if (filp->f_op && filp->f_op->fasync) {
 240                        error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 241                        if (error < 0)
 242                                goto out;
 243                }
 244        }
 245
 246        filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 247 out:
 248        unlock_kernel();
 249        return error;
 250}
 251
 252static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 253                     uid_t uid, uid_t euid, int force)
 254{
 255        write_lock_irq(&filp->f_owner.lock);
 256        if (force || !filp->f_owner.pid) {
 257                put_pid(filp->f_owner.pid);
 258                filp->f_owner.pid = get_pid(pid);
 259                filp->f_owner.pid_type = type;
 260                filp->f_owner.uid = uid;
 261                filp->f_owner.euid = euid;
 262        }
 263        write_unlock_irq(&filp->f_owner.lock);
 264}
 265
 266int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 267                int force)
 268{
 269        int err;
 270        
 271        err = security_file_set_fowner(filp);
 272        if (err)
 273                return err;
 274
 275        f_modown(filp, pid, type, current->uid, current->euid, force);
 276        return 0;
 277}
 278EXPORT_SYMBOL(__f_setown);
 279
 280int f_setown(struct file *filp, unsigned long arg, int force)
 281{
 282        enum pid_type type;
 283        struct pid *pid;
 284        int who = arg;
 285        int result;
 286        type = PIDTYPE_PID;
 287        if (who < 0) {
 288                type = PIDTYPE_PGID;
 289                who = -who;
 290        }
 291        rcu_read_lock();
 292        pid = find_pid(who);
 293        result = __f_setown(filp, pid, type, force);
 294        rcu_read_unlock();
 295        return result;
 296}
 297EXPORT_SYMBOL(f_setown);
 298
 299void f_delown(struct file *filp)
 300{
 301        f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
 302}
 303
 304pid_t f_getown(struct file *filp)
 305{
 306        pid_t pid;
 307        read_lock(&filp->f_owner.lock);
 308        pid = pid_nr(filp->f_owner.pid);
 309        if (filp->f_owner.pid_type == PIDTYPE_PGID)
 310                pid = -pid;
 311        read_unlock(&filp->f_owner.lock);
 312        return pid;
 313}
 314
 315static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 316                struct file *filp)
 317{
 318        long err = -EINVAL;
 319
 320        switch (cmd) {
 321        case F_DUPFD:
 322                get_file(filp);
 323                err = dupfd(filp, arg);
 324                break;
 325        case F_GETFD:
 326                err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
 327                break;
 328        case F_SETFD:
 329                err = 0;
 330                set_close_on_exec(fd, arg & FD_CLOEXEC);
 331                break;
 332        case F_GETFL:
 333                err = filp->f_flags;
 334                break;
 335        case F_SETFL:
 336                err = setfl(fd, filp, arg);
 337                break;
 338        case F_GETLK:
 339                err = fcntl_getlk(filp, (struct flock __user *) arg);
 340                break;
 341        case F_SETLK:
 342        case F_SETLKW:
 343                err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
 344                break;
 345        case F_GETOWN:
 346                /*
 347                 * XXX If f_owner is a process group, the
 348                 * negative return value will get converted
 349                 * into an error.  Oops.  If we keep the
 350                 * current syscall conventions, the only way
 351                 * to fix this will be in libc.
 352                 */
 353                err = f_getown(filp);
 354                force_successful_syscall_return();
 355                break;
 356        case F_SETOWN:
 357                err = f_setown(filp, arg, 1);
 358                break;
 359        case F_GETSIG:
 360                err = filp->f_owner.signum;
 361                break;
 362        case F_SETSIG:
 363                /* arg == 0 restores default behaviour. */
 364                if (!valid_signal(arg)) {
 365                        break;
 366                }
 367                err = 0;
 368                filp->f_owner.signum = arg;
 369                break;
 370        case F_GETLEASE:
 371                err = fcntl_getlease(filp);
 372                break;
 373        case F_SETLEASE:
 374                err = fcntl_setlease(fd, filp, arg);
 375                break;
 376        case F_NOTIFY:
 377                err = fcntl_dirnotify(fd, filp, arg);
 378                break;
 379        default:
 380                break;
 381        }
 382        return err;
 383}
 384
 385asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
 386{       
 387        struct file *filp;
 388        long err = -EBADF;
 389
 390        filp = fget(fd);
 391        if (!filp)
 392                goto out;
 393
 394        err = security_file_fcntl(filp, cmd, arg);
 395        if (err) {
 396                fput(filp);
 397                return err;
 398        }
 399
 400        err = do_fcntl(fd, cmd, arg, filp);
 401
 402        fput(filp);
 403out:
 404        return err;
 405}
 406
 407#if BITS_PER_LONG == 32
 408asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
 409{       
 410        struct file * filp;
 411        long err;
 412
 413        err = -EBADF;
 414        filp = fget(fd);
 415        if (!filp)
 416                goto out;
 417
 418        err = security_file_fcntl(filp, cmd, arg);
 419        if (err) {
 420                fput(filp);
 421                return err;
 422        }
 423        err = -EBADF;
 424        
 425        switch (cmd) {
 426                case F_GETLK64:
 427                        err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
 428                        break;
 429                case F_SETLK64:
 430                case F_SETLKW64:
 431                        err = fcntl_setlk64(fd, filp, cmd,
 432                                        (struct flock64 __user *) arg);
 433                        break;
 434                default:
 435                        err = do_fcntl(fd, cmd, arg, filp);
 436                        break;
 437        }
 438        fput(filp);
 439out:
 440        return err;
 441}
 442#endif
 443
 444/* Table to convert sigio signal codes into poll band bitmaps */
 445
 446static const long band_table[NSIGPOLL] = {
 447        POLLIN | POLLRDNORM,                    /* POLL_IN */
 448        POLLOUT | POLLWRNORM | POLLWRBAND,      /* POLL_OUT */
 449        POLLIN | POLLRDNORM | POLLMSG,          /* POLL_MSG */
 450        POLLERR,                                /* POLL_ERR */
 451        POLLPRI | POLLRDBAND,                   /* POLL_PRI */
 452        POLLHUP | POLLERR                       /* POLL_HUP */
 453};
 454
 455static inline int sigio_perm(struct task_struct *p,
 456                             struct fown_struct *fown, int sig)
 457{
 458        return (((fown->euid == 0) ||
 459                 (fown->euid == p->suid) || (fown->euid == p->uid) ||
 460                 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
 461                !security_file_send_sigiotask(p, fown, sig));
 462}
 463
 464static void send_sigio_to_task(struct task_struct *p,
 465                               struct fown_struct *fown, 
 466                               int fd,
 467                               int reason)
 468{
 469        if (!sigio_perm(p, fown, fown->signum))
 470                return;
 471
 472        switch (fown->signum) {
 473                siginfo_t si;
 474                default:
 475                        /* Queue a rt signal with the appropriate fd as its
 476                           value.  We use SI_SIGIO as the source, not 
 477                           SI_KERNEL, since kernel signals always get 
 478                           delivered even if we can't queue.  Failure to
 479                           queue in this case _should_ be reported; we fall
 480                           back to SIGIO in that case. --sct */
 481                        si.si_signo = fown->signum;
 482                        si.si_errno = 0;
 483                        si.si_code  = reason;
 484                        /* Make sure we are called with one of the POLL_*
 485                           reasons, otherwise we could leak kernel stack into
 486                           userspace.  */
 487                        BUG_ON((reason & __SI_MASK) != __SI_POLL);
 488                        if (reason - POLL_IN >= NSIGPOLL)
 489                                si.si_band  = ~0L;
 490                        else
 491                                si.si_band = band_table[reason - POLL_IN];
 492                        si.si_fd    = fd;
 493                        if (!group_send_sig_info(fown->signum, &si, p))
 494                                break;
 495                /* fall-through: fall back on the old plain SIGIO signal */
 496                case 0:
 497                        group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
 498        }
 499}
 500
 501void send_sigio(struct fown_struct *fown, int fd, int band)
 502{
 503        struct task_struct *p;
 504        enum pid_type type;
 505        struct pid *pid;
 506        
 507        read_lock(&fown->lock);
 508        type = fown->pid_type;
 509        pid = fown->pid;
 510        if (!pid)
 511                goto out_unlock_fown;
 512        
 513        read_lock(&tasklist_lock);
 514        do_each_pid_task(pid, type, p) {
 515                send_sigio_to_task(p, fown, fd, band);
 516        } while_each_pid_task(pid, type, p);
 517        read_unlock(&tasklist_lock);
 518 out_unlock_fown:
 519        read_unlock(&fown->lock);
 520}
 521
 522static void send_sigurg_to_task(struct task_struct *p,
 523                                struct fown_struct *fown)
 524{
 525        if (sigio_perm(p, fown, SIGURG))
 526                group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
 527}
 528
 529int send_sigurg(struct fown_struct *fown)
 530{
 531        struct task_struct *p;
 532        enum pid_type type;
 533        struct pid *pid;
 534        int ret = 0;
 535        
 536        read_lock(&fown->lock);
 537        type = fown->pid_type;
 538        pid = fown->pid;
 539        if (!pid)
 540                goto out_unlock_fown;
 541
 542        ret = 1;
 543        
 544        read_lock(&tasklist_lock);
 545        do_each_pid_task(pid, type, p) {
 546                send_sigurg_to_task(p, fown);
 547        } while_each_pid_task(pid, type, p);
 548        read_unlock(&tasklist_lock);
 549 out_unlock_fown:
 550        read_unlock(&fown->lock);
 551        return ret;
 552}
 553
 554static DEFINE_RWLOCK(fasync_lock);
 555static struct kmem_cache *fasync_cache __read_mostly;
 556
 557/*
 558 * fasync_helper() is used by some character device drivers (mainly mice)
 559 * to set up the fasync queue. It returns negative on error, 0 if it did
 560 * no changes and positive if it added/deleted the entry.
 561 */
 562int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 563{
 564        struct fasync_struct *fa, **fp;
 565        struct fasync_struct *new = NULL;
 566        int result = 0;
 567
 568        if (on) {
 569                new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
 570                if (!new)
 571                        return -ENOMEM;
 572        }
 573        write_lock_irq(&fasync_lock);
 574        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 575                if (fa->fa_file == filp) {
 576                        if(on) {
 577                                fa->fa_fd = fd;
 578                                kmem_cache_free(fasync_cache, new);
 579                        } else {
 580                                *fp = fa->fa_next;
 581                                kmem_cache_free(fasync_cache, fa);
 582                                result = 1;
 583                        }
 584                        goto out;
 585                }
 586        }
 587
 588        if (on) {
 589                new->magic = FASYNC_MAGIC;
 590                new->fa_file = filp;
 591                new->fa_fd = fd;
 592                new->fa_next = *fapp;
 593                *fapp = new;
 594                result = 1;
 595        }
 596out:
 597        write_unlock_irq(&fasync_lock);
 598        return result;
 599}
 600
 601EXPORT_SYMBOL(fasync_helper);
 602
 603void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 604{
 605        while (fa) {
 606                struct fown_struct * fown;
 607                if (fa->magic != FASYNC_MAGIC) {
 608                        printk(KERN_ERR "kill_fasync: bad magic number in "
 609                               "fasync_struct!\n");
 610                        return;
 611                }
 612                fown = &fa->fa_file->f_owner;
 613                /* Don't send SIGURG to processes which have not set a
 614                   queued signum: SIGURG has its own default signalling
 615                   mechanism. */
 616                if (!(sig == SIGURG && fown->signum == 0))
 617                        send_sigio(fown, fa->fa_fd, band);
 618                fa = fa->fa_next;
 619        }
 620}
 621
 622EXPORT_SYMBOL(__kill_fasync);
 623
 624void kill_fasync(struct fasync_struct **fp, int sig, int band)
 625{
 626        /* First a quick test without locking: usually
 627         * the list is empty.
 628         */
 629        if (*fp) {
 630                read_lock(&fasync_lock);
 631                /* reread *fp after obtaining the lock */
 632                __kill_fasync(*fp, sig, band);
 633                read_unlock(&fasync_lock);
 634        }
 635}
 636EXPORT_SYMBOL(kill_fasync);
 637
 638static int __init fasync_init(void)
 639{
 640        fasync_cache = kmem_cache_create("fasync_cache",
 641                sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL);
 642        return 0;
 643}
 644
 645module_init(fasync_init)
 646
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.