linux-old/fs/fcntl.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/fcntl.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/init.h>
   8#include <linux/mm.h>
   9#include <linux/file.h>
  10#include <linux/dnotify.h>
  11#include <linux/smp_lock.h>
  12#include <linux/slab.h>
  13#include <linux/iobuf.h>
  14#include <linux/ptrace.h>
  15
  16#include <asm/poll.h>
  17#include <asm/siginfo.h>
  18#include <asm/uaccess.h>
  19
  20extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
  21extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
  22extern int fcntl_getlease(struct file *filp);
  23
  24/* Expand files.  Return <0 on error; 0 nothing done; 1 files expanded,
  25 * we may have blocked. 
  26 *
  27 * Should be called with the files->file_lock spinlock held for write.
  28 */
  29static int expand_files(struct files_struct *files, int nr)
  30{
  31        int err, expand = 0;
  32#ifdef FDSET_DEBUG      
  33        printk (KERN_ERR __FUNCTION__ " %d: nr = %d\n", current->pid, nr);
  34#endif
  35        
  36        if (nr >= files->max_fdset) {
  37                expand = 1;
  38                if ((err = expand_fdset(files, nr)))
  39                        goto out;
  40        }
  41        if (nr >= files->max_fds) {
  42                expand = 1;
  43                if ((err = expand_fd_array(files, nr)))
  44                        goto out;
  45        }
  46        err = expand;
  47 out:
  48#ifdef FDSET_DEBUG      
  49        if (err)
  50                printk (KERN_ERR __FUNCTION__ " %d: return %d\n", current->pid, err);
  51#endif
  52        return err;
  53}
  54
  55/*
  56 * locate_fd finds a free file descriptor in the open_fds fdset,
  57 * expanding the fd arrays if necessary.  The files write lock will be
  58 * held on exit to ensure that the fd can be entered atomically.
  59 */
  60
  61static int locate_fd(struct files_struct *files, 
  62                            struct file *file, int orig_start)
  63{
  64        unsigned int newfd;
  65        int error;
  66        int start;
  67
  68        write_lock(&files->file_lock);
  69        
  70        error = -EINVAL;
  71        if (orig_start >= current->rlim[RLIMIT_NOFILE].rlim_cur)
  72                goto out;
  73
  74repeat:
  75        /*
  76         * Someone might have closed fd's in the range
  77         * orig_start..files->next_fd
  78         */
  79        start = orig_start;
  80        if (start < files->next_fd)
  81                start = files->next_fd;
  82
  83        newfd = start;
  84        if (start < files->max_fdset) {
  85                newfd = find_next_zero_bit(files->open_fds->fds_bits,
  86                        files->max_fdset, start);
  87        }
  88        
  89        error = -EMFILE;
  90        if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
  91                goto out;
  92
  93        error = expand_files(files, newfd);
  94        if (error < 0)
  95                goto out;
  96
  97        /*
  98         * If we needed to expand the fs array we
  99         * might have blocked - try again.
 100         */
 101        if (error)
 102                goto repeat;
 103
 104        if (start <= files->next_fd)
 105                files->next_fd = newfd + 1;
 106        
 107        error = newfd;
 108        
 109out:
 110        return error;
 111}
 112
 113static inline void allocate_fd(struct files_struct *files, 
 114                                        struct file *file, int fd)
 115{
 116        FD_SET(fd, files->open_fds);
 117        FD_CLR(fd, files->close_on_exec);
 118        write_unlock(&files->file_lock);
 119        fd_install(fd, file);
 120}
 121
 122static int dupfd(struct file *file, int start)
 123{
 124        struct files_struct * files = current->files;
 125        int ret;
 126
 127        ret = locate_fd(files, file, start);
 128        if (ret < 0) 
 129                goto out_putf;
 130        allocate_fd(files, file, ret);
 131        return ret;
 132
 133out_putf:
 134        write_unlock(&files->file_lock);
 135        fput(file);
 136        return ret;
 137}
 138
 139asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 140{
 141        int err = -EBADF;
 142        struct file * file, *tofree;
 143        struct files_struct * files = current->files;
 144
 145        write_lock(&files->file_lock);
 146        if (!(file = fcheck(oldfd)))
 147                goto out_unlock;
 148        err = newfd;
 149        if (newfd == oldfd)
 150                goto out_unlock;
 151        err = -EBADF;
 152        if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
 153                goto out_unlock;
 154        get_file(file);                 /* We are now finished with oldfd */
 155
 156        err = expand_files(files, newfd);
 157        if (err < 0)
 158                goto out_fput;
 159
 160        /* To avoid races with open() and dup(), we will mark the fd as
 161         * in-use in the open-file bitmap throughout the entire dup2()
 162         * process.  This is quite safe: do_close() uses the fd array
 163         * entry, not the bitmap, to decide what work needs to be
 164         * done.  --sct */
 165        /* Doesn't work. open() might be there first. --AV */
 166
 167        /* Yes. It's a race. In user space. Nothing sane to do */
 168        err = -EBUSY;
 169        tofree = files->fd[newfd];
 170        if (!tofree && FD_ISSET(newfd, files->open_fds))
 171                goto out_fput;
 172
 173        files->fd[newfd] = file;
 174        FD_SET(newfd, files->open_fds);
 175        FD_CLR(newfd, files->close_on_exec);
 176        write_unlock(&files->file_lock);
 177
 178        if (tofree)
 179                filp_close(tofree, files);
 180        err = newfd;
 181out:
 182        return err;
 183out_unlock:
 184        write_unlock(&files->file_lock);
 185        goto out;
 186
 187out_fput:
 188        write_unlock(&files->file_lock);
 189        fput(file);
 190        goto out;
 191}
 192
 193asmlinkage long sys_dup(unsigned int fildes)
 194{
 195        int ret = -EBADF;
 196        struct file * file = fget(fildes);
 197
 198        if (file)
 199                ret = dupfd(file, 0);
 200        return ret;
 201}
 202
 203#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT)
 204
 205static int setfl(int fd, struct file * filp, unsigned long arg)
 206{
 207        struct inode * inode = filp->f_dentry->d_inode;
 208        int error;
 209
 210        /*
 211         * In the case of an append-only file, O_APPEND
 212         * cannot be cleared
 213         */
 214        if (!(arg & O_APPEND) && IS_APPEND(inode))
 215                return -EPERM;
 216
 217        /* Did FASYNC state change? */
 218        if ((arg ^ filp->f_flags) & FASYNC) {
 219                if (filp->f_op && filp->f_op->fasync) {
 220                        error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 221                        if (error < 0)
 222                                return error;
 223                }
 224        }
 225
 226        if (arg & O_DIRECT) {
 227                /*
 228                 * alloc_kiovec() can sleep and we are only serialized by
 229                 * the big kernel lock here, so abuse the i_sem to serialize
 230                 * this case too. We of course wouldn't need to go deep down
 231                 * to the inode layer, we could stay at the file layer, but
 232                 * we don't want to pay for the memory of a semaphore in each
 233                 * file structure too and we use the inode semaphore that we just
 234                 * pay for anyways.
 235                 */
 236                error = 0;
 237                down(&inode->i_sem);
 238                if (!filp->f_iobuf)
 239                        error = alloc_kiovec(1, &filp->f_iobuf);
 240                up(&inode->i_sem);
 241                if (error < 0)
 242                        return error;
 243        }
 244
 245        /* required for strict SunOS emulation */
 246        if (O_NONBLOCK != O_NDELAY)
 247               if (arg & O_NDELAY)
 248                   arg |= O_NONBLOCK;
 249
 250        filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 251        return 0;
 252}
 253
 254static long do_fcntl(unsigned int fd, unsigned int cmd,
 255                     unsigned long arg, struct file * filp)
 256{
 257        long err = -EINVAL;
 258
 259        switch (cmd) {
 260                case F_DUPFD:
 261                        if (arg < NR_OPEN) {
 262                                get_file(filp);
 263                                err = dupfd(filp, arg);
 264                        }
 265                        break;
 266                case F_GETFD:
 267                        err = get_close_on_exec(fd);
 268                        break;
 269                case F_SETFD:
 270                        err = 0;
 271                        set_close_on_exec(fd, arg&1);
 272                        break;
 273                case F_GETFL:
 274                        err = filp->f_flags;
 275                        break;
 276                case F_SETFL:
 277                        lock_kernel();
 278                        err = setfl(fd, filp, arg);
 279                        unlock_kernel();
 280                        break;
 281                case F_GETLK:
 282                        err = fcntl_getlk(fd, (struct flock *) arg);
 283                        break;
 284                case F_SETLK:
 285                case F_SETLKW:
 286                        err = fcntl_setlk(fd, cmd, (struct flock *) arg);
 287                        break;
 288                case F_GETOWN:
 289                        /*
 290                         * XXX If f_owner is a process group, the
 291                         * negative return value will get converted
 292                         * into an error.  Oops.  If we keep the
 293                         * current syscall conventions, the only way
 294                         * to fix this will be in libc.
 295                         */
 296                        err = filp->f_owner.pid;
 297                        force_successful_syscall_return();
 298                        break;
 299                case F_SETOWN:
 300                        lock_kernel();
 301                        filp->f_owner.pid = arg;
 302                        filp->f_owner.uid = current->uid;
 303                        filp->f_owner.euid = current->euid;
 304                        err = 0;
 305                        if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 306                                err = sock_fcntl (filp, F_SETOWN, arg);
 307                        unlock_kernel();
 308                        break;
 309                case F_GETSIG:
 310                        err = filp->f_owner.signum;
 311                        break;
 312                case F_SETSIG:
 313                        /* arg == 0 restores default behaviour. */
 314                        if (arg < 0 || arg > _NSIG) {
 315                                break;
 316                        }
 317                        err = 0;
 318                        filp->f_owner.signum = arg;
 319                        break;
 320                case F_GETLEASE:
 321                        err = fcntl_getlease(filp);
 322                        break;
 323                case F_SETLEASE:
 324                        err = fcntl_setlease(fd, filp, arg);
 325                        break;
 326                case F_NOTIFY:
 327                        err = fcntl_dirnotify(fd, filp, arg);
 328                        break;
 329                default:
 330                        /* sockets need a few special fcntls. */
 331                        err = -EINVAL;
 332                        if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 333                                err = sock_fcntl (filp, cmd, arg);
 334                        break;
 335        }
 336
 337        return err;
 338}
 339
 340asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
 341{       
 342        struct file * filp;
 343        long err = -EBADF;
 344
 345        filp = fget(fd);
 346        if (!filp)
 347                goto out;
 348
 349        err = do_fcntl(fd, cmd, arg, filp);
 350
 351        fput(filp);
 352out:
 353        return err;
 354}
 355
 356#if BITS_PER_LONG == 32
 357asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
 358{       
 359        struct file * filp;
 360        long err;
 361
 362        err = -EBADF;
 363        filp = fget(fd);
 364        if (!filp)
 365                goto out;
 366
 367        switch (cmd) {
 368                case F_GETLK64:
 369                        err = fcntl_getlk64(fd, (struct flock64 *) arg);
 370                        break;
 371                case F_SETLK64:
 372                        err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
 373                        break;
 374                case F_SETLKW64:
 375                        err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
 376                        break;
 377                default:
 378                        err = do_fcntl(fd, cmd, arg, filp);
 379                        break;
 380        }
 381        fput(filp);
 382out:
 383        return err;
 384}
 385#endif
 386
 387/* Table to convert sigio signal codes into poll band bitmaps */
 388
 389static long band_table[NSIGPOLL] = {
 390        POLLIN | POLLRDNORM,                    /* POLL_IN */
 391        POLLOUT | POLLWRNORM | POLLWRBAND,      /* POLL_OUT */
 392        POLLIN | POLLRDNORM | POLLMSG,          /* POLL_MSG */
 393        POLLERR,                                /* POLL_ERR */
 394        POLLPRI | POLLRDBAND,                   /* POLL_PRI */
 395        POLLHUP | POLLERR                       /* POLL_HUP */
 396};
 397
 398static void send_sigio_to_task(struct task_struct *p,
 399                               struct fown_struct *fown, 
 400                               int fd,
 401                               int reason)
 402{
 403        if ((fown->euid != 0) &&
 404            (fown->euid ^ p->suid) && (fown->euid ^ p->uid) &&
 405            (fown->uid ^ p->suid) && (fown->uid ^ p->uid))
 406                return;
 407        switch (fown->signum) {
 408                siginfo_t si;
 409                default:
 410                        /* Queue a rt signal with the appropriate fd as its
 411                           value.  We use SI_SIGIO as the source, not 
 412                           SI_KERNEL, since kernel signals always get 
 413                           delivered even if we can't queue.  Failure to
 414                           queue in this case _should_ be reported; we fall
 415                           back to SIGIO in that case. --sct */
 416                        si.si_signo = fown->signum;
 417                        si.si_errno = 0;
 418                        si.si_code  = reason;
 419                        /* Make sure we are called with one of the POLL_*
 420                           reasons, otherwise we could leak kernel stack into
 421                           userspace.  */
 422                        if ((reason & __SI_MASK) != __SI_POLL)
 423                                BUG();
 424                        if (reason - POLL_IN >= NSIGPOLL)
 425                                si.si_band  = ~0L;
 426                        else
 427                                si.si_band = band_table[reason - POLL_IN];
 428                        si.si_fd    = fd;
 429                        if (!send_sig_info(fown->signum, &si, p))
 430                                break;
 431                /* fall-through: fall back on the old plain SIGIO signal */
 432                case 0:
 433                        send_sig(SIGIO, p, 1);
 434        }
 435}
 436
 437void send_sigio(struct fown_struct *fown, int fd, int band)
 438{
 439        struct task_struct * p;
 440        int   pid       = fown->pid;
 441        
 442        read_lock(&tasklist_lock);
 443        if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {
 444                send_sigio_to_task(p, fown, fd, band);
 445                goto out;
 446        }
 447        for_each_task(p) {
 448                int match = p->pid;
 449                if (pid < 0)
 450                        match = -p->pgrp;
 451                if (pid != match)
 452                        continue;
 453                send_sigio_to_task(p, fown, fd, band);
 454        }
 455out:
 456        read_unlock(&tasklist_lock);
 457}
 458
 459static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
 460static kmem_cache_t *fasync_cache;
 461
 462/*
 463 * fasync_helper() is used by some character device drivers (mainly mice)
 464 * to set up the fasync queue. It returns negative on error, 0 if it did
 465 * no changes and positive if it added/deleted the entry.
 466 */
 467int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 468{
 469        struct fasync_struct *fa, **fp;
 470        struct fasync_struct *new = NULL;
 471        int result = 0;
 472
 473        if (on) {
 474                new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
 475                if (!new)
 476                        return -ENOMEM;
 477        }
 478        write_lock_irq(&fasync_lock);
 479        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 480                if (fa->fa_file == filp) {
 481                        if(on) {
 482                                fa->fa_fd = fd;
 483                                kmem_cache_free(fasync_cache, new);
 484                        } else {
 485                                *fp = fa->fa_next;
 486                                kmem_cache_free(fasync_cache, fa);
 487                                result = 1;
 488                        }
 489                        goto out;
 490                }
 491        }
 492
 493        if (on) {
 494                new->magic = FASYNC_MAGIC;
 495                new->fa_file = filp;
 496                new->fa_fd = fd;
 497                new->fa_next = *fapp;
 498                *fapp = new;
 499                result = 1;
 500        }
 501out:
 502        write_unlock_irq(&fasync_lock);
 503        return result;
 504}
 505
 506void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 507{
 508        while (fa) {
 509                struct fown_struct * fown;
 510                if (fa->magic != FASYNC_MAGIC) {
 511                        printk(KERN_ERR "kill_fasync: bad magic number in "
 512                               "fasync_struct!\n");
 513                        return;
 514                }
 515                fown = &fa->fa_file->f_owner;
 516                /* Don't send SIGURG to processes which have not set a
 517                   queued signum: SIGURG has its own default signalling
 518                   mechanism. */
 519                if (fown->pid && !(sig == SIGURG && fown->signum == 0))
 520                        send_sigio(fown, fa->fa_fd, band);
 521                fa = fa->fa_next;
 522        }
 523}
 524
 525void kill_fasync(struct fasync_struct **fp, int sig, int band)
 526{
 527        read_lock(&fasync_lock);
 528        __kill_fasync(*fp, sig, band);
 529        read_unlock(&fasync_lock);
 530}
 531
 532static int __init fasync_init(void)
 533{
 534        fasync_cache = kmem_cache_create("fasync_cache",
 535                sizeof(struct fasync_struct), 0, 0, NULL, NULL);
 536        if (!fasync_cache)
 537                panic("cannot create fasync slab cache");
 538        return 0;
 539}
 540
 541module_init(fasync_init)
 542
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.