linux-bk/fs/fcntl.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/fcntl.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/init.h>
   8#include <linux/mm.h>
   9#include <linux/file.h>
  10#include <linux/dnotify.h>
  11#include <linux/smp_lock.h>
  12#include <linux/slab.h>
  13#include <linux/iobuf.h>
  14#include <linux/module.h>
  15#include <linux/security.h>
  16
  17#include <asm/poll.h>
  18#include <asm/siginfo.h>
  19#include <asm/uaccess.h>
  20
  21extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
  22extern int fcntl_getlease(struct file *filp);
  23
  24void set_close_on_exec(unsigned int fd, int flag)
  25{
  26        struct files_struct *files = current->files;
  27        write_lock(&files->file_lock);
  28        if (flag)
  29                FD_SET(fd, files->close_on_exec);
  30        else
  31                FD_CLR(fd, files->close_on_exec);
  32        write_unlock(&files->file_lock);
  33}
  34
  35static inline int get_close_on_exec(unsigned int fd)
  36{
  37        struct files_struct *files = current->files;
  38        int res;
  39        read_lock(&files->file_lock);
  40        res = FD_ISSET(fd, files->close_on_exec);
  41        read_unlock(&files->file_lock);
  42        return res;
  43}
  44
  45
  46/* Expand files.  Return <0 on error; 0 nothing done; 1 files expanded,
  47 * we may have blocked. 
  48 *
  49 * Should be called with the files->file_lock spinlock held for write.
  50 */
  51static int expand_files(struct files_struct *files, int nr)
  52{
  53        int err, expand = 0;
  54#ifdef FDSET_DEBUG      
  55        printk (KERN_ERR "%s %d: nr = %d\n", __FUNCTION__, current->pid, nr);
  56#endif
  57        
  58        if (nr >= files->max_fdset) {
  59                expand = 1;
  60                if ((err = expand_fdset(files, nr)))
  61                        goto out;
  62        }
  63        if (nr >= files->max_fds) {
  64                expand = 1;
  65                if ((err = expand_fd_array(files, nr)))
  66                        goto out;
  67        }
  68        err = expand;
  69 out:
  70#ifdef FDSET_DEBUG      
  71        if (err)
  72                printk (KERN_ERR "%s %d: return %d\n", __FUNCTION__, current->pid, err);
  73#endif
  74        return err;
  75}
  76
  77/*
  78 * locate_fd finds a free file descriptor in the open_fds fdset,
  79 * expanding the fd arrays if necessary.  The files write lock will be
  80 * held on exit to ensure that the fd can be entered atomically.
  81 */
  82
  83static int locate_fd(struct files_struct *files, 
  84                            struct file *file, int orig_start)
  85{
  86        unsigned int newfd;
  87        int error;
  88        int start;
  89
  90        write_lock(&files->file_lock);
  91        
  92        error = -EINVAL;
  93        if (orig_start >= current->rlim[RLIMIT_NOFILE].rlim_cur)
  94                goto out;
  95
  96repeat:
  97        /*
  98         * Someone might have closed fd's in the range
  99         * orig_start..files->next_fd
 100         */
 101        start = orig_start;
 102        if (start < files->next_fd)
 103                start = files->next_fd;
 104
 105        newfd = start;
 106        if (start < files->max_fdset) {
 107                newfd = find_next_zero_bit(files->open_fds->fds_bits,
 108                        files->max_fdset, start);
 109        }
 110        
 111        error = -EMFILE;
 112        if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
 113                goto out;
 114
 115        error = expand_files(files, newfd);
 116        if (error < 0)
 117                goto out;
 118
 119        /*
 120         * If we needed to expand the fs array we
 121         * might have blocked - try again.
 122         */
 123        if (error)
 124                goto repeat;
 125
 126        if (start <= files->next_fd)
 127                files->next_fd = newfd + 1;
 128        
 129        error = newfd;
 130        
 131out:
 132        return error;
 133}
 134
 135static inline void allocate_fd(struct files_struct *files, 
 136                                        struct file *file, int fd)
 137{
 138        FD_SET(fd, files->open_fds);
 139        FD_CLR(fd, files->close_on_exec);
 140        write_unlock(&files->file_lock);
 141        fd_install(fd, file);
 142}
 143
 144static int dupfd(struct file *file, int start)
 145{
 146        struct files_struct * files = current->files;
 147        int ret;
 148
 149        ret = locate_fd(files, file, start);
 150        if (ret < 0) 
 151                goto out_putf;
 152        allocate_fd(files, file, ret);
 153        return ret;
 154
 155out_putf:
 156        write_unlock(&files->file_lock);
 157        fput(file);
 158        return ret;
 159}
 160
 161asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 162{
 163        int err = -EBADF;
 164        struct file * file, *tofree;
 165        struct files_struct * files = current->files;
 166
 167        write_lock(&files->file_lock);
 168        if (!(file = fcheck(oldfd)))
 169                goto out_unlock;
 170        err = newfd;
 171        if (newfd == oldfd)
 172                goto out_unlock;
 173        err = -EBADF;
 174        if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
 175                goto out_unlock;
 176        get_file(file);                 /* We are now finished with oldfd */
 177
 178        err = expand_files(files, newfd);
 179        if (err < 0)
 180                goto out_fput;
 181
 182        /* To avoid races with open() and dup(), we will mark the fd as
 183         * in-use in the open-file bitmap throughout the entire dup2()
 184         * process.  This is quite safe: do_close() uses the fd array
 185         * entry, not the bitmap, to decide what work needs to be
 186         * done.  --sct */
 187        /* Doesn't work. open() might be there first. --AV */
 188
 189        /* Yes. It's a race. In user space. Nothing sane to do */
 190        err = -EBUSY;
 191        tofree = files->fd[newfd];
 192        if (!tofree && FD_ISSET(newfd, files->open_fds))
 193                goto out_fput;
 194
 195        files->fd[newfd] = file;
 196        FD_SET(newfd, files->open_fds);
 197        FD_CLR(newfd, files->close_on_exec);
 198        write_unlock(&files->file_lock);
 199
 200        if (tofree)
 201                filp_close(tofree, files);
 202        err = newfd;
 203out:
 204        return err;
 205out_unlock:
 206        write_unlock(&files->file_lock);
 207        goto out;
 208
 209out_fput:
 210        write_unlock(&files->file_lock);
 211        fput(file);
 212        goto out;
 213}
 214
 215asmlinkage long sys_dup(unsigned int fildes)
 216{
 217        int ret = -EBADF;
 218        struct file * file = fget(fildes);
 219
 220        if (file)
 221                ret = dupfd(file, 0);
 222        return ret;
 223}
 224
 225#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT)
 226
 227static int setfl(int fd, struct file * filp, unsigned long arg)
 228{
 229        struct inode * inode = filp->f_dentry->d_inode;
 230        int error = 0;
 231
 232        /* O_APPEND cannot be cleared if the file is marked as append-only */
 233        if (!(arg & O_APPEND) && IS_APPEND(inode))
 234                return -EPERM;
 235
 236        /* required for strict SunOS emulation */
 237        if (O_NONBLOCK != O_NDELAY)
 238               if (arg & O_NDELAY)
 239                   arg |= O_NONBLOCK;
 240
 241        if (arg & O_DIRECT) {
 242                if (!inode->i_mapping || !inode->i_mapping->a_ops ||
 243                        !inode->i_mapping->a_ops->direct_IO)
 244                                return -EINVAL;
 245        }
 246
 247        lock_kernel();
 248        if ((arg ^ filp->f_flags) & FASYNC) {
 249                if (filp->f_op && filp->f_op->fasync) {
 250                        error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 251                        if (error < 0)
 252                                goto out;
 253                }
 254        }
 255
 256        filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 257 out:
 258        unlock_kernel();
 259        return error;
 260}
 261
 262static void f_modown(struct file *filp, unsigned long pid,
 263                     uid_t uid, uid_t euid, int force)
 264{
 265        write_lock_irq(&filp->f_owner.lock);
 266        if (force || !filp->f_owner.pid) {
 267                filp->f_owner.pid = pid;
 268                filp->f_owner.uid = uid;
 269                filp->f_owner.euid = euid;
 270        }
 271        write_unlock_irq(&filp->f_owner.lock);
 272}
 273
 274int f_setown(struct file *filp, unsigned long arg, int force)
 275{
 276        int err;
 277        
 278        err = security_ops->file_set_fowner(filp);
 279        if (err)
 280                return err;
 281
 282        f_modown(filp, arg, current->uid, current->euid, force);
 283        return 0;
 284}
 285
 286void f_delown(struct file *filp)
 287{
 288        f_modown(filp, 0, 0, 0, 1);
 289}
 290
 291static long do_fcntl(unsigned int fd, unsigned int cmd,
 292                     unsigned long arg, struct file * filp)
 293{
 294        long err = -EINVAL;
 295
 296        switch (cmd) {
 297                case F_DUPFD:
 298                        if (arg < NR_OPEN) {
 299                                get_file(filp);
 300                                err = dupfd(filp, arg);
 301                        }
 302                        break;
 303                case F_GETFD:
 304                        err = get_close_on_exec(fd);
 305                        break;
 306                case F_SETFD:
 307                        err = 0;
 308                        set_close_on_exec(fd, arg&1);
 309                        break;
 310                case F_GETFL:
 311                        err = filp->f_flags;
 312                        break;
 313                case F_SETFL:
 314                        err = setfl(fd, filp, arg);
 315                        break;
 316                case F_GETLK:
 317                        err = fcntl_getlk(filp, (struct flock *) arg);
 318                        break;
 319                case F_SETLK:
 320                case F_SETLKW:
 321                        err = fcntl_setlk(filp, cmd, (struct flock *) arg);
 322                        break;
 323                case F_GETOWN:
 324                        /*
 325                         * XXX If f_owner is a process group, the
 326                         * negative return value will get converted
 327                         * into an error.  Oops.  If we keep the
 328                         * current syscall conventions, the only way
 329                         * to fix this will be in libc.
 330                         */
 331                        err = filp->f_owner.pid;
 332                        break;
 333                case F_SETOWN:
 334                        err = f_setown(filp, arg, 1);
 335                        break;
 336                case F_GETSIG:
 337                        err = filp->f_owner.signum;
 338                        break;
 339                case F_SETSIG:
 340                        /* arg == 0 restores default behaviour. */
 341                        if (arg < 0 || arg > _NSIG) {
 342                                break;
 343                        }
 344                        err = 0;
 345                        filp->f_owner.signum = arg;
 346                        break;
 347                case F_GETLEASE:
 348                        err = fcntl_getlease(filp);
 349                        break;
 350                case F_SETLEASE:
 351                        err = fcntl_setlease(fd, filp, arg);
 352                        break;
 353                case F_NOTIFY:
 354                        err = fcntl_dirnotify(fd, filp, arg);
 355                        break;
 356                default:
 357                        break;
 358        }
 359
 360        return err;
 361}
 362
 363asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
 364{       
 365        struct file * filp;
 366        long err = -EBADF;
 367
 368        filp = fget(fd);
 369        if (!filp)
 370                goto out;
 371
 372        err = security_ops->file_fcntl(filp, cmd, arg);
 373        if (err) {
 374                fput(filp);
 375                return err;
 376        }
 377
 378        err = do_fcntl(fd, cmd, arg, filp);
 379
 380        fput(filp);
 381out:
 382        return err;
 383}
 384
 385#if BITS_PER_LONG == 32
 386asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
 387{       
 388        struct file * filp;
 389        long err;
 390
 391        err = -EBADF;
 392        filp = fget(fd);
 393        if (!filp)
 394                goto out;
 395
 396        err = security_ops->file_fcntl(filp, cmd, arg);
 397        if (err) {
 398                fput(filp);
 399                return err;
 400        }
 401        err = -EBADF;
 402        
 403        switch (cmd) {
 404                case F_GETLK64:
 405                        err = fcntl_getlk64(filp, (struct flock64 *) arg);
 406                        break;
 407                case F_SETLK64:
 408                case F_SETLKW64:
 409                        err = fcntl_setlk64(filp, cmd, (struct flock64 *) arg);
 410                        break;
 411                default:
 412                        err = do_fcntl(fd, cmd, arg, filp);
 413                        break;
 414        }
 415        fput(filp);
 416out:
 417        return err;
 418}
 419#endif
 420
 421/* Table to convert sigio signal codes into poll band bitmaps */
 422
 423static long band_table[NSIGPOLL] = {
 424        POLLIN | POLLRDNORM,                    /* POLL_IN */
 425        POLLOUT | POLLWRNORM | POLLWRBAND,      /* POLL_OUT */
 426        POLLIN | POLLRDNORM | POLLMSG,          /* POLL_MSG */
 427        POLLERR,                                /* POLL_ERR */
 428        POLLPRI | POLLRDBAND,                   /* POLL_PRI */
 429        POLLHUP | POLLERR                       /* POLL_HUP */
 430};
 431
 432static inline int sigio_perm(struct task_struct *p,
 433                             struct fown_struct *fown)
 434{
 435        return ((fown->euid == 0) ||
 436                (fown->euid == p->suid) || (fown->euid == p->uid) ||
 437                (fown->uid == p->suid) || (fown->uid == p->uid));
 438}
 439
 440static void send_sigio_to_task(struct task_struct *p,
 441                               struct fown_struct *fown, 
 442                               int fd,
 443                               int reason)
 444{
 445        if (!sigio_perm(p, fown))
 446                return;
 447
 448        if (security_ops->file_send_sigiotask(p, fown, fd, reason))
 449                return;
 450
 451        switch (fown->signum) {
 452                siginfo_t si;
 453                default:
 454                        /* Queue a rt signal with the appropriate fd as its
 455                           value.  We use SI_SIGIO as the source, not 
 456                           SI_KERNEL, since kernel signals always get 
 457                           delivered even if we can't queue.  Failure to
 458                           queue in this case _should_ be reported; we fall
 459                           back to SIGIO in that case. --sct */
 460                        si.si_signo = fown->signum;
 461                        si.si_errno = 0;
 462                        si.si_code  = reason;
 463                        /* Make sure we are called with one of the POLL_*
 464                           reasons, otherwise we could leak kernel stack into
 465                           userspace.  */
 466                        if ((reason & __SI_MASK) != __SI_POLL)
 467                                BUG();
 468                        if (reason - POLL_IN >= NSIGPOLL)
 469                                si.si_band  = ~0L;
 470                        else
 471                                si.si_band = band_table[reason - POLL_IN];
 472                        si.si_fd    = fd;
 473                        if (!send_sig_info(fown->signum, &si, p))
 474                                break;
 475                /* fall-through: fall back on the old plain SIGIO signal */
 476                case 0:
 477                        send_sig(SIGIO, p, 1);
 478        }
 479}
 480
 481void send_sigio(struct fown_struct *fown, int fd, int band)
 482{
 483        struct task_struct *p;
 484        struct list_head *l;
 485        struct pid *pidptr;
 486        int pid;
 487        
 488        read_lock(&fown->lock);
 489        pid = fown->pid;
 490        if (!pid)
 491                goto out_unlock_fown;
 492        
 493        read_lock(&tasklist_lock);
 494        if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {
 495                send_sigio_to_task(p, fown, fd, band);
 496                goto out_unlock_task;
 497        }
 498        for_each_task_pid(-pid, PIDTYPE_PGID, p, l, pidptr)
 499                send_sigio_to_task(p, fown,fd,band);
 500out_unlock_task:
 501        read_unlock(&tasklist_lock);
 502out_unlock_fown:
 503        read_unlock(&fown->lock);
 504}
 505
 506static void send_sigurg_to_task(struct task_struct *p,
 507                                struct fown_struct *fown)
 508{
 509        if (sigio_perm(p, fown))
 510                send_sig(SIGURG, p, 1);
 511}
 512
 513int send_sigurg(struct fown_struct *fown)
 514{
 515        struct task_struct *p;
 516        int pid, ret = 0;
 517        
 518        read_lock(&fown->lock);
 519        pid = fown->pid;
 520        if (!pid)
 521                goto out_unlock_fown;
 522
 523        ret = 1;
 524        
 525        read_lock(&tasklist_lock);
 526        if ((pid > 0) && (p = find_task_by_pid(pid))) {
 527                send_sigurg_to_task(p, fown);
 528                goto out_unlock_task;
 529        }
 530        for_each_process(p) {
 531                int match = p->pid;
 532                if (pid < 0)
 533                        match = -p->pgrp;
 534                if (pid != match)
 535                        continue;
 536                send_sigurg_to_task(p, fown);
 537        }
 538out_unlock_task:
 539        read_unlock(&tasklist_lock);
 540out_unlock_fown:
 541        read_unlock(&fown->lock);
 542        return ret;
 543}
 544
 545static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
 546static kmem_cache_t *fasync_cache;
 547
 548/*
 549 * fasync_helper() is used by some character device drivers (mainly mice)
 550 * to set up the fasync queue. It returns negative on error, 0 if it did
 551 * no changes and positive if it added/deleted the entry.
 552 */
 553int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 554{
 555        struct fasync_struct *fa, **fp;
 556        struct fasync_struct *new = NULL;
 557        int result = 0;
 558
 559        if (on) {
 560                new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
 561                if (!new)
 562                        return -ENOMEM;
 563        }
 564        write_lock_irq(&fasync_lock);
 565        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 566                if (fa->fa_file == filp) {
 567                        if(on) {
 568                                fa->fa_fd = fd;
 569                                kmem_cache_free(fasync_cache, new);
 570                        } else {
 571                                *fp = fa->fa_next;
 572                                kmem_cache_free(fasync_cache, fa);
 573                                result = 1;
 574                        }
 575                        goto out;
 576                }
 577        }
 578
 579        if (on) {
 580                new->magic = FASYNC_MAGIC;
 581                new->fa_file = filp;
 582                new->fa_fd = fd;
 583                new->fa_next = *fapp;
 584                *fapp = new;
 585                result = 1;
 586        }
 587out:
 588        write_unlock_irq(&fasync_lock);
 589        return result;
 590}
 591
 592void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 593{
 594        while (fa) {
 595                struct fown_struct * fown;
 596                if (fa->magic != FASYNC_MAGIC) {
 597                        printk(KERN_ERR "kill_fasync: bad magic number in "
 598                               "fasync_struct!\n");
 599                        return;
 600                }
 601                fown = &fa->fa_file->f_owner;
 602                /* Don't send SIGURG to processes which have not set a
 603                   queued signum: SIGURG has its own default signalling
 604                   mechanism. */
 605                if (!(sig == SIGURG && fown->signum == 0))
 606                        send_sigio(fown, fa->fa_fd, band);
 607                fa = fa->fa_next;
 608        }
 609}
 610
 611void kill_fasync(struct fasync_struct **fp, int sig, int band)
 612{
 613        read_lock(&fasync_lock);
 614        __kill_fasync(*fp, sig, band);
 615        read_unlock(&fasync_lock);
 616}
 617
 618static int __init fasync_init(void)
 619{
 620        fasync_cache = kmem_cache_create("fasync_cache",
 621                sizeof(struct fasync_struct), 0, 0, NULL, NULL);
 622        if (!fasync_cache)
 623                panic("cannot create fasync slab cache");
 624        return 0;
 625}
 626
 627module_init(fasync_init)
 628
 629EXPORT_SYMBOL(f_setown);
 630EXPORT_SYMBOL(f_delown);
 631
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.