linux-old/fs/select.c
<<
>>
Prefs
   1/*
   2 * This file contains the procedures for the handling of select and poll
   3 *
   4 * Created for Linux based loosely upon Mathius Lattner's minix
   5 * patches by Peter MacDonald. Heavily edited by Linus.
   6 *
   7 *  4 February 1994
   8 *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
   9 *     flag set in its personality we do *not* modify the given timeout
  10 *     parameter to reflect time remaining.
  11 */
  12
  13#include <linux/malloc.h>
  14#include <linux/smp_lock.h>
  15#include <linux/poll.h>
  16#include <linux/file.h>
  17
  18#include <asm/uaccess.h>
  19
  20#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
  21#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
  22
  23/*
  24 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
  25 * I have rewritten this, taking some shortcuts: This code may not be easy to
  26 * follow, but it should be free of race-conditions, and it's practical. If you
  27 * understand what I'm doing here, then you understand how the linux
  28 * sleep/wakeup mechanism works.
  29 *
  30 * Two very simple procedures, poll_wait() and free_wait() make all the
  31 * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
  32 * as all select/poll functions have to call it to add an entry to the
  33 * poll table.
  34 */
  35
  36/*
  37 * I rewrote this again to make the poll_table size variable, take some
  38 * more shortcuts, improve responsiveness, and remove another race that
  39 * Linus noticed.  -- jrs
  40 */
  41
  42static poll_table* alloc_wait(int nfds)
  43{
  44        poll_table* out;
  45        poll_table* walk;
  46
  47        out = (poll_table *) __get_free_page(GFP_KERNEL);
  48        if(out==NULL)
  49                return NULL;
  50        out->nr = 0;
  51        out->entry = (struct poll_table_entry *)(out + 1);
  52        out->next = NULL;
  53        nfds -=__MAX_POLL_TABLE_ENTRIES;
  54        walk = out;
  55        while(nfds > 0) {
  56                poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL);
  57                if (!tmp) {
  58                        while(out != NULL) {
  59                                tmp = out->next;
  60                                free_page((unsigned long)out);
  61                                out = tmp;
  62                        }
  63                        return NULL;
  64                }
  65                tmp->nr = 0;
  66                tmp->entry = (struct poll_table_entry *)(tmp + 1);
  67                tmp->next = NULL;
  68                walk->next = tmp;
  69                walk = tmp;
  70                nfds -=__MAX_POLL_TABLE_ENTRIES;
  71        }
  72        return out;
  73}
  74
  75static void free_wait(poll_table * p)
  76{
  77        struct poll_table_entry * entry;
  78        poll_table *old;
  79
  80        while (p) {
  81                entry = p->entry + p->nr;
  82                while (p->nr > 0) {
  83                        p->nr--;
  84                        entry--;
  85                        remove_wait_queue(entry->wait_address,&entry->wait);
  86                        fput(entry->filp);
  87                }
  88                old = p;
  89                p = p->next;
  90                free_page((unsigned long) old);
  91        }
  92}
  93
  94void __pollwait(struct file * filp, struct wait_queue ** wait_address, poll_table *p)
  95{
  96        for (;;) {
  97                if (p->nr < __MAX_POLL_TABLE_ENTRIES) {
  98                        struct poll_table_entry * entry;
  99                        entry = p->entry + p->nr;
 100                        entry->filp = filp;
 101                        filp->f_count++;
 102                        entry->wait_address = wait_address;
 103                        entry->wait.task = current;
 104                        entry->wait.next = NULL;
 105                        add_wait_queue(wait_address,&entry->wait);
 106                        p->nr++;
 107                        return;
 108                }
 109                p = p->next;
 110        }
 111}
 112
 113#define __IN(fds, n)            (fds->in + n)
 114#define __OUT(fds, n)           (fds->out + n)
 115#define __EX(fds, n)            (fds->ex + n)
 116#define __RES_IN(fds, n)        (fds->res_in + n)
 117#define __RES_OUT(fds, n)       (fds->res_out + n)
 118#define __RES_EX(fds, n)        (fds->res_ex + n)
 119
 120#define BITS(fds, n)            (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
 121
 122static int max_select_fd(unsigned long n, fd_set_bits *fds)
 123{
 124        unsigned long *open_fds;
 125        unsigned long set;
 126        int max;
 127
 128        /* handle last in-complete long-word first */
 129        set = ~(~0UL << (n & (__NFDBITS-1)));
 130        n /= __NFDBITS;
 131        open_fds = current->files->open_fds->fds_bits+n;
 132        max = 0;
 133        if (set) {
 134                set &= BITS(fds, n);
 135                if (set) {
 136                        if (!(set & ~*open_fds))
 137                                goto get_max;
 138                        return -EBADF;
 139                }
 140        }
 141        while (n) {
 142                open_fds--;
 143                n--;
 144                set = BITS(fds, n);
 145                if (!set)
 146                        continue;
 147                if (set & ~*open_fds)
 148                        return -EBADF;
 149                if (max)
 150                        continue;
 151get_max:
 152                do {
 153                        max++;
 154                        set >>= 1;
 155                } while (set);
 156                max += n * __NFDBITS;
 157        }
 158
 159        return max;
 160}
 161
 162#define BIT(i)          (1UL << ((i)&(__NFDBITS-1)))
 163#define MEM(i,m)        ((m)+(unsigned)(i)/__NFDBITS)
 164#define ISSET(i,m)      (((i)&*(m)) != 0)
 165#define SET(i,m)        (*(m) |= (i))
 166
 167#define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
 168#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 169#define POLLEX_SET (POLLPRI)
 170
 171int do_select(int n, fd_set_bits *fds, long *timeout)
 172{
 173        poll_table *wait_table, *wait;
 174        int retval, i, off;
 175        long __timeout = *timeout;
 176
 177        wait = wait_table = NULL;
 178        lock_kernel();
 179
 180        retval = max_select_fd(n, fds);
 181        if (retval < 0)
 182                goto out;
 183        n = retval;
 184        if (__timeout) {
 185                retval = -ENOMEM;
 186                wait_table = alloc_wait(n);
 187                if (!wait_table)
 188                        goto out;
 189
 190                wait = wait_table;
 191        }
 192
 193        retval = 0;
 194        for (;;) {
 195                current->state = TASK_INTERRUPTIBLE;
 196                for (i = 0 ; i < n; i++) {
 197                        unsigned long bit = BIT(i);
 198                        unsigned long mask;
 199                        struct file *file;
 200
 201                        off = i / __NFDBITS;
 202                        if (!(bit & BITS(fds, off)))
 203                                continue;
 204                        /*
 205                         * The poll_wait routine will increment f_count if
 206                         * the file is added to the wait table, so we don't
 207                         * need to increment it now.
 208                         */
 209                        file = fcheck(i);
 210                        mask = POLLNVAL;
 211                        if (file) {
 212                                mask = DEFAULT_POLLMASK;
 213                                if (file->f_op && file->f_op->poll)
 214                                        mask = file->f_op->poll(file, wait);
 215                        }
 216                        if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
 217                                SET(bit, __RES_IN(fds,off));
 218                                retval++;
 219                                wait = NULL;
 220                        }
 221                        if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
 222                                SET(bit, __RES_OUT(fds,off));
 223                                retval++;
 224                                wait = NULL;
 225                        }
 226                        if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
 227                                SET(bit, __RES_EX(fds,off));
 228                                retval++;
 229                                wait = NULL;
 230                        }
 231                }
 232                wait = NULL;
 233                if (retval || !__timeout || signal_pending(current))
 234                        break;
 235                __timeout = schedule_timeout(__timeout);
 236        }
 237        current->state = TASK_RUNNING;
 238
 239out:
 240        if (*timeout)
 241                free_wait(wait_table);
 242
 243        /*
 244         * Up-to-date the caller timeout.
 245         */
 246        *timeout = __timeout;
 247        unlock_kernel();
 248        return retval;
 249}
 250
 251/*
 252 * We can actually return ERESTARTSYS instead of EINTR, but I'd
 253 * like to be certain this leads to no problems. So I return
 254 * EINTR just for safety.
 255 *
 256 * Update: ERESTARTSYS breaks at least the xview clock binary, so
 257 * I'm trying ERESTARTNOHAND which restart only when you want to.
 258 */
 259#define MAX_SELECT_SECONDS \
 260        ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 261
 262asmlinkage int
 263sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
 264{
 265        fd_set_bits fds;
 266        char *bits;
 267        long timeout;
 268        int ret, size, max_fdset;
 269
 270        timeout = MAX_SCHEDULE_TIMEOUT;
 271        if (tvp) {
 272                time_t sec, usec;
 273
 274                if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
 275                    || (ret = __get_user(sec, &tvp->tv_sec))
 276                    || (ret = __get_user(usec, &tvp->tv_usec)))
 277                        goto out_nofds;
 278
 279                ret = -EINVAL;
 280                if (sec < 0 || usec < 0)
 281                        goto out_nofds;
 282
 283                if ((unsigned long) sec < MAX_SELECT_SECONDS) {
 284                        timeout = ROUND_UP(usec, 1000000/HZ);
 285                        timeout += sec * (unsigned long) HZ;
 286
 287                        if (timeout < 0) {
 288                                ret = -EINVAL;
 289                                goto out_nofds;
 290                        }
 291                }
 292        }
 293
 294        ret = -EINVAL;
 295        
 296        /*
 297         * We ought to optimise the n=0 case - it is used enough..
 298         */
 299         
 300        if (n < 0)
 301                goto out_nofds;
 302        max_fdset = current->files->max_fdset;
 303        if (n > max_fdset)
 304                n = max_fdset;
 305        if (n > NR_OPEN)
 306                n = NR_OPEN;
 307                
 308        /*
 309         * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
 310         * since we used fdset we need to allocate memory in units of
 311         * long-words.
 312         */
 313
 314        ret = -ENOMEM;
 315        size = FDS_BYTES(n);
 316        bits = kmalloc(6 * size, GFP_KERNEL);
 317        if (!bits)
 318                goto out_nofds;
 319        fds.in      = (unsigned long *)  bits;
 320        fds.out     = (unsigned long *) (bits +   size);
 321        fds.ex      = (unsigned long *) (bits + 2*size);
 322        fds.res_in  = (unsigned long *) (bits + 3*size);
 323        fds.res_out = (unsigned long *) (bits + 4*size);
 324        fds.res_ex  = (unsigned long *) (bits + 5*size);
 325
 326        if ((ret = get_fd_set(n, inp, fds.in)) ||
 327            (ret = get_fd_set(n, outp, fds.out)) ||
 328            (ret = get_fd_set(n, exp, fds.ex)))
 329                goto out;
 330        zero_fd_set(n, fds.res_in);
 331        zero_fd_set(n, fds.res_out);
 332        zero_fd_set(n, fds.res_ex);
 333
 334        ret = do_select(n, &fds, &timeout);
 335
 336        if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
 337                time_t sec = 0, usec = 0;
 338                if (timeout) {
 339                        sec = timeout / HZ;
 340                        usec = timeout % HZ;
 341                        usec *= (1000000/HZ);
 342                }
 343                put_user(sec, &tvp->tv_sec);
 344                put_user(usec, &tvp->tv_usec);
 345        }
 346
 347        if (ret < 0)
 348                goto out;
 349        if (!ret) {
 350                ret = -ERESTARTNOHAND;
 351                if (signal_pending(current))
 352                        goto out;
 353                ret = 0;
 354        }
 355
 356        set_fd_set(n, inp, fds.res_in);
 357        set_fd_set(n, outp, fds.res_out);
 358        set_fd_set(n, exp, fds.res_ex);
 359
 360out:
 361        kfree(bits);
 362out_nofds:
 363        return ret;
 364}
 365
 366static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait,
 367                   long timeout)
 368{
 369        int count = 0;
 370
 371        for (;;) {
 372                unsigned int j;
 373                struct pollfd * fdpnt;
 374
 375                current->state = TASK_INTERRUPTIBLE;
 376                for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) {
 377                        int fd;
 378                        unsigned int mask;
 379
 380                        mask = 0;
 381                        fd = fdpnt->fd;
 382                        if (fd >= 0) {
 383                                /* poll_wait increments f_count if needed */
 384                                struct file * file = fcheck(fd);
 385                                mask = POLLNVAL;
 386                                if (file != NULL) {
 387                                        mask = DEFAULT_POLLMASK;
 388                                        if (file->f_op && file->f_op->poll)
 389                                                mask = file->f_op->poll(file, wait);
 390                                        mask &= fdpnt->events | POLLERR | POLLHUP;
 391                                }
 392                                if (mask) {
 393                                        wait = NULL;
 394                                        count++;
 395                                }
 396                        }
 397                        fdpnt->revents = mask;
 398                }
 399
 400                wait = NULL;
 401                if (count || !timeout || signal_pending(current))
 402                        break;
 403                timeout = schedule_timeout(timeout);
 404        }
 405        current->state = TASK_RUNNING;
 406        return count;
 407}
 408
 409asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
 410{
 411        int i, fdcount, err, size;
 412        struct pollfd * fds, *fds1;
 413        poll_table *wait_table = NULL, *wait = NULL;
 414
 415        lock_kernel();
 416        /* Do a sanity check on nfds ... */
 417        err = -EINVAL;
 418        if (nfds > current->files->max_fds || nfds > NR_OPEN)
 419                goto out;
 420
 421        if (timeout) {
 422                /* Careful about overflow in the intermediate values */
 423                if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
 424                        timeout = (timeout*HZ+999)/1000+1;
 425                else /* Negative or overflow */
 426                        timeout = MAX_SCHEDULE_TIMEOUT;
 427        }
 428
 429        err = -ENOMEM;
 430        if (timeout) {
 431                wait_table = alloc_wait(nfds);
 432                if (!wait_table)
 433                        goto out;
 434
 435                wait = wait_table;
 436        }
 437
 438        size = nfds * sizeof(struct pollfd);
 439        fds = (struct pollfd *) kmalloc(size, GFP_KERNEL);
 440        if (!fds)
 441                goto out;
 442
 443        err = -EFAULT;
 444        if (copy_from_user(fds, ufds, size))
 445                goto out_fds;
 446
 447        fdcount = do_poll(nfds, fds, wait, timeout);
 448
 449        /* OK, now copy the revents fields back to user space. */
 450        fds1 = fds;
 451        for(i=0; i < (int)nfds; i++, ufds++, fds1++) {
 452                __put_user(fds1->revents, &ufds->revents);
 453        }
 454
 455        err = fdcount;
 456        if (!fdcount && signal_pending(current))
 457                err = -EINTR;
 458
 459out_fds:
 460        kfree(fds);
 461out:
 462        if (wait)
 463                free_wait(wait_table);
 464        unlock_kernel();
 465        return err;
 466}
 467
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.