1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/sched.h>
17#include <linux/fs.h>
18#include <linux/file.h>
19#include <linux/signal.h>
20#include <linux/errno.h>
21#include <linux/mm.h>
22#include <linux/slab.h>
23#include <linux/poll.h>
24#include <linux/string.h>
25#include <linux/list.h>
26#include <linux/hash.h>
27#include <linux/spinlock.h>
28#include <linux/syscalls.h>
29#include <linux/rbtree.h>
30#include <linux/wait.h>
31#include <linux/eventpoll.h>
32#include <linux/mount.h>
33#include <linux/bitops.h>
34#include <linux/mutex.h>
35#include <linux/anon_inodes.h>
36#include <asm/uaccess.h>
37#include <asm/system.h>
38#include <asm/io.h>
39#include <asm/mman.h>
40#include <asm/atomic.h>
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74#define DEBUG_EPOLL 0
75
76#if DEBUG_EPOLL > 0
77#define DPRINTK(x) printk x
78#define DNPRINTK(n, x) do { if ((n) <= DEBUG_EPOLL) printk x; } while (0)
79#else
80#define DPRINTK(x) (void) 0
81#define DNPRINTK(n, x) (void) 0
82#endif
83
84#define DEBUG_EPI 0
85
86#if DEBUG_EPI != 0
87#define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE )
88#else
89#define EPI_SLAB_DEBUG 0
90#endif
91
92
93#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
94
95
96#define EP_MAX_POLLWAKE_NESTS 4
97
98
99#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
100
101#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
102
103#define EP_UNACTIVE_PTR ((void *) -1L)
104
105struct epoll_filefd {
106 struct file *file;
107 int fd;
108};
109
110
111
112
113
114
115
116
117struct wake_task_node {
118 struct list_head llink;
119 struct task_struct *task;
120 wait_queue_head_t *wq;
121};
122
123
124
125
126
127struct poll_safewake {
128 struct list_head wake_task_list;
129 spinlock_t lock;
130};
131
132
133
134
135
136struct epitem {
137
138 struct rb_node rbn;
139
140
141 struct list_head rdllink;
142
143
144
145
146
147 struct epitem *next;
148
149
150 struct epoll_filefd ffd;
151
152
153 int nwait;
154
155
156 struct list_head pwqlist;
157
158
159 struct eventpoll *ep;
160
161
162 struct list_head fllink;
163
164
165 struct epoll_event event;
166};
167
168
169
170
171
172
173struct eventpoll {
174
175 spinlock_t lock;
176
177
178
179
180
181
182
183 struct mutex mtx;
184
185
186 wait_queue_head_t wq;
187
188
189 wait_queue_head_t poll_wait;
190
191
192 struct list_head rdllist;
193
194
195 struct rb_root rbr;
196
197
198
199
200
201
202 struct epitem *ovflist;
203};
204
205
206struct eppoll_entry {
207
208 struct list_head llink;
209
210
211 void *base;
212
213
214
215
216
217 wait_queue_t wait;
218
219
220 wait_queue_head_t *whead;
221};
222
223
224struct ep_pqueue {
225 poll_table pt;
226 struct epitem *epi;
227};
228
229
230
231
232static struct mutex epmutex;
233
234
235static struct poll_safewake psw;
236
237
238static struct kmem_cache *epi_cache __read_mostly;
239
240
241static struct kmem_cache *pwq_cache __read_mostly;
242
243
244
245static inline void ep_set_ffd(struct epoll_filefd *ffd,
246 struct file *file, int fd)
247{
248 ffd->file = file;
249 ffd->fd = fd;
250}
251
252
253static inline int ep_cmp_ffd(struct epoll_filefd *p1,
254 struct epoll_filefd *p2)
255{
256 return (p1->file > p2->file ? +1:
257 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
258}
259
260
261static inline void ep_rb_initnode(struct rb_node *n)
262{
263 rb_set_parent(n, n);
264}
265
266
267static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
268{
269 rb_erase(n, r);
270 rb_set_parent(n, n);
271}
272
273
274static inline int ep_rb_linked(struct rb_node *n)
275{
276 return rb_parent(n) != n;
277}
278
279
280static inline int ep_is_linked(struct list_head *p)
281{
282 return !list_empty(p);
283}
284
285
286static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
287{
288 return container_of(p, struct eppoll_entry, wait)->base;
289}
290
291
292static inline struct epitem * ep_item_from_epqueue(poll_table *p)
293{
294 return container_of(p, struct ep_pqueue, pt)->epi;
295}
296
297
298static inline int ep_op_has_event(int op)
299{
300 return op != EPOLL_CTL_DEL;
301}
302
303
304static void ep_poll_safewake_init(struct poll_safewake *psw)
305{
306
307 INIT_LIST_HEAD(&psw->wake_task_list);
308 spin_lock_init(&psw->lock);
309}
310
311
312
313
314
315
316
317
318
319
320
321
322
323static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
324{
325 int wake_nests = 0;
326 unsigned long flags;
327 struct task_struct *this_task = current;
328 struct list_head *lsthead = &psw->wake_task_list, *lnk;
329 struct wake_task_node *tncur;
330 struct wake_task_node tnode;
331
332 spin_lock_irqsave(&psw->lock, flags);
333
334
335 list_for_each(lnk, lsthead) {
336 tncur = list_entry(lnk, struct wake_task_node, llink);
337
338 if (tncur->wq == wq ||
339 (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
340
341
342
343
344 spin_unlock_irqrestore(&psw->lock, flags);
345 return;
346 }
347 }
348
349
350 tnode.task = this_task;
351 tnode.wq = wq;
352 list_add(&tnode.llink, lsthead);
353
354 spin_unlock_irqrestore(&psw->lock, flags);
355
356
357 wake_up(wq);
358
359
360 spin_lock_irqsave(&psw->lock, flags);
361 list_del(&tnode.llink);
362 spin_unlock_irqrestore(&psw->lock, flags);
363}
364
365
366
367
368
369
370static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
371{
372 int nwait;
373 struct list_head *lsthead = &epi->pwqlist;
374 struct eppoll_entry *pwq;
375
376
377 nwait = xchg(&epi->nwait, 0);
378
379 if (nwait) {
380 while (!list_empty(lsthead)) {
381 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
382
383 list_del_init(&pwq->llink);
384 remove_wait_queue(pwq->whead, &pwq->wait);
385 kmem_cache_free(pwq_cache, pwq);
386 }
387 }
388}
389
390
391
392
393
394static int ep_remove(struct eventpoll *ep, struct epitem *epi)
395{
396 unsigned long flags;
397 struct file *file = epi->ffd.file;
398
399
400
401
402
403
404
405
406
407 ep_unregister_pollwait(ep, epi);
408
409
410 spin_lock(&file->f_ep_lock);
411 if (ep_is_linked(&epi->fllink))
412 list_del_init(&epi->fllink);
413 spin_unlock(&file->f_ep_lock);
414
415 if (ep_rb_linked(&epi->rbn))
416 ep_rb_erase(&epi->rbn, &ep->rbr);
417
418 spin_lock_irqsave(&ep->lock, flags);
419 if (ep_is_linked(&epi->rdllink))
420 list_del_init(&epi->rdllink);
421 spin_unlock_irqrestore(&ep->lock, flags);
422
423
424 kmem_cache_free(epi_cache, epi);
425
426 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
427 current, ep, file));
428
429 return 0;
430}
431
432static void ep_free(struct eventpoll *ep)
433{
434 struct rb_node *rbp;
435 struct epitem *epi;
436
437
438 if (waitqueue_active(&ep->poll_wait))
439 ep_poll_safewake(&psw, &ep->poll_wait);
440
441
442
443
444
445
446
447
448
449 mutex_lock(&epmutex);
450
451
452
453
454 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
455 epi = rb_entry(rbp, struct epitem, rbn);
456
457 ep_unregister_pollwait(ep, epi);
458 }
459
460
461
462
463
464
465
466 while ((rbp = rb_first(&ep->rbr)) != 0) {
467 epi = rb_entry(rbp, struct epitem, rbn);
468 ep_remove(ep, epi);
469 }
470
471 mutex_unlock(&epmutex);
472 mutex_destroy(&ep->mtx);
473 kfree(ep);
474}
475
476static int ep_eventpoll_release(struct inode *inode, struct file *file)
477{
478 struct eventpoll *ep = file->private_data;
479
480 if (ep)
481 ep_free(ep);
482
483 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep));
484 return 0;
485}
486
487static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
488{
489 unsigned int pollflags = 0;
490 unsigned long flags;
491 struct eventpoll *ep = file->private_data;
492
493
494 poll_wait(file, &ep->poll_wait, wait);
495
496
497 spin_lock_irqsave(&ep->lock, flags);
498 if (!list_empty(&ep->rdllist))
499 pollflags = POLLIN | POLLRDNORM;
500 spin_unlock_irqrestore(&ep->lock, flags);
501
502 return pollflags;
503}
504
505
506static const struct file_operations eventpoll_fops = {
507 .release = ep_eventpoll_release,
508 .poll = ep_eventpoll_poll
509};
510
511
512static inline int is_file_epoll(struct file *f)
513{
514 return f->f_op == &eventpoll_fops;
515}
516
517
518
519
520
521
522void eventpoll_release_file(struct file *file)
523{
524 struct list_head *lsthead = &file->f_ep_links;
525 struct eventpoll *ep;
526 struct epitem *epi;
527
528
529
530
531
532
533
534
535
536
537
538
539 mutex_lock(&epmutex);
540
541 while (!list_empty(lsthead)) {
542 epi = list_first_entry(lsthead, struct epitem, fllink);
543
544 ep = epi->ep;
545 list_del_init(&epi->fllink);
546 mutex_lock(&ep->mtx);
547 ep_remove(ep, epi);
548 mutex_unlock(&ep->mtx);
549 }
550
551 mutex_unlock(&epmutex);
552}
553
554static int ep_alloc(struct eventpoll **pep)
555{
556 struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
557
558 if (!ep)
559 return -ENOMEM;
560
561 spin_lock_init(&ep->lock);
562 mutex_init(&ep->mtx);
563 init_waitqueue_head(&ep->wq);
564 init_waitqueue_head(&ep->poll_wait);
565 INIT_LIST_HEAD(&ep->rdllist);
566 ep->rbr = RB_ROOT;
567 ep->ovflist = EP_UNACTIVE_PTR;
568
569 *pep = ep;
570
571 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
572 current, ep));
573 return 0;
574}
575
576
577
578
579
580
581static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
582{
583 int kcmp;
584 struct rb_node *rbp;
585 struct epitem *epi, *epir = NULL;
586 struct epoll_filefd ffd;
587
588 ep_set_ffd(&ffd, file, fd);
589 for (rbp = ep->rbr.rb_node; rbp; ) {
590 epi = rb_entry(rbp, struct epitem, rbn);
591 kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
592 if (kcmp > 0)
593 rbp = rbp->rb_right;
594 else if (kcmp < 0)
595 rbp = rbp->rb_left;
596 else {
597 epir = epi;
598 break;
599 }
600 }
601
602 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
603 current, file, epir));
604
605 return epir;
606}
607
608
609
610
611
612
613static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
614{
615 int pwake = 0;
616 unsigned long flags;
617 struct epitem *epi = ep_item_from_wait(wait);
618 struct eventpoll *ep = epi->ep;
619
620 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
621 current, epi->ffd.file, epi, ep));
622
623 spin_lock_irqsave(&ep->lock, flags);
624
625
626
627
628
629
630
631 if (!(epi->event.events & ~EP_PRIVATE_BITS))
632 goto out_unlock;
633
634
635
636
637
638
639
640 if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
641 if (epi->next == EP_UNACTIVE_PTR) {
642 epi->next = ep->ovflist;
643 ep->ovflist = epi;
644 }
645 goto out_unlock;
646 }
647
648
649 if (ep_is_linked(&epi->rdllink))
650 goto is_linked;
651
652 list_add_tail(&epi->rdllink, &ep->rdllist);
653
654is_linked:
655
656
657
658
659 if (waitqueue_active(&ep->wq))
660 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
661 TASK_INTERRUPTIBLE);
662 if (waitqueue_active(&ep->poll_wait))
663 pwake++;
664
665out_unlock:
666 spin_unlock_irqrestore(&ep->lock, flags);
667
668
669 if (pwake)
670 ep_poll_safewake(&psw, &ep->poll_wait);
671
672 return 1;
673}
674
675
676
677
678
679static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
680 poll_table *pt)
681{
682 struct epitem *epi = ep_item_from_epqueue(pt);
683 struct eppoll_entry *pwq;
684
685 if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
686 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
687 pwq->whead = whead;
688 pwq->base = epi;
689 add_wait_queue(whead, &pwq->wait);
690 list_add_tail(&pwq->llink, &epi->pwqlist);
691 epi->nwait++;
692 } else {
693
694 epi->nwait = -1;
695 }
696}
697
698static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
699{
700 int kcmp;
701 struct rb_node **p = &ep->rbr.rb_node, *parent = NULL;
702 struct epitem *epic;
703
704 while (*p) {
705 parent = *p;
706 epic = rb_entry(parent, struct epitem, rbn);
707 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
708 if (kcmp > 0)
709 p = &parent->rb_right;
710 else
711 p = &parent->rb_left;
712 }
713 rb_link_node(&epi->rbn, parent, p);
714 rb_insert_color(&epi->rbn, &ep->rbr);
715}
716
717
718
719
720static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
721 struct file *tfile, int fd)
722{
723 int error, revents, pwake = 0;
724 unsigned long flags;
725 struct epitem *epi;
726 struct ep_pqueue epq;
727
728 error = -ENOMEM;
729 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
730 goto error_return;
731
732
733 ep_rb_initnode(&epi->rbn);
734 INIT_LIST_HEAD(&epi->rdllink);
735 INIT_LIST_HEAD(&epi->fllink);
736 INIT_LIST_HEAD(&epi->pwqlist);
737 epi->ep = ep;
738 ep_set_ffd(&epi->ffd, tfile, fd);
739 epi->event = *event;
740 epi->nwait = 0;
741 epi->next = EP_UNACTIVE_PTR;
742
743
744 epq.epi = epi;
745 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
746
747
748
749
750
751
752
753
754 revents = tfile->f_op->poll(tfile, &epq.pt);
755
756
757
758
759
760
761 if (epi->nwait < 0)
762 goto error_unregister;
763
764
765 spin_lock(&tfile->f_ep_lock);
766 list_add_tail(&epi->fllink, &tfile->f_ep_links);
767 spin_unlock(&tfile->f_ep_lock);
768
769
770
771
772
773 ep_rbtree_insert(ep, epi);
774
775
776 spin_lock_irqsave(&ep->lock, flags);
777
778
779 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
780 list_add_tail(&epi->rdllink, &ep->rdllist);
781
782
783 if (waitqueue_active(&ep->wq))
784 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
785 if (waitqueue_active(&ep->poll_wait))
786 pwake++;
787 }
788
789 spin_unlock_irqrestore(&ep->lock, flags);
790
791
792 if (pwake)
793 ep_poll_safewake(&psw, &ep->poll_wait);
794
795 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n",
796 current, ep, tfile, fd));
797
798 return 0;
799
800error_unregister:
801 ep_unregister_pollwait(ep, epi);
802
803
804
805
806
807
808
809 spin_lock_irqsave(&ep->lock, flags);
810 if (ep_is_linked(&epi->rdllink))
811 list_del_init(&epi->rdllink);
812 spin_unlock_irqrestore(&ep->lock, flags);
813
814 kmem_cache_free(epi_cache, epi);
815error_return:
816 return error;
817}
818
819
820
821
822
823static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event)
824{
825 int pwake = 0;
826 unsigned int revents;
827 unsigned long flags;
828
829
830
831
832
833
834
835 epi->event.events = event->events;
836
837
838
839
840
841 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
842
843 spin_lock_irqsave(&ep->lock, flags);
844
845
846 epi->event.data = event->data;
847
848
849
850
851
852 if (revents & event->events) {
853 if (!ep_is_linked(&epi->rdllink)) {
854 list_add_tail(&epi->rdllink, &ep->rdllist);
855
856
857 if (waitqueue_active(&ep->wq))
858 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
859 TASK_INTERRUPTIBLE);
860 if (waitqueue_active(&ep->poll_wait))
861 pwake++;
862 }
863 }
864 spin_unlock_irqrestore(&ep->lock, flags);
865
866
867 if (pwake)
868 ep_poll_safewake(&psw, &ep->poll_wait);
869
870 return 0;
871}
872
873static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events,
874 int maxevents)
875{
876 int eventcnt, error = -EFAULT, pwake = 0;
877 unsigned int revents;
878 unsigned long flags;
879 struct epitem *epi, *nepi;
880 struct list_head txlist;
881
882 INIT_LIST_HEAD(&txlist);
883
884
885
886
887
888 mutex_lock(&ep->mtx);
889
890
891
892
893
894
895
896
897 spin_lock_irqsave(&ep->lock, flags);
898 list_splice(&ep->rdllist, &txlist);
899 INIT_LIST_HEAD(&ep->rdllist);
900 ep->ovflist = NULL;
901 spin_unlock_irqrestore(&ep->lock, flags);
902
903
904
905
906
907
908 for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) {
909 epi = list_first_entry(&txlist, struct epitem, rdllink);
910
911 list_del_init(&epi->rdllink);
912
913
914
915
916
917
918 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
919 revents &= epi->event.events;
920
921
922
923
924
925
926
927 if (revents) {
928 if (__put_user(revents,
929 &events[eventcnt].events) ||
930 __put_user(epi->event.data,
931 &events[eventcnt].data))
932 goto errxit;
933 if (epi->event.events & EPOLLONESHOT)
934 epi->event.events &= EP_PRIVATE_BITS;
935 eventcnt++;
936 }
937
938
939
940
941
942 if (!(epi->event.events & EPOLLET) &&
943 (revents & epi->event.events))
944 list_add_tail(&epi->rdllink, &ep->rdllist);
945 }
946 error = 0;
947
948errxit:
949
950 spin_lock_irqsave(&ep->lock, flags);
951
952
953
954
955
956 for (nepi = ep->ovflist; (epi = nepi) != NULL;
957 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
958 if (!ep_is_linked(&epi->rdllink) &&
959 (epi->event.events & ~EP_PRIVATE_BITS))
960 list_add_tail(&epi->rdllink, &ep->rdllist);
961 }
962
963
964
965
966
967 ep->ovflist = EP_UNACTIVE_PTR;
968
969
970
971
972
973
974 list_splice(&txlist, &ep->rdllist);
975
976 if (!list_empty(&ep->rdllist)) {
977
978
979
980
981 if (waitqueue_active(&ep->wq))
982 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
983 TASK_INTERRUPTIBLE);
984 if (waitqueue_active(&ep->poll_wait))
985 pwake++;
986 }
987 spin_unlock_irqrestore(&ep->lock, flags);
988
989 mutex_unlock(&ep->mtx);
990
991
992 if (pwake)
993 ep_poll_safewake(&psw, &ep->poll_wait);
994
995 return eventcnt == 0 ? error: eventcnt;
996}
997
998static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
999 int maxevents, long timeout)
1000{
1001 int res, eavail;
1002 unsigned long flags;
1003 long jtimeout;
1004 wait_queue_t wait;
1005
1006
1007
1008
1009
1010
1011 jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
1012 MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
1013
1014retry:
1015 spin_lock_irqsave(&ep->lock, flags);
1016
1017 res = 0;
1018 if (list_empty(&ep->rdllist)) {
1019
1020
1021
1022
1023
1024 init_waitqueue_entry(&wait, current);
1025 wait.flags |= WQ_FLAG_EXCLUSIVE;
1026 __add_wait_queue(&ep->wq, &wait);
1027
1028 for (;;) {
1029
1030
1031
1032
1033
1034 set_current_state(TASK_INTERRUPTIBLE);
1035 if (!list_empty(&ep->rdllist) || !jtimeout)
1036 break;
1037 if (signal_pending(current)) {
1038 res = -EINTR;
1039 break;
1040 }
1041
1042 spin_unlock_irqrestore(&ep->lock, flags);
1043 jtimeout = schedule_timeout(jtimeout);
1044 spin_lock_irqsave(&ep->lock, flags);
1045 }
1046 __remove_wait_queue(&ep->wq, &wait);
1047
1048 set_current_state(TASK_RUNNING);
1049 }
1050
1051
1052 eavail = !list_empty(&ep->rdllist);
1053
1054 spin_unlock_irqrestore(&ep->lock, flags);
1055
1056
1057
1058
1059
1060
1061 if (!res && eavail &&
1062 !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
1063 goto retry;
1064
1065 return res;
1066}
1067
1068
1069
1070
1071
1072
1073
1074asmlinkage long sys_epoll_create(int size)
1075{
1076 int error, fd = -1;
1077 struct eventpoll *ep;
1078 struct inode *inode;
1079 struct file *file;
1080
1081 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1082 current, size));
1083
1084
1085
1086
1087
1088 error = -EINVAL;
1089 if (size <= 0 || (error = ep_alloc(&ep)) != 0)
1090 goto error_return;
1091
1092
1093
1094
1095
1096 error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]",
1097 &eventpoll_fops, ep);
1098 if (error)
1099 goto error_free;
1100
1101 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1102 current, size, fd));
1103
1104 return fd;
1105
1106error_free:
1107 ep_free(ep);
1108error_return:
1109 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1110 current, size, error));
1111 return error;
1112}
1113
1114
1115
1116
1117
1118
1119asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
1120 struct epoll_event __user *event)
1121{
1122 int error;
1123 struct file *file, *tfile;
1124 struct eventpoll *ep;
1125 struct epitem *epi;
1126 struct epoll_event epds;
1127
1128 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n",
1129 current, epfd, op, fd, event));
1130
1131 error = -EFAULT;
1132 if (ep_op_has_event(op) &&
1133 copy_from_user(&epds, event, sizeof(struct epoll_event)))
1134 goto error_return;
1135
1136
1137 error = -EBADF;
1138 file = fget(epfd);
1139 if (!file)
1140 goto error_return;
1141
1142
1143 tfile = fget(fd);
1144 if (!tfile)
1145 goto error_fput;
1146
1147
1148 error = -EPERM;
1149 if (!tfile->f_op || !tfile->f_op->poll)
1150 goto error_tgt_fput;
1151
1152
1153
1154
1155
1156
1157 error = -EINVAL;
1158 if (file == tfile || !is_file_epoll(file))
1159 goto error_tgt_fput;
1160
1161
1162
1163
1164
1165 ep = file->private_data;
1166
1167 mutex_lock(&ep->mtx);
1168
1169
1170
1171
1172
1173
1174 epi = ep_find(ep, tfile, fd);
1175
1176 error = -EINVAL;
1177 switch (op) {
1178 case EPOLL_CTL_ADD:
1179 if (!epi) {
1180 epds.events |= POLLERR | POLLHUP;
1181
1182 error = ep_insert(ep, &epds, tfile, fd);
1183 } else
1184 error = -EEXIST;
1185 break;
1186 case EPOLL_CTL_DEL:
1187 if (epi)
1188 error = ep_remove(ep, epi);
1189 else
1190 error = -ENOENT;
1191 break;
1192 case EPOLL_CTL_MOD:
1193 if (epi) {
1194 epds.events |= POLLERR | POLLHUP;
1195 error = ep_modify(ep, epi, &epds);
1196 } else
1197 error = -ENOENT;
1198 break;
1199 }
1200 mutex_unlock(&ep->mtx);
1201
1202error_tgt_fput:
1203 fput(tfile);
1204error_fput:
1205 fput(file);
1206error_return:
1207 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n",
1208 current, epfd, op, fd, event, error));
1209
1210 return error;
1211}
1212
1213
1214
1215
1216
1217asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
1218 int maxevents, int timeout)
1219{
1220 int error;
1221 struct file *file;
1222 struct eventpoll *ep;
1223
1224 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
1225 current, epfd, events, maxevents, timeout));
1226
1227
1228 if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
1229 return -EINVAL;
1230
1231
1232 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) {
1233 error = -EFAULT;
1234 goto error_return;
1235 }
1236
1237
1238 error = -EBADF;
1239 file = fget(epfd);
1240 if (!file)
1241 goto error_return;
1242
1243
1244
1245
1246
1247 error = -EINVAL;
1248 if (!is_file_epoll(file))
1249 goto error_fput;
1250
1251
1252
1253
1254
1255 ep = file->private_data;
1256
1257
1258 error = ep_poll(ep, events, maxevents, timeout);
1259
1260error_fput:
1261 fput(file);
1262error_return:
1263 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
1264 current, epfd, events, maxevents, timeout, error));
1265
1266 return error;
1267}
1268
1269#ifdef TIF_RESTORE_SIGMASK
1270
1271
1272
1273
1274
1275asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1276 int maxevents, int timeout, const sigset_t __user *sigmask,
1277 size_t sigsetsize)
1278{
1279 int error;
1280 sigset_t ksigmask, sigsaved;
1281
1282
1283
1284
1285
1286 if (sigmask) {
1287 if (sigsetsize != sizeof(sigset_t))
1288 return -EINVAL;
1289 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
1290 return -EFAULT;
1291 sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
1292 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1293 }
1294
1295 error = sys_epoll_wait(epfd, events, maxevents, timeout);
1296
1297
1298
1299
1300
1301
1302
1303 if (sigmask) {
1304 if (error == -EINTR) {
1305 memcpy(¤t->saved_sigmask, &sigsaved,
1306 sizeof(sigsaved));
1307 set_thread_flag(TIF_RESTORE_SIGMASK);
1308 } else
1309 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1310 }
1311
1312 return error;
1313}
1314
1315#endif
1316
1317static int __init eventpoll_init(void)
1318{
1319 mutex_init(&epmutex);
1320
1321
1322 ep_poll_safewake_init(&psw);
1323
1324
1325 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
1326 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC,
1327 NULL, NULL);
1328
1329
1330 pwq_cache = kmem_cache_create("eventpoll_pwq",
1331 sizeof(struct eppoll_entry), 0,
1332 EPI_SLAB_DEBUG|SLAB_PANIC, NULL, NULL);
1333
1334 return 0;
1335}
1336fs_initcall(eventpoll_init);
1337
1338