1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/sched.h>
17#include <linux/fs.h>
18#include <linux/file.h>
19#include <linux/signal.h>
20#include <linux/errno.h>
21#include <linux/mm.h>
22#include <linux/slab.h>
23#include <linux/poll.h>
24#include <linux/string.h>
25#include <linux/list.h>
26#include <linux/hash.h>
27#include <linux/spinlock.h>
28#include <linux/syscalls.h>
29#include <linux/rbtree.h>
30#include <linux/wait.h>
31#include <linux/eventpoll.h>
32#include <linux/mount.h>
33#include <linux/bitops.h>
34#include <linux/mutex.h>
35#include <linux/anon_inodes.h>
36#include <asm/uaccess.h>
37#include <asm/system.h>
38#include <asm/io.h>
39#include <asm/mman.h>
40#include <asm/atomic.h>
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
76
77
78#define EP_MAX_NESTS 4
79
80
81#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
82
83#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
84
85#define EP_UNACTIVE_PTR ((void *) -1L)
86
87#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
88
89struct epoll_filefd {
90 struct file *file;
91 int fd;
92};
93
94
95
96
97
98struct nested_call_node {
99 struct list_head llink;
100 void *cookie;
101 void *ctx;
102};
103
104
105
106
107
108struct nested_calls {
109 struct list_head tasks_call_list;
110 spinlock_t lock;
111};
112
113
114
115
116
117struct epitem {
118
119 struct rb_node rbn;
120
121
122 struct list_head rdllink;
123
124
125
126
127
128 struct epitem *next;
129
130
131 struct epoll_filefd ffd;
132
133
134 int nwait;
135
136
137 struct list_head pwqlist;
138
139
140 struct eventpoll *ep;
141
142
143 struct list_head fllink;
144
145
146 struct epoll_event event;
147};
148
149
150
151
152
153
154struct eventpoll {
155
156 spinlock_t lock;
157
158
159
160
161
162
163
164 struct mutex mtx;
165
166
167 wait_queue_head_t wq;
168
169
170 wait_queue_head_t poll_wait;
171
172
173 struct list_head rdllist;
174
175
176 struct rb_root rbr;
177
178
179
180
181
182
183 struct epitem *ovflist;
184
185
186 struct user_struct *user;
187};
188
189
190struct eppoll_entry {
191
192 struct list_head llink;
193
194
195 struct epitem *base;
196
197
198
199
200
201 wait_queue_t wait;
202
203
204 wait_queue_head_t *whead;
205};
206
207
208struct ep_pqueue {
209 poll_table pt;
210 struct epitem *epi;
211};
212
213
214struct ep_send_events_data {
215 int maxevents;
216 struct epoll_event __user *events;
217};
218
219
220
221
222
223static int max_user_watches __read_mostly;
224
225
226
227
228static DEFINE_MUTEX(epmutex);
229
230
231static struct nested_calls poll_safewake_ncalls;
232
233
234static struct nested_calls poll_readywalk_ncalls;
235
236
237static struct kmem_cache *epi_cache __read_mostly;
238
239
240static struct kmem_cache *pwq_cache __read_mostly;
241
242#ifdef CONFIG_SYSCTL
243
244#include <linux/sysctl.h>
245
246static int zero;
247
248ctl_table epoll_table[] = {
249 {
250 .procname = "max_user_watches",
251 .data = &max_user_watches,
252 .maxlen = sizeof(int),
253 .mode = 0644,
254 .proc_handler = &proc_dointvec_minmax,
255 .extra1 = &zero,
256 },
257 { .ctl_name = 0 }
258};
259#endif
260
261
262
263static inline void ep_set_ffd(struct epoll_filefd *ffd,
264 struct file *file, int fd)
265{
266 ffd->file = file;
267 ffd->fd = fd;
268}
269
270
271static inline int ep_cmp_ffd(struct epoll_filefd *p1,
272 struct epoll_filefd *p2)
273{
274 return (p1->file > p2->file ? +1:
275 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
276}
277
278
279static inline int ep_is_linked(struct list_head *p)
280{
281 return !list_empty(p);
282}
283
284
285static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
286{
287 return container_of(p, struct eppoll_entry, wait)->base;
288}
289
290
291static inline struct epitem *ep_item_from_epqueue(poll_table *p)
292{
293 return container_of(p, struct ep_pqueue, pt)->epi;
294}
295
296
297static inline int ep_op_has_event(int op)
298{
299 return op != EPOLL_CTL_DEL;
300}
301
302
303static void ep_nested_calls_init(struct nested_calls *ncalls)
304{
305 INIT_LIST_HEAD(&ncalls->tasks_call_list);
306 spin_lock_init(&ncalls->lock);
307}
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
326 int (*nproc)(void *, void *, int), void *priv,
327 void *cookie, void *ctx)
328{
329 int error, call_nests = 0;
330 unsigned long flags;
331 struct list_head *lsthead = &ncalls->tasks_call_list;
332 struct nested_call_node *tncur;
333 struct nested_call_node tnode;
334
335 spin_lock_irqsave(&ncalls->lock, flags);
336
337
338
339
340
341
342 list_for_each_entry(tncur, lsthead, llink) {
343 if (tncur->ctx == ctx &&
344 (tncur->cookie == cookie || ++call_nests > max_nests)) {
345
346
347
348
349 error = -1;
350 goto out_unlock;
351 }
352 }
353
354
355 tnode.ctx = ctx;
356 tnode.cookie = cookie;
357 list_add(&tnode.llink, lsthead);
358
359 spin_unlock_irqrestore(&ncalls->lock, flags);
360
361
362 error = (*nproc)(priv, cookie, call_nests);
363
364
365 spin_lock_irqsave(&ncalls->lock, flags);
366 list_del(&tnode.llink);
367out_unlock:
368 spin_unlock_irqrestore(&ncalls->lock, flags);
369
370 return error;
371}
372
373#ifdef CONFIG_DEBUG_LOCK_ALLOC
374static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
375 unsigned long events, int subclass)
376{
377 unsigned long flags;
378
379 spin_lock_irqsave_nested(&wqueue->lock, flags, subclass);
380 wake_up_locked_poll(wqueue, events);
381 spin_unlock_irqrestore(&wqueue->lock, flags);
382}
383#else
384static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
385 unsigned long events, int subclass)
386{
387 wake_up_poll(wqueue, events);
388}
389#endif
390
391static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
392{
393 ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN,
394 1 + call_nests);
395 return 0;
396}
397
398
399
400
401
402
403
404
405
406
407
408static void ep_poll_safewake(wait_queue_head_t *wq)
409{
410 int this_cpu = get_cpu();
411
412 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
413 ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
414
415 put_cpu();
416}
417
418
419
420
421
422
423static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
424{
425 struct list_head *lsthead = &epi->pwqlist;
426 struct eppoll_entry *pwq;
427
428 while (!list_empty(lsthead)) {
429 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
430
431 list_del(&pwq->llink);
432 remove_wait_queue(pwq->whead, &pwq->wait);
433 kmem_cache_free(pwq_cache, pwq);
434 }
435}
436
437
438
439
440
441
442
443
444
445
446
447
448static int ep_scan_ready_list(struct eventpoll *ep,
449 int (*sproc)(struct eventpoll *,
450 struct list_head *, void *),
451 void *priv)
452{
453 int error, pwake = 0;
454 unsigned long flags;
455 struct epitem *epi, *nepi;
456 LIST_HEAD(txlist);
457
458
459
460
461
462 mutex_lock(&ep->mtx);
463
464
465
466
467
468
469
470
471
472 spin_lock_irqsave(&ep->lock, flags);
473 list_splice_init(&ep->rdllist, &txlist);
474 ep->ovflist = NULL;
475 spin_unlock_irqrestore(&ep->lock, flags);
476
477
478
479
480 error = (*sproc)(ep, &txlist, priv);
481
482 spin_lock_irqsave(&ep->lock, flags);
483
484
485
486
487
488 for (nepi = ep->ovflist; (epi = nepi) != NULL;
489 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
490
491
492
493
494
495
496 if (!ep_is_linked(&epi->rdllink))
497 list_add_tail(&epi->rdllink, &ep->rdllist);
498 }
499
500
501
502
503
504 ep->ovflist = EP_UNACTIVE_PTR;
505
506
507
508
509 list_splice(&txlist, &ep->rdllist);
510
511 if (!list_empty(&ep->rdllist)) {
512
513
514
515
516 if (waitqueue_active(&ep->wq))
517 wake_up_locked(&ep->wq);
518 if (waitqueue_active(&ep->poll_wait))
519 pwake++;
520 }
521 spin_unlock_irqrestore(&ep->lock, flags);
522
523 mutex_unlock(&ep->mtx);
524
525
526 if (pwake)
527 ep_poll_safewake(&ep->poll_wait);
528
529 return error;
530}
531
532
533
534
535
536static int ep_remove(struct eventpoll *ep, struct epitem *epi)
537{
538 unsigned long flags;
539 struct file *file = epi->ffd.file;
540
541
542
543
544
545
546
547
548
549 ep_unregister_pollwait(ep, epi);
550
551
552 spin_lock(&file->f_lock);
553 if (ep_is_linked(&epi->fllink))
554 list_del_init(&epi->fllink);
555 spin_unlock(&file->f_lock);
556
557 rb_erase(&epi->rbn, &ep->rbr);
558
559 spin_lock_irqsave(&ep->lock, flags);
560 if (ep_is_linked(&epi->rdllink))
561 list_del_init(&epi->rdllink);
562 spin_unlock_irqrestore(&ep->lock, flags);
563
564
565 kmem_cache_free(epi_cache, epi);
566
567 atomic_dec(&ep->user->epoll_watches);
568
569 return 0;
570}
571
572static void ep_free(struct eventpoll *ep)
573{
574 struct rb_node *rbp;
575 struct epitem *epi;
576
577
578 if (waitqueue_active(&ep->poll_wait))
579 ep_poll_safewake(&ep->poll_wait);
580
581
582
583
584
585
586
587
588
589 mutex_lock(&epmutex);
590
591
592
593
594 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
595 epi = rb_entry(rbp, struct epitem, rbn);
596
597 ep_unregister_pollwait(ep, epi);
598 }
599
600
601
602
603
604
605
606 while ((rbp = rb_first(&ep->rbr)) != NULL) {
607 epi = rb_entry(rbp, struct epitem, rbn);
608 ep_remove(ep, epi);
609 }
610
611 mutex_unlock(&epmutex);
612 mutex_destroy(&ep->mtx);
613 free_uid(ep->user);
614 kfree(ep);
615}
616
617static int ep_eventpoll_release(struct inode *inode, struct file *file)
618{
619 struct eventpoll *ep = file->private_data;
620
621 if (ep)
622 ep_free(ep);
623
624 return 0;
625}
626
627static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
628 void *priv)
629{
630 struct epitem *epi, *tmp;
631
632 list_for_each_entry_safe(epi, tmp, head, rdllink) {
633 if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
634 epi->event.events)
635 return POLLIN | POLLRDNORM;
636 else {
637
638
639
640
641
642 list_del_init(&epi->rdllink);
643 }
644 }
645
646 return 0;
647}
648
649static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
650{
651 return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
652}
653
654static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
655{
656 int pollflags;
657 struct eventpoll *ep = file->private_data;
658
659
660 poll_wait(file, &ep->poll_wait, wait);
661
662
663
664
665
666
667
668 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
669 ep_poll_readyevents_proc, ep, ep, current);
670
671 return pollflags != -1 ? pollflags : 0;
672}
673
674
675static const struct file_operations eventpoll_fops = {
676 .release = ep_eventpoll_release,
677 .poll = ep_eventpoll_poll
678};
679
680
681static inline int is_file_epoll(struct file *f)
682{
683 return f->f_op == &eventpoll_fops;
684}
685
686
687
688
689
690
691void eventpoll_release_file(struct file *file)
692{
693 struct list_head *lsthead = &file->f_ep_links;
694 struct eventpoll *ep;
695 struct epitem *epi;
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710 mutex_lock(&epmutex);
711
712 while (!list_empty(lsthead)) {
713 epi = list_first_entry(lsthead, struct epitem, fllink);
714
715 ep = epi->ep;
716 list_del_init(&epi->fllink);
717 mutex_lock(&ep->mtx);
718 ep_remove(ep, epi);
719 mutex_unlock(&ep->mtx);
720 }
721
722 mutex_unlock(&epmutex);
723}
724
725static int ep_alloc(struct eventpoll **pep)
726{
727 int error;
728 struct user_struct *user;
729 struct eventpoll *ep;
730
731 user = get_current_user();
732 error = -ENOMEM;
733 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
734 if (unlikely(!ep))
735 goto free_uid;
736
737 spin_lock_init(&ep->lock);
738 mutex_init(&ep->mtx);
739 init_waitqueue_head(&ep->wq);
740 init_waitqueue_head(&ep->poll_wait);
741 INIT_LIST_HEAD(&ep->rdllist);
742 ep->rbr = RB_ROOT;
743 ep->ovflist = EP_UNACTIVE_PTR;
744 ep->user = user;
745
746 *pep = ep;
747
748 return 0;
749
750free_uid:
751 free_uid(user);
752 return error;
753}
754
755
756
757
758
759
760static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
761{
762 int kcmp;
763 struct rb_node *rbp;
764 struct epitem *epi, *epir = NULL;
765 struct epoll_filefd ffd;
766
767 ep_set_ffd(&ffd, file, fd);
768 for (rbp = ep->rbr.rb_node; rbp; ) {
769 epi = rb_entry(rbp, struct epitem, rbn);
770 kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
771 if (kcmp > 0)
772 rbp = rbp->rb_right;
773 else if (kcmp < 0)
774 rbp = rbp->rb_left;
775 else {
776 epir = epi;
777 break;
778 }
779 }
780
781 return epir;
782}
783
784
785
786
787
788
789static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
790{
791 int pwake = 0;
792 unsigned long flags;
793 struct epitem *epi = ep_item_from_wait(wait);
794 struct eventpoll *ep = epi->ep;
795
796 spin_lock_irqsave(&ep->lock, flags);
797
798
799
800
801
802
803
804 if (!(epi->event.events & ~EP_PRIVATE_BITS))
805 goto out_unlock;
806
807
808
809
810
811
812
813 if (key && !((unsigned long) key & epi->event.events))
814 goto out_unlock;
815
816
817
818
819
820
821
822 if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
823 if (epi->next == EP_UNACTIVE_PTR) {
824 epi->next = ep->ovflist;
825 ep->ovflist = epi;
826 }
827 goto out_unlock;
828 }
829
830
831 if (!ep_is_linked(&epi->rdllink))
832 list_add_tail(&epi->rdllink, &ep->rdllist);
833
834
835
836
837
838 if (waitqueue_active(&ep->wq))
839 wake_up_locked(&ep->wq);
840 if (waitqueue_active(&ep->poll_wait))
841 pwake++;
842
843out_unlock:
844 spin_unlock_irqrestore(&ep->lock, flags);
845
846
847 if (pwake)
848 ep_poll_safewake(&ep->poll_wait);
849
850 return 1;
851}
852
853
854
855
856
857static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
858 poll_table *pt)
859{
860 struct epitem *epi = ep_item_from_epqueue(pt);
861 struct eppoll_entry *pwq;
862
863 if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
864 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
865 pwq->whead = whead;
866 pwq->base = epi;
867 add_wait_queue(whead, &pwq->wait);
868 list_add_tail(&pwq->llink, &epi->pwqlist);
869 epi->nwait++;
870 } else {
871
872 epi->nwait = -1;
873 }
874}
875
876static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
877{
878 int kcmp;
879 struct rb_node **p = &ep->rbr.rb_node, *parent = NULL;
880 struct epitem *epic;
881
882 while (*p) {
883 parent = *p;
884 epic = rb_entry(parent, struct epitem, rbn);
885 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
886 if (kcmp > 0)
887 p = &parent->rb_right;
888 else
889 p = &parent->rb_left;
890 }
891 rb_link_node(&epi->rbn, parent, p);
892 rb_insert_color(&epi->rbn, &ep->rbr);
893}
894
895
896
897
898static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
899 struct file *tfile, int fd)
900{
901 int error, revents, pwake = 0;
902 unsigned long flags;
903 struct epitem *epi;
904 struct ep_pqueue epq;
905
906 if (unlikely(atomic_read(&ep->user->epoll_watches) >=
907 max_user_watches))
908 return -ENOSPC;
909 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
910 return -ENOMEM;
911
912
913 INIT_LIST_HEAD(&epi->rdllink);
914 INIT_LIST_HEAD(&epi->fllink);
915 INIT_LIST_HEAD(&epi->pwqlist);
916 epi->ep = ep;
917 ep_set_ffd(&epi->ffd, tfile, fd);
918 epi->event = *event;
919 epi->nwait = 0;
920 epi->next = EP_UNACTIVE_PTR;
921
922
923 epq.epi = epi;
924 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
925
926
927
928
929
930
931
932
933 revents = tfile->f_op->poll(tfile, &epq.pt);
934
935
936
937
938
939
940 error = -ENOMEM;
941 if (epi->nwait < 0)
942 goto error_unregister;
943
944
945 spin_lock(&tfile->f_lock);
946 list_add_tail(&epi->fllink, &tfile->f_ep_links);
947 spin_unlock(&tfile->f_lock);
948
949
950
951
952
953 ep_rbtree_insert(ep, epi);
954
955
956 spin_lock_irqsave(&ep->lock, flags);
957
958
959 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
960 list_add_tail(&epi->rdllink, &ep->rdllist);
961
962
963 if (waitqueue_active(&ep->wq))
964 wake_up_locked(&ep->wq);
965 if (waitqueue_active(&ep->poll_wait))
966 pwake++;
967 }
968
969 spin_unlock_irqrestore(&ep->lock, flags);
970
971 atomic_inc(&ep->user->epoll_watches);
972
973
974 if (pwake)
975 ep_poll_safewake(&ep->poll_wait);
976
977 return 0;
978
979error_unregister:
980 ep_unregister_pollwait(ep, epi);
981
982
983
984
985
986
987
988 spin_lock_irqsave(&ep->lock, flags);
989 if (ep_is_linked(&epi->rdllink))
990 list_del_init(&epi->rdllink);
991 spin_unlock_irqrestore(&ep->lock, flags);
992
993 kmem_cache_free(epi_cache, epi);
994
995 return error;
996}
997
998
999
1000
1001
1002static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event)
1003{
1004 int pwake = 0;
1005 unsigned int revents;
1006
1007
1008
1009
1010
1011
1012 epi->event.events = event->events;
1013 epi->event.data = event->data;
1014
1015
1016
1017
1018
1019 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
1020
1021
1022
1023
1024
1025 if (revents & event->events) {
1026 spin_lock_irq(&ep->lock);
1027 if (!ep_is_linked(&epi->rdllink)) {
1028 list_add_tail(&epi->rdllink, &ep->rdllist);
1029
1030
1031 if (waitqueue_active(&ep->wq))
1032 wake_up_locked(&ep->wq);
1033 if (waitqueue_active(&ep->poll_wait))
1034 pwake++;
1035 }
1036 spin_unlock_irq(&ep->lock);
1037 }
1038
1039
1040 if (pwake)
1041 ep_poll_safewake(&ep->poll_wait);
1042
1043 return 0;
1044}
1045
1046static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1047 void *priv)
1048{
1049 struct ep_send_events_data *esed = priv;
1050 int eventcnt;
1051 unsigned int revents;
1052 struct epitem *epi;
1053 struct epoll_event __user *uevent;
1054
1055
1056
1057
1058
1059
1060 for (eventcnt = 0, uevent = esed->events;
1061 !list_empty(head) && eventcnt < esed->maxevents;) {
1062 epi = list_first_entry(head, struct epitem, rdllink);
1063
1064 list_del_init(&epi->rdllink);
1065
1066 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
1067 epi->event.events;
1068
1069
1070
1071
1072
1073
1074
1075 if (revents) {
1076 if (__put_user(revents, &uevent->events) ||
1077 __put_user(epi->event.data, &uevent->data)) {
1078 list_add(&epi->rdllink, head);
1079 return eventcnt ? eventcnt : -EFAULT;
1080 }
1081 eventcnt++;
1082 uevent++;
1083 if (epi->event.events & EPOLLONESHOT)
1084 epi->event.events &= EP_PRIVATE_BITS;
1085 else if (!(epi->event.events & EPOLLET)) {
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097 list_add_tail(&epi->rdllink, &ep->rdllist);
1098 }
1099 }
1100 }
1101
1102 return eventcnt;
1103}
1104
1105static int ep_send_events(struct eventpoll *ep,
1106 struct epoll_event __user *events, int maxevents)
1107{
1108 struct ep_send_events_data esed;
1109
1110 esed.maxevents = maxevents;
1111 esed.events = events;
1112
1113 return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
1114}
1115
1116static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1117 int maxevents, long timeout)
1118{
1119 int res, eavail;
1120 unsigned long flags;
1121 long jtimeout;
1122 wait_queue_t wait;
1123
1124
1125
1126
1127
1128
1129 jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
1130 MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
1131
1132retry:
1133 spin_lock_irqsave(&ep->lock, flags);
1134
1135 res = 0;
1136 if (list_empty(&ep->rdllist)) {
1137
1138
1139
1140
1141
1142 init_waitqueue_entry(&wait, current);
1143 wait.flags |= WQ_FLAG_EXCLUSIVE;
1144 __add_wait_queue(&ep->wq, &wait);
1145
1146 for (;;) {
1147
1148
1149
1150
1151
1152 set_current_state(TASK_INTERRUPTIBLE);
1153 if (!list_empty(&ep->rdllist) || !jtimeout)
1154 break;
1155 if (signal_pending(current)) {
1156 res = -EINTR;
1157 break;
1158 }
1159
1160 spin_unlock_irqrestore(&ep->lock, flags);
1161 jtimeout = schedule_timeout(jtimeout);
1162 spin_lock_irqsave(&ep->lock, flags);
1163 }
1164 __remove_wait_queue(&ep->wq, &wait);
1165
1166 set_current_state(TASK_RUNNING);
1167 }
1168
1169 eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
1170
1171 spin_unlock_irqrestore(&ep->lock, flags);
1172
1173
1174
1175
1176
1177
1178 if (!res && eavail &&
1179 !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
1180 goto retry;
1181
1182 return res;
1183}
1184
1185
1186
1187
1188SYSCALL_DEFINE1(epoll_create1, int, flags)
1189{
1190 int error;
1191 struct eventpoll *ep = NULL;
1192
1193
1194 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1195
1196 if (flags & ~EPOLL_CLOEXEC)
1197 return -EINVAL;
1198
1199
1200
1201 error = ep_alloc(&ep);
1202 if (error < 0)
1203 return error;
1204
1205
1206
1207
1208 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1209 flags & O_CLOEXEC);
1210 if (error < 0)
1211 ep_free(ep);
1212
1213 return error;
1214}
1215
1216SYSCALL_DEFINE1(epoll_create, int, size)
1217{
1218 if (size <= 0)
1219 return -EINVAL;
1220
1221 return sys_epoll_create1(0);
1222}
1223
1224
1225
1226
1227
1228
1229SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1230 struct epoll_event __user *, event)
1231{
1232 int error;
1233 struct file *file, *tfile;
1234 struct eventpoll *ep;
1235 struct epitem *epi;
1236 struct epoll_event epds;
1237
1238 error = -EFAULT;
1239 if (ep_op_has_event(op) &&
1240 copy_from_user(&epds, event, sizeof(struct epoll_event)))
1241 goto error_return;
1242
1243
1244 error = -EBADF;
1245 file = fget(epfd);
1246 if (!file)
1247 goto error_return;
1248
1249
1250 tfile = fget(fd);
1251 if (!tfile)
1252 goto error_fput;
1253
1254
1255 error = -EPERM;
1256 if (!tfile->f_op || !tfile->f_op->poll)
1257 goto error_tgt_fput;
1258
1259
1260
1261
1262
1263
1264 error = -EINVAL;
1265 if (file == tfile || !is_file_epoll(file))
1266 goto error_tgt_fput;
1267
1268
1269
1270
1271
1272 ep = file->private_data;
1273
1274 mutex_lock(&ep->mtx);
1275
1276
1277
1278
1279
1280
1281 epi = ep_find(ep, tfile, fd);
1282
1283 error = -EINVAL;
1284 switch (op) {
1285 case EPOLL_CTL_ADD:
1286 if (!epi) {
1287 epds.events |= POLLERR | POLLHUP;
1288 error = ep_insert(ep, &epds, tfile, fd);
1289 } else
1290 error = -EEXIST;
1291 break;
1292 case EPOLL_CTL_DEL:
1293 if (epi)
1294 error = ep_remove(ep, epi);
1295 else
1296 error = -ENOENT;
1297 break;
1298 case EPOLL_CTL_MOD:
1299 if (epi) {
1300 epds.events |= POLLERR | POLLHUP;
1301 error = ep_modify(ep, epi, &epds);
1302 } else
1303 error = -ENOENT;
1304 break;
1305 }
1306 mutex_unlock(&ep->mtx);
1307
1308error_tgt_fput:
1309 fput(tfile);
1310error_fput:
1311 fput(file);
1312error_return:
1313
1314 return error;
1315}
1316
1317
1318
1319
1320
1321SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1322 int, maxevents, int, timeout)
1323{
1324 int error;
1325 struct file *file;
1326 struct eventpoll *ep;
1327
1328
1329 if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
1330 return -EINVAL;
1331
1332
1333 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) {
1334 error = -EFAULT;
1335 goto error_return;
1336 }
1337
1338
1339 error = -EBADF;
1340 file = fget(epfd);
1341 if (!file)
1342 goto error_return;
1343
1344
1345
1346
1347
1348 error = -EINVAL;
1349 if (!is_file_epoll(file))
1350 goto error_fput;
1351
1352
1353
1354
1355
1356 ep = file->private_data;
1357
1358
1359 error = ep_poll(ep, events, maxevents, timeout);
1360
1361error_fput:
1362 fput(file);
1363error_return:
1364
1365 return error;
1366}
1367
1368#ifdef HAVE_SET_RESTORE_SIGMASK
1369
1370
1371
1372
1373
1374SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
1375 int, maxevents, int, timeout, const sigset_t __user *, sigmask,
1376 size_t, sigsetsize)
1377{
1378 int error;
1379 sigset_t ksigmask, sigsaved;
1380
1381
1382
1383
1384
1385 if (sigmask) {
1386 if (sigsetsize != sizeof(sigset_t))
1387 return -EINVAL;
1388 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
1389 return -EFAULT;
1390 sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
1391 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1392 }
1393
1394 error = sys_epoll_wait(epfd, events, maxevents, timeout);
1395
1396
1397
1398
1399
1400
1401
1402 if (sigmask) {
1403 if (error == -EINTR) {
1404 memcpy(¤t->saved_sigmask, &sigsaved,
1405 sizeof(sigsaved));
1406 set_restore_sigmask();
1407 } else
1408 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1409 }
1410
1411 return error;
1412}
1413
1414#endif
1415
1416static int __init eventpoll_init(void)
1417{
1418 struct sysinfo si;
1419
1420 si_meminfo(&si);
1421
1422
1423
1424 max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) /
1425 EP_ITEM_COST;
1426
1427
1428 ep_nested_calls_init(&poll_safewake_ncalls);
1429
1430
1431 ep_nested_calls_init(&poll_readywalk_ncalls);
1432
1433
1434 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
1435 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1436
1437
1438 pwq_cache = kmem_cache_create("eventpoll_pwq",
1439 sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);
1440
1441 return 0;
1442}
1443fs_initcall(eventpoll_init);
1444