1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63
64#include <asm/futex.h>
65
66#include "rtmutex_common.h"
67
68int __read_mostly futex_cmpxchg_enabled;
69
70#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
71
72
73
74
75
76#define FLAGS_SHARED 0x01
77#define FLAGS_CLOCKRT 0x02
78#define FLAGS_HAS_TIMEOUT 0x04
79
80
81
82
83struct futex_pi_state {
84
85
86
87
88 struct list_head list;
89
90
91
92
93 struct rt_mutex pi_mutex;
94
95 struct task_struct *owner;
96 atomic_t refcount;
97
98 union futex_key key;
99};
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123struct futex_q {
124 struct plist_node list;
125
126 struct task_struct *task;
127 spinlock_t *lock_ptr;
128 union futex_key key;
129 struct futex_pi_state *pi_state;
130 struct rt_mutex_waiter *rt_waiter;
131 union futex_key *requeue_pi_key;
132 u32 bitset;
133};
134
135static const struct futex_q futex_q_init = {
136
137 .key = FUTEX_KEY_INIT,
138 .bitset = FUTEX_BITSET_MATCH_ANY
139};
140
141
142
143
144
145
146struct futex_hash_bucket {
147 spinlock_t lock;
148 struct plist_head chain;
149};
150
151static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
152
153
154
155
156static struct futex_hash_bucket *hash_futex(union futex_key *key)
157{
158 u32 hash = jhash2((u32*)&key->both.word,
159 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
160 key->both.offset);
161 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
162}
163
164
165
166
167static inline int match_futex(union futex_key *key1, union futex_key *key2)
168{
169 return (key1 && key2
170 && key1->both.word == key2->both.word
171 && key1->both.ptr == key2->both.ptr
172 && key1->both.offset == key2->both.offset);
173}
174
175
176
177
178
179
180static void get_futex_key_refs(union futex_key *key)
181{
182 if (!key->both.ptr)
183 return;
184
185 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
186 case FUT_OFF_INODE:
187 ihold(key->shared.inode);
188 break;
189 case FUT_OFF_MMSHARED:
190 atomic_inc(&key->private.mm->mm_count);
191 break;
192 }
193}
194
195
196
197
198
199static void drop_futex_key_refs(union futex_key *key)
200{
201 if (!key->both.ptr) {
202
203 WARN_ON_ONCE(1);
204 return;
205 }
206
207 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
208 case FUT_OFF_INODE:
209 iput(key->shared.inode);
210 break;
211 case FUT_OFF_MMSHARED:
212 mmdrop(key->private.mm);
213 break;
214 }
215}
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234static int
235get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
236{
237 unsigned long address = (unsigned long)uaddr;
238 struct mm_struct *mm = current->mm;
239 struct page *page, *page_head;
240 int err, ro = 0;
241
242
243
244
245 key->both.offset = address % PAGE_SIZE;
246 if (unlikely((address % sizeof(u32)) != 0))
247 return -EINVAL;
248 address -= key->both.offset;
249
250
251
252
253
254
255
256
257 if (!fshared) {
258 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
259 return -EFAULT;
260 key->private.mm = mm;
261 key->private.address = address;
262 get_futex_key_refs(key);
263 return 0;
264 }
265
266again:
267 err = get_user_pages_fast(address, 1, 1, &page);
268
269
270
271
272 if (err == -EFAULT && rw == VERIFY_READ) {
273 err = get_user_pages_fast(address, 1, 0, &page);
274 ro = 1;
275 }
276 if (err < 0)
277 return err;
278 else
279 err = 0;
280
281#ifdef CONFIG_TRANSPARENT_HUGEPAGE
282 page_head = page;
283 if (unlikely(PageTail(page))) {
284 put_page(page);
285
286 local_irq_disable();
287 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
288 page_head = compound_head(page);
289
290
291
292
293
294
295
296
297
298
299 if (page != page_head) {
300 get_page(page_head);
301 put_page(page);
302 }
303 local_irq_enable();
304 } else {
305 local_irq_enable();
306 goto again;
307 }
308 }
309#else
310 page_head = compound_head(page);
311 if (page != page_head) {
312 get_page(page_head);
313 put_page(page);
314 }
315#endif
316
317 lock_page(page_head);
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334 if (!page_head->mapping) {
335 int shmem_swizzled = PageSwapCache(page_head);
336 unlock_page(page_head);
337 put_page(page_head);
338 if (shmem_swizzled)
339 goto again;
340 return -EFAULT;
341 }
342
343
344
345
346
347
348
349
350 if (PageAnon(page_head)) {
351
352
353
354
355 if (ro) {
356 err = -EFAULT;
357 goto out;
358 }
359
360 key->both.offset |= FUT_OFF_MMSHARED;
361 key->private.mm = mm;
362 key->private.address = address;
363 } else {
364 key->both.offset |= FUT_OFF_INODE;
365 key->shared.inode = page_head->mapping->host;
366 key->shared.pgoff = page_head->index;
367 }
368
369 get_futex_key_refs(key);
370
371out:
372 unlock_page(page_head);
373 put_page(page_head);
374 return err;
375}
376
377static inline void put_futex_key(union futex_key *key)
378{
379 drop_futex_key_refs(key);
380}
381
382
383
384
385
386
387
388
389
390
391
392
393
394static int fault_in_user_writeable(u32 __user *uaddr)
395{
396 struct mm_struct *mm = current->mm;
397 int ret;
398
399 down_read(&mm->mmap_sem);
400 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
401 FAULT_FLAG_WRITE);
402 up_read(&mm->mmap_sem);
403
404 return ret < 0 ? ret : 0;
405}
406
407
408
409
410
411
412
413
414static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
415 union futex_key *key)
416{
417 struct futex_q *this;
418
419 plist_for_each_entry(this, &hb->chain, list) {
420 if (match_futex(&this->key, key))
421 return this;
422 }
423 return NULL;
424}
425
426static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
427 u32 uval, u32 newval)
428{
429 int ret;
430
431 pagefault_disable();
432 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
433 pagefault_enable();
434
435 return ret;
436}
437
438static int get_futex_value_locked(u32 *dest, u32 __user *from)
439{
440 int ret;
441
442 pagefault_disable();
443 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
444 pagefault_enable();
445
446 return ret ? -EFAULT : 0;
447}
448
449
450
451
452
453static int refill_pi_state_cache(void)
454{
455 struct futex_pi_state *pi_state;
456
457 if (likely(current->pi_state_cache))
458 return 0;
459
460 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
461
462 if (!pi_state)
463 return -ENOMEM;
464
465 INIT_LIST_HEAD(&pi_state->list);
466
467 pi_state->owner = NULL;
468 atomic_set(&pi_state->refcount, 1);
469 pi_state->key = FUTEX_KEY_INIT;
470
471 current->pi_state_cache = pi_state;
472
473 return 0;
474}
475
476static struct futex_pi_state * alloc_pi_state(void)
477{
478 struct futex_pi_state *pi_state = current->pi_state_cache;
479
480 WARN_ON(!pi_state);
481 current->pi_state_cache = NULL;
482
483 return pi_state;
484}
485
486static void free_pi_state(struct futex_pi_state *pi_state)
487{
488 if (!atomic_dec_and_test(&pi_state->refcount))
489 return;
490
491
492
493
494
495 if (pi_state->owner) {
496 raw_spin_lock_irq(&pi_state->owner->pi_lock);
497 list_del_init(&pi_state->list);
498 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
499
500 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
501 }
502
503 if (current->pi_state_cache)
504 kfree(pi_state);
505 else {
506
507
508
509
510
511 pi_state->owner = NULL;
512 atomic_set(&pi_state->refcount, 1);
513 current->pi_state_cache = pi_state;
514 }
515}
516
517
518
519
520
521static struct task_struct * futex_find_get_task(pid_t pid)
522{
523 struct task_struct *p;
524
525 rcu_read_lock();
526 p = find_task_by_vpid(pid);
527 if (p)
528 get_task_struct(p);
529
530 rcu_read_unlock();
531
532 return p;
533}
534
535
536
537
538
539
540void exit_pi_state_list(struct task_struct *curr)
541{
542 struct list_head *next, *head = &curr->pi_state_list;
543 struct futex_pi_state *pi_state;
544 struct futex_hash_bucket *hb;
545 union futex_key key = FUTEX_KEY_INIT;
546
547 if (!futex_cmpxchg_enabled)
548 return;
549
550
551
552
553
554 raw_spin_lock_irq(&curr->pi_lock);
555 while (!list_empty(head)) {
556
557 next = head->next;
558 pi_state = list_entry(next, struct futex_pi_state, list);
559 key = pi_state->key;
560 hb = hash_futex(&key);
561 raw_spin_unlock_irq(&curr->pi_lock);
562
563 spin_lock(&hb->lock);
564
565 raw_spin_lock_irq(&curr->pi_lock);
566
567
568
569
570 if (head->next != next) {
571 spin_unlock(&hb->lock);
572 continue;
573 }
574
575 WARN_ON(pi_state->owner != curr);
576 WARN_ON(list_empty(&pi_state->list));
577 list_del_init(&pi_state->list);
578 pi_state->owner = NULL;
579 raw_spin_unlock_irq(&curr->pi_lock);
580
581 rt_mutex_unlock(&pi_state->pi_mutex);
582
583 spin_unlock(&hb->lock);
584
585 raw_spin_lock_irq(&curr->pi_lock);
586 }
587 raw_spin_unlock_irq(&curr->pi_lock);
588}
589
590static int
591lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
592 union futex_key *key, struct futex_pi_state **ps)
593{
594 struct futex_pi_state *pi_state = NULL;
595 struct futex_q *this, *next;
596 struct plist_head *head;
597 struct task_struct *p;
598 pid_t pid = uval & FUTEX_TID_MASK;
599
600 head = &hb->chain;
601
602 plist_for_each_entry_safe(this, next, head, list) {
603 if (match_futex(&this->key, key)) {
604
605
606
607
608 pi_state = this->pi_state;
609
610
611
612 if (unlikely(!pi_state))
613 return -EINVAL;
614
615 WARN_ON(!atomic_read(&pi_state->refcount));
616
617
618
619
620
621
622
623
624
625
626
627 if (pid && pi_state->owner) {
628
629
630
631
632 if (pid != task_pid_vnr(pi_state->owner))
633 return -EINVAL;
634 }
635
636 atomic_inc(&pi_state->refcount);
637 *ps = pi_state;
638
639 return 0;
640 }
641 }
642
643
644
645
646
647 if (!pid)
648 return -ESRCH;
649 p = futex_find_get_task(pid);
650 if (!p)
651 return -ESRCH;
652
653
654
655
656
657
658
659 raw_spin_lock_irq(&p->pi_lock);
660 if (unlikely(p->flags & PF_EXITING)) {
661
662
663
664
665
666 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
667
668 raw_spin_unlock_irq(&p->pi_lock);
669 put_task_struct(p);
670 return ret;
671 }
672
673 pi_state = alloc_pi_state();
674
675
676
677
678
679 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
680
681
682 pi_state->key = *key;
683
684 WARN_ON(!list_empty(&pi_state->list));
685 list_add(&pi_state->list, &p->pi_state_list);
686 pi_state->owner = p;
687 raw_spin_unlock_irq(&p->pi_lock);
688
689 put_task_struct(p);
690
691 *ps = pi_state;
692
693 return 0;
694}
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
715 union futex_key *key,
716 struct futex_pi_state **ps,
717 struct task_struct *task, int set_waiters)
718{
719 int lock_taken, ret, ownerdied = 0;
720 u32 uval, newval, curval, vpid = task_pid_vnr(task);
721
722retry:
723 ret = lock_taken = 0;
724
725
726
727
728
729
730 newval = vpid;
731 if (set_waiters)
732 newval |= FUTEX_WAITERS;
733
734 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
735 return -EFAULT;
736
737
738
739
740 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
741 return -EDEADLK;
742
743
744
745
746 if (unlikely(!curval))
747 return 1;
748
749 uval = curval;
750
751
752
753
754
755 newval = curval | FUTEX_WAITERS;
756
757
758
759
760
761
762
763
764
765 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
766
767 newval = (curval & ~FUTEX_TID_MASK) | vpid;
768 ownerdied = 0;
769 lock_taken = 1;
770 }
771
772 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
773 return -EFAULT;
774 if (unlikely(curval != uval))
775 goto retry;
776
777
778
779
780 if (unlikely(lock_taken))
781 return 1;
782
783
784
785
786
787 ret = lookup_pi_state(uval, hb, key, ps);
788
789 if (unlikely(ret)) {
790 switch (ret) {
791 case -ESRCH:
792
793
794
795
796
797 if (get_futex_value_locked(&curval, uaddr))
798 return -EFAULT;
799
800
801
802
803
804
805 if (curval & FUTEX_OWNER_DIED) {
806 ownerdied = 1;
807 goto retry;
808 }
809 default:
810 break;
811 }
812 }
813
814 return ret;
815}
816
817
818
819
820
821
822
823static void __unqueue_futex(struct futex_q *q)
824{
825 struct futex_hash_bucket *hb;
826
827 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
828 || WARN_ON(plist_node_empty(&q->list)))
829 return;
830
831 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
832 plist_del(&q->list, &hb->chain);
833}
834
835
836
837
838
839static void wake_futex(struct futex_q *q)
840{
841 struct task_struct *p = q->task;
842
843
844
845
846
847
848
849
850 get_task_struct(p);
851
852 __unqueue_futex(q);
853
854
855
856
857
858
859 smp_wmb();
860 q->lock_ptr = NULL;
861
862 wake_up_state(p, TASK_NORMAL);
863 put_task_struct(p);
864}
865
866static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
867{
868 struct task_struct *new_owner;
869 struct futex_pi_state *pi_state = this->pi_state;
870 u32 uninitialized_var(curval), newval;
871
872 if (!pi_state)
873 return -EINVAL;
874
875
876
877
878
879 if (pi_state->owner != current)
880 return -EINVAL;
881
882 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
883 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
884
885
886
887
888
889
890 if (!new_owner)
891 new_owner = this->task;
892
893
894
895
896
897
898 if (!(uval & FUTEX_OWNER_DIED)) {
899 int ret = 0;
900
901 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
902
903 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
904 ret = -EFAULT;
905 else if (curval != uval)
906 ret = -EINVAL;
907 if (ret) {
908 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
909 return ret;
910 }
911 }
912
913 raw_spin_lock_irq(&pi_state->owner->pi_lock);
914 WARN_ON(list_empty(&pi_state->list));
915 list_del_init(&pi_state->list);
916 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
917
918 raw_spin_lock_irq(&new_owner->pi_lock);
919 WARN_ON(!list_empty(&pi_state->list));
920 list_add(&pi_state->list, &new_owner->pi_state_list);
921 pi_state->owner = new_owner;
922 raw_spin_unlock_irq(&new_owner->pi_lock);
923
924 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
925 rt_mutex_unlock(&pi_state->pi_mutex);
926
927 return 0;
928}
929
930static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
931{
932 u32 uninitialized_var(oldval);
933
934
935
936
937
938 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
939 return -EFAULT;
940 if (oldval != uval)
941 return -EAGAIN;
942
943 return 0;
944}
945
946
947
948
949static inline void
950double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
951{
952 if (hb1 <= hb2) {
953 spin_lock(&hb1->lock);
954 if (hb1 < hb2)
955 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
956 } else {
957 spin_lock(&hb2->lock);
958 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
959 }
960}
961
962static inline void
963double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
964{
965 spin_unlock(&hb1->lock);
966 if (hb1 != hb2)
967 spin_unlock(&hb2->lock);
968}
969
970
971
972
973static int
974futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
975{
976 struct futex_hash_bucket *hb;
977 struct futex_q *this, *next;
978 struct plist_head *head;
979 union futex_key key = FUTEX_KEY_INIT;
980 int ret;
981
982 if (!bitset)
983 return -EINVAL;
984
985 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
986 if (unlikely(ret != 0))
987 goto out;
988
989 hb = hash_futex(&key);
990 spin_lock(&hb->lock);
991 head = &hb->chain;
992
993 plist_for_each_entry_safe(this, next, head, list) {
994 if (match_futex (&this->key, &key)) {
995 if (this->pi_state || this->rt_waiter) {
996 ret = -EINVAL;
997 break;
998 }
999
1000
1001 if (!(this->bitset & bitset))
1002 continue;
1003
1004 wake_futex(this);
1005 if (++ret >= nr_wake)
1006 break;
1007 }
1008 }
1009
1010 spin_unlock(&hb->lock);
1011 put_futex_key(&key);
1012out:
1013 return ret;
1014}
1015
1016
1017
1018
1019
1020static int
1021futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1022 int nr_wake, int nr_wake2, int op)
1023{
1024 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1025 struct futex_hash_bucket *hb1, *hb2;
1026 struct plist_head *head;
1027 struct futex_q *this, *next;
1028 int ret, op_ret;
1029
1030retry:
1031 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1032 if (unlikely(ret != 0))
1033 goto out;
1034 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1035 if (unlikely(ret != 0))
1036 goto out_put_key1;
1037
1038 hb1 = hash_futex(&key1);
1039 hb2 = hash_futex(&key2);
1040
1041retry_private:
1042 double_lock_hb(hb1, hb2);
1043 op_ret = futex_atomic_op_inuser(op, uaddr2);
1044 if (unlikely(op_ret < 0)) {
1045
1046 double_unlock_hb(hb1, hb2);
1047
1048#ifndef CONFIG_MMU
1049
1050
1051
1052
1053 ret = op_ret;
1054 goto out_put_keys;
1055#endif
1056
1057 if (unlikely(op_ret != -EFAULT)) {
1058 ret = op_ret;
1059 goto out_put_keys;
1060 }
1061
1062 ret = fault_in_user_writeable(uaddr2);
1063 if (ret)
1064 goto out_put_keys;
1065
1066 if (!(flags & FLAGS_SHARED))
1067 goto retry_private;
1068
1069 put_futex_key(&key2);
1070 put_futex_key(&key1);
1071 goto retry;
1072 }
1073
1074 head = &hb1->chain;
1075
1076 plist_for_each_entry_safe(this, next, head, list) {
1077 if (match_futex (&this->key, &key1)) {
1078 wake_futex(this);
1079 if (++ret >= nr_wake)
1080 break;
1081 }
1082 }
1083
1084 if (op_ret > 0) {
1085 head = &hb2->chain;
1086
1087 op_ret = 0;
1088 plist_for_each_entry_safe(this, next, head, list) {
1089 if (match_futex (&this->key, &key2)) {
1090 wake_futex(this);
1091 if (++op_ret >= nr_wake2)
1092 break;
1093 }
1094 }
1095 ret += op_ret;
1096 }
1097
1098 double_unlock_hb(hb1, hb2);
1099out_put_keys:
1100 put_futex_key(&key2);
1101out_put_key1:
1102 put_futex_key(&key1);
1103out:
1104 return ret;
1105}
1106
1107
1108
1109
1110
1111
1112
1113
1114static inline
1115void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1116 struct futex_hash_bucket *hb2, union futex_key *key2)
1117{
1118
1119
1120
1121
1122
1123 if (likely(&hb1->chain != &hb2->chain)) {
1124 plist_del(&q->list, &hb1->chain);
1125 plist_add(&q->list, &hb2->chain);
1126 q->lock_ptr = &hb2->lock;
1127 }
1128 get_futex_key_refs(key2);
1129 q->key = *key2;
1130}
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146static inline
1147void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1148 struct futex_hash_bucket *hb)
1149{
1150 get_futex_key_refs(key);
1151 q->key = *key;
1152
1153 __unqueue_futex(q);
1154
1155 WARN_ON(!q->rt_waiter);
1156 q->rt_waiter = NULL;
1157
1158 q->lock_ptr = &hb->lock;
1159
1160 wake_up_state(q->task, TASK_NORMAL);
1161}
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1184 struct futex_hash_bucket *hb1,
1185 struct futex_hash_bucket *hb2,
1186 union futex_key *key1, union futex_key *key2,
1187 struct futex_pi_state **ps, int set_waiters)
1188{
1189 struct futex_q *top_waiter = NULL;
1190 u32 curval;
1191 int ret;
1192
1193 if (get_futex_value_locked(&curval, pifutex))
1194 return -EFAULT;
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204 top_waiter = futex_top_waiter(hb1, key1);
1205
1206
1207 if (!top_waiter)
1208 return 0;
1209
1210
1211 if (!match_futex(top_waiter->requeue_pi_key, key2))
1212 return -EINVAL;
1213
1214
1215
1216
1217
1218
1219 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1220 set_waiters);
1221 if (ret == 1)
1222 requeue_pi_wake_futex(top_waiter, key2, hb2);
1223
1224 return ret;
1225}
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1246 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1247 u32 *cmpval, int requeue_pi)
1248{
1249 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1250 int drop_count = 0, task_count = 0, ret;
1251 struct futex_pi_state *pi_state = NULL;
1252 struct futex_hash_bucket *hb1, *hb2;
1253 struct plist_head *head1;
1254 struct futex_q *this, *next;
1255 u32 curval2;
1256
1257 if (requeue_pi) {
1258
1259
1260
1261
1262 if (refill_pi_state_cache())
1263 return -ENOMEM;
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274 if (nr_wake != 1)
1275 return -EINVAL;
1276 }
1277
1278retry:
1279 if (pi_state != NULL) {
1280
1281
1282
1283
1284 free_pi_state(pi_state);
1285 pi_state = NULL;
1286 }
1287
1288 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1289 if (unlikely(ret != 0))
1290 goto out;
1291 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1292 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1293 if (unlikely(ret != 0))
1294 goto out_put_key1;
1295
1296 hb1 = hash_futex(&key1);
1297 hb2 = hash_futex(&key2);
1298
1299retry_private:
1300 double_lock_hb(hb1, hb2);
1301
1302 if (likely(cmpval != NULL)) {
1303 u32 curval;
1304
1305 ret = get_futex_value_locked(&curval, uaddr1);
1306
1307 if (unlikely(ret)) {
1308 double_unlock_hb(hb1, hb2);
1309
1310 ret = get_user(curval, uaddr1);
1311 if (ret)
1312 goto out_put_keys;
1313
1314 if (!(flags & FLAGS_SHARED))
1315 goto retry_private;
1316
1317 put_futex_key(&key2);
1318 put_futex_key(&key1);
1319 goto retry;
1320 }
1321 if (curval != *cmpval) {
1322 ret = -EAGAIN;
1323 goto out_unlock;
1324 }
1325 }
1326
1327 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1328
1329
1330
1331
1332
1333
1334 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1335 &key2, &pi_state, nr_requeue);
1336
1337
1338
1339
1340
1341
1342
1343 if (ret == 1) {
1344 WARN_ON(pi_state);
1345 drop_count++;
1346 task_count++;
1347 ret = get_futex_value_locked(&curval2, uaddr2);
1348 if (!ret)
1349 ret = lookup_pi_state(curval2, hb2, &key2,
1350 &pi_state);
1351 }
1352
1353 switch (ret) {
1354 case 0:
1355 break;
1356 case -EFAULT:
1357 double_unlock_hb(hb1, hb2);
1358 put_futex_key(&key2);
1359 put_futex_key(&key1);
1360 ret = fault_in_user_writeable(uaddr2);
1361 if (!ret)
1362 goto retry;
1363 goto out;
1364 case -EAGAIN:
1365
1366 double_unlock_hb(hb1, hb2);
1367 put_futex_key(&key2);
1368 put_futex_key(&key1);
1369 cond_resched();
1370 goto retry;
1371 default:
1372 goto out_unlock;
1373 }
1374 }
1375
1376 head1 = &hb1->chain;
1377 plist_for_each_entry_safe(this, next, head1, list) {
1378 if (task_count - nr_wake >= nr_requeue)
1379 break;
1380
1381 if (!match_futex(&this->key, &key1))
1382 continue;
1383
1384
1385
1386
1387
1388 if ((requeue_pi && !this->rt_waiter) ||
1389 (!requeue_pi && this->rt_waiter)) {
1390 ret = -EINVAL;
1391 break;
1392 }
1393
1394
1395
1396
1397
1398
1399 if (++task_count <= nr_wake && !requeue_pi) {
1400 wake_futex(this);
1401 continue;
1402 }
1403
1404
1405 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1406 ret = -EINVAL;
1407 break;
1408 }
1409
1410
1411
1412
1413
1414 if (requeue_pi) {
1415
1416 atomic_inc(&pi_state->refcount);
1417 this->pi_state = pi_state;
1418 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1419 this->rt_waiter,
1420 this->task, 1);
1421 if (ret == 1) {
1422
1423 requeue_pi_wake_futex(this, &key2, hb2);
1424 drop_count++;
1425 continue;
1426 } else if (ret) {
1427
1428 this->pi_state = NULL;
1429 free_pi_state(pi_state);
1430 goto out_unlock;
1431 }
1432 }
1433 requeue_futex(this, hb1, hb2, &key2);
1434 drop_count++;
1435 }
1436
1437out_unlock:
1438 double_unlock_hb(hb1, hb2);
1439
1440
1441
1442
1443
1444
1445
1446 while (--drop_count >= 0)
1447 drop_futex_key_refs(&key1);
1448
1449out_put_keys:
1450 put_futex_key(&key2);
1451out_put_key1:
1452 put_futex_key(&key1);
1453out:
1454 if (pi_state != NULL)
1455 free_pi_state(pi_state);
1456 return ret ? ret : task_count;
1457}
1458
1459
1460static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1461 __acquires(&hb->lock)
1462{
1463 struct futex_hash_bucket *hb;
1464
1465 hb = hash_futex(&q->key);
1466 q->lock_ptr = &hb->lock;
1467
1468 spin_lock(&hb->lock);
1469 return hb;
1470}
1471
1472static inline void
1473queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1474 __releases(&hb->lock)
1475{
1476 spin_unlock(&hb->lock);
1477}
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1492 __releases(&hb->lock)
1493{
1494 int prio;
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504 prio = min(current->normal_prio, MAX_RT_PRIO);
1505
1506 plist_node_init(&q->list, prio);
1507 plist_add(&q->list, &hb->chain);
1508 q->task = current;
1509 spin_unlock(&hb->lock);
1510}
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523static int unqueue_me(struct futex_q *q)
1524{
1525 spinlock_t *lock_ptr;
1526 int ret = 0;
1527
1528
1529retry:
1530 lock_ptr = q->lock_ptr;
1531 barrier();
1532 if (lock_ptr != NULL) {
1533 spin_lock(lock_ptr);
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547 if (unlikely(lock_ptr != q->lock_ptr)) {
1548 spin_unlock(lock_ptr);
1549 goto retry;
1550 }
1551 __unqueue_futex(q);
1552
1553 BUG_ON(q->pi_state);
1554
1555 spin_unlock(lock_ptr);
1556 ret = 1;
1557 }
1558
1559 drop_futex_key_refs(&q->key);
1560 return ret;
1561}
1562
1563
1564
1565
1566
1567
1568static void unqueue_me_pi(struct futex_q *q)
1569 __releases(q->lock_ptr)
1570{
1571 __unqueue_futex(q);
1572
1573 BUG_ON(!q->pi_state);
1574 free_pi_state(q->pi_state);
1575 q->pi_state = NULL;
1576
1577 spin_unlock(q->lock_ptr);
1578}
1579
1580
1581
1582
1583
1584
1585
1586static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1587 struct task_struct *newowner)
1588{
1589 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1590 struct futex_pi_state *pi_state = q->pi_state;
1591 struct task_struct *oldowner = pi_state->owner;
1592 u32 uval, uninitialized_var(curval), newval;
1593 int ret;
1594
1595
1596 if (!pi_state->owner)
1597 newtid |= FUTEX_OWNER_DIED;
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616retry:
1617 if (get_futex_value_locked(&uval, uaddr))
1618 goto handle_fault;
1619
1620 while (1) {
1621 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1622
1623 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1624 goto handle_fault;
1625 if (curval == uval)
1626 break;
1627 uval = curval;
1628 }
1629
1630
1631
1632
1633
1634 if (pi_state->owner != NULL) {
1635 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1636 WARN_ON(list_empty(&pi_state->list));
1637 list_del_init(&pi_state->list);
1638 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1639 }
1640
1641 pi_state->owner = newowner;
1642
1643 raw_spin_lock_irq(&newowner->pi_lock);
1644 WARN_ON(!list_empty(&pi_state->list));
1645 list_add(&pi_state->list, &newowner->pi_state_list);
1646 raw_spin_unlock_irq(&newowner->pi_lock);
1647 return 0;
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659handle_fault:
1660 spin_unlock(q->lock_ptr);
1661
1662 ret = fault_in_user_writeable(uaddr);
1663
1664 spin_lock(q->lock_ptr);
1665
1666
1667
1668
1669 if (pi_state->owner != oldowner)
1670 return 0;
1671
1672 if (ret)
1673 return ret;
1674
1675 goto retry;
1676}
1677
1678static long futex_wait_restart(struct restart_block *restart);
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1696{
1697 struct task_struct *owner;
1698 int ret = 0;
1699
1700 if (locked) {
1701
1702
1703
1704
1705 if (q->pi_state->owner != current)
1706 ret = fixup_pi_state_owner(uaddr, q, current);
1707 goto out;
1708 }
1709
1710
1711
1712
1713
1714 if (q->pi_state->owner == current) {
1715
1716
1717
1718
1719
1720 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1721 locked = 1;
1722 goto out;
1723 }
1724
1725
1726
1727
1728
1729
1730 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1731 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1732 if (!owner)
1733 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1734 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1735 ret = fixup_pi_state_owner(uaddr, q, owner);
1736 goto out;
1737 }
1738
1739
1740
1741
1742
1743 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1744 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1745 "pi-state %p\n", ret,
1746 q->pi_state->pi_mutex.owner,
1747 q->pi_state->owner);
1748
1749out:
1750 return ret ? ret : locked;
1751}
1752
1753
1754
1755
1756
1757
1758
1759static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1760 struct hrtimer_sleeper *timeout)
1761{
1762
1763
1764
1765
1766
1767
1768 set_current_state(TASK_INTERRUPTIBLE);
1769 queue_me(q, hb);
1770
1771
1772 if (timeout) {
1773 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1774 if (!hrtimer_active(&timeout->timer))
1775 timeout->task = NULL;
1776 }
1777
1778
1779
1780
1781
1782 if (likely(!plist_node_empty(&q->list))) {
1783
1784
1785
1786
1787
1788 if (!timeout || timeout->task)
1789 schedule();
1790 }
1791 __set_current_state(TASK_RUNNING);
1792}
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1812 struct futex_q *q, struct futex_hash_bucket **hb)
1813{
1814 u32 uval;
1815 int ret;
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835retry:
1836 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1837 if (unlikely(ret != 0))
1838 return ret;
1839
1840retry_private:
1841 *hb = queue_lock(q);
1842
1843 ret = get_futex_value_locked(&uval, uaddr);
1844
1845 if (ret) {
1846 queue_unlock(q, *hb);
1847
1848 ret = get_user(uval, uaddr);
1849 if (ret)
1850 goto out;
1851
1852 if (!(flags & FLAGS_SHARED))
1853 goto retry_private;
1854
1855 put_futex_key(&q->key);
1856 goto retry;
1857 }
1858
1859 if (uval != val) {
1860 queue_unlock(q, *hb);
1861 ret = -EWOULDBLOCK;
1862 }
1863
1864out:
1865 if (ret)
1866 put_futex_key(&q->key);
1867 return ret;
1868}
1869
1870static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1871 ktime_t *abs_time, u32 bitset)
1872{
1873 struct hrtimer_sleeper timeout, *to = NULL;
1874 struct restart_block *restart;
1875 struct futex_hash_bucket *hb;
1876 struct futex_q q = futex_q_init;
1877 int ret;
1878
1879 if (!bitset)
1880 return -EINVAL;
1881 q.bitset = bitset;
1882
1883 if (abs_time) {
1884 to = &timeout;
1885
1886 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1887 CLOCK_REALTIME : CLOCK_MONOTONIC,
1888 HRTIMER_MODE_ABS);
1889 hrtimer_init_sleeper(to, current);
1890 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1891 current->timer_slack_ns);
1892 }
1893
1894retry:
1895
1896
1897
1898
1899 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1900 if (ret)
1901 goto out;
1902
1903
1904 futex_wait_queue_me(hb, &q, to);
1905
1906
1907 ret = 0;
1908
1909 if (!unqueue_me(&q))
1910 goto out;
1911 ret = -ETIMEDOUT;
1912 if (to && !to->task)
1913 goto out;
1914
1915
1916
1917
1918
1919 if (!signal_pending(current))
1920 goto retry;
1921
1922 ret = -ERESTARTSYS;
1923 if (!abs_time)
1924 goto out;
1925
1926 restart = ¤t_thread_info()->restart_block;
1927 restart->fn = futex_wait_restart;
1928 restart->futex.uaddr = uaddr;
1929 restart->futex.val = val;
1930 restart->futex.time = abs_time->tv64;
1931 restart->futex.bitset = bitset;
1932 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1933
1934 ret = -ERESTART_RESTARTBLOCK;
1935
1936out:
1937 if (to) {
1938 hrtimer_cancel(&to->timer);
1939 destroy_hrtimer_on_stack(&to->timer);
1940 }
1941 return ret;
1942}
1943
1944
1945static long futex_wait_restart(struct restart_block *restart)
1946{
1947 u32 __user *uaddr = restart->futex.uaddr;
1948 ktime_t t, *tp = NULL;
1949
1950 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1951 t.tv64 = restart->futex.time;
1952 tp = &t;
1953 }
1954 restart->fn = do_no_restart_syscall;
1955
1956 return (long)futex_wait(uaddr, restart->futex.flags,
1957 restart->futex.val, tp, restart->futex.bitset);
1958}
1959
1960
1961
1962
1963
1964
1965
1966
1967static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1968 ktime_t *time, int trylock)
1969{
1970 struct hrtimer_sleeper timeout, *to = NULL;
1971 struct futex_hash_bucket *hb;
1972 struct futex_q q = futex_q_init;
1973 int res, ret;
1974
1975 if (refill_pi_state_cache())
1976 return -ENOMEM;
1977
1978 if (time) {
1979 to = &timeout;
1980 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1981 HRTIMER_MODE_ABS);
1982 hrtimer_init_sleeper(to, current);
1983 hrtimer_set_expires(&to->timer, *time);
1984 }
1985
1986retry:
1987 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
1988 if (unlikely(ret != 0))
1989 goto out;
1990
1991retry_private:
1992 hb = queue_lock(&q);
1993
1994 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
1995 if (unlikely(ret)) {
1996 switch (ret) {
1997 case 1:
1998
1999 ret = 0;
2000 goto out_unlock_put_key;
2001 case -EFAULT:
2002 goto uaddr_faulted;
2003 case -EAGAIN:
2004
2005
2006
2007
2008 queue_unlock(&q, hb);
2009 put_futex_key(&q.key);
2010 cond_resched();
2011 goto retry;
2012 default:
2013 goto out_unlock_put_key;
2014 }
2015 }
2016
2017
2018
2019
2020 queue_me(&q, hb);
2021
2022 WARN_ON(!q.pi_state);
2023
2024
2025
2026 if (!trylock)
2027 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2028 else {
2029 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2030
2031 ret = ret ? 0 : -EWOULDBLOCK;
2032 }
2033
2034 spin_lock(q.lock_ptr);
2035
2036
2037
2038
2039 res = fixup_owner(uaddr, &q, !ret);
2040
2041
2042
2043
2044 if (res)
2045 ret = (res < 0) ? res : 0;
2046
2047
2048
2049
2050
2051 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2052 rt_mutex_unlock(&q.pi_state->pi_mutex);
2053
2054
2055 unqueue_me_pi(&q);
2056
2057 goto out_put_key;
2058
2059out_unlock_put_key:
2060 queue_unlock(&q, hb);
2061
2062out_put_key:
2063 put_futex_key(&q.key);
2064out:
2065 if (to)
2066 destroy_hrtimer_on_stack(&to->timer);
2067 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2068
2069uaddr_faulted:
2070 queue_unlock(&q, hb);
2071
2072 ret = fault_in_user_writeable(uaddr);
2073 if (ret)
2074 goto out_put_key;
2075
2076 if (!(flags & FLAGS_SHARED))
2077 goto retry_private;
2078
2079 put_futex_key(&q.key);
2080 goto retry;
2081}
2082
2083
2084
2085
2086
2087
2088static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2089{
2090 struct futex_hash_bucket *hb;
2091 struct futex_q *this, *next;
2092 struct plist_head *head;
2093 union futex_key key = FUTEX_KEY_INIT;
2094 u32 uval, vpid = task_pid_vnr(current);
2095 int ret;
2096
2097retry:
2098 if (get_user(uval, uaddr))
2099 return -EFAULT;
2100
2101
2102
2103 if ((uval & FUTEX_TID_MASK) != vpid)
2104 return -EPERM;
2105
2106 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2107 if (unlikely(ret != 0))
2108 goto out;
2109
2110 hb = hash_futex(&key);
2111 spin_lock(&hb->lock);
2112
2113
2114
2115
2116
2117
2118 if (!(uval & FUTEX_OWNER_DIED) &&
2119 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2120 goto pi_faulted;
2121
2122
2123
2124
2125 if (unlikely(uval == vpid))
2126 goto out_unlock;
2127
2128
2129
2130
2131
2132 head = &hb->chain;
2133
2134 plist_for_each_entry_safe(this, next, head, list) {
2135 if (!match_futex (&this->key, &key))
2136 continue;
2137 ret = wake_futex_pi(uaddr, uval, this);
2138
2139
2140
2141
2142
2143 if (ret == -EFAULT)
2144 goto pi_faulted;
2145 goto out_unlock;
2146 }
2147
2148
2149
2150 if (!(uval & FUTEX_OWNER_DIED)) {
2151 ret = unlock_futex_pi(uaddr, uval);
2152 if (ret == -EFAULT)
2153 goto pi_faulted;
2154 }
2155
2156out_unlock:
2157 spin_unlock(&hb->lock);
2158 put_futex_key(&key);
2159
2160out:
2161 return ret;
2162
2163pi_faulted:
2164 spin_unlock(&hb->lock);
2165 put_futex_key(&key);
2166
2167 ret = fault_in_user_writeable(uaddr);
2168 if (!ret)
2169 goto retry;
2170
2171 return ret;
2172}
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190static inline
2191int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2192 struct futex_q *q, union futex_key *key2,
2193 struct hrtimer_sleeper *timeout)
2194{
2195 int ret = 0;
2196
2197
2198
2199
2200
2201
2202
2203
2204 if (!match_futex(&q->key, key2)) {
2205 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2206
2207
2208
2209
2210 plist_del(&q->list, &hb->chain);
2211
2212
2213 ret = -EWOULDBLOCK;
2214 if (timeout && !timeout->task)
2215 ret = -ETIMEDOUT;
2216 else if (signal_pending(current))
2217 ret = -ERESTARTNOINTR;
2218 }
2219 return ret;
2220}
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2264 u32 val, ktime_t *abs_time, u32 bitset,
2265 u32 __user *uaddr2)
2266{
2267 struct hrtimer_sleeper timeout, *to = NULL;
2268 struct rt_mutex_waiter rt_waiter;
2269 struct rt_mutex *pi_mutex = NULL;
2270 struct futex_hash_bucket *hb;
2271 union futex_key key2 = FUTEX_KEY_INIT;
2272 struct futex_q q = futex_q_init;
2273 int res, ret;
2274
2275 if (uaddr == uaddr2)
2276 return -EINVAL;
2277
2278 if (!bitset)
2279 return -EINVAL;
2280
2281 if (abs_time) {
2282 to = &timeout;
2283 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2284 CLOCK_REALTIME : CLOCK_MONOTONIC,
2285 HRTIMER_MODE_ABS);
2286 hrtimer_init_sleeper(to, current);
2287 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2288 current->timer_slack_ns);
2289 }
2290
2291
2292
2293
2294
2295 debug_rt_mutex_init_waiter(&rt_waiter);
2296 rt_waiter.task = NULL;
2297
2298 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2299 if (unlikely(ret != 0))
2300 goto out;
2301
2302 q.bitset = bitset;
2303 q.rt_waiter = &rt_waiter;
2304 q.requeue_pi_key = &key2;
2305
2306
2307
2308
2309
2310 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2311 if (ret)
2312 goto out_key2;
2313
2314
2315 futex_wait_queue_me(hb, &q, to);
2316
2317 spin_lock(&hb->lock);
2318 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2319 spin_unlock(&hb->lock);
2320 if (ret)
2321 goto out_put_keys;
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333 if (!q.rt_waiter) {
2334
2335
2336
2337
2338 if (q.pi_state && (q.pi_state->owner != current)) {
2339 spin_lock(q.lock_ptr);
2340 ret = fixup_pi_state_owner(uaddr2, &q, current);
2341 spin_unlock(q.lock_ptr);
2342 }
2343 } else {
2344
2345
2346
2347
2348
2349 WARN_ON(!q.pi_state);
2350 pi_mutex = &q.pi_state->pi_mutex;
2351 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2352 debug_rt_mutex_free_waiter(&rt_waiter);
2353
2354 spin_lock(q.lock_ptr);
2355
2356
2357
2358
2359 res = fixup_owner(uaddr2, &q, !ret);
2360
2361
2362
2363
2364 if (res)
2365 ret = (res < 0) ? res : 0;
2366
2367
2368 unqueue_me_pi(&q);
2369 }
2370
2371
2372
2373
2374
2375 if (ret == -EFAULT) {
2376 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2377 rt_mutex_unlock(pi_mutex);
2378 } else if (ret == -EINTR) {
2379
2380
2381
2382
2383
2384
2385
2386 ret = -EWOULDBLOCK;
2387 }
2388
2389out_put_keys:
2390 put_futex_key(&q.key);
2391out_key2:
2392 put_futex_key(&key2);
2393
2394out:
2395 if (to) {
2396 hrtimer_cancel(&to->timer);
2397 destroy_hrtimer_on_stack(&to->timer);
2398 }
2399 return ret;
2400}
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2423 size_t, len)
2424{
2425 if (!futex_cmpxchg_enabled)
2426 return -ENOSYS;
2427
2428
2429
2430 if (unlikely(len != sizeof(*head)))
2431 return -EINVAL;
2432
2433 current->robust_list = head;
2434
2435 return 0;
2436}
2437
2438
2439
2440
2441
2442
2443
2444SYSCALL_DEFINE3(get_robust_list, int, pid,
2445 struct robust_list_head __user * __user *, head_ptr,
2446 size_t __user *, len_ptr)
2447{
2448 struct robust_list_head __user *head;
2449 unsigned long ret;
2450 struct task_struct *p;
2451
2452 if (!futex_cmpxchg_enabled)
2453 return -ENOSYS;
2454
2455 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
2456
2457 rcu_read_lock();
2458
2459 ret = -ESRCH;
2460 if (!pid)
2461 p = current;
2462 else {
2463 p = find_task_by_vpid(pid);
2464 if (!p)
2465 goto err_unlock;
2466 }
2467
2468 ret = -EPERM;
2469 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2470 goto err_unlock;
2471
2472 head = p->robust_list;
2473 rcu_read_unlock();
2474
2475 if (put_user(sizeof(*head), len_ptr))
2476 return -EFAULT;
2477 return put_user(head, head_ptr);
2478
2479err_unlock:
2480 rcu_read_unlock();
2481
2482 return ret;
2483}
2484
2485
2486
2487
2488
2489int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2490{
2491 u32 uval, uninitialized_var(nval), mval;
2492
2493retry:
2494 if (get_user(uval, uaddr))
2495 return -1;
2496
2497 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2519 if (fault_in_user_writeable(uaddr))
2520 return -1;
2521 goto retry;
2522 }
2523 if (nval != uval)
2524 goto retry;
2525
2526
2527
2528
2529
2530 if (!pi && (uval & FUTEX_WAITERS))
2531 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2532 }
2533 return 0;
2534}
2535
2536
2537
2538
2539static inline int fetch_robust_entry(struct robust_list __user **entry,
2540 struct robust_list __user * __user *head,
2541 unsigned int *pi)
2542{
2543 unsigned long uentry;
2544
2545 if (get_user(uentry, (unsigned long __user *)head))
2546 return -EFAULT;
2547
2548 *entry = (void __user *)(uentry & ~1UL);
2549 *pi = uentry & 1;
2550
2551 return 0;
2552}
2553
2554
2555
2556
2557
2558
2559
2560void exit_robust_list(struct task_struct *curr)
2561{
2562 struct robust_list_head __user *head = curr->robust_list;
2563 struct robust_list __user *entry, *next_entry, *pending;
2564 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2565 unsigned int uninitialized_var(next_pi);
2566 unsigned long futex_offset;
2567 int rc;
2568
2569 if (!futex_cmpxchg_enabled)
2570 return;
2571
2572
2573
2574
2575
2576 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2577 return;
2578
2579
2580
2581 if (get_user(futex_offset, &head->futex_offset))
2582 return;
2583
2584
2585
2586
2587 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2588 return;
2589
2590 next_entry = NULL;
2591 while (entry != &head->list) {
2592
2593
2594
2595
2596 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2597
2598
2599
2600
2601 if (entry != pending)
2602 if (handle_futex_death((void __user *)entry + futex_offset,
2603 curr, pi))
2604 return;
2605 if (rc)
2606 return;
2607 entry = next_entry;
2608 pi = next_pi;
2609
2610
2611
2612 if (!--limit)
2613 break;
2614
2615 cond_resched();
2616 }
2617
2618 if (pending)
2619 handle_futex_death((void __user *)pending + futex_offset,
2620 curr, pip);
2621}
2622
2623long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2624 u32 __user *uaddr2, u32 val2, u32 val3)
2625{
2626 int cmd = op & FUTEX_CMD_MASK;
2627 unsigned int flags = 0;
2628
2629 if (!(op & FUTEX_PRIVATE_FLAG))
2630 flags |= FLAGS_SHARED;
2631
2632 if (op & FUTEX_CLOCK_REALTIME) {
2633 flags |= FLAGS_CLOCKRT;
2634 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2635 return -ENOSYS;
2636 }
2637
2638 switch (cmd) {
2639 case FUTEX_LOCK_PI:
2640 case FUTEX_UNLOCK_PI:
2641 case FUTEX_TRYLOCK_PI:
2642 case FUTEX_WAIT_REQUEUE_PI:
2643 case FUTEX_CMP_REQUEUE_PI:
2644 if (!futex_cmpxchg_enabled)
2645 return -ENOSYS;
2646 }
2647
2648 switch (cmd) {
2649 case FUTEX_WAIT:
2650 val3 = FUTEX_BITSET_MATCH_ANY;
2651 case FUTEX_WAIT_BITSET:
2652 return futex_wait(uaddr, flags, val, timeout, val3);
2653 case FUTEX_WAKE:
2654 val3 = FUTEX_BITSET_MATCH_ANY;
2655 case FUTEX_WAKE_BITSET:
2656 return futex_wake(uaddr, flags, val, val3);
2657 case FUTEX_REQUEUE:
2658 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2659 case FUTEX_CMP_REQUEUE:
2660 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2661 case FUTEX_WAKE_OP:
2662 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2663 case FUTEX_LOCK_PI:
2664 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2665 case FUTEX_UNLOCK_PI:
2666 return futex_unlock_pi(uaddr, flags);
2667 case FUTEX_TRYLOCK_PI:
2668 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2669 case FUTEX_WAIT_REQUEUE_PI:
2670 val3 = FUTEX_BITSET_MATCH_ANY;
2671 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2672 uaddr2);
2673 case FUTEX_CMP_REQUEUE_PI:
2674 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2675 }
2676 return -ENOSYS;
2677}
2678
2679
2680SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2681 struct timespec __user *, utime, u32 __user *, uaddr2,
2682 u32, val3)
2683{
2684 struct timespec ts;
2685 ktime_t t, *tp = NULL;
2686 u32 val2 = 0;
2687 int cmd = op & FUTEX_CMD_MASK;
2688
2689 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2690 cmd == FUTEX_WAIT_BITSET ||
2691 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2692 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2693 return -EFAULT;
2694 if (!timespec_valid(&ts))
2695 return -EINVAL;
2696
2697 t = timespec_to_ktime(ts);
2698 if (cmd == FUTEX_WAIT)
2699 t = ktime_add_safe(ktime_get(), t);
2700 tp = &t;
2701 }
2702
2703
2704
2705
2706 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2707 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2708 val2 = (u32) (unsigned long) utime;
2709
2710 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2711}
2712
2713static int __init futex_init(void)
2714{
2715 u32 curval;
2716 int i;
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2729 futex_cmpxchg_enabled = 1;
2730
2731 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2732 plist_head_init(&futex_queues[i].chain);
2733 spin_lock_init(&futex_queues[i].lock);
2734 }
2735
2736 return 0;
2737}
2738__initcall(futex_init);
2739