1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63
64#include <asm/futex.h>
65
66#include "rtmutex_common.h"
67
68int __read_mostly futex_cmpxchg_enabled;
69
70#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
71
72
73
74
75
76#define FLAGS_SHARED 0x01
77#define FLAGS_CLOCKRT 0x02
78#define FLAGS_HAS_TIMEOUT 0x04
79
80
81
82
83struct futex_pi_state {
84
85
86
87
88 struct list_head list;
89
90
91
92
93 struct rt_mutex pi_mutex;
94
95 struct task_struct *owner;
96 atomic_t refcount;
97
98 union futex_key key;
99};
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123struct futex_q {
124 struct plist_node list;
125
126 struct task_struct *task;
127 spinlock_t *lock_ptr;
128 union futex_key key;
129 struct futex_pi_state *pi_state;
130 struct rt_mutex_waiter *rt_waiter;
131 union futex_key *requeue_pi_key;
132 u32 bitset;
133};
134
135static const struct futex_q futex_q_init = {
136
137 .key = FUTEX_KEY_INIT,
138 .bitset = FUTEX_BITSET_MATCH_ANY
139};
140
141
142
143
144
145
146struct futex_hash_bucket {
147 spinlock_t lock;
148 struct plist_head chain;
149};
150
151static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
152
153
154
155
156static struct futex_hash_bucket *hash_futex(union futex_key *key)
157{
158 u32 hash = jhash2((u32*)&key->both.word,
159 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
160 key->both.offset);
161 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
162}
163
164
165
166
167static inline int match_futex(union futex_key *key1, union futex_key *key2)
168{
169 return (key1 && key2
170 && key1->both.word == key2->both.word
171 && key1->both.ptr == key2->both.ptr
172 && key1->both.offset == key2->both.offset);
173}
174
175
176
177
178
179
180static void get_futex_key_refs(union futex_key *key)
181{
182 if (!key->both.ptr)
183 return;
184
185 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
186 case FUT_OFF_INODE:
187 ihold(key->shared.inode);
188 break;
189 case FUT_OFF_MMSHARED:
190 atomic_inc(&key->private.mm->mm_count);
191 break;
192 }
193}
194
195
196
197
198
199static void drop_futex_key_refs(union futex_key *key)
200{
201 if (!key->both.ptr) {
202
203 WARN_ON_ONCE(1);
204 return;
205 }
206
207 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
208 case FUT_OFF_INODE:
209 iput(key->shared.inode);
210 break;
211 case FUT_OFF_MMSHARED:
212 mmdrop(key->private.mm);
213 break;
214 }
215}
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234static int
235get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
236{
237 unsigned long address = (unsigned long)uaddr;
238 struct mm_struct *mm = current->mm;
239 struct page *page, *page_head;
240 int err, ro = 0;
241
242
243
244
245 key->both.offset = address % PAGE_SIZE;
246 if (unlikely((address % sizeof(u32)) != 0))
247 return -EINVAL;
248 address -= key->both.offset;
249
250
251
252
253
254
255
256
257 if (!fshared) {
258 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
259 return -EFAULT;
260 key->private.mm = mm;
261 key->private.address = address;
262 get_futex_key_refs(key);
263 return 0;
264 }
265
266again:
267 err = get_user_pages_fast(address, 1, 1, &page);
268
269
270
271
272 if (err == -EFAULT && rw == VERIFY_READ) {
273 err = get_user_pages_fast(address, 1, 0, &page);
274 ro = 1;
275 }
276 if (err < 0)
277 return err;
278 else
279 err = 0;
280
281#ifdef CONFIG_TRANSPARENT_HUGEPAGE
282 page_head = page;
283 if (unlikely(PageTail(page))) {
284 put_page(page);
285
286 local_irq_disable();
287 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
288 page_head = compound_head(page);
289
290
291
292
293
294
295
296
297
298
299 if (page != page_head) {
300 get_page(page_head);
301 put_page(page);
302 }
303 local_irq_enable();
304 } else {
305 local_irq_enable();
306 goto again;
307 }
308 }
309#else
310 page_head = compound_head(page);
311 if (page != page_head) {
312 get_page(page_head);
313 put_page(page);
314 }
315#endif
316
317 lock_page(page_head);
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334 if (!page_head->mapping) {
335 int shmem_swizzled = PageSwapCache(page_head);
336 unlock_page(page_head);
337 put_page(page_head);
338 if (shmem_swizzled)
339 goto again;
340 return -EFAULT;
341 }
342
343
344
345
346
347
348
349
350 if (PageAnon(page_head)) {
351
352
353
354
355 if (ro) {
356 err = -EFAULT;
357 goto out;
358 }
359
360 key->both.offset |= FUT_OFF_MMSHARED;
361 key->private.mm = mm;
362 key->private.address = address;
363 } else {
364 key->both.offset |= FUT_OFF_INODE;
365 key->shared.inode = page_head->mapping->host;
366 key->shared.pgoff = page_head->index;
367 }
368
369 get_futex_key_refs(key);
370
371out:
372 unlock_page(page_head);
373 put_page(page_head);
374 return err;
375}
376
377static inline void put_futex_key(union futex_key *key)
378{
379 drop_futex_key_refs(key);
380}
381
382
383
384
385
386
387
388
389
390
391
392
393
394static int fault_in_user_writeable(u32 __user *uaddr)
395{
396 struct mm_struct *mm = current->mm;
397 int ret;
398
399 down_read(&mm->mmap_sem);
400 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
401 FAULT_FLAG_WRITE);
402 up_read(&mm->mmap_sem);
403
404 return ret < 0 ? ret : 0;
405}
406
407
408
409
410
411
412
413
414static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
415 union futex_key *key)
416{
417 struct futex_q *this;
418
419 plist_for_each_entry(this, &hb->chain, list) {
420 if (match_futex(&this->key, key))
421 return this;
422 }
423 return NULL;
424}
425
426static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
427 u32 uval, u32 newval)
428{
429 int ret;
430
431 pagefault_disable();
432 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
433 pagefault_enable();
434
435 return ret;
436}
437
438static int get_futex_value_locked(u32 *dest, u32 __user *from)
439{
440 int ret;
441
442 pagefault_disable();
443 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
444 pagefault_enable();
445
446 return ret ? -EFAULT : 0;
447}
448
449
450
451
452
453static int refill_pi_state_cache(void)
454{
455 struct futex_pi_state *pi_state;
456
457 if (likely(current->pi_state_cache))
458 return 0;
459
460 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
461
462 if (!pi_state)
463 return -ENOMEM;
464
465 INIT_LIST_HEAD(&pi_state->list);
466
467 pi_state->owner = NULL;
468 atomic_set(&pi_state->refcount, 1);
469 pi_state->key = FUTEX_KEY_INIT;
470
471 current->pi_state_cache = pi_state;
472
473 return 0;
474}
475
476static struct futex_pi_state * alloc_pi_state(void)
477{
478 struct futex_pi_state *pi_state = current->pi_state_cache;
479
480 WARN_ON(!pi_state);
481 current->pi_state_cache = NULL;
482
483 return pi_state;
484}
485
486static void free_pi_state(struct futex_pi_state *pi_state)
487{
488 if (!atomic_dec_and_test(&pi_state->refcount))
489 return;
490
491
492
493
494
495 if (pi_state->owner) {
496 raw_spin_lock_irq(&pi_state->owner->pi_lock);
497 list_del_init(&pi_state->list);
498 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
499
500 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
501 }
502
503 if (current->pi_state_cache)
504 kfree(pi_state);
505 else {
506
507
508
509
510
511 pi_state->owner = NULL;
512 atomic_set(&pi_state->refcount, 1);
513 current->pi_state_cache = pi_state;
514 }
515}
516
517
518
519
520
521static struct task_struct * futex_find_get_task(pid_t pid)
522{
523 struct task_struct *p;
524
525 rcu_read_lock();
526 p = find_task_by_vpid(pid);
527 if (p)
528 get_task_struct(p);
529
530 rcu_read_unlock();
531
532 return p;
533}
534
535
536
537
538
539
540void exit_pi_state_list(struct task_struct *curr)
541{
542 struct list_head *next, *head = &curr->pi_state_list;
543 struct futex_pi_state *pi_state;
544 struct futex_hash_bucket *hb;
545 union futex_key key = FUTEX_KEY_INIT;
546
547 if (!futex_cmpxchg_enabled)
548 return;
549
550
551
552
553
554 raw_spin_lock_irq(&curr->pi_lock);
555 while (!list_empty(head)) {
556
557 next = head->next;
558 pi_state = list_entry(next, struct futex_pi_state, list);
559 key = pi_state->key;
560 hb = hash_futex(&key);
561 raw_spin_unlock_irq(&curr->pi_lock);
562
563 spin_lock(&hb->lock);
564
565 raw_spin_lock_irq(&curr->pi_lock);
566
567
568
569
570 if (head->next != next) {
571 spin_unlock(&hb->lock);
572 continue;
573 }
574
575 WARN_ON(pi_state->owner != curr);
576 WARN_ON(list_empty(&pi_state->list));
577 list_del_init(&pi_state->list);
578 pi_state->owner = NULL;
579 raw_spin_unlock_irq(&curr->pi_lock);
580
581 rt_mutex_unlock(&pi_state->pi_mutex);
582
583 spin_unlock(&hb->lock);
584
585 raw_spin_lock_irq(&curr->pi_lock);
586 }
587 raw_spin_unlock_irq(&curr->pi_lock);
588}
589
590static int
591lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
592 union futex_key *key, struct futex_pi_state **ps)
593{
594 struct futex_pi_state *pi_state = NULL;
595 struct futex_q *this, *next;
596 struct plist_head *head;
597 struct task_struct *p;
598 pid_t pid = uval & FUTEX_TID_MASK;
599
600 head = &hb->chain;
601
602 plist_for_each_entry_safe(this, next, head, list) {
603 if (match_futex(&this->key, key)) {
604
605
606
607
608 pi_state = this->pi_state;
609
610
611
612 if (unlikely(!pi_state))
613 return -EINVAL;
614
615 WARN_ON(!atomic_read(&pi_state->refcount));
616
617
618
619
620
621
622
623
624
625
626
627 if (pid && pi_state->owner) {
628
629
630
631
632 if (pid != task_pid_vnr(pi_state->owner))
633 return -EINVAL;
634 }
635
636 atomic_inc(&pi_state->refcount);
637 *ps = pi_state;
638
639 return 0;
640 }
641 }
642
643
644
645
646
647 if (!pid)
648 return -ESRCH;
649 p = futex_find_get_task(pid);
650 if (!p)
651 return -ESRCH;
652
653
654
655
656
657
658
659 raw_spin_lock_irq(&p->pi_lock);
660 if (unlikely(p->flags & PF_EXITING)) {
661
662
663
664
665
666 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
667
668 raw_spin_unlock_irq(&p->pi_lock);
669 put_task_struct(p);
670 return ret;
671 }
672
673 pi_state = alloc_pi_state();
674
675
676
677
678
679 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
680
681
682 pi_state->key = *key;
683
684 WARN_ON(!list_empty(&pi_state->list));
685 list_add(&pi_state->list, &p->pi_state_list);
686 pi_state->owner = p;
687 raw_spin_unlock_irq(&p->pi_lock);
688
689 put_task_struct(p);
690
691 *ps = pi_state;
692
693 return 0;
694}
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
715 union futex_key *key,
716 struct futex_pi_state **ps,
717 struct task_struct *task, int set_waiters)
718{
719 int lock_taken, ret, force_take = 0;
720 u32 uval, newval, curval, vpid = task_pid_vnr(task);
721
722retry:
723 ret = lock_taken = 0;
724
725
726
727
728
729
730 newval = vpid;
731 if (set_waiters)
732 newval |= FUTEX_WAITERS;
733
734 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
735 return -EFAULT;
736
737
738
739
740 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
741 return -EDEADLK;
742
743
744
745
746 if (unlikely(!curval))
747 return 1;
748
749 uval = curval;
750
751
752
753
754
755 newval = curval | FUTEX_WAITERS;
756
757
758
759
760 if (unlikely(force_take)) {
761
762
763
764
765 newval = (curval & ~FUTEX_TID_MASK) | vpid;
766 force_take = 0;
767 lock_taken = 1;
768 }
769
770 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
771 return -EFAULT;
772 if (unlikely(curval != uval))
773 goto retry;
774
775
776
777
778 if (unlikely(lock_taken))
779 return 1;
780
781
782
783
784
785 ret = lookup_pi_state(uval, hb, key, ps);
786
787 if (unlikely(ret)) {
788 switch (ret) {
789 case -ESRCH:
790
791
792
793
794
795
796
797
798
799
800 if (get_futex_value_locked(&curval, uaddr))
801 return -EFAULT;
802
803
804
805
806
807
808 if (!(curval & FUTEX_TID_MASK)) {
809 force_take = 1;
810 goto retry;
811 }
812 default:
813 break;
814 }
815 }
816
817 return ret;
818}
819
820
821
822
823
824
825
826static void __unqueue_futex(struct futex_q *q)
827{
828 struct futex_hash_bucket *hb;
829
830 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
831 || WARN_ON(plist_node_empty(&q->list)))
832 return;
833
834 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
835 plist_del(&q->list, &hb->chain);
836}
837
838
839
840
841
842static void wake_futex(struct futex_q *q)
843{
844 struct task_struct *p = q->task;
845
846 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
847 return;
848
849
850
851
852
853
854
855
856 get_task_struct(p);
857
858 __unqueue_futex(q);
859
860
861
862
863
864
865 smp_wmb();
866 q->lock_ptr = NULL;
867
868 wake_up_state(p, TASK_NORMAL);
869 put_task_struct(p);
870}
871
872static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
873{
874 struct task_struct *new_owner;
875 struct futex_pi_state *pi_state = this->pi_state;
876 u32 uninitialized_var(curval), newval;
877
878 if (!pi_state)
879 return -EINVAL;
880
881
882
883
884
885 if (pi_state->owner != current)
886 return -EINVAL;
887
888 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
889 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
890
891
892
893
894
895
896 if (!new_owner)
897 new_owner = this->task;
898
899
900
901
902
903
904 if (!(uval & FUTEX_OWNER_DIED)) {
905 int ret = 0;
906
907 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
908
909 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
910 ret = -EFAULT;
911 else if (curval != uval)
912 ret = -EINVAL;
913 if (ret) {
914 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
915 return ret;
916 }
917 }
918
919 raw_spin_lock_irq(&pi_state->owner->pi_lock);
920 WARN_ON(list_empty(&pi_state->list));
921 list_del_init(&pi_state->list);
922 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
923
924 raw_spin_lock_irq(&new_owner->pi_lock);
925 WARN_ON(!list_empty(&pi_state->list));
926 list_add(&pi_state->list, &new_owner->pi_state_list);
927 pi_state->owner = new_owner;
928 raw_spin_unlock_irq(&new_owner->pi_lock);
929
930 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
931 rt_mutex_unlock(&pi_state->pi_mutex);
932
933 return 0;
934}
935
936static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
937{
938 u32 uninitialized_var(oldval);
939
940
941
942
943
944 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
945 return -EFAULT;
946 if (oldval != uval)
947 return -EAGAIN;
948
949 return 0;
950}
951
952
953
954
955static inline void
956double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
957{
958 if (hb1 <= hb2) {
959 spin_lock(&hb1->lock);
960 if (hb1 < hb2)
961 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
962 } else {
963 spin_lock(&hb2->lock);
964 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
965 }
966}
967
968static inline void
969double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
970{
971 spin_unlock(&hb1->lock);
972 if (hb1 != hb2)
973 spin_unlock(&hb2->lock);
974}
975
976
977
978
979static int
980futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
981{
982 struct futex_hash_bucket *hb;
983 struct futex_q *this, *next;
984 struct plist_head *head;
985 union futex_key key = FUTEX_KEY_INIT;
986 int ret;
987
988 if (!bitset)
989 return -EINVAL;
990
991 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
992 if (unlikely(ret != 0))
993 goto out;
994
995 hb = hash_futex(&key);
996 spin_lock(&hb->lock);
997 head = &hb->chain;
998
999 plist_for_each_entry_safe(this, next, head, list) {
1000 if (match_futex (&this->key, &key)) {
1001 if (this->pi_state || this->rt_waiter) {
1002 ret = -EINVAL;
1003 break;
1004 }
1005
1006
1007 if (!(this->bitset & bitset))
1008 continue;
1009
1010 wake_futex(this);
1011 if (++ret >= nr_wake)
1012 break;
1013 }
1014 }
1015
1016 spin_unlock(&hb->lock);
1017 put_futex_key(&key);
1018out:
1019 return ret;
1020}
1021
1022
1023
1024
1025
1026static int
1027futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1028 int nr_wake, int nr_wake2, int op)
1029{
1030 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1031 struct futex_hash_bucket *hb1, *hb2;
1032 struct plist_head *head;
1033 struct futex_q *this, *next;
1034 int ret, op_ret;
1035
1036retry:
1037 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1038 if (unlikely(ret != 0))
1039 goto out;
1040 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1041 if (unlikely(ret != 0))
1042 goto out_put_key1;
1043
1044 hb1 = hash_futex(&key1);
1045 hb2 = hash_futex(&key2);
1046
1047retry_private:
1048 double_lock_hb(hb1, hb2);
1049 op_ret = futex_atomic_op_inuser(op, uaddr2);
1050 if (unlikely(op_ret < 0)) {
1051
1052 double_unlock_hb(hb1, hb2);
1053
1054#ifndef CONFIG_MMU
1055
1056
1057
1058
1059 ret = op_ret;
1060 goto out_put_keys;
1061#endif
1062
1063 if (unlikely(op_ret != -EFAULT)) {
1064 ret = op_ret;
1065 goto out_put_keys;
1066 }
1067
1068 ret = fault_in_user_writeable(uaddr2);
1069 if (ret)
1070 goto out_put_keys;
1071
1072 if (!(flags & FLAGS_SHARED))
1073 goto retry_private;
1074
1075 put_futex_key(&key2);
1076 put_futex_key(&key1);
1077 goto retry;
1078 }
1079
1080 head = &hb1->chain;
1081
1082 plist_for_each_entry_safe(this, next, head, list) {
1083 if (match_futex (&this->key, &key1)) {
1084 if (this->pi_state || this->rt_waiter) {
1085 ret = -EINVAL;
1086 goto out_unlock;
1087 }
1088 wake_futex(this);
1089 if (++ret >= nr_wake)
1090 break;
1091 }
1092 }
1093
1094 if (op_ret > 0) {
1095 head = &hb2->chain;
1096
1097 op_ret = 0;
1098 plist_for_each_entry_safe(this, next, head, list) {
1099 if (match_futex (&this->key, &key2)) {
1100 if (this->pi_state || this->rt_waiter) {
1101 ret = -EINVAL;
1102 goto out_unlock;
1103 }
1104 wake_futex(this);
1105 if (++op_ret >= nr_wake2)
1106 break;
1107 }
1108 }
1109 ret += op_ret;
1110 }
1111
1112out_unlock:
1113 double_unlock_hb(hb1, hb2);
1114out_put_keys:
1115 put_futex_key(&key2);
1116out_put_key1:
1117 put_futex_key(&key1);
1118out:
1119 return ret;
1120}
1121
1122
1123
1124
1125
1126
1127
1128
1129static inline
1130void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1131 struct futex_hash_bucket *hb2, union futex_key *key2)
1132{
1133
1134
1135
1136
1137
1138 if (likely(&hb1->chain != &hb2->chain)) {
1139 plist_del(&q->list, &hb1->chain);
1140 plist_add(&q->list, &hb2->chain);
1141 q->lock_ptr = &hb2->lock;
1142 }
1143 get_futex_key_refs(key2);
1144 q->key = *key2;
1145}
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161static inline
1162void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1163 struct futex_hash_bucket *hb)
1164{
1165 get_futex_key_refs(key);
1166 q->key = *key;
1167
1168 __unqueue_futex(q);
1169
1170 WARN_ON(!q->rt_waiter);
1171 q->rt_waiter = NULL;
1172
1173 q->lock_ptr = &hb->lock;
1174
1175 wake_up_state(q->task, TASK_NORMAL);
1176}
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1199 struct futex_hash_bucket *hb1,
1200 struct futex_hash_bucket *hb2,
1201 union futex_key *key1, union futex_key *key2,
1202 struct futex_pi_state **ps, int set_waiters)
1203{
1204 struct futex_q *top_waiter = NULL;
1205 u32 curval;
1206 int ret;
1207
1208 if (get_futex_value_locked(&curval, pifutex))
1209 return -EFAULT;
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219 top_waiter = futex_top_waiter(hb1, key1);
1220
1221
1222 if (!top_waiter)
1223 return 0;
1224
1225
1226 if (!match_futex(top_waiter->requeue_pi_key, key2))
1227 return -EINVAL;
1228
1229
1230
1231
1232
1233
1234 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1235 set_waiters);
1236 if (ret == 1)
1237 requeue_pi_wake_futex(top_waiter, key2, hb2);
1238
1239 return ret;
1240}
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1261 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1262 u32 *cmpval, int requeue_pi)
1263{
1264 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1265 int drop_count = 0, task_count = 0, ret;
1266 struct futex_pi_state *pi_state = NULL;
1267 struct futex_hash_bucket *hb1, *hb2;
1268 struct plist_head *head1;
1269 struct futex_q *this, *next;
1270 u32 curval2;
1271
1272 if (requeue_pi) {
1273
1274
1275
1276
1277 if (refill_pi_state_cache())
1278 return -ENOMEM;
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289 if (nr_wake != 1)
1290 return -EINVAL;
1291 }
1292
1293retry:
1294 if (pi_state != NULL) {
1295
1296
1297
1298
1299 free_pi_state(pi_state);
1300 pi_state = NULL;
1301 }
1302
1303 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1304 if (unlikely(ret != 0))
1305 goto out;
1306 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1307 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1308 if (unlikely(ret != 0))
1309 goto out_put_key1;
1310
1311 hb1 = hash_futex(&key1);
1312 hb2 = hash_futex(&key2);
1313
1314retry_private:
1315 double_lock_hb(hb1, hb2);
1316
1317 if (likely(cmpval != NULL)) {
1318 u32 curval;
1319
1320 ret = get_futex_value_locked(&curval, uaddr1);
1321
1322 if (unlikely(ret)) {
1323 double_unlock_hb(hb1, hb2);
1324
1325 ret = get_user(curval, uaddr1);
1326 if (ret)
1327 goto out_put_keys;
1328
1329 if (!(flags & FLAGS_SHARED))
1330 goto retry_private;
1331
1332 put_futex_key(&key2);
1333 put_futex_key(&key1);
1334 goto retry;
1335 }
1336 if (curval != *cmpval) {
1337 ret = -EAGAIN;
1338 goto out_unlock;
1339 }
1340 }
1341
1342 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1343
1344
1345
1346
1347
1348
1349 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1350 &key2, &pi_state, nr_requeue);
1351
1352
1353
1354
1355
1356
1357
1358 if (ret == 1) {
1359 WARN_ON(pi_state);
1360 drop_count++;
1361 task_count++;
1362 ret = get_futex_value_locked(&curval2, uaddr2);
1363 if (!ret)
1364 ret = lookup_pi_state(curval2, hb2, &key2,
1365 &pi_state);
1366 }
1367
1368 switch (ret) {
1369 case 0:
1370 break;
1371 case -EFAULT:
1372 double_unlock_hb(hb1, hb2);
1373 put_futex_key(&key2);
1374 put_futex_key(&key1);
1375 ret = fault_in_user_writeable(uaddr2);
1376 if (!ret)
1377 goto retry;
1378 goto out;
1379 case -EAGAIN:
1380
1381 double_unlock_hb(hb1, hb2);
1382 put_futex_key(&key2);
1383 put_futex_key(&key1);
1384 cond_resched();
1385 goto retry;
1386 default:
1387 goto out_unlock;
1388 }
1389 }
1390
1391 head1 = &hb1->chain;
1392 plist_for_each_entry_safe(this, next, head1, list) {
1393 if (task_count - nr_wake >= nr_requeue)
1394 break;
1395
1396 if (!match_futex(&this->key, &key1))
1397 continue;
1398
1399
1400
1401
1402
1403
1404
1405
1406 if ((requeue_pi && !this->rt_waiter) ||
1407 (!requeue_pi && this->rt_waiter) ||
1408 this->pi_state) {
1409 ret = -EINVAL;
1410 break;
1411 }
1412
1413
1414
1415
1416
1417
1418 if (++task_count <= nr_wake && !requeue_pi) {
1419 wake_futex(this);
1420 continue;
1421 }
1422
1423
1424 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1425 ret = -EINVAL;
1426 break;
1427 }
1428
1429
1430
1431
1432
1433 if (requeue_pi) {
1434
1435 atomic_inc(&pi_state->refcount);
1436 this->pi_state = pi_state;
1437 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1438 this->rt_waiter,
1439 this->task, 1);
1440 if (ret == 1) {
1441
1442 requeue_pi_wake_futex(this, &key2, hb2);
1443 drop_count++;
1444 continue;
1445 } else if (ret) {
1446
1447 this->pi_state = NULL;
1448 free_pi_state(pi_state);
1449 goto out_unlock;
1450 }
1451 }
1452 requeue_futex(this, hb1, hb2, &key2);
1453 drop_count++;
1454 }
1455
1456out_unlock:
1457 double_unlock_hb(hb1, hb2);
1458
1459
1460
1461
1462
1463
1464
1465 while (--drop_count >= 0)
1466 drop_futex_key_refs(&key1);
1467
1468out_put_keys:
1469 put_futex_key(&key2);
1470out_put_key1:
1471 put_futex_key(&key1);
1472out:
1473 if (pi_state != NULL)
1474 free_pi_state(pi_state);
1475 return ret ? ret : task_count;
1476}
1477
1478
1479static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1480 __acquires(&hb->lock)
1481{
1482 struct futex_hash_bucket *hb;
1483
1484 hb = hash_futex(&q->key);
1485 q->lock_ptr = &hb->lock;
1486
1487 spin_lock(&hb->lock);
1488 return hb;
1489}
1490
1491static inline void
1492queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1493 __releases(&hb->lock)
1494{
1495 spin_unlock(&hb->lock);
1496}
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1511 __releases(&hb->lock)
1512{
1513 int prio;
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523 prio = min(current->normal_prio, MAX_RT_PRIO);
1524
1525 plist_node_init(&q->list, prio);
1526 plist_add(&q->list, &hb->chain);
1527 q->task = current;
1528 spin_unlock(&hb->lock);
1529}
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542static int unqueue_me(struct futex_q *q)
1543{
1544 spinlock_t *lock_ptr;
1545 int ret = 0;
1546
1547
1548retry:
1549 lock_ptr = q->lock_ptr;
1550 barrier();
1551 if (lock_ptr != NULL) {
1552 spin_lock(lock_ptr);
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566 if (unlikely(lock_ptr != q->lock_ptr)) {
1567 spin_unlock(lock_ptr);
1568 goto retry;
1569 }
1570 __unqueue_futex(q);
1571
1572 BUG_ON(q->pi_state);
1573
1574 spin_unlock(lock_ptr);
1575 ret = 1;
1576 }
1577
1578 drop_futex_key_refs(&q->key);
1579 return ret;
1580}
1581
1582
1583
1584
1585
1586
1587static void unqueue_me_pi(struct futex_q *q)
1588 __releases(q->lock_ptr)
1589{
1590 __unqueue_futex(q);
1591
1592 BUG_ON(!q->pi_state);
1593 free_pi_state(q->pi_state);
1594 q->pi_state = NULL;
1595
1596 spin_unlock(q->lock_ptr);
1597}
1598
1599
1600
1601
1602
1603
1604
1605static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1606 struct task_struct *newowner)
1607{
1608 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1609 struct futex_pi_state *pi_state = q->pi_state;
1610 struct task_struct *oldowner = pi_state->owner;
1611 u32 uval, uninitialized_var(curval), newval;
1612 int ret;
1613
1614
1615 if (!pi_state->owner)
1616 newtid |= FUTEX_OWNER_DIED;
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635retry:
1636 if (get_futex_value_locked(&uval, uaddr))
1637 goto handle_fault;
1638
1639 while (1) {
1640 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1641
1642 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1643 goto handle_fault;
1644 if (curval == uval)
1645 break;
1646 uval = curval;
1647 }
1648
1649
1650
1651
1652
1653 if (pi_state->owner != NULL) {
1654 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1655 WARN_ON(list_empty(&pi_state->list));
1656 list_del_init(&pi_state->list);
1657 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1658 }
1659
1660 pi_state->owner = newowner;
1661
1662 raw_spin_lock_irq(&newowner->pi_lock);
1663 WARN_ON(!list_empty(&pi_state->list));
1664 list_add(&pi_state->list, &newowner->pi_state_list);
1665 raw_spin_unlock_irq(&newowner->pi_lock);
1666 return 0;
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678handle_fault:
1679 spin_unlock(q->lock_ptr);
1680
1681 ret = fault_in_user_writeable(uaddr);
1682
1683 spin_lock(q->lock_ptr);
1684
1685
1686
1687
1688 if (pi_state->owner != oldowner)
1689 return 0;
1690
1691 if (ret)
1692 return ret;
1693
1694 goto retry;
1695}
1696
1697static long futex_wait_restart(struct restart_block *restart);
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1715{
1716 struct task_struct *owner;
1717 int ret = 0;
1718
1719 if (locked) {
1720
1721
1722
1723
1724 if (q->pi_state->owner != current)
1725 ret = fixup_pi_state_owner(uaddr, q, current);
1726 goto out;
1727 }
1728
1729
1730
1731
1732
1733 if (q->pi_state->owner == current) {
1734
1735
1736
1737
1738
1739 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1740 locked = 1;
1741 goto out;
1742 }
1743
1744
1745
1746
1747
1748
1749 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1750 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1751 if (!owner)
1752 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1753 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1754 ret = fixup_pi_state_owner(uaddr, q, owner);
1755 goto out;
1756 }
1757
1758
1759
1760
1761
1762 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1763 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1764 "pi-state %p\n", ret,
1765 q->pi_state->pi_mutex.owner,
1766 q->pi_state->owner);
1767
1768out:
1769 return ret ? ret : locked;
1770}
1771
1772
1773
1774
1775
1776
1777
1778static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1779 struct hrtimer_sleeper *timeout)
1780{
1781
1782
1783
1784
1785
1786
1787 set_current_state(TASK_INTERRUPTIBLE);
1788 queue_me(q, hb);
1789
1790
1791 if (timeout) {
1792 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1793 if (!hrtimer_active(&timeout->timer))
1794 timeout->task = NULL;
1795 }
1796
1797
1798
1799
1800
1801 if (likely(!plist_node_empty(&q->list))) {
1802
1803
1804
1805
1806
1807 if (!timeout || timeout->task)
1808 schedule();
1809 }
1810 __set_current_state(TASK_RUNNING);
1811}
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1831 struct futex_q *q, struct futex_hash_bucket **hb)
1832{
1833 u32 uval;
1834 int ret;
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854retry:
1855 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1856 if (unlikely(ret != 0))
1857 return ret;
1858
1859retry_private:
1860 *hb = queue_lock(q);
1861
1862 ret = get_futex_value_locked(&uval, uaddr);
1863
1864 if (ret) {
1865 queue_unlock(q, *hb);
1866
1867 ret = get_user(uval, uaddr);
1868 if (ret)
1869 goto out;
1870
1871 if (!(flags & FLAGS_SHARED))
1872 goto retry_private;
1873
1874 put_futex_key(&q->key);
1875 goto retry;
1876 }
1877
1878 if (uval != val) {
1879 queue_unlock(q, *hb);
1880 ret = -EWOULDBLOCK;
1881 }
1882
1883out:
1884 if (ret)
1885 put_futex_key(&q->key);
1886 return ret;
1887}
1888
1889static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1890 ktime_t *abs_time, u32 bitset)
1891{
1892 struct hrtimer_sleeper timeout, *to = NULL;
1893 struct restart_block *restart;
1894 struct futex_hash_bucket *hb;
1895 struct futex_q q = futex_q_init;
1896 int ret;
1897
1898 if (!bitset)
1899 return -EINVAL;
1900 q.bitset = bitset;
1901
1902 if (abs_time) {
1903 to = &timeout;
1904
1905 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1906 CLOCK_REALTIME : CLOCK_MONOTONIC,
1907 HRTIMER_MODE_ABS);
1908 hrtimer_init_sleeper(to, current);
1909 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1910 current->timer_slack_ns);
1911 }
1912
1913retry:
1914
1915
1916
1917
1918 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1919 if (ret)
1920 goto out;
1921
1922
1923 futex_wait_queue_me(hb, &q, to);
1924
1925
1926 ret = 0;
1927
1928 if (!unqueue_me(&q))
1929 goto out;
1930 ret = -ETIMEDOUT;
1931 if (to && !to->task)
1932 goto out;
1933
1934
1935
1936
1937
1938 if (!signal_pending(current))
1939 goto retry;
1940
1941 ret = -ERESTARTSYS;
1942 if (!abs_time)
1943 goto out;
1944
1945 restart = ¤t_thread_info()->restart_block;
1946 restart->fn = futex_wait_restart;
1947 restart->futex.uaddr = uaddr;
1948 restart->futex.val = val;
1949 restart->futex.time = abs_time->tv64;
1950 restart->futex.bitset = bitset;
1951 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1952
1953 ret = -ERESTART_RESTARTBLOCK;
1954
1955out:
1956 if (to) {
1957 hrtimer_cancel(&to->timer);
1958 destroy_hrtimer_on_stack(&to->timer);
1959 }
1960 return ret;
1961}
1962
1963
1964static long futex_wait_restart(struct restart_block *restart)
1965{
1966 u32 __user *uaddr = restart->futex.uaddr;
1967 ktime_t t, *tp = NULL;
1968
1969 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1970 t.tv64 = restart->futex.time;
1971 tp = &t;
1972 }
1973 restart->fn = do_no_restart_syscall;
1974
1975 return (long)futex_wait(uaddr, restart->futex.flags,
1976 restart->futex.val, tp, restart->futex.bitset);
1977}
1978
1979
1980
1981
1982
1983
1984
1985
1986static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1987 ktime_t *time, int trylock)
1988{
1989 struct hrtimer_sleeper timeout, *to = NULL;
1990 struct futex_hash_bucket *hb;
1991 struct futex_q q = futex_q_init;
1992 int res, ret;
1993
1994 if (refill_pi_state_cache())
1995 return -ENOMEM;
1996
1997 if (time) {
1998 to = &timeout;
1999 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2000 HRTIMER_MODE_ABS);
2001 hrtimer_init_sleeper(to, current);
2002 hrtimer_set_expires(&to->timer, *time);
2003 }
2004
2005retry:
2006 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2007 if (unlikely(ret != 0))
2008 goto out;
2009
2010retry_private:
2011 hb = queue_lock(&q);
2012
2013 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2014 if (unlikely(ret)) {
2015 switch (ret) {
2016 case 1:
2017
2018 ret = 0;
2019 goto out_unlock_put_key;
2020 case -EFAULT:
2021 goto uaddr_faulted;
2022 case -EAGAIN:
2023
2024
2025
2026
2027 queue_unlock(&q, hb);
2028 put_futex_key(&q.key);
2029 cond_resched();
2030 goto retry;
2031 default:
2032 goto out_unlock_put_key;
2033 }
2034 }
2035
2036
2037
2038
2039 queue_me(&q, hb);
2040
2041 WARN_ON(!q.pi_state);
2042
2043
2044
2045 if (!trylock)
2046 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2047 else {
2048 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2049
2050 ret = ret ? 0 : -EWOULDBLOCK;
2051 }
2052
2053 spin_lock(q.lock_ptr);
2054
2055
2056
2057
2058 res = fixup_owner(uaddr, &q, !ret);
2059
2060
2061
2062
2063 if (res)
2064 ret = (res < 0) ? res : 0;
2065
2066
2067
2068
2069
2070 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2071 rt_mutex_unlock(&q.pi_state->pi_mutex);
2072
2073
2074 unqueue_me_pi(&q);
2075
2076 goto out_put_key;
2077
2078out_unlock_put_key:
2079 queue_unlock(&q, hb);
2080
2081out_put_key:
2082 put_futex_key(&q.key);
2083out:
2084 if (to)
2085 destroy_hrtimer_on_stack(&to->timer);
2086 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2087
2088uaddr_faulted:
2089 queue_unlock(&q, hb);
2090
2091 ret = fault_in_user_writeable(uaddr);
2092 if (ret)
2093 goto out_put_key;
2094
2095 if (!(flags & FLAGS_SHARED))
2096 goto retry_private;
2097
2098 put_futex_key(&q.key);
2099 goto retry;
2100}
2101
2102
2103
2104
2105
2106
2107static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2108{
2109 struct futex_hash_bucket *hb;
2110 struct futex_q *this, *next;
2111 struct plist_head *head;
2112 union futex_key key = FUTEX_KEY_INIT;
2113 u32 uval, vpid = task_pid_vnr(current);
2114 int ret;
2115
2116retry:
2117 if (get_user(uval, uaddr))
2118 return -EFAULT;
2119
2120
2121
2122 if ((uval & FUTEX_TID_MASK) != vpid)
2123 return -EPERM;
2124
2125 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2126 if (unlikely(ret != 0))
2127 goto out;
2128
2129 hb = hash_futex(&key);
2130 spin_lock(&hb->lock);
2131
2132
2133
2134
2135
2136
2137 if (!(uval & FUTEX_OWNER_DIED) &&
2138 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2139 goto pi_faulted;
2140
2141
2142
2143
2144 if (unlikely(uval == vpid))
2145 goto out_unlock;
2146
2147
2148
2149
2150
2151 head = &hb->chain;
2152
2153 plist_for_each_entry_safe(this, next, head, list) {
2154 if (!match_futex (&this->key, &key))
2155 continue;
2156 ret = wake_futex_pi(uaddr, uval, this);
2157
2158
2159
2160
2161
2162 if (ret == -EFAULT)
2163 goto pi_faulted;
2164 goto out_unlock;
2165 }
2166
2167
2168
2169 if (!(uval & FUTEX_OWNER_DIED)) {
2170 ret = unlock_futex_pi(uaddr, uval);
2171 if (ret == -EFAULT)
2172 goto pi_faulted;
2173 }
2174
2175out_unlock:
2176 spin_unlock(&hb->lock);
2177 put_futex_key(&key);
2178
2179out:
2180 return ret;
2181
2182pi_faulted:
2183 spin_unlock(&hb->lock);
2184 put_futex_key(&key);
2185
2186 ret = fault_in_user_writeable(uaddr);
2187 if (!ret)
2188 goto retry;
2189
2190 return ret;
2191}
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209static inline
2210int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2211 struct futex_q *q, union futex_key *key2,
2212 struct hrtimer_sleeper *timeout)
2213{
2214 int ret = 0;
2215
2216
2217
2218
2219
2220
2221
2222
2223 if (!match_futex(&q->key, key2)) {
2224 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2225
2226
2227
2228
2229 plist_del(&q->list, &hb->chain);
2230
2231
2232 ret = -EWOULDBLOCK;
2233 if (timeout && !timeout->task)
2234 ret = -ETIMEDOUT;
2235 else if (signal_pending(current))
2236 ret = -ERESTARTNOINTR;
2237 }
2238 return ret;
2239}
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2283 u32 val, ktime_t *abs_time, u32 bitset,
2284 u32 __user *uaddr2)
2285{
2286 struct hrtimer_sleeper timeout, *to = NULL;
2287 struct rt_mutex_waiter rt_waiter;
2288 struct rt_mutex *pi_mutex = NULL;
2289 struct futex_hash_bucket *hb;
2290 union futex_key key2 = FUTEX_KEY_INIT;
2291 struct futex_q q = futex_q_init;
2292 int res, ret;
2293
2294 if (uaddr == uaddr2)
2295 return -EINVAL;
2296
2297 if (!bitset)
2298 return -EINVAL;
2299
2300 if (abs_time) {
2301 to = &timeout;
2302 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2303 CLOCK_REALTIME : CLOCK_MONOTONIC,
2304 HRTIMER_MODE_ABS);
2305 hrtimer_init_sleeper(to, current);
2306 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2307 current->timer_slack_ns);
2308 }
2309
2310
2311
2312
2313
2314 debug_rt_mutex_init_waiter(&rt_waiter);
2315 rt_waiter.task = NULL;
2316
2317 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2318 if (unlikely(ret != 0))
2319 goto out;
2320
2321 q.bitset = bitset;
2322 q.rt_waiter = &rt_waiter;
2323 q.requeue_pi_key = &key2;
2324
2325
2326
2327
2328
2329 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2330 if (ret)
2331 goto out_key2;
2332
2333
2334 futex_wait_queue_me(hb, &q, to);
2335
2336 spin_lock(&hb->lock);
2337 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2338 spin_unlock(&hb->lock);
2339 if (ret)
2340 goto out_put_keys;
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352 if (!q.rt_waiter) {
2353
2354
2355
2356
2357 if (q.pi_state && (q.pi_state->owner != current)) {
2358 spin_lock(q.lock_ptr);
2359 ret = fixup_pi_state_owner(uaddr2, &q, current);
2360 spin_unlock(q.lock_ptr);
2361 }
2362 } else {
2363
2364
2365
2366
2367
2368 WARN_ON(!q.pi_state);
2369 pi_mutex = &q.pi_state->pi_mutex;
2370 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2371 debug_rt_mutex_free_waiter(&rt_waiter);
2372
2373 spin_lock(q.lock_ptr);
2374
2375
2376
2377
2378 res = fixup_owner(uaddr2, &q, !ret);
2379
2380
2381
2382
2383 if (res)
2384 ret = (res < 0) ? res : 0;
2385
2386
2387 unqueue_me_pi(&q);
2388 }
2389
2390
2391
2392
2393
2394 if (ret == -EFAULT) {
2395 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2396 rt_mutex_unlock(pi_mutex);
2397 } else if (ret == -EINTR) {
2398
2399
2400
2401
2402
2403
2404
2405 ret = -EWOULDBLOCK;
2406 }
2407
2408out_put_keys:
2409 put_futex_key(&q.key);
2410out_key2:
2411 put_futex_key(&key2);
2412
2413out:
2414 if (to) {
2415 hrtimer_cancel(&to->timer);
2416 destroy_hrtimer_on_stack(&to->timer);
2417 }
2418 return ret;
2419}
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2442 size_t, len)
2443{
2444 if (!futex_cmpxchg_enabled)
2445 return -ENOSYS;
2446
2447
2448
2449 if (unlikely(len != sizeof(*head)))
2450 return -EINVAL;
2451
2452 current->robust_list = head;
2453
2454 return 0;
2455}
2456
2457
2458
2459
2460
2461
2462
2463SYSCALL_DEFINE3(get_robust_list, int, pid,
2464 struct robust_list_head __user * __user *, head_ptr,
2465 size_t __user *, len_ptr)
2466{
2467 struct robust_list_head __user *head;
2468 unsigned long ret;
2469 struct task_struct *p;
2470
2471 if (!futex_cmpxchg_enabled)
2472 return -ENOSYS;
2473
2474 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
2475
2476 rcu_read_lock();
2477
2478 ret = -ESRCH;
2479 if (!pid)
2480 p = current;
2481 else {
2482 p = find_task_by_vpid(pid);
2483 if (!p)
2484 goto err_unlock;
2485 }
2486
2487 ret = -EPERM;
2488 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2489 goto err_unlock;
2490
2491 head = p->robust_list;
2492 rcu_read_unlock();
2493
2494 if (put_user(sizeof(*head), len_ptr))
2495 return -EFAULT;
2496 return put_user(head, head_ptr);
2497
2498err_unlock:
2499 rcu_read_unlock();
2500
2501 return ret;
2502}
2503
2504
2505
2506
2507
2508int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2509{
2510 u32 uval, uninitialized_var(nval), mval;
2511
2512retry:
2513 if (get_user(uval, uaddr))
2514 return -1;
2515
2516 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2538 if (fault_in_user_writeable(uaddr))
2539 return -1;
2540 goto retry;
2541 }
2542 if (nval != uval)
2543 goto retry;
2544
2545
2546
2547
2548
2549 if (!pi && (uval & FUTEX_WAITERS))
2550 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2551 }
2552 return 0;
2553}
2554
2555
2556
2557
2558static inline int fetch_robust_entry(struct robust_list __user **entry,
2559 struct robust_list __user * __user *head,
2560 unsigned int *pi)
2561{
2562 unsigned long uentry;
2563
2564 if (get_user(uentry, (unsigned long __user *)head))
2565 return -EFAULT;
2566
2567 *entry = (void __user *)(uentry & ~1UL);
2568 *pi = uentry & 1;
2569
2570 return 0;
2571}
2572
2573
2574
2575
2576
2577
2578
2579void exit_robust_list(struct task_struct *curr)
2580{
2581 struct robust_list_head __user *head = curr->robust_list;
2582 struct robust_list __user *entry, *next_entry, *pending;
2583 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2584 unsigned int uninitialized_var(next_pi);
2585 unsigned long futex_offset;
2586 int rc;
2587
2588 if (!futex_cmpxchg_enabled)
2589 return;
2590
2591
2592
2593
2594
2595 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2596 return;
2597
2598
2599
2600 if (get_user(futex_offset, &head->futex_offset))
2601 return;
2602
2603
2604
2605
2606 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2607 return;
2608
2609 next_entry = NULL;
2610 while (entry != &head->list) {
2611
2612
2613
2614
2615 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2616
2617
2618
2619
2620 if (entry != pending)
2621 if (handle_futex_death((void __user *)entry + futex_offset,
2622 curr, pi))
2623 return;
2624 if (rc)
2625 return;
2626 entry = next_entry;
2627 pi = next_pi;
2628
2629
2630
2631 if (!--limit)
2632 break;
2633
2634 cond_resched();
2635 }
2636
2637 if (pending)
2638 handle_futex_death((void __user *)pending + futex_offset,
2639 curr, pip);
2640}
2641
2642long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2643 u32 __user *uaddr2, u32 val2, u32 val3)
2644{
2645 int cmd = op & FUTEX_CMD_MASK;
2646 unsigned int flags = 0;
2647
2648 if (!(op & FUTEX_PRIVATE_FLAG))
2649 flags |= FLAGS_SHARED;
2650
2651 if (op & FUTEX_CLOCK_REALTIME) {
2652 flags |= FLAGS_CLOCKRT;
2653 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2654 return -ENOSYS;
2655 }
2656
2657 switch (cmd) {
2658 case FUTEX_LOCK_PI:
2659 case FUTEX_UNLOCK_PI:
2660 case FUTEX_TRYLOCK_PI:
2661 case FUTEX_WAIT_REQUEUE_PI:
2662 case FUTEX_CMP_REQUEUE_PI:
2663 if (!futex_cmpxchg_enabled)
2664 return -ENOSYS;
2665 }
2666
2667 switch (cmd) {
2668 case FUTEX_WAIT:
2669 val3 = FUTEX_BITSET_MATCH_ANY;
2670 case FUTEX_WAIT_BITSET:
2671 return futex_wait(uaddr, flags, val, timeout, val3);
2672 case FUTEX_WAKE:
2673 val3 = FUTEX_BITSET_MATCH_ANY;
2674 case FUTEX_WAKE_BITSET:
2675 return futex_wake(uaddr, flags, val, val3);
2676 case FUTEX_REQUEUE:
2677 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2678 case FUTEX_CMP_REQUEUE:
2679 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2680 case FUTEX_WAKE_OP:
2681 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2682 case FUTEX_LOCK_PI:
2683 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2684 case FUTEX_UNLOCK_PI:
2685 return futex_unlock_pi(uaddr, flags);
2686 case FUTEX_TRYLOCK_PI:
2687 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2688 case FUTEX_WAIT_REQUEUE_PI:
2689 val3 = FUTEX_BITSET_MATCH_ANY;
2690 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2691 uaddr2);
2692 case FUTEX_CMP_REQUEUE_PI:
2693 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2694 }
2695 return -ENOSYS;
2696}
2697
2698
2699SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2700 struct timespec __user *, utime, u32 __user *, uaddr2,
2701 u32, val3)
2702{
2703 struct timespec ts;
2704 ktime_t t, *tp = NULL;
2705 u32 val2 = 0;
2706 int cmd = op & FUTEX_CMD_MASK;
2707
2708 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2709 cmd == FUTEX_WAIT_BITSET ||
2710 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2711 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2712 return -EFAULT;
2713 if (!timespec_valid(&ts))
2714 return -EINVAL;
2715
2716 t = timespec_to_ktime(ts);
2717 if (cmd == FUTEX_WAIT)
2718 t = ktime_add_safe(ktime_get(), t);
2719 tp = &t;
2720 }
2721
2722
2723
2724
2725 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2726 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2727 val2 = (u32) (unsigned long) utime;
2728
2729 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2730}
2731
2732static int __init futex_init(void)
2733{
2734 u32 curval;
2735 int i;
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2748 futex_cmpxchg_enabled = 1;
2749
2750 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2751 plist_head_init(&futex_queues[i].chain);
2752 spin_lock_init(&futex_queues[i].lock);
2753 }
2754
2755 return 0;
2756}
2757__initcall(futex_init);
2758