1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/slab.h>
48#include <linux/poll.h>
49#include <linux/fs.h>
50#include <linux/file.h>
51#include <linux/jhash.h>
52#include <linux/init.h>
53#include <linux/futex.h>
54#include <linux/mount.h>
55#include <linux/pagemap.h>
56#include <linux/syscalls.h>
57#include <linux/signal.h>
58#include <linux/export.h>
59#include <linux/magic.h>
60#include <linux/pid.h>
61#include <linux/nsproxy.h>
62#include <linux/ptrace.h>
63
64#include <asm/futex.h>
65
66#include "rtmutex_common.h"
67
68int __read_mostly futex_cmpxchg_enabled;
69
70#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
71
72
73
74
75
76#define FLAGS_SHARED 0x01
77#define FLAGS_CLOCKRT 0x02
78#define FLAGS_HAS_TIMEOUT 0x04
79
80
81
82
83struct futex_pi_state {
84
85
86
87
88 struct list_head list;
89
90
91
92
93 struct rt_mutex pi_mutex;
94
95 struct task_struct *owner;
96 atomic_t refcount;
97
98 union futex_key key;
99};
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123struct futex_q {
124 struct plist_node list;
125
126 struct task_struct *task;
127 spinlock_t *lock_ptr;
128 union futex_key key;
129 struct futex_pi_state *pi_state;
130 struct rt_mutex_waiter *rt_waiter;
131 union futex_key *requeue_pi_key;
132 u32 bitset;
133};
134
135static const struct futex_q futex_q_init = {
136
137 .key = FUTEX_KEY_INIT,
138 .bitset = FUTEX_BITSET_MATCH_ANY
139};
140
141
142
143
144
145
146struct futex_hash_bucket {
147 spinlock_t lock;
148 struct plist_head chain;
149};
150
151static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
152
153
154
155
156static struct futex_hash_bucket *hash_futex(union futex_key *key)
157{
158 u32 hash = jhash2((u32*)&key->both.word,
159 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
160 key->both.offset);
161 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
162}
163
164
165
166
167static inline int match_futex(union futex_key *key1, union futex_key *key2)
168{
169 return (key1 && key2
170 && key1->both.word == key2->both.word
171 && key1->both.ptr == key2->both.ptr
172 && key1->both.offset == key2->both.offset);
173}
174
175
176
177
178
179
180static void get_futex_key_refs(union futex_key *key)
181{
182 if (!key->both.ptr)
183 return;
184
185 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
186 case FUT_OFF_INODE:
187 ihold(key->shared.inode);
188 break;
189 case FUT_OFF_MMSHARED:
190 atomic_inc(&key->private.mm->mm_count);
191 break;
192 }
193}
194
195
196
197
198
199static void drop_futex_key_refs(union futex_key *key)
200{
201 if (!key->both.ptr) {
202
203 WARN_ON_ONCE(1);
204 return;
205 }
206
207 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
208 case FUT_OFF_INODE:
209 iput(key->shared.inode);
210 break;
211 case FUT_OFF_MMSHARED:
212 mmdrop(key->private.mm);
213 break;
214 }
215}
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234static int
235get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
236{
237 unsigned long address = (unsigned long)uaddr;
238 struct mm_struct *mm = current->mm;
239 struct page *page, *page_head;
240 int err, ro = 0;
241
242
243
244
245 key->both.offset = address % PAGE_SIZE;
246 if (unlikely((address % sizeof(u32)) != 0))
247 return -EINVAL;
248 address -= key->both.offset;
249
250
251
252
253
254
255
256
257 if (!fshared) {
258 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
259 return -EFAULT;
260 key->private.mm = mm;
261 key->private.address = address;
262 get_futex_key_refs(key);
263 return 0;
264 }
265
266again:
267 err = get_user_pages_fast(address, 1, 1, &page);
268
269
270
271
272 if (err == -EFAULT && rw == VERIFY_READ) {
273 err = get_user_pages_fast(address, 1, 0, &page);
274 ro = 1;
275 }
276 if (err < 0)
277 return err;
278 else
279 err = 0;
280
281#ifdef CONFIG_TRANSPARENT_HUGEPAGE
282 page_head = page;
283 if (unlikely(PageTail(page))) {
284 put_page(page);
285
286 local_irq_disable();
287 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
288 page_head = compound_head(page);
289
290
291
292
293
294
295
296
297
298
299 if (page != page_head) {
300 get_page(page_head);
301 put_page(page);
302 }
303 local_irq_enable();
304 } else {
305 local_irq_enable();
306 goto again;
307 }
308 }
309#else
310 page_head = compound_head(page);
311 if (page != page_head) {
312 get_page(page_head);
313 put_page(page);
314 }
315#endif
316
317 lock_page(page_head);
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334 if (!page_head->mapping) {
335 int shmem_swizzled = PageSwapCache(page_head);
336 unlock_page(page_head);
337 put_page(page_head);
338 if (shmem_swizzled)
339 goto again;
340 return -EFAULT;
341 }
342
343
344
345
346
347
348
349
350 if (PageAnon(page_head)) {
351
352
353
354
355 if (ro) {
356 err = -EFAULT;
357 goto out;
358 }
359
360 key->both.offset |= FUT_OFF_MMSHARED;
361 key->private.mm = mm;
362 key->private.address = address;
363 } else {
364 key->both.offset |= FUT_OFF_INODE;
365 key->shared.inode = page_head->mapping->host;
366 key->shared.pgoff = page_head->index;
367 }
368
369 get_futex_key_refs(key);
370
371out:
372 unlock_page(page_head);
373 put_page(page_head);
374 return err;
375}
376
377static inline void put_futex_key(union futex_key *key)
378{
379 drop_futex_key_refs(key);
380}
381
382
383
384
385
386
387
388
389
390
391
392
393
394static int fault_in_user_writeable(u32 __user *uaddr)
395{
396 struct mm_struct *mm = current->mm;
397 int ret;
398
399 down_read(&mm->mmap_sem);
400 ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
401 FAULT_FLAG_WRITE);
402 up_read(&mm->mmap_sem);
403
404 return ret < 0 ? ret : 0;
405}
406
407
408
409
410
411
412
413
414static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
415 union futex_key *key)
416{
417 struct futex_q *this;
418
419 plist_for_each_entry(this, &hb->chain, list) {
420 if (match_futex(&this->key, key))
421 return this;
422 }
423 return NULL;
424}
425
426static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
427 u32 uval, u32 newval)
428{
429 int ret;
430
431 pagefault_disable();
432 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
433 pagefault_enable();
434
435 return ret;
436}
437
438static int get_futex_value_locked(u32 *dest, u32 __user *from)
439{
440 int ret;
441
442 pagefault_disable();
443 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
444 pagefault_enable();
445
446 return ret ? -EFAULT : 0;
447}
448
449
450
451
452
453static int refill_pi_state_cache(void)
454{
455 struct futex_pi_state *pi_state;
456
457 if (likely(current->pi_state_cache))
458 return 0;
459
460 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
461
462 if (!pi_state)
463 return -ENOMEM;
464
465 INIT_LIST_HEAD(&pi_state->list);
466
467 pi_state->owner = NULL;
468 atomic_set(&pi_state->refcount, 1);
469 pi_state->key = FUTEX_KEY_INIT;
470
471 current->pi_state_cache = pi_state;
472
473 return 0;
474}
475
476static struct futex_pi_state * alloc_pi_state(void)
477{
478 struct futex_pi_state *pi_state = current->pi_state_cache;
479
480 WARN_ON(!pi_state);
481 current->pi_state_cache = NULL;
482
483 return pi_state;
484}
485
486static void free_pi_state(struct futex_pi_state *pi_state)
487{
488 if (!atomic_dec_and_test(&pi_state->refcount))
489 return;
490
491
492
493
494
495 if (pi_state->owner) {
496 raw_spin_lock_irq(&pi_state->owner->pi_lock);
497 list_del_init(&pi_state->list);
498 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
499
500 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
501 }
502
503 if (current->pi_state_cache)
504 kfree(pi_state);
505 else {
506
507
508
509
510
511 pi_state->owner = NULL;
512 atomic_set(&pi_state->refcount, 1);
513 current->pi_state_cache = pi_state;
514 }
515}
516
517
518
519
520
521static struct task_struct * futex_find_get_task(pid_t pid)
522{
523 struct task_struct *p;
524
525 rcu_read_lock();
526 p = find_task_by_vpid(pid);
527 if (p)
528 get_task_struct(p);
529
530 rcu_read_unlock();
531
532 return p;
533}
534
535
536
537
538
539
540void exit_pi_state_list(struct task_struct *curr)
541{
542 struct list_head *next, *head = &curr->pi_state_list;
543 struct futex_pi_state *pi_state;
544 struct futex_hash_bucket *hb;
545 union futex_key key = FUTEX_KEY_INIT;
546
547 if (!futex_cmpxchg_enabled)
548 return;
549
550
551
552
553
554 raw_spin_lock_irq(&curr->pi_lock);
555 while (!list_empty(head)) {
556
557 next = head->next;
558 pi_state = list_entry(next, struct futex_pi_state, list);
559 key = pi_state->key;
560 hb = hash_futex(&key);
561 raw_spin_unlock_irq(&curr->pi_lock);
562
563 spin_lock(&hb->lock);
564
565 raw_spin_lock_irq(&curr->pi_lock);
566
567
568
569
570 if (head->next != next) {
571 spin_unlock(&hb->lock);
572 continue;
573 }
574
575 WARN_ON(pi_state->owner != curr);
576 WARN_ON(list_empty(&pi_state->list));
577 list_del_init(&pi_state->list);
578 pi_state->owner = NULL;
579 raw_spin_unlock_irq(&curr->pi_lock);
580
581 rt_mutex_unlock(&pi_state->pi_mutex);
582
583 spin_unlock(&hb->lock);
584
585 raw_spin_lock_irq(&curr->pi_lock);
586 }
587 raw_spin_unlock_irq(&curr->pi_lock);
588}
589
590static int
591lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
592 union futex_key *key, struct futex_pi_state **ps)
593{
594 struct futex_pi_state *pi_state = NULL;
595 struct futex_q *this, *next;
596 struct plist_head *head;
597 struct task_struct *p;
598 pid_t pid = uval & FUTEX_TID_MASK;
599
600 head = &hb->chain;
601
602 plist_for_each_entry_safe(this, next, head, list) {
603 if (match_futex(&this->key, key)) {
604
605
606
607
608 pi_state = this->pi_state;
609
610
611
612 if (unlikely(!pi_state))
613 return -EINVAL;
614
615 WARN_ON(!atomic_read(&pi_state->refcount));
616
617
618
619
620
621
622
623
624
625
626
627 if (pid && pi_state->owner) {
628
629
630
631
632 if (pid != task_pid_vnr(pi_state->owner))
633 return -EINVAL;
634 }
635
636 atomic_inc(&pi_state->refcount);
637 *ps = pi_state;
638
639 return 0;
640 }
641 }
642
643
644
645
646
647 if (!pid)
648 return -ESRCH;
649 p = futex_find_get_task(pid);
650 if (!p)
651 return -ESRCH;
652
653
654
655
656
657
658
659 raw_spin_lock_irq(&p->pi_lock);
660 if (unlikely(p->flags & PF_EXITING)) {
661
662
663
664
665
666 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
667
668 raw_spin_unlock_irq(&p->pi_lock);
669 put_task_struct(p);
670 return ret;
671 }
672
673 pi_state = alloc_pi_state();
674
675
676
677
678
679 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
680
681
682 pi_state->key = *key;
683
684 WARN_ON(!list_empty(&pi_state->list));
685 list_add(&pi_state->list, &p->pi_state_list);
686 pi_state->owner = p;
687 raw_spin_unlock_irq(&p->pi_lock);
688
689 put_task_struct(p);
690
691 *ps = pi_state;
692
693 return 0;
694}
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
715 union futex_key *key,
716 struct futex_pi_state **ps,
717 struct task_struct *task, int set_waiters)
718{
719 int lock_taken, ret, force_take = 0;
720 u32 uval, newval, curval, vpid = task_pid_vnr(task);
721
722retry:
723 ret = lock_taken = 0;
724
725
726
727
728
729
730 newval = vpid;
731 if (set_waiters)
732 newval |= FUTEX_WAITERS;
733
734 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
735 return -EFAULT;
736
737
738
739
740 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
741 return -EDEADLK;
742
743
744
745
746 if (unlikely(!curval))
747 return 1;
748
749 uval = curval;
750
751
752
753
754
755 newval = curval | FUTEX_WAITERS;
756
757
758
759
760 if (unlikely(force_take)) {
761
762
763
764
765 newval = (curval & ~FUTEX_TID_MASK) | vpid;
766 force_take = 0;
767 lock_taken = 1;
768 }
769
770 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
771 return -EFAULT;
772 if (unlikely(curval != uval))
773 goto retry;
774
775
776
777
778 if (unlikely(lock_taken))
779 return 1;
780
781
782
783
784
785 ret = lookup_pi_state(uval, hb, key, ps);
786
787 if (unlikely(ret)) {
788 switch (ret) {
789 case -ESRCH:
790
791
792
793
794
795
796
797
798
799
800 if (get_futex_value_locked(&curval, uaddr))
801 return -EFAULT;
802
803
804
805
806
807
808 if (!(curval & FUTEX_TID_MASK)) {
809 force_take = 1;
810 goto retry;
811 }
812 default:
813 break;
814 }
815 }
816
817 return ret;
818}
819
820
821
822
823
824
825
826static void __unqueue_futex(struct futex_q *q)
827{
828 struct futex_hash_bucket *hb;
829
830 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
831 || WARN_ON(plist_node_empty(&q->list)))
832 return;
833
834 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
835 plist_del(&q->list, &hb->chain);
836}
837
838
839
840
841
842static void wake_futex(struct futex_q *q)
843{
844 struct task_struct *p = q->task;
845
846
847
848
849
850
851
852
853 get_task_struct(p);
854
855 __unqueue_futex(q);
856
857
858
859
860
861
862 smp_wmb();
863 q->lock_ptr = NULL;
864
865 wake_up_state(p, TASK_NORMAL);
866 put_task_struct(p);
867}
868
869static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
870{
871 struct task_struct *new_owner;
872 struct futex_pi_state *pi_state = this->pi_state;
873 u32 uninitialized_var(curval), newval;
874
875 if (!pi_state)
876 return -EINVAL;
877
878
879
880
881
882 if (pi_state->owner != current)
883 return -EINVAL;
884
885 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
886 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
887
888
889
890
891
892
893 if (!new_owner)
894 new_owner = this->task;
895
896
897
898
899
900
901 if (!(uval & FUTEX_OWNER_DIED)) {
902 int ret = 0;
903
904 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
905
906 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
907 ret = -EFAULT;
908 else if (curval != uval)
909 ret = -EINVAL;
910 if (ret) {
911 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
912 return ret;
913 }
914 }
915
916 raw_spin_lock_irq(&pi_state->owner->pi_lock);
917 WARN_ON(list_empty(&pi_state->list));
918 list_del_init(&pi_state->list);
919 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
920
921 raw_spin_lock_irq(&new_owner->pi_lock);
922 WARN_ON(!list_empty(&pi_state->list));
923 list_add(&pi_state->list, &new_owner->pi_state_list);
924 pi_state->owner = new_owner;
925 raw_spin_unlock_irq(&new_owner->pi_lock);
926
927 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
928 rt_mutex_unlock(&pi_state->pi_mutex);
929
930 return 0;
931}
932
933static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
934{
935 u32 uninitialized_var(oldval);
936
937
938
939
940
941 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
942 return -EFAULT;
943 if (oldval != uval)
944 return -EAGAIN;
945
946 return 0;
947}
948
949
950
951
952static inline void
953double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
954{
955 if (hb1 <= hb2) {
956 spin_lock(&hb1->lock);
957 if (hb1 < hb2)
958 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
959 } else {
960 spin_lock(&hb2->lock);
961 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
962 }
963}
964
965static inline void
966double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
967{
968 spin_unlock(&hb1->lock);
969 if (hb1 != hb2)
970 spin_unlock(&hb2->lock);
971}
972
973
974
975
976static int
977futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
978{
979 struct futex_hash_bucket *hb;
980 struct futex_q *this, *next;
981 struct plist_head *head;
982 union futex_key key = FUTEX_KEY_INIT;
983 int ret;
984
985 if (!bitset)
986 return -EINVAL;
987
988 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
989 if (unlikely(ret != 0))
990 goto out;
991
992 hb = hash_futex(&key);
993 spin_lock(&hb->lock);
994 head = &hb->chain;
995
996 plist_for_each_entry_safe(this, next, head, list) {
997 if (match_futex (&this->key, &key)) {
998 if (this->pi_state || this->rt_waiter) {
999 ret = -EINVAL;
1000 break;
1001 }
1002
1003
1004 if (!(this->bitset & bitset))
1005 continue;
1006
1007 wake_futex(this);
1008 if (++ret >= nr_wake)
1009 break;
1010 }
1011 }
1012
1013 spin_unlock(&hb->lock);
1014 put_futex_key(&key);
1015out:
1016 return ret;
1017}
1018
1019
1020
1021
1022
1023static int
1024futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1025 int nr_wake, int nr_wake2, int op)
1026{
1027 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1028 struct futex_hash_bucket *hb1, *hb2;
1029 struct plist_head *head;
1030 struct futex_q *this, *next;
1031 int ret, op_ret;
1032
1033retry:
1034 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1035 if (unlikely(ret != 0))
1036 goto out;
1037 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1038 if (unlikely(ret != 0))
1039 goto out_put_key1;
1040
1041 hb1 = hash_futex(&key1);
1042 hb2 = hash_futex(&key2);
1043
1044retry_private:
1045 double_lock_hb(hb1, hb2);
1046 op_ret = futex_atomic_op_inuser(op, uaddr2);
1047 if (unlikely(op_ret < 0)) {
1048
1049 double_unlock_hb(hb1, hb2);
1050
1051#ifndef CONFIG_MMU
1052
1053
1054
1055
1056 ret = op_ret;
1057 goto out_put_keys;
1058#endif
1059
1060 if (unlikely(op_ret != -EFAULT)) {
1061 ret = op_ret;
1062 goto out_put_keys;
1063 }
1064
1065 ret = fault_in_user_writeable(uaddr2);
1066 if (ret)
1067 goto out_put_keys;
1068
1069 if (!(flags & FLAGS_SHARED))
1070 goto retry_private;
1071
1072 put_futex_key(&key2);
1073 put_futex_key(&key1);
1074 goto retry;
1075 }
1076
1077 head = &hb1->chain;
1078
1079 plist_for_each_entry_safe(this, next, head, list) {
1080 if (match_futex (&this->key, &key1)) {
1081 wake_futex(this);
1082 if (++ret >= nr_wake)
1083 break;
1084 }
1085 }
1086
1087 if (op_ret > 0) {
1088 head = &hb2->chain;
1089
1090 op_ret = 0;
1091 plist_for_each_entry_safe(this, next, head, list) {
1092 if (match_futex (&this->key, &key2)) {
1093 wake_futex(this);
1094 if (++op_ret >= nr_wake2)
1095 break;
1096 }
1097 }
1098 ret += op_ret;
1099 }
1100
1101 double_unlock_hb(hb1, hb2);
1102out_put_keys:
1103 put_futex_key(&key2);
1104out_put_key1:
1105 put_futex_key(&key1);
1106out:
1107 return ret;
1108}
1109
1110
1111
1112
1113
1114
1115
1116
1117static inline
1118void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1119 struct futex_hash_bucket *hb2, union futex_key *key2)
1120{
1121
1122
1123
1124
1125
1126 if (likely(&hb1->chain != &hb2->chain)) {
1127 plist_del(&q->list, &hb1->chain);
1128 plist_add(&q->list, &hb2->chain);
1129 q->lock_ptr = &hb2->lock;
1130 }
1131 get_futex_key_refs(key2);
1132 q->key = *key2;
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149static inline
1150void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1151 struct futex_hash_bucket *hb)
1152{
1153 get_futex_key_refs(key);
1154 q->key = *key;
1155
1156 __unqueue_futex(q);
1157
1158 WARN_ON(!q->rt_waiter);
1159 q->rt_waiter = NULL;
1160
1161 q->lock_ptr = &hb->lock;
1162
1163 wake_up_state(q->task, TASK_NORMAL);
1164}
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1187 struct futex_hash_bucket *hb1,
1188 struct futex_hash_bucket *hb2,
1189 union futex_key *key1, union futex_key *key2,
1190 struct futex_pi_state **ps, int set_waiters)
1191{
1192 struct futex_q *top_waiter = NULL;
1193 u32 curval;
1194 int ret;
1195
1196 if (get_futex_value_locked(&curval, pifutex))
1197 return -EFAULT;
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207 top_waiter = futex_top_waiter(hb1, key1);
1208
1209
1210 if (!top_waiter)
1211 return 0;
1212
1213
1214 if (!match_futex(top_waiter->requeue_pi_key, key2))
1215 return -EINVAL;
1216
1217
1218
1219
1220
1221
1222 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1223 set_waiters);
1224 if (ret == 1)
1225 requeue_pi_wake_futex(top_waiter, key2, hb2);
1226
1227 return ret;
1228}
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1249 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1250 u32 *cmpval, int requeue_pi)
1251{
1252 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1253 int drop_count = 0, task_count = 0, ret;
1254 struct futex_pi_state *pi_state = NULL;
1255 struct futex_hash_bucket *hb1, *hb2;
1256 struct plist_head *head1;
1257 struct futex_q *this, *next;
1258 u32 curval2;
1259
1260 if (requeue_pi) {
1261
1262
1263
1264
1265 if (refill_pi_state_cache())
1266 return -ENOMEM;
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277 if (nr_wake != 1)
1278 return -EINVAL;
1279 }
1280
1281retry:
1282 if (pi_state != NULL) {
1283
1284
1285
1286
1287 free_pi_state(pi_state);
1288 pi_state = NULL;
1289 }
1290
1291 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1292 if (unlikely(ret != 0))
1293 goto out;
1294 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1295 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1296 if (unlikely(ret != 0))
1297 goto out_put_key1;
1298
1299 hb1 = hash_futex(&key1);
1300 hb2 = hash_futex(&key2);
1301
1302retry_private:
1303 double_lock_hb(hb1, hb2);
1304
1305 if (likely(cmpval != NULL)) {
1306 u32 curval;
1307
1308 ret = get_futex_value_locked(&curval, uaddr1);
1309
1310 if (unlikely(ret)) {
1311 double_unlock_hb(hb1, hb2);
1312
1313 ret = get_user(curval, uaddr1);
1314 if (ret)
1315 goto out_put_keys;
1316
1317 if (!(flags & FLAGS_SHARED))
1318 goto retry_private;
1319
1320 put_futex_key(&key2);
1321 put_futex_key(&key1);
1322 goto retry;
1323 }
1324 if (curval != *cmpval) {
1325 ret = -EAGAIN;
1326 goto out_unlock;
1327 }
1328 }
1329
1330 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1331
1332
1333
1334
1335
1336
1337 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1338 &key2, &pi_state, nr_requeue);
1339
1340
1341
1342
1343
1344
1345
1346 if (ret == 1) {
1347 WARN_ON(pi_state);
1348 drop_count++;
1349 task_count++;
1350 ret = get_futex_value_locked(&curval2, uaddr2);
1351 if (!ret)
1352 ret = lookup_pi_state(curval2, hb2, &key2,
1353 &pi_state);
1354 }
1355
1356 switch (ret) {
1357 case 0:
1358 break;
1359 case -EFAULT:
1360 double_unlock_hb(hb1, hb2);
1361 put_futex_key(&key2);
1362 put_futex_key(&key1);
1363 ret = fault_in_user_writeable(uaddr2);
1364 if (!ret)
1365 goto retry;
1366 goto out;
1367 case -EAGAIN:
1368
1369 double_unlock_hb(hb1, hb2);
1370 put_futex_key(&key2);
1371 put_futex_key(&key1);
1372 cond_resched();
1373 goto retry;
1374 default:
1375 goto out_unlock;
1376 }
1377 }
1378
1379 head1 = &hb1->chain;
1380 plist_for_each_entry_safe(this, next, head1, list) {
1381 if (task_count - nr_wake >= nr_requeue)
1382 break;
1383
1384 if (!match_futex(&this->key, &key1))
1385 continue;
1386
1387
1388
1389
1390
1391 if ((requeue_pi && !this->rt_waiter) ||
1392 (!requeue_pi && this->rt_waiter)) {
1393 ret = -EINVAL;
1394 break;
1395 }
1396
1397
1398
1399
1400
1401
1402 if (++task_count <= nr_wake && !requeue_pi) {
1403 wake_futex(this);
1404 continue;
1405 }
1406
1407
1408 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1409 ret = -EINVAL;
1410 break;
1411 }
1412
1413
1414
1415
1416
1417 if (requeue_pi) {
1418
1419 atomic_inc(&pi_state->refcount);
1420 this->pi_state = pi_state;
1421 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1422 this->rt_waiter,
1423 this->task, 1);
1424 if (ret == 1) {
1425
1426 requeue_pi_wake_futex(this, &key2, hb2);
1427 drop_count++;
1428 continue;
1429 } else if (ret) {
1430
1431 this->pi_state = NULL;
1432 free_pi_state(pi_state);
1433 goto out_unlock;
1434 }
1435 }
1436 requeue_futex(this, hb1, hb2, &key2);
1437 drop_count++;
1438 }
1439
1440out_unlock:
1441 double_unlock_hb(hb1, hb2);
1442
1443
1444
1445
1446
1447
1448
1449 while (--drop_count >= 0)
1450 drop_futex_key_refs(&key1);
1451
1452out_put_keys:
1453 put_futex_key(&key2);
1454out_put_key1:
1455 put_futex_key(&key1);
1456out:
1457 if (pi_state != NULL)
1458 free_pi_state(pi_state);
1459 return ret ? ret : task_count;
1460}
1461
1462
1463static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1464 __acquires(&hb->lock)
1465{
1466 struct futex_hash_bucket *hb;
1467
1468 hb = hash_futex(&q->key);
1469 q->lock_ptr = &hb->lock;
1470
1471 spin_lock(&hb->lock);
1472 return hb;
1473}
1474
1475static inline void
1476queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1477 __releases(&hb->lock)
1478{
1479 spin_unlock(&hb->lock);
1480}
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1495 __releases(&hb->lock)
1496{
1497 int prio;
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507 prio = min(current->normal_prio, MAX_RT_PRIO);
1508
1509 plist_node_init(&q->list, prio);
1510 plist_add(&q->list, &hb->chain);
1511 q->task = current;
1512 spin_unlock(&hb->lock);
1513}
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526static int unqueue_me(struct futex_q *q)
1527{
1528 spinlock_t *lock_ptr;
1529 int ret = 0;
1530
1531
1532retry:
1533 lock_ptr = q->lock_ptr;
1534 barrier();
1535 if (lock_ptr != NULL) {
1536 spin_lock(lock_ptr);
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550 if (unlikely(lock_ptr != q->lock_ptr)) {
1551 spin_unlock(lock_ptr);
1552 goto retry;
1553 }
1554 __unqueue_futex(q);
1555
1556 BUG_ON(q->pi_state);
1557
1558 spin_unlock(lock_ptr);
1559 ret = 1;
1560 }
1561
1562 drop_futex_key_refs(&q->key);
1563 return ret;
1564}
1565
1566
1567
1568
1569
1570
1571static void unqueue_me_pi(struct futex_q *q)
1572 __releases(q->lock_ptr)
1573{
1574 __unqueue_futex(q);
1575
1576 BUG_ON(!q->pi_state);
1577 free_pi_state(q->pi_state);
1578 q->pi_state = NULL;
1579
1580 spin_unlock(q->lock_ptr);
1581}
1582
1583
1584
1585
1586
1587
1588
1589static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1590 struct task_struct *newowner)
1591{
1592 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1593 struct futex_pi_state *pi_state = q->pi_state;
1594 struct task_struct *oldowner = pi_state->owner;
1595 u32 uval, uninitialized_var(curval), newval;
1596 int ret;
1597
1598
1599 if (!pi_state->owner)
1600 newtid |= FUTEX_OWNER_DIED;
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619retry:
1620 if (get_futex_value_locked(&uval, uaddr))
1621 goto handle_fault;
1622
1623 while (1) {
1624 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1625
1626 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1627 goto handle_fault;
1628 if (curval == uval)
1629 break;
1630 uval = curval;
1631 }
1632
1633
1634
1635
1636
1637 if (pi_state->owner != NULL) {
1638 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1639 WARN_ON(list_empty(&pi_state->list));
1640 list_del_init(&pi_state->list);
1641 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1642 }
1643
1644 pi_state->owner = newowner;
1645
1646 raw_spin_lock_irq(&newowner->pi_lock);
1647 WARN_ON(!list_empty(&pi_state->list));
1648 list_add(&pi_state->list, &newowner->pi_state_list);
1649 raw_spin_unlock_irq(&newowner->pi_lock);
1650 return 0;
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662handle_fault:
1663 spin_unlock(q->lock_ptr);
1664
1665 ret = fault_in_user_writeable(uaddr);
1666
1667 spin_lock(q->lock_ptr);
1668
1669
1670
1671
1672 if (pi_state->owner != oldowner)
1673 return 0;
1674
1675 if (ret)
1676 return ret;
1677
1678 goto retry;
1679}
1680
1681static long futex_wait_restart(struct restart_block *restart);
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1699{
1700 struct task_struct *owner;
1701 int ret = 0;
1702
1703 if (locked) {
1704
1705
1706
1707
1708 if (q->pi_state->owner != current)
1709 ret = fixup_pi_state_owner(uaddr, q, current);
1710 goto out;
1711 }
1712
1713
1714
1715
1716
1717 if (q->pi_state->owner == current) {
1718
1719
1720
1721
1722
1723 if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1724 locked = 1;
1725 goto out;
1726 }
1727
1728
1729
1730
1731
1732
1733 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1734 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1735 if (!owner)
1736 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1737 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1738 ret = fixup_pi_state_owner(uaddr, q, owner);
1739 goto out;
1740 }
1741
1742
1743
1744
1745
1746 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1747 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1748 "pi-state %p\n", ret,
1749 q->pi_state->pi_mutex.owner,
1750 q->pi_state->owner);
1751
1752out:
1753 return ret ? ret : locked;
1754}
1755
1756
1757
1758
1759
1760
1761
1762static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1763 struct hrtimer_sleeper *timeout)
1764{
1765
1766
1767
1768
1769
1770
1771 set_current_state(TASK_INTERRUPTIBLE);
1772 queue_me(q, hb);
1773
1774
1775 if (timeout) {
1776 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1777 if (!hrtimer_active(&timeout->timer))
1778 timeout->task = NULL;
1779 }
1780
1781
1782
1783
1784
1785 if (likely(!plist_node_empty(&q->list))) {
1786
1787
1788
1789
1790
1791 if (!timeout || timeout->task)
1792 schedule();
1793 }
1794 __set_current_state(TASK_RUNNING);
1795}
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1815 struct futex_q *q, struct futex_hash_bucket **hb)
1816{
1817 u32 uval;
1818 int ret;
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838retry:
1839 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1840 if (unlikely(ret != 0))
1841 return ret;
1842
1843retry_private:
1844 *hb = queue_lock(q);
1845
1846 ret = get_futex_value_locked(&uval, uaddr);
1847
1848 if (ret) {
1849 queue_unlock(q, *hb);
1850
1851 ret = get_user(uval, uaddr);
1852 if (ret)
1853 goto out;
1854
1855 if (!(flags & FLAGS_SHARED))
1856 goto retry_private;
1857
1858 put_futex_key(&q->key);
1859 goto retry;
1860 }
1861
1862 if (uval != val) {
1863 queue_unlock(q, *hb);
1864 ret = -EWOULDBLOCK;
1865 }
1866
1867out:
1868 if (ret)
1869 put_futex_key(&q->key);
1870 return ret;
1871}
1872
1873static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1874 ktime_t *abs_time, u32 bitset)
1875{
1876 struct hrtimer_sleeper timeout, *to = NULL;
1877 struct restart_block *restart;
1878 struct futex_hash_bucket *hb;
1879 struct futex_q q = futex_q_init;
1880 int ret;
1881
1882 if (!bitset)
1883 return -EINVAL;
1884 q.bitset = bitset;
1885
1886 if (abs_time) {
1887 to = &timeout;
1888
1889 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1890 CLOCK_REALTIME : CLOCK_MONOTONIC,
1891 HRTIMER_MODE_ABS);
1892 hrtimer_init_sleeper(to, current);
1893 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1894 current->timer_slack_ns);
1895 }
1896
1897retry:
1898
1899
1900
1901
1902 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1903 if (ret)
1904 goto out;
1905
1906
1907 futex_wait_queue_me(hb, &q, to);
1908
1909
1910 ret = 0;
1911
1912 if (!unqueue_me(&q))
1913 goto out;
1914 ret = -ETIMEDOUT;
1915 if (to && !to->task)
1916 goto out;
1917
1918
1919
1920
1921
1922 if (!signal_pending(current))
1923 goto retry;
1924
1925 ret = -ERESTARTSYS;
1926 if (!abs_time)
1927 goto out;
1928
1929 restart = ¤t_thread_info()->restart_block;
1930 restart->fn = futex_wait_restart;
1931 restart->futex.uaddr = uaddr;
1932 restart->futex.val = val;
1933 restart->futex.time = abs_time->tv64;
1934 restart->futex.bitset = bitset;
1935 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1936
1937 ret = -ERESTART_RESTARTBLOCK;
1938
1939out:
1940 if (to) {
1941 hrtimer_cancel(&to->timer);
1942 destroy_hrtimer_on_stack(&to->timer);
1943 }
1944 return ret;
1945}
1946
1947
1948static long futex_wait_restart(struct restart_block *restart)
1949{
1950 u32 __user *uaddr = restart->futex.uaddr;
1951 ktime_t t, *tp = NULL;
1952
1953 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1954 t.tv64 = restart->futex.time;
1955 tp = &t;
1956 }
1957 restart->fn = do_no_restart_syscall;
1958
1959 return (long)futex_wait(uaddr, restart->futex.flags,
1960 restart->futex.val, tp, restart->futex.bitset);
1961}
1962
1963
1964
1965
1966
1967
1968
1969
1970static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1971 ktime_t *time, int trylock)
1972{
1973 struct hrtimer_sleeper timeout, *to = NULL;
1974 struct futex_hash_bucket *hb;
1975 struct futex_q q = futex_q_init;
1976 int res, ret;
1977
1978 if (refill_pi_state_cache())
1979 return -ENOMEM;
1980
1981 if (time) {
1982 to = &timeout;
1983 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1984 HRTIMER_MODE_ABS);
1985 hrtimer_init_sleeper(to, current);
1986 hrtimer_set_expires(&to->timer, *time);
1987 }
1988
1989retry:
1990 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
1991 if (unlikely(ret != 0))
1992 goto out;
1993
1994retry_private:
1995 hb = queue_lock(&q);
1996
1997 ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
1998 if (unlikely(ret)) {
1999 switch (ret) {
2000 case 1:
2001
2002 ret = 0;
2003 goto out_unlock_put_key;
2004 case -EFAULT:
2005 goto uaddr_faulted;
2006 case -EAGAIN:
2007
2008
2009
2010
2011 queue_unlock(&q, hb);
2012 put_futex_key(&q.key);
2013 cond_resched();
2014 goto retry;
2015 default:
2016 goto out_unlock_put_key;
2017 }
2018 }
2019
2020
2021
2022
2023 queue_me(&q, hb);
2024
2025 WARN_ON(!q.pi_state);
2026
2027
2028
2029 if (!trylock)
2030 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2031 else {
2032 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2033
2034 ret = ret ? 0 : -EWOULDBLOCK;
2035 }
2036
2037 spin_lock(q.lock_ptr);
2038
2039
2040
2041
2042 res = fixup_owner(uaddr, &q, !ret);
2043
2044
2045
2046
2047 if (res)
2048 ret = (res < 0) ? res : 0;
2049
2050
2051
2052
2053
2054 if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2055 rt_mutex_unlock(&q.pi_state->pi_mutex);
2056
2057
2058 unqueue_me_pi(&q);
2059
2060 goto out_put_key;
2061
2062out_unlock_put_key:
2063 queue_unlock(&q, hb);
2064
2065out_put_key:
2066 put_futex_key(&q.key);
2067out:
2068 if (to)
2069 destroy_hrtimer_on_stack(&to->timer);
2070 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2071
2072uaddr_faulted:
2073 queue_unlock(&q, hb);
2074
2075 ret = fault_in_user_writeable(uaddr);
2076 if (ret)
2077 goto out_put_key;
2078
2079 if (!(flags & FLAGS_SHARED))
2080 goto retry_private;
2081
2082 put_futex_key(&q.key);
2083 goto retry;
2084}
2085
2086
2087
2088
2089
2090
2091static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2092{
2093 struct futex_hash_bucket *hb;
2094 struct futex_q *this, *next;
2095 struct plist_head *head;
2096 union futex_key key = FUTEX_KEY_INIT;
2097 u32 uval, vpid = task_pid_vnr(current);
2098 int ret;
2099
2100retry:
2101 if (get_user(uval, uaddr))
2102 return -EFAULT;
2103
2104
2105
2106 if ((uval & FUTEX_TID_MASK) != vpid)
2107 return -EPERM;
2108
2109 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2110 if (unlikely(ret != 0))
2111 goto out;
2112
2113 hb = hash_futex(&key);
2114 spin_lock(&hb->lock);
2115
2116
2117
2118
2119
2120
2121 if (!(uval & FUTEX_OWNER_DIED) &&
2122 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2123 goto pi_faulted;
2124
2125
2126
2127
2128 if (unlikely(uval == vpid))
2129 goto out_unlock;
2130
2131
2132
2133
2134
2135 head = &hb->chain;
2136
2137 plist_for_each_entry_safe(this, next, head, list) {
2138 if (!match_futex (&this->key, &key))
2139 continue;
2140 ret = wake_futex_pi(uaddr, uval, this);
2141
2142
2143
2144
2145
2146 if (ret == -EFAULT)
2147 goto pi_faulted;
2148 goto out_unlock;
2149 }
2150
2151
2152
2153 if (!(uval & FUTEX_OWNER_DIED)) {
2154 ret = unlock_futex_pi(uaddr, uval);
2155 if (ret == -EFAULT)
2156 goto pi_faulted;
2157 }
2158
2159out_unlock:
2160 spin_unlock(&hb->lock);
2161 put_futex_key(&key);
2162
2163out:
2164 return ret;
2165
2166pi_faulted:
2167 spin_unlock(&hb->lock);
2168 put_futex_key(&key);
2169
2170 ret = fault_in_user_writeable(uaddr);
2171 if (!ret)
2172 goto retry;
2173
2174 return ret;
2175}
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193static inline
2194int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2195 struct futex_q *q, union futex_key *key2,
2196 struct hrtimer_sleeper *timeout)
2197{
2198 int ret = 0;
2199
2200
2201
2202
2203
2204
2205
2206
2207 if (!match_futex(&q->key, key2)) {
2208 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2209
2210
2211
2212
2213 plist_del(&q->list, &hb->chain);
2214
2215
2216 ret = -EWOULDBLOCK;
2217 if (timeout && !timeout->task)
2218 ret = -ETIMEDOUT;
2219 else if (signal_pending(current))
2220 ret = -ERESTARTNOINTR;
2221 }
2222 return ret;
2223}
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2267 u32 val, ktime_t *abs_time, u32 bitset,
2268 u32 __user *uaddr2)
2269{
2270 struct hrtimer_sleeper timeout, *to = NULL;
2271 struct rt_mutex_waiter rt_waiter;
2272 struct rt_mutex *pi_mutex = NULL;
2273 struct futex_hash_bucket *hb;
2274 union futex_key key2 = FUTEX_KEY_INIT;
2275 struct futex_q q = futex_q_init;
2276 int res, ret;
2277
2278 if (uaddr == uaddr2)
2279 return -EINVAL;
2280
2281 if (!bitset)
2282 return -EINVAL;
2283
2284 if (abs_time) {
2285 to = &timeout;
2286 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2287 CLOCK_REALTIME : CLOCK_MONOTONIC,
2288 HRTIMER_MODE_ABS);
2289 hrtimer_init_sleeper(to, current);
2290 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2291 current->timer_slack_ns);
2292 }
2293
2294
2295
2296
2297
2298 debug_rt_mutex_init_waiter(&rt_waiter);
2299 rt_waiter.task = NULL;
2300
2301 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2302 if (unlikely(ret != 0))
2303 goto out;
2304
2305 q.bitset = bitset;
2306 q.rt_waiter = &rt_waiter;
2307 q.requeue_pi_key = &key2;
2308
2309
2310
2311
2312
2313 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2314 if (ret)
2315 goto out_key2;
2316
2317
2318 futex_wait_queue_me(hb, &q, to);
2319
2320 spin_lock(&hb->lock);
2321 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2322 spin_unlock(&hb->lock);
2323 if (ret)
2324 goto out_put_keys;
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336 if (!q.rt_waiter) {
2337
2338
2339
2340
2341 if (q.pi_state && (q.pi_state->owner != current)) {
2342 spin_lock(q.lock_ptr);
2343 ret = fixup_pi_state_owner(uaddr2, &q, current);
2344 spin_unlock(q.lock_ptr);
2345 }
2346 } else {
2347
2348
2349
2350
2351
2352 WARN_ON(!q.pi_state);
2353 pi_mutex = &q.pi_state->pi_mutex;
2354 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2355 debug_rt_mutex_free_waiter(&rt_waiter);
2356
2357 spin_lock(q.lock_ptr);
2358
2359
2360
2361
2362 res = fixup_owner(uaddr2, &q, !ret);
2363
2364
2365
2366
2367 if (res)
2368 ret = (res < 0) ? res : 0;
2369
2370
2371 unqueue_me_pi(&q);
2372 }
2373
2374
2375
2376
2377
2378 if (ret == -EFAULT) {
2379 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2380 rt_mutex_unlock(pi_mutex);
2381 } else if (ret == -EINTR) {
2382
2383
2384
2385
2386
2387
2388
2389 ret = -EWOULDBLOCK;
2390 }
2391
2392out_put_keys:
2393 put_futex_key(&q.key);
2394out_key2:
2395 put_futex_key(&key2);
2396
2397out:
2398 if (to) {
2399 hrtimer_cancel(&to->timer);
2400 destroy_hrtimer_on_stack(&to->timer);
2401 }
2402 return ret;
2403}
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2426 size_t, len)
2427{
2428 if (!futex_cmpxchg_enabled)
2429 return -ENOSYS;
2430
2431
2432
2433 if (unlikely(len != sizeof(*head)))
2434 return -EINVAL;
2435
2436 current->robust_list = head;
2437
2438 return 0;
2439}
2440
2441
2442
2443
2444
2445
2446
2447SYSCALL_DEFINE3(get_robust_list, int, pid,
2448 struct robust_list_head __user * __user *, head_ptr,
2449 size_t __user *, len_ptr)
2450{
2451 struct robust_list_head __user *head;
2452 unsigned long ret;
2453 struct task_struct *p;
2454
2455 if (!futex_cmpxchg_enabled)
2456 return -ENOSYS;
2457
2458 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
2459
2460 rcu_read_lock();
2461
2462 ret = -ESRCH;
2463 if (!pid)
2464 p = current;
2465 else {
2466 p = find_task_by_vpid(pid);
2467 if (!p)
2468 goto err_unlock;
2469 }
2470
2471 ret = -EPERM;
2472 if (!ptrace_may_access(p, PTRACE_MODE_READ))
2473 goto err_unlock;
2474
2475 head = p->robust_list;
2476 rcu_read_unlock();
2477
2478 if (put_user(sizeof(*head), len_ptr))
2479 return -EFAULT;
2480 return put_user(head, head_ptr);
2481
2482err_unlock:
2483 rcu_read_unlock();
2484
2485 return ret;
2486}
2487
2488
2489
2490
2491
2492int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2493{
2494 u32 uval, uninitialized_var(nval), mval;
2495
2496retry:
2497 if (get_user(uval, uaddr))
2498 return -1;
2499
2500 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2522 if (fault_in_user_writeable(uaddr))
2523 return -1;
2524 goto retry;
2525 }
2526 if (nval != uval)
2527 goto retry;
2528
2529
2530
2531
2532
2533 if (!pi && (uval & FUTEX_WAITERS))
2534 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2535 }
2536 return 0;
2537}
2538
2539
2540
2541
2542static inline int fetch_robust_entry(struct robust_list __user **entry,
2543 struct robust_list __user * __user *head,
2544 unsigned int *pi)
2545{
2546 unsigned long uentry;
2547
2548 if (get_user(uentry, (unsigned long __user *)head))
2549 return -EFAULT;
2550
2551 *entry = (void __user *)(uentry & ~1UL);
2552 *pi = uentry & 1;
2553
2554 return 0;
2555}
2556
2557
2558
2559
2560
2561
2562
2563void exit_robust_list(struct task_struct *curr)
2564{
2565 struct robust_list_head __user *head = curr->robust_list;
2566 struct robust_list __user *entry, *next_entry, *pending;
2567 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2568 unsigned int uninitialized_var(next_pi);
2569 unsigned long futex_offset;
2570 int rc;
2571
2572 if (!futex_cmpxchg_enabled)
2573 return;
2574
2575
2576
2577
2578
2579 if (fetch_robust_entry(&entry, &head->list.next, &pi))
2580 return;
2581
2582
2583
2584 if (get_user(futex_offset, &head->futex_offset))
2585 return;
2586
2587
2588
2589
2590 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2591 return;
2592
2593 next_entry = NULL;
2594 while (entry != &head->list) {
2595
2596
2597
2598
2599 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2600
2601
2602
2603
2604 if (entry != pending)
2605 if (handle_futex_death((void __user *)entry + futex_offset,
2606 curr, pi))
2607 return;
2608 if (rc)
2609 return;
2610 entry = next_entry;
2611 pi = next_pi;
2612
2613
2614
2615 if (!--limit)
2616 break;
2617
2618 cond_resched();
2619 }
2620
2621 if (pending)
2622 handle_futex_death((void __user *)pending + futex_offset,
2623 curr, pip);
2624}
2625
2626long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2627 u32 __user *uaddr2, u32 val2, u32 val3)
2628{
2629 int cmd = op & FUTEX_CMD_MASK;
2630 unsigned int flags = 0;
2631
2632 if (!(op & FUTEX_PRIVATE_FLAG))
2633 flags |= FLAGS_SHARED;
2634
2635 if (op & FUTEX_CLOCK_REALTIME) {
2636 flags |= FLAGS_CLOCKRT;
2637 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2638 return -ENOSYS;
2639 }
2640
2641 switch (cmd) {
2642 case FUTEX_LOCK_PI:
2643 case FUTEX_UNLOCK_PI:
2644 case FUTEX_TRYLOCK_PI:
2645 case FUTEX_WAIT_REQUEUE_PI:
2646 case FUTEX_CMP_REQUEUE_PI:
2647 if (!futex_cmpxchg_enabled)
2648 return -ENOSYS;
2649 }
2650
2651 switch (cmd) {
2652 case FUTEX_WAIT:
2653 val3 = FUTEX_BITSET_MATCH_ANY;
2654 case FUTEX_WAIT_BITSET:
2655 return futex_wait(uaddr, flags, val, timeout, val3);
2656 case FUTEX_WAKE:
2657 val3 = FUTEX_BITSET_MATCH_ANY;
2658 case FUTEX_WAKE_BITSET:
2659 return futex_wake(uaddr, flags, val, val3);
2660 case FUTEX_REQUEUE:
2661 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2662 case FUTEX_CMP_REQUEUE:
2663 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2664 case FUTEX_WAKE_OP:
2665 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2666 case FUTEX_LOCK_PI:
2667 return futex_lock_pi(uaddr, flags, val, timeout, 0);
2668 case FUTEX_UNLOCK_PI:
2669 return futex_unlock_pi(uaddr, flags);
2670 case FUTEX_TRYLOCK_PI:
2671 return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2672 case FUTEX_WAIT_REQUEUE_PI:
2673 val3 = FUTEX_BITSET_MATCH_ANY;
2674 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2675 uaddr2);
2676 case FUTEX_CMP_REQUEUE_PI:
2677 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2678 }
2679 return -ENOSYS;
2680}
2681
2682
2683SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2684 struct timespec __user *, utime, u32 __user *, uaddr2,
2685 u32, val3)
2686{
2687 struct timespec ts;
2688 ktime_t t, *tp = NULL;
2689 u32 val2 = 0;
2690 int cmd = op & FUTEX_CMD_MASK;
2691
2692 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2693 cmd == FUTEX_WAIT_BITSET ||
2694 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2695 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2696 return -EFAULT;
2697 if (!timespec_valid(&ts))
2698 return -EINVAL;
2699
2700 t = timespec_to_ktime(ts);
2701 if (cmd == FUTEX_WAIT)
2702 t = ktime_add_safe(ktime_get(), t);
2703 tp = &t;
2704 }
2705
2706
2707
2708
2709 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2710 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2711 val2 = (u32) (unsigned long) utime;
2712
2713 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2714}
2715
2716static int __init futex_init(void)
2717{
2718 u32 curval;
2719 int i;
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2732 futex_cmpxchg_enabled = 1;
2733
2734 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2735 plist_head_init(&futex_queues[i].chain);
2736 spin_lock_init(&futex_queues[i].lock);
2737 }
2738
2739 return 0;
2740}
2741__initcall(futex_init);
2742