1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43#include <linux/slab.h>
44#include <linux/poll.h>
45#include <linux/fs.h>
46#include <linux/file.h>
47#include <linux/jhash.h>
48#include <linux/init.h>
49#include <linux/futex.h>
50#include <linux/mount.h>
51#include <linux/pagemap.h>
52#include <linux/syscalls.h>
53#include <linux/signal.h>
54#include <linux/module.h>
55#include <linux/magic.h>
56#include <linux/pid.h>
57#include <linux/nsproxy.h>
58
59#include <asm/futex.h>
60
61#include "rtmutex_common.h"
62
63int __read_mostly futex_cmpxchg_enabled;
64
65#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
66
67
68
69
70struct futex_pi_state {
71
72
73
74
75 struct list_head list;
76
77
78
79
80 struct rt_mutex pi_mutex;
81
82 struct task_struct *owner;
83 atomic_t refcount;
84
85 union futex_key key;
86};
87
88
89
90
91
92
93
94
95
96
97struct futex_q {
98 struct plist_node list;
99
100 wait_queue_head_t waiter;
101
102
103 spinlock_t *lock_ptr;
104
105
106 union futex_key key;
107
108
109 struct futex_pi_state *pi_state;
110 struct task_struct *task;
111
112
113 u32 bitset;
114};
115
116
117
118
119struct futex_hash_bucket {
120 spinlock_t lock;
121 struct plist_head chain;
122};
123
124static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
125
126
127
128
129static struct futex_hash_bucket *hash_futex(union futex_key *key)
130{
131 u32 hash = jhash2((u32*)&key->both.word,
132 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
133 key->both.offset);
134 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
135}
136
137
138
139
140static inline int match_futex(union futex_key *key1, union futex_key *key2)
141{
142 return (key1->both.word == key2->both.word
143 && key1->both.ptr == key2->both.ptr
144 && key1->both.offset == key2->both.offset);
145}
146
147
148
149
150
151
152static void get_futex_key_refs(union futex_key *key)
153{
154 if (!key->both.ptr)
155 return;
156
157 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
158 case FUT_OFF_INODE:
159 atomic_inc(&key->shared.inode->i_count);
160 break;
161 case FUT_OFF_MMSHARED:
162 atomic_inc(&key->private.mm->mm_count);
163 break;
164 }
165}
166
167
168
169
170
171static void drop_futex_key_refs(union futex_key *key)
172{
173 if (!key->both.ptr) {
174
175 WARN_ON_ONCE(1);
176 return;
177 }
178
179 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
180 case FUT_OFF_INODE:
181 iput(key->shared.inode);
182 break;
183 case FUT_OFF_MMSHARED:
184 mmdrop(key->private.mm);
185 break;
186 }
187}
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208static int
209get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
210{
211 unsigned long address = (unsigned long)uaddr;
212 struct mm_struct *mm = current->mm;
213 struct page *page;
214 int err;
215
216
217
218
219 key->both.offset = address % PAGE_SIZE;
220 if (unlikely((address % sizeof(u32)) != 0))
221 return -EINVAL;
222 address -= key->both.offset;
223
224
225
226
227
228
229
230
231 if (!fshared) {
232 if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
233 return -EFAULT;
234 key->private.mm = mm;
235 key->private.address = address;
236 get_futex_key_refs(key);
237 return 0;
238 }
239
240again:
241 err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page);
242 if (err < 0)
243 return err;
244
245 lock_page(page);
246 if (!page->mapping) {
247 unlock_page(page);
248 put_page(page);
249 goto again;
250 }
251
252
253
254
255
256
257
258
259 if (PageAnon(page)) {
260 key->both.offset |= FUT_OFF_MMSHARED;
261 key->private.mm = mm;
262 key->private.address = address;
263 } else {
264 key->both.offset |= FUT_OFF_INODE;
265 key->shared.inode = page->mapping->host;
266 key->shared.pgoff = page->index;
267 }
268
269 get_futex_key_refs(key);
270
271 unlock_page(page);
272 put_page(page);
273 return 0;
274}
275
276static inline
277void put_futex_key(int fshared, union futex_key *key)
278{
279 drop_futex_key_refs(key);
280}
281
282static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
283{
284 u32 curval;
285
286 pagefault_disable();
287 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
288 pagefault_enable();
289
290 return curval;
291}
292
293static int get_futex_value_locked(u32 *dest, u32 __user *from)
294{
295 int ret;
296
297 pagefault_disable();
298 ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
299 pagefault_enable();
300
301 return ret ? -EFAULT : 0;
302}
303
304
305
306
307static int futex_handle_fault(unsigned long address, int attempt)
308{
309 struct vm_area_struct * vma;
310 struct mm_struct *mm = current->mm;
311 int ret = -EFAULT;
312
313 if (attempt > 2)
314 return ret;
315
316 down_read(&mm->mmap_sem);
317 vma = find_vma(mm, address);
318 if (vma && address >= vma->vm_start &&
319 (vma->vm_flags & VM_WRITE)) {
320 int fault;
321 fault = handle_mm_fault(mm, vma, address, 1);
322 if (unlikely((fault & VM_FAULT_ERROR))) {
323#if 0
324
325 if (ret & VM_FAULT_OOM)
326 ret = -ENOMEM;
327#endif
328 } else {
329 ret = 0;
330 if (fault & VM_FAULT_MAJOR)
331 current->maj_flt++;
332 else
333 current->min_flt++;
334 }
335 }
336 up_read(&mm->mmap_sem);
337 return ret;
338}
339
340
341
342
343static int refill_pi_state_cache(void)
344{
345 struct futex_pi_state *pi_state;
346
347 if (likely(current->pi_state_cache))
348 return 0;
349
350 pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
351
352 if (!pi_state)
353 return -ENOMEM;
354
355 INIT_LIST_HEAD(&pi_state->list);
356
357 pi_state->owner = NULL;
358 atomic_set(&pi_state->refcount, 1);
359 pi_state->key = FUTEX_KEY_INIT;
360
361 current->pi_state_cache = pi_state;
362
363 return 0;
364}
365
366static struct futex_pi_state * alloc_pi_state(void)
367{
368 struct futex_pi_state *pi_state = current->pi_state_cache;
369
370 WARN_ON(!pi_state);
371 current->pi_state_cache = NULL;
372
373 return pi_state;
374}
375
376static void free_pi_state(struct futex_pi_state *pi_state)
377{
378 if (!atomic_dec_and_test(&pi_state->refcount))
379 return;
380
381
382
383
384
385 if (pi_state->owner) {
386 spin_lock_irq(&pi_state->owner->pi_lock);
387 list_del_init(&pi_state->list);
388 spin_unlock_irq(&pi_state->owner->pi_lock);
389
390 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
391 }
392
393 if (current->pi_state_cache)
394 kfree(pi_state);
395 else {
396
397
398
399
400
401 pi_state->owner = NULL;
402 atomic_set(&pi_state->refcount, 1);
403 current->pi_state_cache = pi_state;
404 }
405}
406
407
408
409
410
411static struct task_struct * futex_find_get_task(pid_t pid)
412{
413 struct task_struct *p;
414 const struct cred *cred = current_cred(), *pcred;
415
416 rcu_read_lock();
417 p = find_task_by_vpid(pid);
418 if (!p) {
419 p = ERR_PTR(-ESRCH);
420 } else {
421 pcred = __task_cred(p);
422 if (cred->euid != pcred->euid &&
423 cred->euid != pcred->uid)
424 p = ERR_PTR(-ESRCH);
425 else
426 get_task_struct(p);
427 }
428
429 rcu_read_unlock();
430
431 return p;
432}
433
434
435
436
437
438
439void exit_pi_state_list(struct task_struct *curr)
440{
441 struct list_head *next, *head = &curr->pi_state_list;
442 struct futex_pi_state *pi_state;
443 struct futex_hash_bucket *hb;
444 union futex_key key = FUTEX_KEY_INIT;
445
446 if (!futex_cmpxchg_enabled)
447 return;
448
449
450
451
452
453 spin_lock_irq(&curr->pi_lock);
454 while (!list_empty(head)) {
455
456 next = head->next;
457 pi_state = list_entry(next, struct futex_pi_state, list);
458 key = pi_state->key;
459 hb = hash_futex(&key);
460 spin_unlock_irq(&curr->pi_lock);
461
462 spin_lock(&hb->lock);
463
464 spin_lock_irq(&curr->pi_lock);
465
466
467
468
469 if (head->next != next) {
470 spin_unlock(&hb->lock);
471 continue;
472 }
473
474 WARN_ON(pi_state->owner != curr);
475 WARN_ON(list_empty(&pi_state->list));
476 list_del_init(&pi_state->list);
477 pi_state->owner = NULL;
478 spin_unlock_irq(&curr->pi_lock);
479
480 rt_mutex_unlock(&pi_state->pi_mutex);
481
482 spin_unlock(&hb->lock);
483
484 spin_lock_irq(&curr->pi_lock);
485 }
486 spin_unlock_irq(&curr->pi_lock);
487}
488
489static int
490lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
491 union futex_key *key, struct futex_pi_state **ps)
492{
493 struct futex_pi_state *pi_state = NULL;
494 struct futex_q *this, *next;
495 struct plist_head *head;
496 struct task_struct *p;
497 pid_t pid = uval & FUTEX_TID_MASK;
498
499 head = &hb->chain;
500
501 plist_for_each_entry_safe(this, next, head, list) {
502 if (match_futex(&this->key, key)) {
503
504
505
506
507 pi_state = this->pi_state;
508
509
510
511 if (unlikely(!pi_state))
512 return -EINVAL;
513
514 WARN_ON(!atomic_read(&pi_state->refcount));
515 WARN_ON(pid && pi_state->owner &&
516 pi_state->owner->pid != pid);
517
518 atomic_inc(&pi_state->refcount);
519 *ps = pi_state;
520
521 return 0;
522 }
523 }
524
525
526
527
528
529 if (!pid)
530 return -ESRCH;
531 p = futex_find_get_task(pid);
532 if (IS_ERR(p))
533 return PTR_ERR(p);
534
535
536
537
538
539
540
541 spin_lock_irq(&p->pi_lock);
542 if (unlikely(p->flags & PF_EXITING)) {
543
544
545
546
547
548 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
549
550 spin_unlock_irq(&p->pi_lock);
551 put_task_struct(p);
552 return ret;
553 }
554
555 pi_state = alloc_pi_state();
556
557
558
559
560
561 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
562
563
564 pi_state->key = *key;
565
566 WARN_ON(!list_empty(&pi_state->list));
567 list_add(&pi_state->list, &p->pi_state_list);
568 pi_state->owner = p;
569 spin_unlock_irq(&p->pi_lock);
570
571 put_task_struct(p);
572
573 *ps = pi_state;
574
575 return 0;
576}
577
578
579
580
581
582static void wake_futex(struct futex_q *q)
583{
584 plist_del(&q->list, &q->list.plist);
585
586
587
588
589 wake_up(&q->waiter);
590
591
592
593
594
595
596
597
598
599 smp_wmb();
600 q->lock_ptr = NULL;
601}
602
603static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
604{
605 struct task_struct *new_owner;
606 struct futex_pi_state *pi_state = this->pi_state;
607 u32 curval, newval;
608
609 if (!pi_state)
610 return -EINVAL;
611
612 spin_lock(&pi_state->pi_mutex.wait_lock);
613 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
614
615
616
617
618
619
620
621 if (!new_owner)
622 new_owner = this->task;
623
624
625
626
627
628
629 if (!(uval & FUTEX_OWNER_DIED)) {
630 int ret = 0;
631
632 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
633
634 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
635
636 if (curval == -EFAULT)
637 ret = -EFAULT;
638 else if (curval != uval)
639 ret = -EINVAL;
640 if (ret) {
641 spin_unlock(&pi_state->pi_mutex.wait_lock);
642 return ret;
643 }
644 }
645
646 spin_lock_irq(&pi_state->owner->pi_lock);
647 WARN_ON(list_empty(&pi_state->list));
648 list_del_init(&pi_state->list);
649 spin_unlock_irq(&pi_state->owner->pi_lock);
650
651 spin_lock_irq(&new_owner->pi_lock);
652 WARN_ON(!list_empty(&pi_state->list));
653 list_add(&pi_state->list, &new_owner->pi_state_list);
654 pi_state->owner = new_owner;
655 spin_unlock_irq(&new_owner->pi_lock);
656
657 spin_unlock(&pi_state->pi_mutex.wait_lock);
658 rt_mutex_unlock(&pi_state->pi_mutex);
659
660 return 0;
661}
662
663static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
664{
665 u32 oldval;
666
667
668
669
670
671 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
672
673 if (oldval == -EFAULT)
674 return oldval;
675 if (oldval != uval)
676 return -EAGAIN;
677
678 return 0;
679}
680
681
682
683
684static inline void
685double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
686{
687 if (hb1 <= hb2) {
688 spin_lock(&hb1->lock);
689 if (hb1 < hb2)
690 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
691 } else {
692 spin_lock(&hb2->lock);
693 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
694 }
695}
696
697
698
699
700
701static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
702{
703 struct futex_hash_bucket *hb;
704 struct futex_q *this, *next;
705 struct plist_head *head;
706 union futex_key key = FUTEX_KEY_INIT;
707 int ret;
708
709 if (!bitset)
710 return -EINVAL;
711
712 ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ);
713 if (unlikely(ret != 0))
714 goto out;
715
716 hb = hash_futex(&key);
717 spin_lock(&hb->lock);
718 head = &hb->chain;
719
720 plist_for_each_entry_safe(this, next, head, list) {
721 if (match_futex (&this->key, &key)) {
722 if (this->pi_state) {
723 ret = -EINVAL;
724 break;
725 }
726
727
728 if (!(this->bitset & bitset))
729 continue;
730
731 wake_futex(this);
732 if (++ret >= nr_wake)
733 break;
734 }
735 }
736
737 spin_unlock(&hb->lock);
738 put_futex_key(fshared, &key);
739out:
740 return ret;
741}
742
743
744
745
746
747static int
748futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
749 int nr_wake, int nr_wake2, int op)
750{
751 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
752 struct futex_hash_bucket *hb1, *hb2;
753 struct plist_head *head;
754 struct futex_q *this, *next;
755 int ret, op_ret, attempt = 0;
756
757retryfull:
758 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
759 if (unlikely(ret != 0))
760 goto out;
761 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
762 if (unlikely(ret != 0))
763 goto out_put_key1;
764
765 hb1 = hash_futex(&key1);
766 hb2 = hash_futex(&key2);
767
768retry:
769 double_lock_hb(hb1, hb2);
770
771 op_ret = futex_atomic_op_inuser(op, uaddr2);
772 if (unlikely(op_ret < 0)) {
773 u32 dummy;
774
775 spin_unlock(&hb1->lock);
776 if (hb1 != hb2)
777 spin_unlock(&hb2->lock);
778
779#ifndef CONFIG_MMU
780
781
782
783
784 ret = op_ret;
785 goto out_put_keys;
786#endif
787
788 if (unlikely(op_ret != -EFAULT)) {
789 ret = op_ret;
790 goto out_put_keys;
791 }
792
793
794
795
796
797
798
799
800 if (attempt++) {
801 ret = futex_handle_fault((unsigned long)uaddr2,
802 attempt);
803 if (ret)
804 goto out_put_keys;
805 goto retry;
806 }
807
808 ret = get_user(dummy, uaddr2);
809 if (ret)
810 return ret;
811
812 goto retryfull;
813 }
814
815 head = &hb1->chain;
816
817 plist_for_each_entry_safe(this, next, head, list) {
818 if (match_futex (&this->key, &key1)) {
819 wake_futex(this);
820 if (++ret >= nr_wake)
821 break;
822 }
823 }
824
825 if (op_ret > 0) {
826 head = &hb2->chain;
827
828 op_ret = 0;
829 plist_for_each_entry_safe(this, next, head, list) {
830 if (match_futex (&this->key, &key2)) {
831 wake_futex(this);
832 if (++op_ret >= nr_wake2)
833 break;
834 }
835 }
836 ret += op_ret;
837 }
838
839 spin_unlock(&hb1->lock);
840 if (hb1 != hb2)
841 spin_unlock(&hb2->lock);
842out_put_keys:
843 put_futex_key(fshared, &key2);
844out_put_key1:
845 put_futex_key(fshared, &key1);
846out:
847 return ret;
848}
849
850
851
852
853
854static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
855 int nr_wake, int nr_requeue, u32 *cmpval)
856{
857 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
858 struct futex_hash_bucket *hb1, *hb2;
859 struct plist_head *head1;
860 struct futex_q *this, *next;
861 int ret, drop_count = 0;
862
863retry:
864 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
865 if (unlikely(ret != 0))
866 goto out;
867 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
868 if (unlikely(ret != 0))
869 goto out_put_key1;
870
871 hb1 = hash_futex(&key1);
872 hb2 = hash_futex(&key2);
873
874 double_lock_hb(hb1, hb2);
875
876 if (likely(cmpval != NULL)) {
877 u32 curval;
878
879 ret = get_futex_value_locked(&curval, uaddr1);
880
881 if (unlikely(ret)) {
882 spin_unlock(&hb1->lock);
883 if (hb1 != hb2)
884 spin_unlock(&hb2->lock);
885
886 ret = get_user(curval, uaddr1);
887
888 if (!ret)
889 goto retry;
890
891 goto out_put_keys;
892 }
893 if (curval != *cmpval) {
894 ret = -EAGAIN;
895 goto out_unlock;
896 }
897 }
898
899 head1 = &hb1->chain;
900 plist_for_each_entry_safe(this, next, head1, list) {
901 if (!match_futex (&this->key, &key1))
902 continue;
903 if (++ret <= nr_wake) {
904 wake_futex(this);
905 } else {
906
907
908
909
910 if (likely(head1 != &hb2->chain)) {
911 plist_del(&this->list, &hb1->chain);
912 plist_add(&this->list, &hb2->chain);
913 this->lock_ptr = &hb2->lock;
914#ifdef CONFIG_DEBUG_PI_LIST
915 this->list.plist.lock = &hb2->lock;
916#endif
917 }
918 this->key = key2;
919 get_futex_key_refs(&key2);
920 drop_count++;
921
922 if (ret - nr_wake >= nr_requeue)
923 break;
924 }
925 }
926
927out_unlock:
928 spin_unlock(&hb1->lock);
929 if (hb1 != hb2)
930 spin_unlock(&hb2->lock);
931
932
933 while (--drop_count >= 0)
934 drop_futex_key_refs(&key1);
935
936out_put_keys:
937 put_futex_key(fshared, &key2);
938out_put_key1:
939 put_futex_key(fshared, &key1);
940out:
941 return ret;
942}
943
944
945static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
946{
947 struct futex_hash_bucket *hb;
948
949 init_waitqueue_head(&q->waiter);
950
951 get_futex_key_refs(&q->key);
952 hb = hash_futex(&q->key);
953 q->lock_ptr = &hb->lock;
954
955 spin_lock(&hb->lock);
956 return hb;
957}
958
959static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
960{
961 int prio;
962
963
964
965
966
967
968
969
970
971 prio = min(current->normal_prio, MAX_RT_PRIO);
972
973 plist_node_init(&q->list, prio);
974#ifdef CONFIG_DEBUG_PI_LIST
975 q->list.plist.lock = &hb->lock;
976#endif
977 plist_add(&q->list, &hb->chain);
978 q->task = current;
979 spin_unlock(&hb->lock);
980}
981
982static inline void
983queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
984{
985 spin_unlock(&hb->lock);
986 drop_futex_key_refs(&q->key);
987}
988
989
990
991
992
993
994
995static int unqueue_me(struct futex_q *q)
996{
997 spinlock_t *lock_ptr;
998 int ret = 0;
999
1000
1001retry:
1002 lock_ptr = q->lock_ptr;
1003 barrier();
1004 if (lock_ptr != NULL) {
1005 spin_lock(lock_ptr);
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019 if (unlikely(lock_ptr != q->lock_ptr)) {
1020 spin_unlock(lock_ptr);
1021 goto retry;
1022 }
1023 WARN_ON(plist_node_empty(&q->list));
1024 plist_del(&q->list, &q->list.plist);
1025
1026 BUG_ON(q->pi_state);
1027
1028 spin_unlock(lock_ptr);
1029 ret = 1;
1030 }
1031
1032 drop_futex_key_refs(&q->key);
1033 return ret;
1034}
1035
1036
1037
1038
1039
1040
1041static void unqueue_me_pi(struct futex_q *q)
1042{
1043 WARN_ON(plist_node_empty(&q->list));
1044 plist_del(&q->list, &q->list.plist);
1045
1046 BUG_ON(!q->pi_state);
1047 free_pi_state(q->pi_state);
1048 q->pi_state = NULL;
1049
1050 spin_unlock(q->lock_ptr);
1051
1052 drop_futex_key_refs(&q->key);
1053}
1054
1055
1056
1057
1058
1059
1060
1061static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1062 struct task_struct *newowner, int fshared)
1063{
1064 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1065 struct futex_pi_state *pi_state = q->pi_state;
1066 struct task_struct *oldowner = pi_state->owner;
1067 u32 uval, curval, newval;
1068 int ret, attempt = 0;
1069
1070
1071 if (!pi_state->owner)
1072 newtid |= FUTEX_OWNER_DIED;
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093retry:
1094 if (get_futex_value_locked(&uval, uaddr))
1095 goto handle_fault;
1096
1097 while (1) {
1098 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1099
1100 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1101
1102 if (curval == -EFAULT)
1103 goto handle_fault;
1104 if (curval == uval)
1105 break;
1106 uval = curval;
1107 }
1108
1109
1110
1111
1112
1113 if (pi_state->owner != NULL) {
1114 spin_lock_irq(&pi_state->owner->pi_lock);
1115 WARN_ON(list_empty(&pi_state->list));
1116 list_del_init(&pi_state->list);
1117 spin_unlock_irq(&pi_state->owner->pi_lock);
1118 }
1119
1120 pi_state->owner = newowner;
1121
1122 spin_lock_irq(&newowner->pi_lock);
1123 WARN_ON(!list_empty(&pi_state->list));
1124 list_add(&pi_state->list, &newowner->pi_state_list);
1125 spin_unlock_irq(&newowner->pi_lock);
1126 return 0;
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138handle_fault:
1139 spin_unlock(q->lock_ptr);
1140
1141 ret = futex_handle_fault((unsigned long)uaddr, attempt++);
1142
1143 spin_lock(q->lock_ptr);
1144
1145
1146
1147
1148 if (pi_state->owner != oldowner)
1149 return 0;
1150
1151 if (ret)
1152 return ret;
1153
1154 goto retry;
1155}
1156
1157
1158
1159
1160
1161#define FLAGS_SHARED 0x01
1162#define FLAGS_CLOCKRT 0x02
1163
1164static long futex_wait_restart(struct restart_block *restart);
1165
1166static int futex_wait(u32 __user *uaddr, int fshared,
1167 u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1168{
1169 struct task_struct *curr = current;
1170 struct restart_block *restart;
1171 DECLARE_WAITQUEUE(wait, curr);
1172 struct futex_hash_bucket *hb;
1173 struct futex_q q;
1174 u32 uval;
1175 int ret;
1176 struct hrtimer_sleeper t;
1177 int rem = 0;
1178
1179 if (!bitset)
1180 return -EINVAL;
1181
1182 q.pi_state = NULL;
1183 q.bitset = bitset;
1184retry:
1185 q.key = FUTEX_KEY_INIT;
1186 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ);
1187 if (unlikely(ret != 0))
1188 goto out;
1189
1190 hb = queue_lock(&q);
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212 ret = get_futex_value_locked(&uval, uaddr);
1213
1214 if (unlikely(ret)) {
1215 queue_unlock(&q, hb);
1216 put_futex_key(fshared, &q.key);
1217
1218 ret = get_user(uval, uaddr);
1219
1220 if (!ret)
1221 goto retry;
1222 goto out;
1223 }
1224 ret = -EWOULDBLOCK;
1225 if (unlikely(uval != val)) {
1226 queue_unlock(&q, hb);
1227 goto out_put_key;
1228 }
1229
1230
1231 queue_me(&q, hb);
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243 __set_current_state(TASK_INTERRUPTIBLE);
1244 add_wait_queue(&q.waiter, &wait);
1245
1246
1247
1248
1249 if (likely(!plist_node_empty(&q.list))) {
1250 if (!abs_time)
1251 schedule();
1252 else {
1253 unsigned long slack;
1254 slack = current->timer_slack_ns;
1255 if (rt_task(current))
1256 slack = 0;
1257 hrtimer_init_on_stack(&t.timer,
1258 clockrt ? CLOCK_REALTIME :
1259 CLOCK_MONOTONIC,
1260 HRTIMER_MODE_ABS);
1261 hrtimer_init_sleeper(&t, current);
1262 hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
1263
1264 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
1265 if (!hrtimer_active(&t.timer))
1266 t.task = NULL;
1267
1268
1269
1270
1271
1272
1273 if (likely(t.task))
1274 schedule();
1275
1276 hrtimer_cancel(&t.timer);
1277
1278
1279 rem = (t.task == NULL);
1280
1281 destroy_hrtimer_on_stack(&t.timer);
1282 }
1283 }
1284 __set_current_state(TASK_RUNNING);
1285
1286
1287
1288
1289
1290
1291
1292 ret = 0;
1293 if (!unqueue_me(&q))
1294 goto out_put_key;
1295 ret = -ETIMEDOUT;
1296 if (rem)
1297 goto out_put_key;
1298
1299
1300
1301
1302
1303 ret = -ERESTARTSYS;
1304 if (!abs_time)
1305 goto out_put_key;
1306
1307 restart = ¤t_thread_info()->restart_block;
1308 restart->fn = futex_wait_restart;
1309 restart->futex.uaddr = (u32 *)uaddr;
1310 restart->futex.val = val;
1311 restart->futex.time = abs_time->tv64;
1312 restart->futex.bitset = bitset;
1313 restart->futex.flags = 0;
1314
1315 if (fshared)
1316 restart->futex.flags |= FLAGS_SHARED;
1317 if (clockrt)
1318 restart->futex.flags |= FLAGS_CLOCKRT;
1319
1320 ret = -ERESTART_RESTARTBLOCK;
1321
1322out_put_key:
1323 put_futex_key(fshared, &q.key);
1324out:
1325 return ret;
1326}
1327
1328
1329static long futex_wait_restart(struct restart_block *restart)
1330{
1331 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1332 int fshared = 0;
1333 ktime_t t;
1334
1335 t.tv64 = restart->futex.time;
1336 restart->fn = do_no_restart_syscall;
1337 if (restart->futex.flags & FLAGS_SHARED)
1338 fshared = 1;
1339 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
1340 restart->futex.bitset,
1341 restart->futex.flags & FLAGS_CLOCKRT);
1342}
1343
1344
1345
1346
1347
1348
1349
1350
1351static int futex_lock_pi(u32 __user *uaddr, int fshared,
1352 int detect, ktime_t *time, int trylock)
1353{
1354 struct hrtimer_sleeper timeout, *to = NULL;
1355 struct task_struct *curr = current;
1356 struct futex_hash_bucket *hb;
1357 u32 uval, newval, curval;
1358 struct futex_q q;
1359 int ret, lock_taken, ownerdied = 0, attempt = 0;
1360
1361 if (refill_pi_state_cache())
1362 return -ENOMEM;
1363
1364 if (time) {
1365 to = &timeout;
1366 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1367 HRTIMER_MODE_ABS);
1368 hrtimer_init_sleeper(to, current);
1369 hrtimer_set_expires(&to->timer, *time);
1370 }
1371
1372 q.pi_state = NULL;
1373retry:
1374 q.key = FUTEX_KEY_INIT;
1375 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
1376 if (unlikely(ret != 0))
1377 goto out;
1378
1379retry_unlocked:
1380 hb = queue_lock(&q);
1381
1382retry_locked:
1383 ret = lock_taken = 0;
1384
1385
1386
1387
1388
1389
1390 newval = task_pid_vnr(current);
1391
1392 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
1393
1394 if (unlikely(curval == -EFAULT))
1395 goto uaddr_faulted;
1396
1397
1398
1399
1400
1401 if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
1402 ret = -EDEADLK;
1403 goto out_unlock_put_key;
1404 }
1405
1406
1407
1408
1409 if (unlikely(!curval))
1410 goto out_unlock_put_key;
1411
1412 uval = curval;
1413
1414
1415
1416
1417
1418 newval = curval | FUTEX_WAITERS;
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
1429
1430 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
1431 ownerdied = 0;
1432 lock_taken = 1;
1433 }
1434
1435 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1436
1437 if (unlikely(curval == -EFAULT))
1438 goto uaddr_faulted;
1439 if (unlikely(curval != uval))
1440 goto retry_locked;
1441
1442
1443
1444
1445 if (unlikely(lock_taken))
1446 goto out_unlock_put_key;
1447
1448
1449
1450
1451
1452 ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
1453
1454 if (unlikely(ret)) {
1455 switch (ret) {
1456
1457 case -EAGAIN:
1458
1459
1460
1461
1462 queue_unlock(&q, hb);
1463 cond_resched();
1464 goto retry;
1465
1466 case -ESRCH:
1467
1468
1469
1470
1471
1472 if (get_futex_value_locked(&curval, uaddr))
1473 goto uaddr_faulted;
1474
1475
1476
1477
1478
1479
1480 if (curval & FUTEX_OWNER_DIED) {
1481 ownerdied = 1;
1482 goto retry_locked;
1483 }
1484 default:
1485 goto out_unlock_put_key;
1486 }
1487 }
1488
1489
1490
1491
1492 queue_me(&q, hb);
1493
1494 WARN_ON(!q.pi_state);
1495
1496
1497
1498 if (!trylock)
1499 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
1500 else {
1501 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
1502
1503 ret = ret ? 0 : -EWOULDBLOCK;
1504 }
1505
1506 spin_lock(q.lock_ptr);
1507
1508 if (!ret) {
1509
1510
1511
1512
1513
1514 if (q.pi_state->owner != curr)
1515 ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
1516 } else {
1517
1518
1519
1520
1521
1522 if (q.pi_state->owner == curr) {
1523
1524
1525
1526
1527
1528
1529 if (rt_mutex_trylock(&q.pi_state->pi_mutex))
1530 ret = 0;
1531 else {
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543 struct task_struct *owner;
1544 int res;
1545
1546 owner = rt_mutex_owner(&q.pi_state->pi_mutex);
1547 res = fixup_pi_state_owner(uaddr, &q, owner,
1548 fshared);
1549
1550
1551 if (res)
1552 ret = res;
1553 }
1554 } else {
1555
1556
1557
1558
1559
1560
1561 if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
1562 printk(KERN_ERR "futex_lock_pi: ret = %d "
1563 "pi-mutex: %p pi-state %p\n", ret,
1564 q.pi_state->pi_mutex.owner,
1565 q.pi_state->owner);
1566 }
1567 }
1568
1569
1570 unqueue_me_pi(&q);
1571
1572 if (to)
1573 destroy_hrtimer_on_stack(&to->timer);
1574 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1575
1576out_unlock_put_key:
1577 queue_unlock(&q, hb);
1578
1579out_put_key:
1580 put_futex_key(fshared, &q.key);
1581out:
1582 if (to)
1583 destroy_hrtimer_on_stack(&to->timer);
1584 return ret;
1585
1586uaddr_faulted:
1587
1588
1589
1590
1591
1592
1593
1594 queue_unlock(&q, hb);
1595
1596 if (attempt++) {
1597 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1598 if (ret)
1599 goto out_put_key;
1600 goto retry_unlocked;
1601 }
1602
1603 ret = get_user(uval, uaddr);
1604 if (!ret)
1605 goto retry;
1606
1607 if (to)
1608 destroy_hrtimer_on_stack(&to->timer);
1609 return ret;
1610}
1611
1612
1613
1614
1615
1616
1617static int futex_unlock_pi(u32 __user *uaddr, int fshared)
1618{
1619 struct futex_hash_bucket *hb;
1620 struct futex_q *this, *next;
1621 u32 uval;
1622 struct plist_head *head;
1623 union futex_key key = FUTEX_KEY_INIT;
1624 int ret, attempt = 0;
1625
1626retry:
1627 if (get_user(uval, uaddr))
1628 return -EFAULT;
1629
1630
1631
1632 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
1633 return -EPERM;
1634
1635 ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE);
1636 if (unlikely(ret != 0))
1637 goto out;
1638
1639 hb = hash_futex(&key);
1640retry_unlocked:
1641 spin_lock(&hb->lock);
1642
1643
1644
1645
1646
1647
1648 if (!(uval & FUTEX_OWNER_DIED))
1649 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
1650
1651
1652 if (unlikely(uval == -EFAULT))
1653 goto pi_faulted;
1654
1655
1656
1657
1658 if (unlikely(uval == task_pid_vnr(current)))
1659 goto out_unlock;
1660
1661
1662
1663
1664
1665 head = &hb->chain;
1666
1667 plist_for_each_entry_safe(this, next, head, list) {
1668 if (!match_futex (&this->key, &key))
1669 continue;
1670 ret = wake_futex_pi(uaddr, uval, this);
1671
1672
1673
1674
1675
1676 if (ret == -EFAULT)
1677 goto pi_faulted;
1678 goto out_unlock;
1679 }
1680
1681
1682
1683 if (!(uval & FUTEX_OWNER_DIED)) {
1684 ret = unlock_futex_pi(uaddr, uval);
1685 if (ret == -EFAULT)
1686 goto pi_faulted;
1687 }
1688
1689out_unlock:
1690 spin_unlock(&hb->lock);
1691 put_futex_key(fshared, &key);
1692
1693out:
1694 return ret;
1695
1696pi_faulted:
1697
1698
1699
1700
1701
1702
1703
1704 spin_unlock(&hb->lock);
1705
1706 if (attempt++) {
1707 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1708 if (ret)
1709 goto out;
1710 uval = 0;
1711 goto retry_unlocked;
1712 }
1713
1714 ret = get_user(uval, uaddr);
1715 if (!ret)
1716 goto retry;
1717
1718 return ret;
1719}
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
1742 size_t, len)
1743{
1744 if (!futex_cmpxchg_enabled)
1745 return -ENOSYS;
1746
1747
1748
1749 if (unlikely(len != sizeof(*head)))
1750 return -EINVAL;
1751
1752 current->robust_list = head;
1753
1754 return 0;
1755}
1756
1757
1758
1759
1760
1761
1762
1763SYSCALL_DEFINE3(get_robust_list, int, pid,
1764 struct robust_list_head __user * __user *, head_ptr,
1765 size_t __user *, len_ptr)
1766{
1767 struct robust_list_head __user *head;
1768 unsigned long ret;
1769 const struct cred *cred = current_cred(), *pcred;
1770
1771 if (!futex_cmpxchg_enabled)
1772 return -ENOSYS;
1773
1774 if (!pid)
1775 head = current->robust_list;
1776 else {
1777 struct task_struct *p;
1778
1779 ret = -ESRCH;
1780 rcu_read_lock();
1781 p = find_task_by_vpid(pid);
1782 if (!p)
1783 goto err_unlock;
1784 ret = -EPERM;
1785 pcred = __task_cred(p);
1786 if (cred->euid != pcred->euid &&
1787 cred->euid != pcred->uid &&
1788 !capable(CAP_SYS_PTRACE))
1789 goto err_unlock;
1790 head = p->robust_list;
1791 rcu_read_unlock();
1792 }
1793
1794 if (put_user(sizeof(*head), len_ptr))
1795 return -EFAULT;
1796 return put_user(head, head_ptr);
1797
1798err_unlock:
1799 rcu_read_unlock();
1800
1801 return ret;
1802}
1803
1804
1805
1806
1807
1808int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
1809{
1810 u32 uval, nval, mval;
1811
1812retry:
1813 if (get_user(uval, uaddr))
1814 return -1;
1815
1816 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1828 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
1829
1830 if (nval == -EFAULT)
1831 return -1;
1832
1833 if (nval != uval)
1834 goto retry;
1835
1836
1837
1838
1839
1840 if (!pi && (uval & FUTEX_WAITERS))
1841 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
1842 }
1843 return 0;
1844}
1845
1846
1847
1848
1849static inline int fetch_robust_entry(struct robust_list __user **entry,
1850 struct robust_list __user * __user *head,
1851 int *pi)
1852{
1853 unsigned long uentry;
1854
1855 if (get_user(uentry, (unsigned long __user *)head))
1856 return -EFAULT;
1857
1858 *entry = (void __user *)(uentry & ~1UL);
1859 *pi = uentry & 1;
1860
1861 return 0;
1862}
1863
1864
1865
1866
1867
1868
1869
1870void exit_robust_list(struct task_struct *curr)
1871{
1872 struct robust_list_head __user *head = curr->robust_list;
1873 struct robust_list __user *entry, *next_entry, *pending;
1874 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
1875 unsigned long futex_offset;
1876 int rc;
1877
1878 if (!futex_cmpxchg_enabled)
1879 return;
1880
1881
1882
1883
1884
1885 if (fetch_robust_entry(&entry, &head->list.next, &pi))
1886 return;
1887
1888
1889
1890 if (get_user(futex_offset, &head->futex_offset))
1891 return;
1892
1893
1894
1895
1896 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
1897 return;
1898
1899 next_entry = NULL;
1900 while (entry != &head->list) {
1901
1902
1903
1904
1905 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
1906
1907
1908
1909
1910 if (entry != pending)
1911 if (handle_futex_death((void __user *)entry + futex_offset,
1912 curr, pi))
1913 return;
1914 if (rc)
1915 return;
1916 entry = next_entry;
1917 pi = next_pi;
1918
1919
1920
1921 if (!--limit)
1922 break;
1923
1924 cond_resched();
1925 }
1926
1927 if (pending)
1928 handle_futex_death((void __user *)pending + futex_offset,
1929 curr, pip);
1930}
1931
1932long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1933 u32 __user *uaddr2, u32 val2, u32 val3)
1934{
1935 int clockrt, ret = -ENOSYS;
1936 int cmd = op & FUTEX_CMD_MASK;
1937 int fshared = 0;
1938
1939 if (!(op & FUTEX_PRIVATE_FLAG))
1940 fshared = 1;
1941
1942 clockrt = op & FUTEX_CLOCK_REALTIME;
1943 if (clockrt && cmd != FUTEX_WAIT_BITSET)
1944 return -ENOSYS;
1945
1946 switch (cmd) {
1947 case FUTEX_WAIT:
1948 val3 = FUTEX_BITSET_MATCH_ANY;
1949 case FUTEX_WAIT_BITSET:
1950 ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
1951 break;
1952 case FUTEX_WAKE:
1953 val3 = FUTEX_BITSET_MATCH_ANY;
1954 case FUTEX_WAKE_BITSET:
1955 ret = futex_wake(uaddr, fshared, val, val3);
1956 break;
1957 case FUTEX_REQUEUE:
1958 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
1959 break;
1960 case FUTEX_CMP_REQUEUE:
1961 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
1962 break;
1963 case FUTEX_WAKE_OP:
1964 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
1965 break;
1966 case FUTEX_LOCK_PI:
1967 if (futex_cmpxchg_enabled)
1968 ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
1969 break;
1970 case FUTEX_UNLOCK_PI:
1971 if (futex_cmpxchg_enabled)
1972 ret = futex_unlock_pi(uaddr, fshared);
1973 break;
1974 case FUTEX_TRYLOCK_PI:
1975 if (futex_cmpxchg_enabled)
1976 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
1977 break;
1978 default:
1979 ret = -ENOSYS;
1980 }
1981 return ret;
1982}
1983
1984
1985SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
1986 struct timespec __user *, utime, u32 __user *, uaddr2,
1987 u32, val3)
1988{
1989 struct timespec ts;
1990 ktime_t t, *tp = NULL;
1991 u32 val2 = 0;
1992 int cmd = op & FUTEX_CMD_MASK;
1993
1994 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
1995 cmd == FUTEX_WAIT_BITSET)) {
1996 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
1997 return -EFAULT;
1998 if (!timespec_valid(&ts))
1999 return -EINVAL;
2000
2001 t = timespec_to_ktime(ts);
2002 if (cmd == FUTEX_WAIT)
2003 t = ktime_add_safe(ktime_get(), t);
2004 tp = &t;
2005 }
2006
2007
2008
2009
2010 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2011 cmd == FUTEX_WAKE_OP)
2012 val2 = (u32) (unsigned long) utime;
2013
2014 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2015}
2016
2017static int __init futex_init(void)
2018{
2019 u32 curval;
2020 int i;
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032 curval = cmpxchg_futex_value_locked(NULL, 0, 0);
2033 if (curval == -EFAULT)
2034 futex_cmpxchg_enabled = 1;
2035
2036 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2037 plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
2038 spin_lock_init(&futex_queues[i].lock);
2039 }
2040
2041 return 0;
2042}
2043__initcall(futex_init);
2044