1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/sched.h>
21#include <linux/sched/rt.h>
22#include <linux/sched/task.h>
23#include <linux/sched/debug.h>
24#include <linux/sched/wake_q.h>
25#include <linux/sched/signal.h>
26#include <linux/sched/clock.h>
27#include <linux/export.h>
28#include <linux/rwsem.h>
29#include <linux/atomic.h>
30
31#include "lock_events.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62#define RWSEM_READER_OWNED (1UL << 0)
63#define RWSEM_NONSPINNABLE (1UL << 1)
64#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
65
66#ifdef CONFIG_DEBUG_RWSEMS
67# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
68 if (!debug_locks_silent && \
69 WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
70 #c, atomic_long_read(&(sem)->count), \
71 (unsigned long) sem->magic, \
72 atomic_long_read(&(sem)->owner), (long)current, \
73 list_empty(&(sem)->wait_list) ? "" : "not ")) \
74 debug_locks_off(); \
75 } while (0)
76#else
77# define DEBUG_RWSEMS_WARN_ON(c, sem)
78#endif
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116#define RWSEM_WRITER_LOCKED (1UL << 0)
117#define RWSEM_FLAG_WAITERS (1UL << 1)
118#define RWSEM_FLAG_HANDOFF (1UL << 2)
119#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
120
121#define RWSEM_READER_SHIFT 8
122#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
123#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
124#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
125#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
126#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
127 RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
128
129
130
131
132
133
134
135
136static inline void rwsem_set_owner(struct rw_semaphore *sem)
137{
138 atomic_long_set(&sem->owner, (long)current);
139}
140
141static inline void rwsem_clear_owner(struct rw_semaphore *sem)
142{
143 atomic_long_set(&sem->owner, 0);
144}
145
146
147
148
149static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
150{
151 return atomic_long_read(&sem->owner) & flags;
152}
153
154
155
156
157
158
159
160
161
162
163
164static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
165 struct task_struct *owner)
166{
167 unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
168 (atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE);
169
170 atomic_long_set(&sem->owner, val);
171}
172
173static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
174{
175 __rwsem_set_reader_owned(sem, current);
176}
177
178
179
180
181static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
182{
183#ifdef CONFIG_DEBUG_RWSEMS
184
185
186
187 long count = atomic_long_read(&sem->count);
188
189 if (count & RWSEM_WRITER_MASK)
190 return false;
191#endif
192 return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
193}
194
195#ifdef CONFIG_DEBUG_RWSEMS
196
197
198
199
200
201
202static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
203{
204 unsigned long val = atomic_long_read(&sem->owner);
205
206 while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
207 if (atomic_long_try_cmpxchg(&sem->owner, &val,
208 val & RWSEM_OWNER_FLAGS_MASK))
209 return;
210 }
211}
212#else
213static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
214{
215}
216#endif
217
218
219
220
221
222static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
223{
224 unsigned long owner = atomic_long_read(&sem->owner);
225
226 do {
227 if (!(owner & RWSEM_READER_OWNED))
228 break;
229 if (owner & RWSEM_NONSPINNABLE)
230 break;
231 } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
232 owner | RWSEM_NONSPINNABLE));
233}
234
235static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
236{
237 *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
238
239 if (WARN_ON_ONCE(*cntp < 0))
240 rwsem_set_nonspinnable(sem);
241
242 if (!(*cntp & RWSEM_READ_FAILED_MASK)) {
243 rwsem_set_reader_owned(sem);
244 return true;
245 }
246
247 return false;
248}
249
250static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
251{
252 long tmp = RWSEM_UNLOCKED_VALUE;
253
254 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
255 rwsem_set_owner(sem);
256 return true;
257 }
258
259 return false;
260}
261
262
263
264
265static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
266{
267 return (struct task_struct *)
268 (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
269}
270
271
272
273
274
275static inline struct task_struct *
276rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
277{
278 unsigned long owner = atomic_long_read(&sem->owner);
279
280 *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
281 return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
282}
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304void __init_rwsem(struct rw_semaphore *sem, const char *name,
305 struct lock_class_key *key)
306{
307#ifdef CONFIG_DEBUG_LOCK_ALLOC
308
309
310
311 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
312 lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
313#endif
314#ifdef CONFIG_DEBUG_RWSEMS
315 sem->magic = sem;
316#endif
317 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
318 raw_spin_lock_init(&sem->wait_lock);
319 INIT_LIST_HEAD(&sem->wait_list);
320 atomic_long_set(&sem->owner, 0L);
321#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
322 osq_lock_init(&sem->osq);
323#endif
324}
325EXPORT_SYMBOL(__init_rwsem);
326
327enum rwsem_waiter_type {
328 RWSEM_WAITING_FOR_WRITE,
329 RWSEM_WAITING_FOR_READ
330};
331
332struct rwsem_waiter {
333 struct list_head list;
334 struct task_struct *task;
335 enum rwsem_waiter_type type;
336 unsigned long timeout;
337};
338#define rwsem_first_waiter(sem) \
339 list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
340
341enum rwsem_wake_type {
342 RWSEM_WAKE_ANY,
343 RWSEM_WAKE_READERS,
344 RWSEM_WAKE_READ_OWNED
345};
346
347enum writer_wait_state {
348 WRITER_NOT_FIRST,
349 WRITER_FIRST,
350 WRITER_HANDOFF
351};
352
353
354
355
356
357
358#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
359
360
361
362
363
364
365
366#define MAX_READERS_WAKEUP 0x100
367
368
369
370
371
372
373
374
375
376
377
378
379
380static void rwsem_mark_wake(struct rw_semaphore *sem,
381 enum rwsem_wake_type wake_type,
382 struct wake_q_head *wake_q)
383{
384 struct rwsem_waiter *waiter, *tmp;
385 long oldcount, woken = 0, adjustment = 0;
386 struct list_head wlist;
387
388 lockdep_assert_held(&sem->wait_lock);
389
390
391
392
393
394 waiter = rwsem_first_waiter(sem);
395
396 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
397 if (wake_type == RWSEM_WAKE_ANY) {
398
399
400
401
402
403
404
405 wake_q_add(wake_q, waiter->task);
406 lockevent_inc(rwsem_wake_writer);
407 }
408
409 return;
410 }
411
412
413
414
415 if (unlikely(atomic_long_read(&sem->count) < 0))
416 return;
417
418
419
420
421
422
423 if (wake_type != RWSEM_WAKE_READ_OWNED) {
424 struct task_struct *owner;
425
426 adjustment = RWSEM_READER_BIAS;
427 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
428 if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
429
430
431
432
433
434 if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
435 time_after(jiffies, waiter->timeout)) {
436 adjustment -= RWSEM_FLAG_HANDOFF;
437 lockevent_inc(rwsem_rlock_handoff);
438 }
439
440 atomic_long_add(-adjustment, &sem->count);
441 return;
442 }
443
444
445
446
447
448
449 owner = waiter->task;
450 __rwsem_set_reader_owned(sem, owner);
451 }
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476 INIT_LIST_HEAD(&wlist);
477 list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
478 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
479 continue;
480
481 woken++;
482 list_move_tail(&waiter->list, &wlist);
483
484
485
486
487 if (woken >= MAX_READERS_WAKEUP)
488 break;
489 }
490
491 adjustment = woken * RWSEM_READER_BIAS - adjustment;
492 lockevent_cond_inc(rwsem_wake_reader, woken);
493 if (list_empty(&sem->wait_list)) {
494
495 adjustment -= RWSEM_FLAG_WAITERS;
496 }
497
498
499
500
501
502 if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
503 adjustment -= RWSEM_FLAG_HANDOFF;
504
505 if (adjustment)
506 atomic_long_add(adjustment, &sem->count);
507
508
509 list_for_each_entry_safe(waiter, tmp, &wlist, list) {
510 struct task_struct *tsk;
511
512 tsk = waiter->task;
513 get_task_struct(tsk);
514
515
516
517
518
519
520
521 smp_store_release(&waiter->task, NULL);
522
523
524
525
526 wake_q_add_safe(wake_q, tsk);
527 }
528}
529
530
531
532
533
534
535
536
537
538static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
539 enum writer_wait_state wstate)
540{
541 long count, new;
542
543 lockdep_assert_held(&sem->wait_lock);
544
545 count = atomic_long_read(&sem->count);
546 do {
547 bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
548
549 if (has_handoff && wstate == WRITER_NOT_FIRST)
550 return false;
551
552 new = count;
553
554 if (count & RWSEM_LOCK_MASK) {
555 if (has_handoff || (wstate != WRITER_HANDOFF))
556 return false;
557
558 new |= RWSEM_FLAG_HANDOFF;
559 } else {
560 new |= RWSEM_WRITER_LOCKED;
561 new &= ~RWSEM_FLAG_HANDOFF;
562
563 if (list_is_singular(&sem->wait_list))
564 new &= ~RWSEM_FLAG_WAITERS;
565 }
566 } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
567
568
569
570
571
572 if (new & RWSEM_FLAG_HANDOFF)
573 return false;
574
575 rwsem_set_owner(sem);
576 return true;
577}
578
579#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
580
581
582
583static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
584{
585 long count = atomic_long_read(&sem->count);
586
587 while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
588 if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
589 count | RWSEM_WRITER_LOCKED)) {
590 rwsem_set_owner(sem);
591 lockevent_inc(rwsem_opt_lock);
592 return true;
593 }
594 }
595 return false;
596}
597
598static inline bool owner_on_cpu(struct task_struct *owner)
599{
600
601
602
603
604 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
605}
606
607static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
608{
609 struct task_struct *owner;
610 unsigned long flags;
611 bool ret = true;
612
613 if (need_resched()) {
614 lockevent_inc(rwsem_opt_fail);
615 return false;
616 }
617
618 preempt_disable();
619 rcu_read_lock();
620 owner = rwsem_owner_flags(sem, &flags);
621
622
623
624 if ((flags & RWSEM_NONSPINNABLE) ||
625 (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
626 ret = false;
627 rcu_read_unlock();
628 preempt_enable();
629
630 lockevent_cond_inc(rwsem_opt_fail, !ret);
631 return ret;
632}
633
634
635
636
637
638
639
640
641
642
643
644
645enum owner_state {
646 OWNER_NULL = 1 << 0,
647 OWNER_WRITER = 1 << 1,
648 OWNER_READER = 1 << 2,
649 OWNER_NONSPINNABLE = 1 << 3,
650};
651#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
652
653static inline enum owner_state
654rwsem_owner_state(struct task_struct *owner, unsigned long flags)
655{
656 if (flags & RWSEM_NONSPINNABLE)
657 return OWNER_NONSPINNABLE;
658
659 if (flags & RWSEM_READER_OWNED)
660 return OWNER_READER;
661
662 return owner ? OWNER_WRITER : OWNER_NULL;
663}
664
665static noinline enum owner_state
666rwsem_spin_on_owner(struct rw_semaphore *sem)
667{
668 struct task_struct *new, *owner;
669 unsigned long flags, new_flags;
670 enum owner_state state;
671
672 owner = rwsem_owner_flags(sem, &flags);
673 state = rwsem_owner_state(owner, flags);
674 if (state != OWNER_WRITER)
675 return state;
676
677 rcu_read_lock();
678 for (;;) {
679
680
681
682
683
684
685 new = rwsem_owner_flags(sem, &new_flags);
686 if ((new != owner) || (new_flags != flags)) {
687 state = rwsem_owner_state(new, new_flags);
688 break;
689 }
690
691
692
693
694
695
696
697 barrier();
698
699 if (need_resched() || !owner_on_cpu(owner)) {
700 state = OWNER_NONSPINNABLE;
701 break;
702 }
703
704 cpu_relax();
705 }
706 rcu_read_unlock();
707
708 return state;
709}
710
711
712
713
714
715
716
717
718
719
720
721
722
723static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
724{
725 long count = atomic_long_read(&sem->count);
726 int readers = count >> RWSEM_READER_SHIFT;
727 u64 delta;
728
729 if (readers > 30)
730 readers = 30;
731 delta = (20 + readers) * NSEC_PER_USEC / 2;
732
733 return sched_clock() + delta;
734}
735
736static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
737{
738 bool taken = false;
739 int prev_owner_state = OWNER_NULL;
740 int loop = 0;
741 u64 rspin_threshold = 0;
742
743 preempt_disable();
744
745
746 if (!osq_lock(&sem->osq))
747 goto done;
748
749
750
751
752
753
754
755 for (;;) {
756 enum owner_state owner_state;
757
758 owner_state = rwsem_spin_on_owner(sem);
759 if (!(owner_state & OWNER_SPINNABLE))
760 break;
761
762
763
764
765 taken = rwsem_try_write_lock_unqueued(sem);
766
767 if (taken)
768 break;
769
770
771
772
773 if (owner_state == OWNER_READER) {
774
775
776
777
778
779
780
781 if (prev_owner_state != OWNER_READER) {
782 if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
783 break;
784 rspin_threshold = rwsem_rspin_threshold(sem);
785 loop = 0;
786 }
787
788
789
790
791
792
793
794
795 else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
796 rwsem_set_nonspinnable(sem);
797 lockevent_inc(rwsem_opt_nospin);
798 break;
799 }
800 }
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833 if (owner_state != OWNER_WRITER) {
834 if (need_resched())
835 break;
836 if (rt_task(current) &&
837 (prev_owner_state != OWNER_WRITER))
838 break;
839 }
840 prev_owner_state = owner_state;
841
842
843
844
845
846
847
848 cpu_relax();
849 }
850 osq_unlock(&sem->osq);
851done:
852 preempt_enable();
853 lockevent_cond_inc(rwsem_opt_fail, !taken);
854 return taken;
855}
856
857
858
859
860
861static inline void clear_nonspinnable(struct rw_semaphore *sem)
862{
863 if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
864 atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
865}
866
867#else
868static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
869{
870 return false;
871}
872
873static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
874{
875 return false;
876}
877
878static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
879
880static inline int
881rwsem_spin_on_owner(struct rw_semaphore *sem)
882{
883 return 0;
884}
885#define OWNER_NULL 1
886#endif
887
888
889
890
891static struct rw_semaphore __sched *
892rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
893{
894 long adjustment = -RWSEM_READER_BIAS;
895 long rcnt = (count >> RWSEM_READER_SHIFT);
896 struct rwsem_waiter waiter;
897 DEFINE_WAKE_Q(wake_q);
898 bool wake = false;
899
900
901
902
903
904
905 if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) &&
906 (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED))
907 goto queue;
908
909
910
911
912 if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) {
913 rwsem_set_reader_owned(sem);
914 lockevent_inc(rwsem_rlock_steal);
915
916
917
918
919
920 if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
921 raw_spin_lock_irq(&sem->wait_lock);
922 if (!list_empty(&sem->wait_list))
923 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
924 &wake_q);
925 raw_spin_unlock_irq(&sem->wait_lock);
926 wake_up_q(&wake_q);
927 }
928 return sem;
929 }
930
931queue:
932 waiter.task = current;
933 waiter.type = RWSEM_WAITING_FOR_READ;
934 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
935
936 raw_spin_lock_irq(&sem->wait_lock);
937 if (list_empty(&sem->wait_list)) {
938
939
940
941
942
943
944 if (!(atomic_long_read(&sem->count) &
945 (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
946
947 smp_acquire__after_ctrl_dep();
948 raw_spin_unlock_irq(&sem->wait_lock);
949 rwsem_set_reader_owned(sem);
950 lockevent_inc(rwsem_rlock_fast);
951 return sem;
952 }
953 adjustment += RWSEM_FLAG_WAITERS;
954 }
955 list_add_tail(&waiter.list, &sem->wait_list);
956
957
958 count = atomic_long_add_return(adjustment, &sem->count);
959
960
961
962
963
964
965
966 if (!(count & RWSEM_LOCK_MASK)) {
967 clear_nonspinnable(sem);
968 wake = true;
969 }
970 if (wake || (!(count & RWSEM_WRITER_MASK) &&
971 (adjustment & RWSEM_FLAG_WAITERS)))
972 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
973
974 raw_spin_unlock_irq(&sem->wait_lock);
975 wake_up_q(&wake_q);
976
977
978 for (;;) {
979 set_current_state(state);
980 if (!smp_load_acquire(&waiter.task)) {
981
982 break;
983 }
984 if (signal_pending_state(state, current)) {
985 raw_spin_lock_irq(&sem->wait_lock);
986 if (waiter.task)
987 goto out_nolock;
988 raw_spin_unlock_irq(&sem->wait_lock);
989
990 break;
991 }
992 schedule();
993 lockevent_inc(rwsem_sleep_reader);
994 }
995
996 __set_current_state(TASK_RUNNING);
997 lockevent_inc(rwsem_rlock);
998 return sem;
999
1000out_nolock:
1001 list_del(&waiter.list);
1002 if (list_empty(&sem->wait_list)) {
1003 atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
1004 &sem->count);
1005 }
1006 raw_spin_unlock_irq(&sem->wait_lock);
1007 __set_current_state(TASK_RUNNING);
1008 lockevent_inc(rwsem_rlock_fail);
1009 return ERR_PTR(-EINTR);
1010}
1011
1012
1013
1014
1015static struct rw_semaphore *
1016rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1017{
1018 long count;
1019 enum writer_wait_state wstate;
1020 struct rwsem_waiter waiter;
1021 struct rw_semaphore *ret = sem;
1022 DEFINE_WAKE_Q(wake_q);
1023
1024
1025 if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
1026
1027 return sem;
1028 }
1029
1030
1031
1032
1033
1034 waiter.task = current;
1035 waiter.type = RWSEM_WAITING_FOR_WRITE;
1036 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1037
1038 raw_spin_lock_irq(&sem->wait_lock);
1039
1040
1041 wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
1042
1043 list_add_tail(&waiter.list, &sem->wait_list);
1044
1045
1046 if (wstate == WRITER_NOT_FIRST) {
1047 count = atomic_long_read(&sem->count);
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057 if (count & RWSEM_WRITER_MASK)
1058 goto wait;
1059
1060 rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
1061 ? RWSEM_WAKE_READERS
1062 : RWSEM_WAKE_ANY, &wake_q);
1063
1064 if (!wake_q_empty(&wake_q)) {
1065
1066
1067
1068
1069 raw_spin_unlock_irq(&sem->wait_lock);
1070 wake_up_q(&wake_q);
1071 wake_q_init(&wake_q);
1072 raw_spin_lock_irq(&sem->wait_lock);
1073 }
1074 } else {
1075 atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
1076 }
1077
1078wait:
1079
1080 set_current_state(state);
1081 for (;;) {
1082 if (rwsem_try_write_lock(sem, wstate)) {
1083
1084 break;
1085 }
1086
1087 raw_spin_unlock_irq(&sem->wait_lock);
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097 if (wstate == WRITER_HANDOFF &&
1098 rwsem_spin_on_owner(sem) == OWNER_NULL)
1099 goto trylock_again;
1100
1101
1102 for (;;) {
1103 if (signal_pending_state(state, current))
1104 goto out_nolock;
1105
1106 schedule();
1107 lockevent_inc(rwsem_sleep_writer);
1108 set_current_state(state);
1109
1110
1111
1112
1113 if (wstate == WRITER_HANDOFF)
1114 break;
1115
1116 if ((wstate == WRITER_NOT_FIRST) &&
1117 (rwsem_first_waiter(sem) == &waiter))
1118 wstate = WRITER_FIRST;
1119
1120 count = atomic_long_read(&sem->count);
1121 if (!(count & RWSEM_LOCK_MASK))
1122 break;
1123
1124
1125
1126
1127
1128 if ((wstate == WRITER_FIRST) && (rt_task(current) ||
1129 time_after(jiffies, waiter.timeout))) {
1130 wstate = WRITER_HANDOFF;
1131 lockevent_inc(rwsem_wlock_handoff);
1132 break;
1133 }
1134 }
1135trylock_again:
1136 raw_spin_lock_irq(&sem->wait_lock);
1137 }
1138 __set_current_state(TASK_RUNNING);
1139 list_del(&waiter.list);
1140 raw_spin_unlock_irq(&sem->wait_lock);
1141 lockevent_inc(rwsem_wlock);
1142
1143 return ret;
1144
1145out_nolock:
1146 __set_current_state(TASK_RUNNING);
1147 raw_spin_lock_irq(&sem->wait_lock);
1148 list_del(&waiter.list);
1149
1150 if (unlikely(wstate == WRITER_HANDOFF))
1151 atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
1152
1153 if (list_empty(&sem->wait_list))
1154 atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
1155 else
1156 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1157 raw_spin_unlock_irq(&sem->wait_lock);
1158 wake_up_q(&wake_q);
1159 lockevent_inc(rwsem_wlock_fail);
1160
1161 return ERR_PTR(-EINTR);
1162}
1163
1164
1165
1166
1167
1168static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
1169{
1170 unsigned long flags;
1171 DEFINE_WAKE_Q(wake_q);
1172
1173 raw_spin_lock_irqsave(&sem->wait_lock, flags);
1174
1175 if (!list_empty(&sem->wait_list))
1176 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1177
1178 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1179 wake_up_q(&wake_q);
1180
1181 return sem;
1182}
1183
1184
1185
1186
1187
1188
1189static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1190{
1191 unsigned long flags;
1192 DEFINE_WAKE_Q(wake_q);
1193
1194 raw_spin_lock_irqsave(&sem->wait_lock, flags);
1195
1196 if (!list_empty(&sem->wait_list))
1197 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
1198
1199 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1200 wake_up_q(&wake_q);
1201
1202 return sem;
1203}
1204
1205
1206
1207
1208static inline int __down_read_common(struct rw_semaphore *sem, int state)
1209{
1210 long count;
1211
1212 if (!rwsem_read_trylock(sem, &count)) {
1213 if (IS_ERR(rwsem_down_read_slowpath(sem, count, state)))
1214 return -EINTR;
1215 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1216 }
1217 return 0;
1218}
1219
1220static inline void __down_read(struct rw_semaphore *sem)
1221{
1222 __down_read_common(sem, TASK_UNINTERRUPTIBLE);
1223}
1224
1225static inline int __down_read_interruptible(struct rw_semaphore *sem)
1226{
1227 return __down_read_common(sem, TASK_INTERRUPTIBLE);
1228}
1229
1230static inline int __down_read_killable(struct rw_semaphore *sem)
1231{
1232 return __down_read_common(sem, TASK_KILLABLE);
1233}
1234
1235static inline int __down_read_trylock(struct rw_semaphore *sem)
1236{
1237 long tmp;
1238
1239 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1240
1241
1242
1243
1244 tmp = RWSEM_UNLOCKED_VALUE;
1245 do {
1246 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1247 tmp + RWSEM_READER_BIAS)) {
1248 rwsem_set_reader_owned(sem);
1249 return 1;
1250 }
1251 } while (!(tmp & RWSEM_READ_FAILED_MASK));
1252 return 0;
1253}
1254
1255
1256
1257
1258static inline int __down_write_common(struct rw_semaphore *sem, int state)
1259{
1260 if (unlikely(!rwsem_write_trylock(sem))) {
1261 if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
1262 return -EINTR;
1263 }
1264
1265 return 0;
1266}
1267
1268static inline void __down_write(struct rw_semaphore *sem)
1269{
1270 __down_write_common(sem, TASK_UNINTERRUPTIBLE);
1271}
1272
1273static inline int __down_write_killable(struct rw_semaphore *sem)
1274{
1275 return __down_write_common(sem, TASK_KILLABLE);
1276}
1277
1278static inline int __down_write_trylock(struct rw_semaphore *sem)
1279{
1280 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1281 return rwsem_write_trylock(sem);
1282}
1283
1284
1285
1286
1287static inline void __up_read(struct rw_semaphore *sem)
1288{
1289 long tmp;
1290
1291 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1292 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1293
1294 rwsem_clear_reader_owned(sem);
1295 tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1296 DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
1297 if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
1298 RWSEM_FLAG_WAITERS)) {
1299 clear_nonspinnable(sem);
1300 rwsem_wake(sem, tmp);
1301 }
1302}
1303
1304
1305
1306
1307static inline void __up_write(struct rw_semaphore *sem)
1308{
1309 long tmp;
1310
1311 DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1312
1313
1314
1315
1316 DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
1317 !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1318
1319 rwsem_clear_owner(sem);
1320 tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
1321 if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1322 rwsem_wake(sem, tmp);
1323}
1324
1325
1326
1327
1328static inline void __downgrade_write(struct rw_semaphore *sem)
1329{
1330 long tmp;
1331
1332
1333
1334
1335
1336
1337
1338
1339 DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
1340 tmp = atomic_long_fetch_add_release(
1341 -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
1342 rwsem_set_reader_owned(sem);
1343 if (tmp & RWSEM_FLAG_WAITERS)
1344 rwsem_downgrade_wake(sem);
1345}
1346
1347
1348
1349
1350void __sched down_read(struct rw_semaphore *sem)
1351{
1352 might_sleep();
1353 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1354
1355 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1356}
1357EXPORT_SYMBOL(down_read);
1358
1359int __sched down_read_interruptible(struct rw_semaphore *sem)
1360{
1361 might_sleep();
1362 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1363
1364 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
1365 rwsem_release(&sem->dep_map, _RET_IP_);
1366 return -EINTR;
1367 }
1368
1369 return 0;
1370}
1371EXPORT_SYMBOL(down_read_interruptible);
1372
1373int __sched down_read_killable(struct rw_semaphore *sem)
1374{
1375 might_sleep();
1376 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1377
1378 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1379 rwsem_release(&sem->dep_map, _RET_IP_);
1380 return -EINTR;
1381 }
1382
1383 return 0;
1384}
1385EXPORT_SYMBOL(down_read_killable);
1386
1387
1388
1389
1390int down_read_trylock(struct rw_semaphore *sem)
1391{
1392 int ret = __down_read_trylock(sem);
1393
1394 if (ret == 1)
1395 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
1396 return ret;
1397}
1398EXPORT_SYMBOL(down_read_trylock);
1399
1400
1401
1402
1403void __sched down_write(struct rw_semaphore *sem)
1404{
1405 might_sleep();
1406 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1407 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1408}
1409EXPORT_SYMBOL(down_write);
1410
1411
1412
1413
1414int __sched down_write_killable(struct rw_semaphore *sem)
1415{
1416 might_sleep();
1417 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1418
1419 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1420 __down_write_killable)) {
1421 rwsem_release(&sem->dep_map, _RET_IP_);
1422 return -EINTR;
1423 }
1424
1425 return 0;
1426}
1427EXPORT_SYMBOL(down_write_killable);
1428
1429
1430
1431
1432int down_write_trylock(struct rw_semaphore *sem)
1433{
1434 int ret = __down_write_trylock(sem);
1435
1436 if (ret == 1)
1437 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
1438
1439 return ret;
1440}
1441EXPORT_SYMBOL(down_write_trylock);
1442
1443
1444
1445
1446void up_read(struct rw_semaphore *sem)
1447{
1448 rwsem_release(&sem->dep_map, _RET_IP_);
1449 __up_read(sem);
1450}
1451EXPORT_SYMBOL(up_read);
1452
1453
1454
1455
1456void up_write(struct rw_semaphore *sem)
1457{
1458 rwsem_release(&sem->dep_map, _RET_IP_);
1459 __up_write(sem);
1460}
1461EXPORT_SYMBOL(up_write);
1462
1463
1464
1465
1466void downgrade_write(struct rw_semaphore *sem)
1467{
1468 lock_downgrade(&sem->dep_map, _RET_IP_);
1469 __downgrade_write(sem);
1470}
1471EXPORT_SYMBOL(downgrade_write);
1472
1473#ifdef CONFIG_DEBUG_LOCK_ALLOC
1474
1475void down_read_nested(struct rw_semaphore *sem, int subclass)
1476{
1477 might_sleep();
1478 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1479 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1480}
1481EXPORT_SYMBOL(down_read_nested);
1482
1483int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
1484{
1485 might_sleep();
1486 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1487
1488 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1489 rwsem_release(&sem->dep_map, _RET_IP_);
1490 return -EINTR;
1491 }
1492
1493 return 0;
1494}
1495EXPORT_SYMBOL(down_read_killable_nested);
1496
1497void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1498{
1499 might_sleep();
1500 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
1501 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1502}
1503EXPORT_SYMBOL(_down_write_nest_lock);
1504
1505void down_read_non_owner(struct rw_semaphore *sem)
1506{
1507 might_sleep();
1508 __down_read(sem);
1509 __rwsem_set_reader_owned(sem, NULL);
1510}
1511EXPORT_SYMBOL(down_read_non_owner);
1512
1513void down_write_nested(struct rw_semaphore *sem, int subclass)
1514{
1515 might_sleep();
1516 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1517 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1518}
1519EXPORT_SYMBOL(down_write_nested);
1520
1521int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
1522{
1523 might_sleep();
1524 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1525
1526 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1527 __down_write_killable)) {
1528 rwsem_release(&sem->dep_map, _RET_IP_);
1529 return -EINTR;
1530 }
1531
1532 return 0;
1533}
1534EXPORT_SYMBOL(down_write_killable_nested);
1535
1536void up_read_non_owner(struct rw_semaphore *sem)
1537{
1538 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1539 __up_read(sem);
1540}
1541EXPORT_SYMBOL(up_read_non_owner);
1542
1543#endif
1544