rwbase_rt.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * RT-specific reader/writer semaphores and reader/writer locks
  4. *
  5. * down_write/write_lock()
  6. * 1) Lock rtmutex
  7. * 2) Remove the reader BIAS to force readers into the slow path
  8. * 3) Wait until all readers have left the critical section
  9. * 4) Mark it write locked
  10. *
  11. * up_write/write_unlock()
  12. * 1) Remove the write locked marker
  13. * 2) Set the reader BIAS, so readers can use the fast path again
  14. * 3) Unlock rtmutex, to release blocked readers
  15. *
  16. * down_read/read_lock()
  17. * 1) Try fast path acquisition (reader BIAS is set)
  18. * 2) Take tmutex::wait_lock, which protects the writelocked flag
  19. * 3) If !writelocked, acquire it for read
  20. * 4) If writelocked, block on tmutex
  21. * 5) unlock rtmutex, goto 1)
  22. *
  23. * up_read/read_unlock()
  24. * 1) Try fast path release (reader count != 1)
  25. * 2) Wake the writer waiting in down_write()/write_lock() #3
  26. *
  27. * down_read/read_lock()#3 has the consequence, that rw semaphores and rw
  28. * locks on RT are not writer fair, but writers, which should be avoided in
  29. * RT tasks (think mmap_sem), are subject to the rtmutex priority/DL
  30. * inheritance mechanism.
  31. *
  32. * It's possible to make the rw primitives writer fair by keeping a list of
  33. * active readers. A blocked writer would force all newly incoming readers
  34. * to block on the rtmutex, but the rtmutex would have to be proxy locked
  35. * for one reader after the other. We can't use multi-reader inheritance
  36. * because there is no way to support that with SCHED_DEADLINE.
  37. * Implementing the one by one reader boosting/handover mechanism is a
  38. * major surgery for a very dubious value.
  39. *
  40. * The risk of writer starvation is there, but the pathological use cases
  41. * which trigger it are not necessarily the typical RT workloads.
  42. *
  43. * Fast-path orderings:
  44. * The lock/unlock of readers can run in fast paths: lock and unlock are only
  45. * atomic ops, and there is no inner lock to provide ACQUIRE and RELEASE
  46. * semantics of rwbase_rt. Atomic ops should thus provide _acquire()
  47. * and _release() (or stronger).
  48. *
  49. * Common code shared between RT rw_semaphore and rwlock
  50. */
  51. static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb)
  52. {
  53. int r;
  54. /*
  55. * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is
  56. * set.
  57. */
  58. for (r = atomic_read(&rwb->readers); r < 0;) {
  59. if (likely(atomic_try_cmpxchg_acquire(&rwb->readers, &r, r + 1)))
  60. return 1;
  61. }
  62. return 0;
  63. }
  64. static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
  65. unsigned int state)
  66. {
  67. struct rt_mutex_base *rtm = &rwb->rtmutex;
  68. int ret;
  69. raw_spin_lock_irq(&rtm->wait_lock);
  70. /*
  71. * Allow readers, as long as the writer has not completely
  72. * acquired the semaphore for write.
  73. */
  74. if (atomic_read(&rwb->readers) != WRITER_BIAS) {
  75. atomic_inc(&rwb->readers);
  76. raw_spin_unlock_irq(&rtm->wait_lock);
  77. return 0;
  78. }
  79. /*
  80. * Call into the slow lock path with the rtmutex->wait_lock
  81. * held, so this can't result in the following race:
  82. *
  83. * Reader1 Reader2 Writer
  84. * down_read()
  85. * down_write()
  86. * rtmutex_lock(m)
  87. * wait()
  88. * down_read()
  89. * unlock(m->wait_lock)
  90. * up_read()
  91. * wake(Writer)
  92. * lock(m->wait_lock)
  93. * sem->writelocked=true
  94. * unlock(m->wait_lock)
  95. *
  96. * up_write()
  97. * sem->writelocked=false
  98. * rtmutex_unlock(m)
  99. * down_read()
  100. * down_write()
  101. * rtmutex_lock(m)
  102. * wait()
  103. * rtmutex_lock(m)
  104. *
  105. * That would put Reader1 behind the writer waiting on
  106. * Reader2 to call up_read(), which might be unbound.
  107. */
  108. trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ);
  109. /*
  110. * For rwlocks this returns 0 unconditionally, so the below
  111. * !ret conditionals are optimized out.
  112. */
  113. ret = rwbase_rtmutex_slowlock_locked(rtm, state);
  114. /*
  115. * On success the rtmutex is held, so there can't be a writer
  116. * active. Increment the reader count and immediately drop the
  117. * rtmutex again.
  118. *
  119. * rtmutex->wait_lock has to be unlocked in any case of course.
  120. */
  121. if (!ret)
  122. atomic_inc(&rwb->readers);
  123. raw_spin_unlock_irq(&rtm->wait_lock);
  124. if (!ret)
  125. rwbase_rtmutex_unlock(rtm);
  126. trace_contention_end(rwb, ret);
  127. return ret;
  128. }
  129. static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,
  130. unsigned int state)
  131. {
  132. if (rwbase_read_trylock(rwb))
  133. return 0;
  134. return __rwbase_read_lock(rwb, state);
  135. }
  136. static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
  137. unsigned int state)
  138. {
  139. struct rt_mutex_base *rtm = &rwb->rtmutex;
  140. struct task_struct *owner;
  141. DEFINE_RT_WAKE_Q(wqh);
  142. raw_spin_lock_irq(&rtm->wait_lock);
  143. /*
  144. * Wake the writer, i.e. the rtmutex owner. It might release the
  145. * rtmutex concurrently in the fast path (due to a signal), but to
  146. * clean up rwb->readers it needs to acquire rtm->wait_lock. The
  147. * worst case which can happen is a spurious wakeup.
  148. */
  149. owner = rt_mutex_owner(rtm);
  150. if (owner)
  151. rt_mutex_wake_q_add_task(&wqh, owner, state);
  152. /* Pairs with the preempt_enable in rt_mutex_wake_up_q() */
  153. preempt_disable();
  154. raw_spin_unlock_irq(&rtm->wait_lock);
  155. rt_mutex_wake_up_q(&wqh);
  156. }
  157. static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
  158. unsigned int state)
  159. {
  160. /*
  161. * rwb->readers can only hit 0 when a writer is waiting for the
  162. * active readers to leave the critical section.
  163. *
  164. * dec_and_test() is fully ordered, provides RELEASE.
  165. */
  166. if (unlikely(atomic_dec_and_test(&rwb->readers)))
  167. __rwbase_read_unlock(rwb, state);
  168. }
  169. static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias,
  170. unsigned long flags)
  171. {
  172. struct rt_mutex_base *rtm = &rwb->rtmutex;
  173. /*
  174. * _release() is needed in case that reader is in fast path, pairing
  175. * with atomic_try_cmpxchg_acquire() in rwbase_read_trylock().
  176. */
  177. (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers);
  178. raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
  179. rwbase_rtmutex_unlock(rtm);
  180. }
  181. static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
  182. {
  183. struct rt_mutex_base *rtm = &rwb->rtmutex;
  184. unsigned long flags;
  185. raw_spin_lock_irqsave(&rtm->wait_lock, flags);
  186. __rwbase_write_unlock(rwb, WRITER_BIAS, flags);
  187. }
  188. static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
  189. {
  190. struct rt_mutex_base *rtm = &rwb->rtmutex;
  191. unsigned long flags;
  192. raw_spin_lock_irqsave(&rtm->wait_lock, flags);
  193. /* Release it and account current as reader */
  194. __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
  195. }
  196. static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb)
  197. {
  198. /* Can do without CAS because we're serialized by wait_lock. */
  199. lockdep_assert_held(&rwb->rtmutex.wait_lock);
  200. /*
  201. * _acquire is needed in case the reader is in the fast path, pairing
  202. * with rwbase_read_unlock(), provides ACQUIRE.
  203. */
  204. if (!atomic_read_acquire(&rwb->readers)) {
  205. atomic_set(&rwb->readers, WRITER_BIAS);
  206. return 1;
  207. }
  208. return 0;
  209. }
  210. static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
  211. unsigned int state)
  212. {
  213. struct rt_mutex_base *rtm = &rwb->rtmutex;
  214. unsigned long flags;
  215. /* Take the rtmutex as a first step */
  216. if (rwbase_rtmutex_lock_state(rtm, state))
  217. return -EINTR;
  218. /* Force readers into slow path */
  219. atomic_sub(READER_BIAS, &rwb->readers);
  220. raw_spin_lock_irqsave(&rtm->wait_lock, flags);
  221. if (__rwbase_write_trylock(rwb))
  222. goto out_unlock;
  223. rwbase_set_and_save_current_state(state);
  224. trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE);
  225. for (;;) {
  226. /* Optimized out for rwlocks */
  227. if (rwbase_signal_pending_state(state, current)) {
  228. rwbase_restore_current_state();
  229. __rwbase_write_unlock(rwb, 0, flags);
  230. trace_contention_end(rwb, -EINTR);
  231. return -EINTR;
  232. }
  233. if (__rwbase_write_trylock(rwb))
  234. break;
  235. raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
  236. rwbase_schedule();
  237. raw_spin_lock_irqsave(&rtm->wait_lock, flags);
  238. set_current_state(state);
  239. }
  240. rwbase_restore_current_state();
  241. trace_contention_end(rwb, 0);
  242. out_unlock:
  243. raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
  244. return 0;
  245. }
  246. static inline int rwbase_write_trylock(struct rwbase_rt *rwb)
  247. {
  248. struct rt_mutex_base *rtm = &rwb->rtmutex;
  249. unsigned long flags;
  250. if (!rwbase_rtmutex_trylock(rtm))
  251. return 0;
  252. atomic_sub(READER_BIAS, &rwb->readers);
  253. raw_spin_lock_irqsave(&rtm->wait_lock, flags);
  254. if (__rwbase_write_trylock(rwb)) {
  255. raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
  256. return 1;
  257. }
  258. __rwbase_write_unlock(rwb, 0, flags);
  259. return 0;
  260. }