rtmutex.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * RT-Mutexes: simple blocking mutual exclusion locks with PI support
  4. *
  5. * started by Ingo Molnar and Thomas Gleixner.
  6. *
  7. * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <[email protected]>
  8. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <[email protected]>
  9. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  10. * Copyright (C) 2006 Esben Nielsen
  11. * Adaptive Spinlocks:
  12. * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  13. * and Peter Morreale,
  14. * Adaptive Spinlocks simplification:
  15. * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <[email protected]>
  16. *
  17. * See Documentation/locking/rt-mutex-design.rst for details.
  18. */
  19. #include <linux/sched.h>
  20. #include <linux/sched/debug.h>
  21. #include <linux/sched/deadline.h>
  22. #include <linux/sched/signal.h>
  23. #include <linux/sched/rt.h>
  24. #include <linux/sched/wake_q.h>
  25. #include <linux/ww_mutex.h>
  26. #include <trace/events/lock.h>
  27. #include <trace/hooks/dtask.h>
  28. #include "rtmutex_common.h"
  29. #ifndef WW_RT
  30. # define build_ww_mutex() (false)
  31. # define ww_container_of(rtm) NULL
  32. static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
  33. struct rt_mutex *lock,
  34. struct ww_acquire_ctx *ww_ctx)
  35. {
  36. return 0;
  37. }
  38. static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
  39. struct ww_acquire_ctx *ww_ctx)
  40. {
  41. }
  42. static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
  43. struct ww_acquire_ctx *ww_ctx)
  44. {
  45. }
  46. static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
  47. struct rt_mutex_waiter *waiter,
  48. struct ww_acquire_ctx *ww_ctx)
  49. {
  50. return 0;
  51. }
  52. #else
  53. # define build_ww_mutex() (true)
  54. # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
  55. # include "ww_mutex.h"
  56. #endif
  57. /*
  58. * lock->owner state tracking:
  59. *
  60. * lock->owner holds the task_struct pointer of the owner. Bit 0
  61. * is used to keep track of the "lock has waiters" state.
  62. *
  63. * owner bit0
  64. * NULL 0 lock is free (fast acquire possible)
  65. * NULL 1 lock is free and has waiters and the top waiter
  66. * is going to take the lock*
  67. * taskpointer 0 lock is held (fast release possible)
  68. * taskpointer 1 lock is held and has waiters**
  69. *
  70. * The fast atomic compare exchange based acquire and release is only
  71. * possible when bit 0 of lock->owner is 0.
  72. *
  73. * (*) It also can be a transitional state when grabbing the lock
  74. * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
  75. * we need to set the bit0 before looking at the lock, and the owner may be
  76. * NULL in this small time, hence this can be a transitional state.
  77. *
  78. * (**) There is a small time when bit 0 is set but there are no
  79. * waiters. This can happen when grabbing the lock in the slow path.
  80. * To prevent a cmpxchg of the owner releasing the lock, we need to
  81. * set this bit before looking at the lock.
  82. */
  83. static __always_inline struct task_struct *
  84. rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
  85. {
  86. unsigned long val = (unsigned long)owner;
  87. if (rt_mutex_has_waiters(lock))
  88. val |= RT_MUTEX_HAS_WAITERS;
  89. return (struct task_struct *)val;
  90. }
  91. static __always_inline void
  92. rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
  93. {
  94. /*
  95. * lock->wait_lock is held but explicit acquire semantics are needed
  96. * for a new lock owner so WRITE_ONCE is insufficient.
  97. */
  98. xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
  99. }
  100. static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
  101. {
  102. /* lock->wait_lock is held so the unlock provides release semantics. */
  103. WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
  104. }
  105. static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
  106. {
  107. lock->owner = (struct task_struct *)
  108. ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  109. }
  110. static __always_inline void
  111. fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
  112. {
  113. unsigned long owner, *p = (unsigned long *) &lock->owner;
  114. if (rt_mutex_has_waiters(lock))
  115. return;
  116. /*
  117. * The rbtree has no waiters enqueued, now make sure that the
  118. * lock->owner still has the waiters bit set, otherwise the
  119. * following can happen:
  120. *
  121. * CPU 0 CPU 1 CPU2
  122. * l->owner=T1
  123. * rt_mutex_lock(l)
  124. * lock(l->lock)
  125. * l->owner = T1 | HAS_WAITERS;
  126. * enqueue(T2)
  127. * boost()
  128. * unlock(l->lock)
  129. * block()
  130. *
  131. * rt_mutex_lock(l)
  132. * lock(l->lock)
  133. * l->owner = T1 | HAS_WAITERS;
  134. * enqueue(T3)
  135. * boost()
  136. * unlock(l->lock)
  137. * block()
  138. * signal(->T2) signal(->T3)
  139. * lock(l->lock)
  140. * dequeue(T2)
  141. * deboost()
  142. * unlock(l->lock)
  143. * lock(l->lock)
  144. * dequeue(T3)
  145. * ==> wait list is empty
  146. * deboost()
  147. * unlock(l->lock)
  148. * lock(l->lock)
  149. * fixup_rt_mutex_waiters()
  150. * if (wait_list_empty(l) {
  151. * l->owner = owner
  152. * owner = l->owner & ~HAS_WAITERS;
  153. * ==> l->owner = T1
  154. * }
  155. * lock(l->lock)
  156. * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
  157. * if (wait_list_empty(l) {
  158. * owner = l->owner & ~HAS_WAITERS;
  159. * cmpxchg(l->owner, T1, NULL)
  160. * ===> Success (l->owner = NULL)
  161. *
  162. * l->owner = owner
  163. * ==> l->owner = T1
  164. * }
  165. *
  166. * With the check for the waiter bit in place T3 on CPU2 will not
  167. * overwrite. All tasks fiddling with the waiters bit are
  168. * serialized by l->lock, so nothing else can modify the waiters
  169. * bit. If the bit is set then nothing can change l->owner either
  170. * so the simple RMW is safe. The cmpxchg() will simply fail if it
  171. * happens in the middle of the RMW because the waiters bit is
  172. * still set.
  173. */
  174. owner = READ_ONCE(*p);
  175. if (owner & RT_MUTEX_HAS_WAITERS) {
  176. /*
  177. * See rt_mutex_set_owner() and rt_mutex_clear_owner() on
  178. * why xchg_acquire() is used for updating owner for
  179. * locking and WRITE_ONCE() for unlocking.
  180. *
  181. * WRITE_ONCE() would work for the acquire case too, but
  182. * in case that the lock acquisition failed it might
  183. * force other lockers into the slow path unnecessarily.
  184. */
  185. if (acquire_lock)
  186. xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
  187. else
  188. WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
  189. }
  190. }
  191. /*
  192. * We can speed up the acquire/release, if there's no debugging state to be
  193. * set up.
  194. */
  195. #ifndef CONFIG_DEBUG_RT_MUTEXES
  196. static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
  197. struct task_struct *old,
  198. struct task_struct *new)
  199. {
  200. return try_cmpxchg_acquire(&lock->owner, &old, new);
  201. }
  202. static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
  203. struct task_struct *old,
  204. struct task_struct *new)
  205. {
  206. return try_cmpxchg_release(&lock->owner, &old, new);
  207. }
  208. /*
  209. * Callers must hold the ->wait_lock -- which is the whole purpose as we force
  210. * all future threads that attempt to [Rmw] the lock to the slowpath. As such
  211. * relaxed semantics suffice.
  212. */
  213. static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
  214. {
  215. unsigned long owner, *p = (unsigned long *) &lock->owner;
  216. do {
  217. owner = *p;
  218. } while (cmpxchg_relaxed(p, owner,
  219. owner | RT_MUTEX_HAS_WAITERS) != owner);
  220. /*
  221. * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
  222. * operations in the event of contention. Ensure the successful
  223. * cmpxchg is visible.
  224. */
  225. smp_mb__after_atomic();
  226. }
  227. /*
  228. * Safe fastpath aware unlock:
  229. * 1) Clear the waiters bit
  230. * 2) Drop lock->wait_lock
  231. * 3) Try to unlock the lock with cmpxchg
  232. */
  233. static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
  234. unsigned long flags)
  235. __releases(lock->wait_lock)
  236. {
  237. struct task_struct *owner = rt_mutex_owner(lock);
  238. clear_rt_mutex_waiters(lock);
  239. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  240. /*
  241. * If a new waiter comes in between the unlock and the cmpxchg
  242. * we have two situations:
  243. *
  244. * unlock(wait_lock);
  245. * lock(wait_lock);
  246. * cmpxchg(p, owner, 0) == owner
  247. * mark_rt_mutex_waiters(lock);
  248. * acquire(lock);
  249. * or:
  250. *
  251. * unlock(wait_lock);
  252. * lock(wait_lock);
  253. * mark_rt_mutex_waiters(lock);
  254. *
  255. * cmpxchg(p, owner, 0) != owner
  256. * enqueue_waiter();
  257. * unlock(wait_lock);
  258. * lock(wait_lock);
  259. * wake waiter();
  260. * unlock(wait_lock);
  261. * lock(wait_lock);
  262. * acquire(lock);
  263. */
  264. return rt_mutex_cmpxchg_release(lock, owner, NULL);
  265. }
  266. #else
  267. static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
  268. struct task_struct *old,
  269. struct task_struct *new)
  270. {
  271. return false;
  272. }
  273. static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
  274. struct task_struct *old,
  275. struct task_struct *new)
  276. {
  277. return false;
  278. }
  279. static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
  280. {
  281. lock->owner = (struct task_struct *)
  282. ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
  283. }
  284. /*
  285. * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  286. */
  287. static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
  288. unsigned long flags)
  289. __releases(lock->wait_lock)
  290. {
  291. lock->owner = NULL;
  292. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  293. return true;
  294. }
  295. #endif
  296. static __always_inline int __waiter_prio(struct task_struct *task)
  297. {
  298. int prio = task->prio;
  299. int waiter_prio = 0;
  300. trace_android_vh_rtmutex_waiter_prio(task, &waiter_prio);
  301. if (waiter_prio > 0)
  302. return waiter_prio;
  303. if (!rt_prio(prio))
  304. return DEFAULT_PRIO;
  305. return prio;
  306. }
  307. static __always_inline void
  308. waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
  309. {
  310. waiter->prio = __waiter_prio(task);
  311. waiter->deadline = task->dl.deadline;
  312. }
  313. /*
  314. * Only use with rt_mutex_waiter_{less,equal}()
  315. */
  316. #define task_to_waiter(p) \
  317. &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
  318. static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
  319. struct rt_mutex_waiter *right)
  320. {
  321. if (left->prio < right->prio)
  322. return 1;
  323. /*
  324. * If both waiters have dl_prio(), we check the deadlines of the
  325. * associated tasks.
  326. * If left waiter has a dl_prio(), and we didn't return 1 above,
  327. * then right waiter has a dl_prio() too.
  328. */
  329. if (dl_prio(left->prio))
  330. return dl_time_before(left->deadline, right->deadline);
  331. return 0;
  332. }
  333. static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
  334. struct rt_mutex_waiter *right)
  335. {
  336. if (left->prio != right->prio)
  337. return 0;
  338. /*
  339. * If both waiters have dl_prio(), we check the deadlines of the
  340. * associated tasks.
  341. * If left waiter has a dl_prio(), and we didn't return 0 above,
  342. * then right waiter has a dl_prio() too.
  343. */
  344. if (dl_prio(left->prio))
  345. return left->deadline == right->deadline;
  346. return 1;
  347. }
  348. static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
  349. struct rt_mutex_waiter *top_waiter)
  350. {
  351. bool ret = false;
  352. if (rt_mutex_waiter_less(waiter, top_waiter))
  353. return true;
  354. trace_android_vh_rt_mutex_steal(waiter->prio, top_waiter->prio, &ret);
  355. if (ret)
  356. return true;
  357. #ifdef RT_MUTEX_BUILD_SPINLOCKS
  358. /*
  359. * Note that RT tasks are excluded from same priority (lateral)
  360. * steals to prevent the introduction of an unbounded latency.
  361. */
  362. if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
  363. return false;
  364. return rt_mutex_waiter_equal(waiter, top_waiter);
  365. #else
  366. return false;
  367. #endif
  368. }
  369. #define __node_2_waiter(node) \
  370. rb_entry((node), struct rt_mutex_waiter, tree_entry)
  371. static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
  372. {
  373. struct rt_mutex_waiter *aw = __node_2_waiter(a);
  374. struct rt_mutex_waiter *bw = __node_2_waiter(b);
  375. if (rt_mutex_waiter_less(aw, bw))
  376. return 1;
  377. if (!build_ww_mutex())
  378. return 0;
  379. if (rt_mutex_waiter_less(bw, aw))
  380. return 0;
  381. /* NOTE: relies on waiter->ww_ctx being set before insertion */
  382. if (aw->ww_ctx) {
  383. if (!bw->ww_ctx)
  384. return 1;
  385. return (signed long)(aw->ww_ctx->stamp -
  386. bw->ww_ctx->stamp) < 0;
  387. }
  388. return 0;
  389. }
  390. static __always_inline void
  391. rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
  392. {
  393. rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
  394. }
  395. static __always_inline void
  396. rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
  397. {
  398. if (RB_EMPTY_NODE(&waiter->tree_entry))
  399. return;
  400. rb_erase_cached(&waiter->tree_entry, &lock->waiters);
  401. RB_CLEAR_NODE(&waiter->tree_entry);
  402. }
  403. #define __node_2_pi_waiter(node) \
  404. rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
  405. static __always_inline bool
  406. __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
  407. {
  408. return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
  409. }
  410. static __always_inline void
  411. rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
  412. {
  413. rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
  414. }
  415. static __always_inline void
  416. rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
  417. {
  418. if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
  419. return;
  420. rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
  421. RB_CLEAR_NODE(&waiter->pi_tree_entry);
  422. }
  423. static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
  424. {
  425. struct task_struct *pi_task = NULL;
  426. lockdep_assert_held(&p->pi_lock);
  427. if (task_has_pi_waiters(p))
  428. pi_task = task_top_pi_waiter(p)->task;
  429. rt_mutex_setprio(p, pi_task);
  430. }
  431. /* RT mutex specific wake_q wrappers */
  432. static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
  433. struct task_struct *task,
  434. unsigned int wake_state)
  435. {
  436. if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
  437. if (IS_ENABLED(CONFIG_PROVE_LOCKING))
  438. WARN_ON_ONCE(wqh->rtlock_task);
  439. get_task_struct(task);
  440. wqh->rtlock_task = task;
  441. } else {
  442. wake_q_add(&wqh->head, task);
  443. }
  444. }
  445. static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
  446. struct rt_mutex_waiter *w)
  447. {
  448. rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);
  449. }
  450. static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
  451. {
  452. if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
  453. wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
  454. put_task_struct(wqh->rtlock_task);
  455. wqh->rtlock_task = NULL;
  456. }
  457. if (!wake_q_empty(&wqh->head))
  458. wake_up_q(&wqh->head);
  459. /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
  460. preempt_enable();
  461. }
  462. /*
  463. * Deadlock detection is conditional:
  464. *
  465. * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
  466. * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
  467. *
  468. * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
  469. * conducted independent of the detect argument.
  470. *
  471. * If the waiter argument is NULL this indicates the deboost path and
  472. * deadlock detection is disabled independent of the detect argument
  473. * and the config settings.
  474. */
  475. static __always_inline bool
  476. rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
  477. enum rtmutex_chainwalk chwalk)
  478. {
  479. if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
  480. return waiter != NULL;
  481. return chwalk == RT_MUTEX_FULL_CHAINWALK;
  482. }
  483. static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
  484. {
  485. return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  486. }
  487. /*
  488. * Adjust the priority chain. Also used for deadlock detection.
  489. * Decreases task's usage by one - may thus free the task.
  490. *
  491. * @task: the task owning the mutex (owner) for which a chain walk is
  492. * probably needed
  493. * @chwalk: do we have to carry out deadlock detection?
  494. * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
  495. * things for a task that has just got its priority adjusted, and
  496. * is waiting on a mutex)
  497. * @next_lock: the mutex on which the owner of @orig_lock was blocked before
  498. * we dropped its pi_lock. Is never dereferenced, only used for
  499. * comparison to detect lock chain changes.
  500. * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
  501. * its priority to the mutex owner (can be NULL in the case
  502. * depicted above or if the top waiter is gone away and we are
  503. * actually deboosting the owner)
  504. * @top_task: the current top waiter
  505. *
  506. * Returns 0 or -EDEADLK.
  507. *
  508. * Chain walk basics and protection scope
  509. *
  510. * [R] refcount on task
  511. * [P] task->pi_lock held
  512. * [L] rtmutex->wait_lock held
  513. *
  514. * Step Description Protected by
  515. * function arguments:
  516. * @task [R]
  517. * @orig_lock if != NULL @top_task is blocked on it
  518. * @next_lock Unprotected. Cannot be
  519. * dereferenced. Only used for
  520. * comparison.
  521. * @orig_waiter if != NULL @top_task is blocked on it
  522. * @top_task current, or in case of proxy
  523. * locking protected by calling
  524. * code
  525. * again:
  526. * loop_sanity_check();
  527. * retry:
  528. * [1] lock(task->pi_lock); [R] acquire [P]
  529. * [2] waiter = task->pi_blocked_on; [P]
  530. * [3] check_exit_conditions_1(); [P]
  531. * [4] lock = waiter->lock; [P]
  532. * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
  533. * unlock(task->pi_lock); release [P]
  534. * goto retry;
  535. * }
  536. * [6] check_exit_conditions_2(); [P] + [L]
  537. * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
  538. * [8] unlock(task->pi_lock); release [P]
  539. * put_task_struct(task); release [R]
  540. * [9] check_exit_conditions_3(); [L]
  541. * [10] task = owner(lock); [L]
  542. * get_task_struct(task); [L] acquire [R]
  543. * lock(task->pi_lock); [L] acquire [P]
  544. * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
  545. * [12] check_exit_conditions_4(); [P] + [L]
  546. * [13] unlock(task->pi_lock); release [P]
  547. * unlock(lock->wait_lock); release [L]
  548. * goto again;
  549. */
  550. static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
  551. enum rtmutex_chainwalk chwalk,
  552. struct rt_mutex_base *orig_lock,
  553. struct rt_mutex_base *next_lock,
  554. struct rt_mutex_waiter *orig_waiter,
  555. struct task_struct *top_task)
  556. {
  557. struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
  558. struct rt_mutex_waiter *prerequeue_top_waiter;
  559. int ret = 0, depth = 0;
  560. struct rt_mutex_base *lock;
  561. bool detect_deadlock;
  562. bool requeue = true;
  563. detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
  564. /*
  565. * The (de)boosting is a step by step approach with a lot of
  566. * pitfalls. We want this to be preemptible and we want hold a
  567. * maximum of two locks per step. So we have to check
  568. * carefully whether things change under us.
  569. */
  570. again:
  571. /*
  572. * We limit the lock chain length for each invocation.
  573. */
  574. if (++depth > max_lock_depth) {
  575. static int prev_max;
  576. /*
  577. * Print this only once. If the admin changes the limit,
  578. * print a new message when reaching the limit again.
  579. */
  580. if (prev_max != max_lock_depth) {
  581. prev_max = max_lock_depth;
  582. printk(KERN_WARNING "Maximum lock depth %d reached "
  583. "task: %s (%d)\n", max_lock_depth,
  584. top_task->comm, task_pid_nr(top_task));
  585. }
  586. put_task_struct(task);
  587. return -EDEADLK;
  588. }
  589. /*
  590. * We are fully preemptible here and only hold the refcount on
  591. * @task. So everything can have changed under us since the
  592. * caller or our own code below (goto retry/again) dropped all
  593. * locks.
  594. */
  595. retry:
  596. /*
  597. * [1] Task cannot go away as we did a get_task() before !
  598. */
  599. raw_spin_lock_irq(&task->pi_lock);
  600. /*
  601. * [2] Get the waiter on which @task is blocked on.
  602. */
  603. waiter = task->pi_blocked_on;
  604. /*
  605. * [3] check_exit_conditions_1() protected by task->pi_lock.
  606. */
  607. /*
  608. * Check whether the end of the boosting chain has been
  609. * reached or the state of the chain has changed while we
  610. * dropped the locks.
  611. */
  612. if (!waiter)
  613. goto out_unlock_pi;
  614. /*
  615. * Check the orig_waiter state. After we dropped the locks,
  616. * the previous owner of the lock might have released the lock.
  617. */
  618. if (orig_waiter && !rt_mutex_owner(orig_lock))
  619. goto out_unlock_pi;
  620. /*
  621. * We dropped all locks after taking a refcount on @task, so
  622. * the task might have moved on in the lock chain or even left
  623. * the chain completely and blocks now on an unrelated lock or
  624. * on @orig_lock.
  625. *
  626. * We stored the lock on which @task was blocked in @next_lock,
  627. * so we can detect the chain change.
  628. */
  629. if (next_lock != waiter->lock)
  630. goto out_unlock_pi;
  631. /*
  632. * There could be 'spurious' loops in the lock graph due to ww_mutex,
  633. * consider:
  634. *
  635. * P1: A, ww_A, ww_B
  636. * P2: ww_B, ww_A
  637. * P3: A
  638. *
  639. * P3 should not return -EDEADLK because it gets trapped in the cycle
  640. * created by P1 and P2 (which will resolve -- and runs into
  641. * max_lock_depth above). Therefore disable detect_deadlock such that
  642. * the below termination condition can trigger once all relevant tasks
  643. * are boosted.
  644. *
  645. * Even when we start with ww_mutex we can disable deadlock detection,
  646. * since we would supress a ww_mutex induced deadlock at [6] anyway.
  647. * Supressing it here however is not sufficient since we might still
  648. * hit [6] due to adjustment driven iteration.
  649. *
  650. * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
  651. * utterly fail to report it; lockdep should.
  652. */
  653. if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
  654. detect_deadlock = false;
  655. /*
  656. * Drop out, when the task has no waiters. Note,
  657. * top_waiter can be NULL, when we are in the deboosting
  658. * mode!
  659. */
  660. if (top_waiter) {
  661. if (!task_has_pi_waiters(task))
  662. goto out_unlock_pi;
  663. /*
  664. * If deadlock detection is off, we stop here if we
  665. * are not the top pi waiter of the task. If deadlock
  666. * detection is enabled we continue, but stop the
  667. * requeueing in the chain walk.
  668. */
  669. if (top_waiter != task_top_pi_waiter(task)) {
  670. if (!detect_deadlock)
  671. goto out_unlock_pi;
  672. else
  673. requeue = false;
  674. }
  675. }
  676. /*
  677. * If the waiter priority is the same as the task priority
  678. * then there is no further priority adjustment necessary. If
  679. * deadlock detection is off, we stop the chain walk. If its
  680. * enabled we continue, but stop the requeueing in the chain
  681. * walk.
  682. */
  683. if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
  684. if (!detect_deadlock)
  685. goto out_unlock_pi;
  686. else
  687. requeue = false;
  688. }
  689. /*
  690. * [4] Get the next lock
  691. */
  692. lock = waiter->lock;
  693. /*
  694. * [5] We need to trylock here as we are holding task->pi_lock,
  695. * which is the reverse lock order versus the other rtmutex
  696. * operations.
  697. */
  698. if (!raw_spin_trylock(&lock->wait_lock)) {
  699. raw_spin_unlock_irq(&task->pi_lock);
  700. cpu_relax();
  701. goto retry;
  702. }
  703. /*
  704. * [6] check_exit_conditions_2() protected by task->pi_lock and
  705. * lock->wait_lock.
  706. *
  707. * Deadlock detection. If the lock is the same as the original
  708. * lock which caused us to walk the lock chain or if the
  709. * current lock is owned by the task which initiated the chain
  710. * walk, we detected a deadlock.
  711. */
  712. if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
  713. ret = -EDEADLK;
  714. /*
  715. * When the deadlock is due to ww_mutex; also see above. Don't
  716. * report the deadlock and instead let the ww_mutex wound/die
  717. * logic pick which of the contending threads gets -EDEADLK.
  718. *
  719. * NOTE: assumes the cycle only contains a single ww_class; any
  720. * other configuration and we fail to report; also, see
  721. * lockdep.
  722. */
  723. if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
  724. ret = 0;
  725. raw_spin_unlock(&lock->wait_lock);
  726. goto out_unlock_pi;
  727. }
  728. /*
  729. * If we just follow the lock chain for deadlock detection, no
  730. * need to do all the requeue operations. To avoid a truckload
  731. * of conditionals around the various places below, just do the
  732. * minimum chain walk checks.
  733. */
  734. if (!requeue) {
  735. /*
  736. * No requeue[7] here. Just release @task [8]
  737. */
  738. raw_spin_unlock(&task->pi_lock);
  739. put_task_struct(task);
  740. /*
  741. * [9] check_exit_conditions_3 protected by lock->wait_lock.
  742. * If there is no owner of the lock, end of chain.
  743. */
  744. if (!rt_mutex_owner(lock)) {
  745. raw_spin_unlock_irq(&lock->wait_lock);
  746. return 0;
  747. }
  748. /* [10] Grab the next task, i.e. owner of @lock */
  749. task = get_task_struct(rt_mutex_owner(lock));
  750. raw_spin_lock(&task->pi_lock);
  751. /*
  752. * No requeue [11] here. We just do deadlock detection.
  753. *
  754. * [12] Store whether owner is blocked
  755. * itself. Decision is made after dropping the locks
  756. */
  757. next_lock = task_blocked_on_lock(task);
  758. /*
  759. * Get the top waiter for the next iteration
  760. */
  761. top_waiter = rt_mutex_top_waiter(lock);
  762. /* [13] Drop locks */
  763. raw_spin_unlock(&task->pi_lock);
  764. raw_spin_unlock_irq(&lock->wait_lock);
  765. /* If owner is not blocked, end of chain. */
  766. if (!next_lock)
  767. goto out_put_task;
  768. goto again;
  769. }
  770. /*
  771. * Store the current top waiter before doing the requeue
  772. * operation on @lock. We need it for the boost/deboost
  773. * decision below.
  774. */
  775. prerequeue_top_waiter = rt_mutex_top_waiter(lock);
  776. /* [7] Requeue the waiter in the lock waiter tree. */
  777. rt_mutex_dequeue(lock, waiter);
  778. /*
  779. * Update the waiter prio fields now that we're dequeued.
  780. *
  781. * These values can have changed through either:
  782. *
  783. * sys_sched_set_scheduler() / sys_sched_setattr()
  784. *
  785. * or
  786. *
  787. * DL CBS enforcement advancing the effective deadline.
  788. *
  789. * Even though pi_waiters also uses these fields, and that tree is only
  790. * updated in [11], we can do this here, since we hold [L], which
  791. * serializes all pi_waiters access and rb_erase() does not care about
  792. * the values of the node being removed.
  793. */
  794. waiter_update_prio(waiter, task);
  795. rt_mutex_enqueue(lock, waiter);
  796. /* [8] Release the task */
  797. raw_spin_unlock(&task->pi_lock);
  798. put_task_struct(task);
  799. /*
  800. * [9] check_exit_conditions_3 protected by lock->wait_lock.
  801. *
  802. * We must abort the chain walk if there is no lock owner even
  803. * in the dead lock detection case, as we have nothing to
  804. * follow here. This is the end of the chain we are walking.
  805. */
  806. if (!rt_mutex_owner(lock)) {
  807. /*
  808. * If the requeue [7] above changed the top waiter,
  809. * then we need to wake the new top waiter up to try
  810. * to get the lock.
  811. */
  812. top_waiter = rt_mutex_top_waiter(lock);
  813. if (prerequeue_top_waiter != top_waiter)
  814. wake_up_state(top_waiter->task, top_waiter->wake_state);
  815. raw_spin_unlock_irq(&lock->wait_lock);
  816. return 0;
  817. }
  818. /* [10] Grab the next task, i.e. the owner of @lock */
  819. task = get_task_struct(rt_mutex_owner(lock));
  820. raw_spin_lock(&task->pi_lock);
  821. /* [11] requeue the pi waiters if necessary */
  822. if (waiter == rt_mutex_top_waiter(lock)) {
  823. /*
  824. * The waiter became the new top (highest priority)
  825. * waiter on the lock. Replace the previous top waiter
  826. * in the owner tasks pi waiters tree with this waiter
  827. * and adjust the priority of the owner.
  828. */
  829. rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
  830. rt_mutex_enqueue_pi(task, waiter);
  831. rt_mutex_adjust_prio(task);
  832. } else if (prerequeue_top_waiter == waiter) {
  833. /*
  834. * The waiter was the top waiter on the lock, but is
  835. * no longer the top priority waiter. Replace waiter in
  836. * the owner tasks pi waiters tree with the new top
  837. * (highest priority) waiter and adjust the priority
  838. * of the owner.
  839. * The new top waiter is stored in @waiter so that
  840. * @waiter == @top_waiter evaluates to true below and
  841. * we continue to deboost the rest of the chain.
  842. */
  843. rt_mutex_dequeue_pi(task, waiter);
  844. waiter = rt_mutex_top_waiter(lock);
  845. rt_mutex_enqueue_pi(task, waiter);
  846. rt_mutex_adjust_prio(task);
  847. } else {
  848. /*
  849. * Nothing changed. No need to do any priority
  850. * adjustment.
  851. */
  852. }
  853. /*
  854. * [12] check_exit_conditions_4() protected by task->pi_lock
  855. * and lock->wait_lock. The actual decisions are made after we
  856. * dropped the locks.
  857. *
  858. * Check whether the task which owns the current lock is pi
  859. * blocked itself. If yes we store a pointer to the lock for
  860. * the lock chain change detection above. After we dropped
  861. * task->pi_lock next_lock cannot be dereferenced anymore.
  862. */
  863. next_lock = task_blocked_on_lock(task);
  864. /*
  865. * Store the top waiter of @lock for the end of chain walk
  866. * decision below.
  867. */
  868. top_waiter = rt_mutex_top_waiter(lock);
  869. /* [13] Drop the locks */
  870. raw_spin_unlock(&task->pi_lock);
  871. raw_spin_unlock_irq(&lock->wait_lock);
  872. /*
  873. * Make the actual exit decisions [12], based on the stored
  874. * values.
  875. *
  876. * We reached the end of the lock chain. Stop right here. No
  877. * point to go back just to figure that out.
  878. */
  879. if (!next_lock)
  880. goto out_put_task;
  881. /*
  882. * If the current waiter is not the top waiter on the lock,
  883. * then we can stop the chain walk here if we are not in full
  884. * deadlock detection mode.
  885. */
  886. if (!detect_deadlock && waiter != top_waiter)
  887. goto out_put_task;
  888. goto again;
  889. out_unlock_pi:
  890. raw_spin_unlock_irq(&task->pi_lock);
  891. out_put_task:
  892. put_task_struct(task);
  893. return ret;
  894. }
  895. /*
  896. * Try to take an rt-mutex
  897. *
  898. * Must be called with lock->wait_lock held and interrupts disabled
  899. *
  900. * @lock: The lock to be acquired.
  901. * @task: The task which wants to acquire the lock
  902. * @waiter: The waiter that is queued to the lock's wait tree if the
  903. * callsite called task_blocked_on_lock(), otherwise NULL
  904. */
  905. static int __sched
  906. try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
  907. struct rt_mutex_waiter *waiter)
  908. {
  909. lockdep_assert_held(&lock->wait_lock);
  910. /*
  911. * Before testing whether we can acquire @lock, we set the
  912. * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
  913. * other tasks which try to modify @lock into the slow path
  914. * and they serialize on @lock->wait_lock.
  915. *
  916. * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
  917. * as explained at the top of this file if and only if:
  918. *
  919. * - There is a lock owner. The caller must fixup the
  920. * transient state if it does a trylock or leaves the lock
  921. * function due to a signal or timeout.
  922. *
  923. * - @task acquires the lock and there are no other
  924. * waiters. This is undone in rt_mutex_set_owner(@task) at
  925. * the end of this function.
  926. */
  927. mark_rt_mutex_waiters(lock);
  928. /*
  929. * If @lock has an owner, give up.
  930. */
  931. if (rt_mutex_owner(lock))
  932. return 0;
  933. /*
  934. * If @waiter != NULL, @task has already enqueued the waiter
  935. * into @lock waiter tree. If @waiter == NULL then this is a
  936. * trylock attempt.
  937. */
  938. if (waiter) {
  939. struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
  940. /*
  941. * If waiter is the highest priority waiter of @lock,
  942. * or allowed to steal it, take it over.
  943. */
  944. if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
  945. /*
  946. * We can acquire the lock. Remove the waiter from the
  947. * lock waiters tree.
  948. */
  949. rt_mutex_dequeue(lock, waiter);
  950. } else {
  951. return 0;
  952. }
  953. } else {
  954. /*
  955. * If the lock has waiters already we check whether @task is
  956. * eligible to take over the lock.
  957. *
  958. * If there are no other waiters, @task can acquire
  959. * the lock. @task->pi_blocked_on is NULL, so it does
  960. * not need to be dequeued.
  961. */
  962. if (rt_mutex_has_waiters(lock)) {
  963. /* Check whether the trylock can steal it. */
  964. if (!rt_mutex_steal(task_to_waiter(task),
  965. rt_mutex_top_waiter(lock)))
  966. return 0;
  967. /*
  968. * The current top waiter stays enqueued. We
  969. * don't have to change anything in the lock
  970. * waiters order.
  971. */
  972. } else {
  973. /*
  974. * No waiters. Take the lock without the
  975. * pi_lock dance.@task->pi_blocked_on is NULL
  976. * and we have no waiters to enqueue in @task
  977. * pi waiters tree.
  978. */
  979. goto takeit;
  980. }
  981. }
  982. /*
  983. * Clear @task->pi_blocked_on. Requires protection by
  984. * @task->pi_lock. Redundant operation for the @waiter == NULL
  985. * case, but conditionals are more expensive than a redundant
  986. * store.
  987. */
  988. raw_spin_lock(&task->pi_lock);
  989. task->pi_blocked_on = NULL;
  990. /*
  991. * Finish the lock acquisition. @task is the new owner. If
  992. * other waiters exist we have to insert the highest priority
  993. * waiter into @task->pi_waiters tree.
  994. */
  995. if (rt_mutex_has_waiters(lock))
  996. rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
  997. raw_spin_unlock(&task->pi_lock);
  998. takeit:
  999. /*
  1000. * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
  1001. * are still waiters or clears it.
  1002. */
  1003. rt_mutex_set_owner(lock, task);
  1004. return 1;
  1005. }
  1006. /*
  1007. * Task blocks on lock.
  1008. *
  1009. * Prepare waiter and propagate pi chain
  1010. *
  1011. * This must be called with lock->wait_lock held and interrupts disabled
  1012. */
  1013. static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
  1014. struct rt_mutex_waiter *waiter,
  1015. struct task_struct *task,
  1016. struct ww_acquire_ctx *ww_ctx,
  1017. enum rtmutex_chainwalk chwalk)
  1018. {
  1019. struct task_struct *owner = rt_mutex_owner(lock);
  1020. struct rt_mutex_waiter *top_waiter = waiter;
  1021. struct rt_mutex_base *next_lock;
  1022. int chain_walk = 0, res;
  1023. lockdep_assert_held(&lock->wait_lock);
  1024. /*
  1025. * Early deadlock detection. We really don't want the task to
  1026. * enqueue on itself just to untangle the mess later. It's not
  1027. * only an optimization. We drop the locks, so another waiter
  1028. * can come in before the chain walk detects the deadlock. So
  1029. * the other will detect the deadlock and return -EDEADLOCK,
  1030. * which is wrong, as the other waiter is not in a deadlock
  1031. * situation.
  1032. *
  1033. * Except for ww_mutex, in that case the chain walk must already deal
  1034. * with spurious cycles, see the comments at [3] and [6].
  1035. */
  1036. if (owner == task && !(build_ww_mutex() && ww_ctx))
  1037. return -EDEADLK;
  1038. trace_android_vh_task_blocks_on_rtmutex(lock, waiter, task, ww_ctx, &chwalk);
  1039. raw_spin_lock(&task->pi_lock);
  1040. waiter->task = task;
  1041. waiter->lock = lock;
  1042. waiter_update_prio(waiter, task);
  1043. /* Get the top priority waiter on the lock */
  1044. if (rt_mutex_has_waiters(lock))
  1045. top_waiter = rt_mutex_top_waiter(lock);
  1046. rt_mutex_enqueue(lock, waiter);
  1047. task->pi_blocked_on = waiter;
  1048. raw_spin_unlock(&task->pi_lock);
  1049. if (build_ww_mutex() && ww_ctx) {
  1050. struct rt_mutex *rtm;
  1051. /* Check whether the waiter should back out immediately */
  1052. rtm = container_of(lock, struct rt_mutex, rtmutex);
  1053. res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
  1054. if (res) {
  1055. raw_spin_lock(&task->pi_lock);
  1056. rt_mutex_dequeue(lock, waiter);
  1057. task->pi_blocked_on = NULL;
  1058. raw_spin_unlock(&task->pi_lock);
  1059. return res;
  1060. }
  1061. }
  1062. if (!owner)
  1063. return 0;
  1064. raw_spin_lock(&owner->pi_lock);
  1065. if (waiter == rt_mutex_top_waiter(lock)) {
  1066. rt_mutex_dequeue_pi(owner, top_waiter);
  1067. rt_mutex_enqueue_pi(owner, waiter);
  1068. rt_mutex_adjust_prio(owner);
  1069. if (owner->pi_blocked_on)
  1070. chain_walk = 1;
  1071. } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  1072. chain_walk = 1;
  1073. }
  1074. /* Store the lock on which owner is blocked or NULL */
  1075. next_lock = task_blocked_on_lock(owner);
  1076. raw_spin_unlock(&owner->pi_lock);
  1077. /*
  1078. * Even if full deadlock detection is on, if the owner is not
  1079. * blocked itself, we can avoid finding this out in the chain
  1080. * walk.
  1081. */
  1082. if (!chain_walk || !next_lock)
  1083. return 0;
  1084. /*
  1085. * The owner can't disappear while holding a lock,
  1086. * so the owner struct is protected by wait_lock.
  1087. * Gets dropped in rt_mutex_adjust_prio_chain()!
  1088. */
  1089. get_task_struct(owner);
  1090. raw_spin_unlock_irq(&lock->wait_lock);
  1091. res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
  1092. next_lock, waiter, task);
  1093. raw_spin_lock_irq(&lock->wait_lock);
  1094. return res;
  1095. }
  1096. /*
  1097. * Remove the top waiter from the current tasks pi waiter tree and
  1098. * queue it up.
  1099. *
  1100. * Called with lock->wait_lock held and interrupts disabled.
  1101. */
  1102. static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
  1103. struct rt_mutex_base *lock)
  1104. {
  1105. struct rt_mutex_waiter *waiter;
  1106. raw_spin_lock(&current->pi_lock);
  1107. waiter = rt_mutex_top_waiter(lock);
  1108. /*
  1109. * Remove it from current->pi_waiters and deboost.
  1110. *
  1111. * We must in fact deboost here in order to ensure we call
  1112. * rt_mutex_setprio() to update p->pi_top_task before the
  1113. * task unblocks.
  1114. */
  1115. rt_mutex_dequeue_pi(current, waiter);
  1116. rt_mutex_adjust_prio(current);
  1117. /*
  1118. * As we are waking up the top waiter, and the waiter stays
  1119. * queued on the lock until it gets the lock, this lock
  1120. * obviously has waiters. Just set the bit here and this has
  1121. * the added benefit of forcing all new tasks into the
  1122. * slow path making sure no task of lower priority than
  1123. * the top waiter can steal this lock.
  1124. */
  1125. lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
  1126. /*
  1127. * We deboosted before waking the top waiter task such that we don't
  1128. * run two tasks with the 'same' priority (and ensure the
  1129. * p->pi_top_task pointer points to a blocked task). This however can
  1130. * lead to priority inversion if we would get preempted after the
  1131. * deboost but before waking our donor task, hence the preempt_disable()
  1132. * before unlock.
  1133. *
  1134. * Pairs with preempt_enable() in rt_mutex_wake_up_q();
  1135. */
  1136. preempt_disable();
  1137. rt_mutex_wake_q_add(wqh, waiter);
  1138. raw_spin_unlock(&current->pi_lock);
  1139. }
  1140. static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
  1141. {
  1142. int ret = try_to_take_rt_mutex(lock, current, NULL);
  1143. /*
  1144. * try_to_take_rt_mutex() sets the lock waiters bit
  1145. * unconditionally. Clean this up.
  1146. */
  1147. fixup_rt_mutex_waiters(lock, true);
  1148. return ret;
  1149. }
  1150. /*
  1151. * Slow path try-lock function:
  1152. */
  1153. static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
  1154. {
  1155. unsigned long flags;
  1156. int ret;
  1157. /*
  1158. * If the lock already has an owner we fail to get the lock.
  1159. * This can be done without taking the @lock->wait_lock as
  1160. * it is only being read, and this is a trylock anyway.
  1161. */
  1162. if (rt_mutex_owner(lock))
  1163. return 0;
  1164. /*
  1165. * The mutex has currently no owner. Lock the wait lock and try to
  1166. * acquire the lock. We use irqsave here to support early boot calls.
  1167. */
  1168. raw_spin_lock_irqsave(&lock->wait_lock, flags);
  1169. ret = __rt_mutex_slowtrylock(lock);
  1170. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  1171. return ret;
  1172. }
  1173. static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
  1174. {
  1175. if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  1176. return 1;
  1177. return rt_mutex_slowtrylock(lock);
  1178. }
  1179. /*
  1180. * Slow path to release a rt-mutex.
  1181. */
  1182. static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
  1183. {
  1184. DEFINE_RT_WAKE_Q(wqh);
  1185. unsigned long flags;
  1186. /* irqsave required to support early boot calls */
  1187. raw_spin_lock_irqsave(&lock->wait_lock, flags);
  1188. debug_rt_mutex_unlock(lock);
  1189. /*
  1190. * We must be careful here if the fast path is enabled. If we
  1191. * have no waiters queued we cannot set owner to NULL here
  1192. * because of:
  1193. *
  1194. * foo->lock->owner = NULL;
  1195. * rtmutex_lock(foo->lock); <- fast path
  1196. * free = atomic_dec_and_test(foo->refcnt);
  1197. * rtmutex_unlock(foo->lock); <- fast path
  1198. * if (free)
  1199. * kfree(foo);
  1200. * raw_spin_unlock(foo->lock->wait_lock);
  1201. *
  1202. * So for the fastpath enabled kernel:
  1203. *
  1204. * Nothing can set the waiters bit as long as we hold
  1205. * lock->wait_lock. So we do the following sequence:
  1206. *
  1207. * owner = rt_mutex_owner(lock);
  1208. * clear_rt_mutex_waiters(lock);
  1209. * raw_spin_unlock(&lock->wait_lock);
  1210. * if (cmpxchg(&lock->owner, owner, 0) == owner)
  1211. * return;
  1212. * goto retry;
  1213. *
  1214. * The fastpath disabled variant is simple as all access to
  1215. * lock->owner is serialized by lock->wait_lock:
  1216. *
  1217. * lock->owner = NULL;
  1218. * raw_spin_unlock(&lock->wait_lock);
  1219. */
  1220. while (!rt_mutex_has_waiters(lock)) {
  1221. /* Drops lock->wait_lock ! */
  1222. if (unlock_rt_mutex_safe(lock, flags) == true)
  1223. return;
  1224. /* Relock the rtmutex and try again */
  1225. raw_spin_lock_irqsave(&lock->wait_lock, flags);
  1226. }
  1227. /*
  1228. * The wakeup next waiter path does not suffer from the above
  1229. * race. See the comments there.
  1230. *
  1231. * Queue the next waiter for wakeup once we release the wait_lock.
  1232. */
  1233. mark_wakeup_next_waiter(&wqh, lock);
  1234. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  1235. rt_mutex_wake_up_q(&wqh);
  1236. }
  1237. static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
  1238. {
  1239. if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
  1240. return;
  1241. rt_mutex_slowunlock(lock);
  1242. }
  1243. #ifdef CONFIG_SMP
  1244. static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
  1245. struct rt_mutex_waiter *waiter,
  1246. struct task_struct *owner)
  1247. {
  1248. bool res = true;
  1249. rcu_read_lock();
  1250. for (;;) {
  1251. /* If owner changed, trylock again. */
  1252. if (owner != rt_mutex_owner(lock))
  1253. break;
  1254. /*
  1255. * Ensure that @owner is dereferenced after checking that
  1256. * the lock owner still matches @owner. If that fails,
  1257. * @owner might point to freed memory. If it still matches,
  1258. * the rcu_read_lock() ensures the memory stays valid.
  1259. */
  1260. barrier();
  1261. /*
  1262. * Stop spinning when:
  1263. * - the lock owner has been scheduled out
  1264. * - current is not longer the top waiter
  1265. * - current is requested to reschedule (redundant
  1266. * for CONFIG_PREEMPT_RCU=y)
  1267. * - the VCPU on which owner runs is preempted
  1268. */
  1269. if (!owner_on_cpu(owner) || need_resched() ||
  1270. !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
  1271. res = false;
  1272. break;
  1273. }
  1274. cpu_relax();
  1275. }
  1276. rcu_read_unlock();
  1277. return res;
  1278. }
  1279. #else
  1280. static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
  1281. struct rt_mutex_waiter *waiter,
  1282. struct task_struct *owner)
  1283. {
  1284. return false;
  1285. }
  1286. #endif
  1287. #ifdef RT_MUTEX_BUILD_MUTEX
  1288. /*
  1289. * Functions required for:
  1290. * - rtmutex, futex on all kernels
  1291. * - mutex and rwsem substitutions on RT kernels
  1292. */
  1293. /*
  1294. * Remove a waiter from a lock and give up
  1295. *
  1296. * Must be called with lock->wait_lock held and interrupts disabled. It must
  1297. * have just failed to try_to_take_rt_mutex().
  1298. */
  1299. static void __sched remove_waiter(struct rt_mutex_base *lock,
  1300. struct rt_mutex_waiter *waiter)
  1301. {
  1302. bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  1303. struct task_struct *owner = rt_mutex_owner(lock);
  1304. struct rt_mutex_base *next_lock;
  1305. lockdep_assert_held(&lock->wait_lock);
  1306. raw_spin_lock(&current->pi_lock);
  1307. rt_mutex_dequeue(lock, waiter);
  1308. current->pi_blocked_on = NULL;
  1309. raw_spin_unlock(&current->pi_lock);
  1310. /*
  1311. * Only update priority if the waiter was the highest priority
  1312. * waiter of the lock and there is an owner to update.
  1313. */
  1314. if (!owner || !is_top_waiter)
  1315. return;
  1316. raw_spin_lock(&owner->pi_lock);
  1317. rt_mutex_dequeue_pi(owner, waiter);
  1318. if (rt_mutex_has_waiters(lock))
  1319. rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
  1320. rt_mutex_adjust_prio(owner);
  1321. /* Store the lock on which owner is blocked or NULL */
  1322. next_lock = task_blocked_on_lock(owner);
  1323. raw_spin_unlock(&owner->pi_lock);
  1324. /*
  1325. * Don't walk the chain, if the owner task is not blocked
  1326. * itself.
  1327. */
  1328. if (!next_lock)
  1329. return;
  1330. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  1331. get_task_struct(owner);
  1332. raw_spin_unlock_irq(&lock->wait_lock);
  1333. rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
  1334. next_lock, NULL, current);
  1335. raw_spin_lock_irq(&lock->wait_lock);
  1336. }
  1337. /**
  1338. * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
  1339. * @lock: the rt_mutex to take
  1340. * @ww_ctx: WW mutex context pointer
  1341. * @state: the state the task should block in (TASK_INTERRUPTIBLE
  1342. * or TASK_UNINTERRUPTIBLE)
  1343. * @timeout: the pre-initialized and started timer, or NULL for none
  1344. * @waiter: the pre-initialized rt_mutex_waiter
  1345. *
  1346. * Must be called with lock->wait_lock held and interrupts disabled
  1347. */
  1348. static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
  1349. struct ww_acquire_ctx *ww_ctx,
  1350. unsigned int state,
  1351. struct hrtimer_sleeper *timeout,
  1352. struct rt_mutex_waiter *waiter)
  1353. {
  1354. struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
  1355. struct task_struct *owner;
  1356. int ret = 0;
  1357. trace_android_vh_rtmutex_wait_start(lock);
  1358. for (;;) {
  1359. /* Try to acquire the lock: */
  1360. if (try_to_take_rt_mutex(lock, current, waiter))
  1361. break;
  1362. if (timeout && !timeout->task) {
  1363. ret = -ETIMEDOUT;
  1364. break;
  1365. }
  1366. if (signal_pending_state(state, current)) {
  1367. ret = -EINTR;
  1368. break;
  1369. }
  1370. if (build_ww_mutex() && ww_ctx) {
  1371. ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
  1372. if (ret)
  1373. break;
  1374. }
  1375. if (waiter == rt_mutex_top_waiter(lock))
  1376. owner = rt_mutex_owner(lock);
  1377. else
  1378. owner = NULL;
  1379. raw_spin_unlock_irq(&lock->wait_lock);
  1380. if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
  1381. schedule();
  1382. raw_spin_lock_irq(&lock->wait_lock);
  1383. set_current_state(state);
  1384. }
  1385. trace_android_vh_rtmutex_wait_finish(lock);
  1386. __set_current_state(TASK_RUNNING);
  1387. return ret;
  1388. }
  1389. static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
  1390. struct rt_mutex_waiter *w)
  1391. {
  1392. /*
  1393. * If the result is not -EDEADLOCK or the caller requested
  1394. * deadlock detection, nothing to do here.
  1395. */
  1396. if (res != -EDEADLOCK || detect_deadlock)
  1397. return;
  1398. if (build_ww_mutex() && w->ww_ctx)
  1399. return;
  1400. /*
  1401. * Yell loudly and stop the task right here.
  1402. */
  1403. WARN(1, "rtmutex deadlock detected\n");
  1404. while (1) {
  1405. set_current_state(TASK_INTERRUPTIBLE);
  1406. schedule();
  1407. }
  1408. }
  1409. /**
  1410. * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
  1411. * @lock: The rtmutex to block lock
  1412. * @ww_ctx: WW mutex context pointer
  1413. * @state: The task state for sleeping
  1414. * @chwalk: Indicator whether full or partial chainwalk is requested
  1415. * @waiter: Initializer waiter for blocking
  1416. */
  1417. static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
  1418. struct ww_acquire_ctx *ww_ctx,
  1419. unsigned int state,
  1420. enum rtmutex_chainwalk chwalk,
  1421. struct rt_mutex_waiter *waiter)
  1422. {
  1423. struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
  1424. struct ww_mutex *ww = ww_container_of(rtm);
  1425. int ret;
  1426. lockdep_assert_held(&lock->wait_lock);
  1427. /* Try to acquire the lock again: */
  1428. if (try_to_take_rt_mutex(lock, current, NULL)) {
  1429. if (build_ww_mutex() && ww_ctx) {
  1430. __ww_mutex_check_waiters(rtm, ww_ctx);
  1431. ww_mutex_lock_acquired(ww, ww_ctx);
  1432. }
  1433. return 0;
  1434. }
  1435. set_current_state(state);
  1436. trace_contention_begin(lock, LCB_F_RT);
  1437. ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
  1438. if (likely(!ret))
  1439. ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
  1440. if (likely(!ret)) {
  1441. /* acquired the lock */
  1442. if (build_ww_mutex() && ww_ctx) {
  1443. if (!ww_ctx->is_wait_die)
  1444. __ww_mutex_check_waiters(rtm, ww_ctx);
  1445. ww_mutex_lock_acquired(ww, ww_ctx);
  1446. }
  1447. } else {
  1448. __set_current_state(TASK_RUNNING);
  1449. remove_waiter(lock, waiter);
  1450. rt_mutex_handle_deadlock(ret, chwalk, waiter);
  1451. }
  1452. /*
  1453. * try_to_take_rt_mutex() sets the waiter bit
  1454. * unconditionally. We might have to fix that up.
  1455. */
  1456. fixup_rt_mutex_waiters(lock, true);
  1457. trace_contention_end(lock, ret);
  1458. return ret;
  1459. }
  1460. static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
  1461. struct ww_acquire_ctx *ww_ctx,
  1462. unsigned int state)
  1463. {
  1464. struct rt_mutex_waiter waiter;
  1465. int ret;
  1466. rt_mutex_init_waiter(&waiter);
  1467. waiter.ww_ctx = ww_ctx;
  1468. ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
  1469. &waiter);
  1470. debug_rt_mutex_free_waiter(&waiter);
  1471. return ret;
  1472. }
  1473. /*
  1474. * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
  1475. * @lock: The rtmutex to block lock
  1476. * @ww_ctx: WW mutex context pointer
  1477. * @state: The task state for sleeping
  1478. */
  1479. static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
  1480. struct ww_acquire_ctx *ww_ctx,
  1481. unsigned int state)
  1482. {
  1483. unsigned long flags;
  1484. int ret;
  1485. /*
  1486. * Technically we could use raw_spin_[un]lock_irq() here, but this can
  1487. * be called in early boot if the cmpxchg() fast path is disabled
  1488. * (debug, no architecture support). In this case we will acquire the
  1489. * rtmutex with lock->wait_lock held. But we cannot unconditionally
  1490. * enable interrupts in that early boot case. So we need to use the
  1491. * irqsave/restore variants.
  1492. */
  1493. raw_spin_lock_irqsave(&lock->wait_lock, flags);
  1494. ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
  1495. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  1496. return ret;
  1497. }
  1498. static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
  1499. unsigned int state)
  1500. {
  1501. if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  1502. return 0;
  1503. return rt_mutex_slowlock(lock, NULL, state);
  1504. }
  1505. #endif /* RT_MUTEX_BUILD_MUTEX */
  1506. #ifdef RT_MUTEX_BUILD_SPINLOCKS
  1507. /*
  1508. * Functions required for spin/rw_lock substitution on RT kernels
  1509. */
  1510. /**
  1511. * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
  1512. * @lock: The underlying RT mutex
  1513. */
  1514. static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
  1515. {
  1516. struct rt_mutex_waiter waiter;
  1517. struct task_struct *owner;
  1518. lockdep_assert_held(&lock->wait_lock);
  1519. if (try_to_take_rt_mutex(lock, current, NULL))
  1520. return;
  1521. rt_mutex_init_rtlock_waiter(&waiter);
  1522. /* Save current state and set state to TASK_RTLOCK_WAIT */
  1523. current_save_and_set_rtlock_wait_state();
  1524. trace_contention_begin(lock, LCB_F_RT);
  1525. task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
  1526. for (;;) {
  1527. /* Try to acquire the lock again */
  1528. if (try_to_take_rt_mutex(lock, current, &waiter))
  1529. break;
  1530. if (&waiter == rt_mutex_top_waiter(lock))
  1531. owner = rt_mutex_owner(lock);
  1532. else
  1533. owner = NULL;
  1534. raw_spin_unlock_irq(&lock->wait_lock);
  1535. if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
  1536. schedule_rtlock();
  1537. raw_spin_lock_irq(&lock->wait_lock);
  1538. set_current_state(TASK_RTLOCK_WAIT);
  1539. }
  1540. /* Restore the task state */
  1541. current_restore_rtlock_saved_state();
  1542. /*
  1543. * try_to_take_rt_mutex() sets the waiter bit unconditionally.
  1544. * We might have to fix that up:
  1545. */
  1546. fixup_rt_mutex_waiters(lock, true);
  1547. debug_rt_mutex_free_waiter(&waiter);
  1548. trace_contention_end(lock, 0);
  1549. }
  1550. static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
  1551. {
  1552. unsigned long flags;
  1553. raw_spin_lock_irqsave(&lock->wait_lock, flags);
  1554. rtlock_slowlock_locked(lock);
  1555. raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  1556. }
  1557. #endif /* RT_MUTEX_BUILD_SPINLOCKS */