rwsem.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* kernel/rwsem.c: R/W semaphores, public implementation
  3. *
  4. * Written by David Howells ([email protected]).
  5. * Derived from asm-i386/semaphore.h
  6. *
  7. * Writer lock-stealing by Alex Shi <[email protected]>
  8. * and Michel Lespinasse <[email protected]>
  9. *
  10. * Optimistic spinning by Tim Chen <[email protected]>
  11. * and Davidlohr Bueso <[email protected]>. Based on mutexes.
  12. *
  13. * Rwsem count bit fields re-definition and rwsem rearchitecture by
  14. * Waiman Long <[email protected]> and
  15. * Peter Zijlstra <[email protected]>.
  16. */
  17. #include <linux/types.h>
  18. #include <linux/kernel.h>
  19. #include <linux/sched.h>
  20. #include <linux/sched/rt.h>
  21. #include <linux/sched/task.h>
  22. #include <linux/sched/debug.h>
  23. #include <linux/sched/wake_q.h>
  24. #include <linux/sched/signal.h>
  25. #include <linux/sched/clock.h>
  26. #include <linux/export.h>
  27. #include <linux/rwsem.h>
  28. #include <linux/atomic.h>
  29. #include <trace/events/lock.h>
  30. #ifndef CONFIG_PREEMPT_RT
  31. #include "lock_events.h"
  32. #include <trace/hooks/dtask.h>
  33. #include <trace/hooks/rwsem.h>
  34. /*
  35. * The least significant 2 bits of the owner value has the following
  36. * meanings when set.
  37. * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
  38. * - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
  39. *
  40. * When the rwsem is reader-owned and a spinning writer has timed out,
  41. * the nonspinnable bit will be set to disable optimistic spinning.
  42. * When a writer acquires a rwsem, it puts its task_struct pointer
  43. * into the owner field. It is cleared after an unlock.
  44. *
  45. * When a reader acquires a rwsem, it will also puts its task_struct
  46. * pointer into the owner field with the RWSEM_READER_OWNED bit set.
  47. * On unlock, the owner field will largely be left untouched. So
  48. * for a free or reader-owned rwsem, the owner value may contain
  49. * information about the last reader that acquires the rwsem.
  50. *
  51. * That information may be helpful in debugging cases where the system
  52. * seems to hang on a reader owned rwsem especially if only one reader
  53. * is involved. Ideally we would like to track all the readers that own
  54. * a rwsem, but the overhead is simply too big.
  55. *
  56. * A fast path reader optimistic lock stealing is supported when the rwsem
  57. * is previously owned by a writer and the following conditions are met:
  58. * - rwsem is not currently writer owned
  59. * - the handoff isn't set.
  60. */
  61. #define RWSEM_READER_OWNED (1UL << 0)
  62. #define RWSEM_NONSPINNABLE (1UL << 1)
  63. #define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
  64. #ifdef CONFIG_DEBUG_RWSEMS
  65. # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
  66. if (!debug_locks_silent && \
  67. WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
  68. #c, atomic_long_read(&(sem)->count), \
  69. (unsigned long) sem->magic, \
  70. atomic_long_read(&(sem)->owner), (long)current, \
  71. list_empty(&(sem)->wait_list) ? "" : "not ")) \
  72. debug_locks_off(); \
  73. } while (0)
  74. #else
  75. # define DEBUG_RWSEMS_WARN_ON(c, sem)
  76. #endif
  77. /*
  78. * On 64-bit architectures, the bit definitions of the count are:
  79. *
  80. * Bit 0 - writer locked bit
  81. * Bit 1 - waiters present bit
  82. * Bit 2 - lock handoff bit
  83. * Bits 3-7 - reserved
  84. * Bits 8-62 - 55-bit reader count
  85. * Bit 63 - read fail bit
  86. *
  87. * On 32-bit architectures, the bit definitions of the count are:
  88. *
  89. * Bit 0 - writer locked bit
  90. * Bit 1 - waiters present bit
  91. * Bit 2 - lock handoff bit
  92. * Bits 3-7 - reserved
  93. * Bits 8-30 - 23-bit reader count
  94. * Bit 31 - read fail bit
  95. *
  96. * It is not likely that the most significant bit (read fail bit) will ever
  97. * be set. This guard bit is still checked anyway in the down_read() fastpath
  98. * just in case we need to use up more of the reader bits for other purpose
  99. * in the future.
  100. *
  101. * atomic_long_fetch_add() is used to obtain reader lock, whereas
  102. * atomic_long_cmpxchg() will be used to obtain writer lock.
  103. *
  104. * There are three places where the lock handoff bit may be set or cleared.
  105. * 1) rwsem_mark_wake() for readers -- set, clear
  106. * 2) rwsem_try_write_lock() for writers -- set, clear
  107. * 3) rwsem_del_waiter() -- clear
  108. *
  109. * For all the above cases, wait_lock will be held. A writer must also
  110. * be the first one in the wait_list to be eligible for setting the handoff
  111. * bit. So concurrent setting/clearing of handoff bit is not possible.
  112. */
  113. #define RWSEM_WRITER_LOCKED (1UL << 0)
  114. #define RWSEM_FLAG_WAITERS (1UL << 1)
  115. #define RWSEM_FLAG_HANDOFF (1UL << 2)
  116. #define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
  117. #define RWSEM_READER_SHIFT 8
  118. #define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
  119. #define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
  120. #define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
  121. #define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
  122. #define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
  123. RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
  124. /*
  125. * All writes to owner are protected by WRITE_ONCE() to make sure that
  126. * store tearing can't happen as optimistic spinners may read and use
  127. * the owner value concurrently without lock. Read from owner, however,
  128. * may not need READ_ONCE() as long as the pointer value is only used
  129. * for comparison and isn't being dereferenced.
  130. *
  131. * Both rwsem_{set,clear}_owner() functions should be in the same
  132. * preempt disable section as the atomic op that changes sem->count.
  133. */
  134. static inline void rwsem_set_owner(struct rw_semaphore *sem)
  135. {
  136. lockdep_assert_preemption_disabled();
  137. atomic_long_set(&sem->owner, (long)current);
  138. }
  139. static inline void rwsem_clear_owner(struct rw_semaphore *sem)
  140. {
  141. lockdep_assert_preemption_disabled();
  142. atomic_long_set(&sem->owner, 0);
  143. }
  144. /*
  145. * Test the flags in the owner field.
  146. */
  147. static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
  148. {
  149. return atomic_long_read(&sem->owner) & flags;
  150. }
  151. /*
  152. * The task_struct pointer of the last owning reader will be left in
  153. * the owner field.
  154. *
  155. * Note that the owner value just indicates the task has owned the rwsem
  156. * previously, it may not be the real owner or one of the real owners
  157. * anymore when that field is examined, so take it with a grain of salt.
  158. *
  159. * The reader non-spinnable bit is preserved.
  160. */
  161. static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
  162. struct task_struct *owner)
  163. {
  164. unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
  165. (atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE);
  166. atomic_long_set(&sem->owner, val);
  167. }
  168. static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
  169. {
  170. __rwsem_set_reader_owned(sem, current);
  171. }
  172. /*
  173. * Return true if the rwsem is owned by a reader.
  174. */
  175. static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
  176. {
  177. #ifdef CONFIG_DEBUG_RWSEMS
  178. /*
  179. * Check the count to see if it is write-locked.
  180. */
  181. long count = atomic_long_read(&sem->count);
  182. if (count & RWSEM_WRITER_MASK)
  183. return false;
  184. #endif
  185. return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
  186. }
  187. #ifdef CONFIG_DEBUG_RWSEMS
  188. /*
  189. * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
  190. * is a task pointer in owner of a reader-owned rwsem, it will be the
  191. * real owner or one of the real owners. The only exception is when the
  192. * unlock is done by up_read_non_owner().
  193. */
  194. static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
  195. {
  196. unsigned long val = atomic_long_read(&sem->owner);
  197. while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
  198. if (atomic_long_try_cmpxchg(&sem->owner, &val,
  199. val & RWSEM_OWNER_FLAGS_MASK))
  200. return;
  201. }
  202. }
  203. #else
  204. static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
  205. {
  206. }
  207. #endif
  208. /*
  209. * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
  210. * remains set. Otherwise, the operation will be aborted.
  211. */
  212. static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
  213. {
  214. unsigned long owner = atomic_long_read(&sem->owner);
  215. do {
  216. if (!(owner & RWSEM_READER_OWNED))
  217. break;
  218. if (owner & RWSEM_NONSPINNABLE)
  219. break;
  220. } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
  221. owner | RWSEM_NONSPINNABLE));
  222. }
  223. static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
  224. {
  225. *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
  226. if (WARN_ON_ONCE(*cntp < 0))
  227. rwsem_set_nonspinnable(sem);
  228. if (!(*cntp & RWSEM_READ_FAILED_MASK)) {
  229. rwsem_set_reader_owned(sem);
  230. trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
  231. return true;
  232. }
  233. return false;
  234. }
  235. static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
  236. {
  237. long tmp = RWSEM_UNLOCKED_VALUE;
  238. if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
  239. trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
  240. rwsem_set_owner(sem);
  241. return true;
  242. }
  243. return false;
  244. }
  245. /*
  246. * Return just the real task structure pointer of the owner
  247. */
  248. static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
  249. {
  250. return (struct task_struct *)
  251. (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
  252. }
  253. /*
  254. * Return the real task structure pointer of the owner and the embedded
  255. * flags in the owner. pflags must be non-NULL.
  256. */
  257. static inline struct task_struct *
  258. rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
  259. {
  260. unsigned long owner = atomic_long_read(&sem->owner);
  261. *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
  262. return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
  263. }
  264. /*
  265. * Guide to the rw_semaphore's count field.
  266. *
  267. * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
  268. * by a writer.
  269. *
  270. * The lock is owned by readers when
  271. * (1) the RWSEM_WRITER_LOCKED isn't set in count,
  272. * (2) some of the reader bits are set in count, and
  273. * (3) the owner field has RWSEM_READ_OWNED bit set.
  274. *
  275. * Having some reader bits set is not enough to guarantee a readers owned
  276. * lock as the readers may be in the process of backing out from the count
  277. * and a writer has just released the lock. So another writer may steal
  278. * the lock immediately after that.
  279. */
  280. /*
  281. * Initialize an rwsem:
  282. */
  283. void __init_rwsem(struct rw_semaphore *sem, const char *name,
  284. struct lock_class_key *key)
  285. {
  286. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  287. /*
  288. * Make sure we are not reinitializing a held semaphore:
  289. */
  290. debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  291. lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
  292. #endif
  293. #ifdef CONFIG_DEBUG_RWSEMS
  294. sem->magic = sem;
  295. #endif
  296. atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
  297. raw_spin_lock_init(&sem->wait_lock);
  298. INIT_LIST_HEAD(&sem->wait_list);
  299. atomic_long_set(&sem->owner, 0L);
  300. #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
  301. osq_lock_init(&sem->osq);
  302. #endif
  303. trace_android_vh_rwsem_init(sem);
  304. }
  305. EXPORT_SYMBOL(__init_rwsem);
  306. enum rwsem_waiter_type {
  307. RWSEM_WAITING_FOR_WRITE,
  308. RWSEM_WAITING_FOR_READ
  309. };
  310. struct rwsem_waiter {
  311. struct list_head list;
  312. struct task_struct *task;
  313. enum rwsem_waiter_type type;
  314. unsigned long timeout;
  315. bool handoff_set;
  316. };
  317. #define rwsem_first_waiter(sem) \
  318. list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
  319. enum rwsem_wake_type {
  320. RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
  321. RWSEM_WAKE_READERS, /* Wake readers only */
  322. RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
  323. };
  324. /*
  325. * The typical HZ value is either 250 or 1000. So set the minimum waiting
  326. * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
  327. * queue before initiating the handoff protocol.
  328. */
  329. #define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
  330. /*
  331. * Magic number to batch-wakeup waiting readers, even when writers are
  332. * also present in the queue. This both limits the amount of work the
  333. * waking thread must do and also prevents any potential counter overflow,
  334. * however unlikely.
  335. */
  336. #define MAX_READERS_WAKEUP 0x100
  337. static inline void
  338. rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
  339. {
  340. lockdep_assert_held(&sem->wait_lock);
  341. list_add_tail(&waiter->list, &sem->wait_list);
  342. /* caller will set RWSEM_FLAG_WAITERS */
  343. }
  344. /*
  345. * Remove a waiter from the wait_list and clear flags.
  346. *
  347. * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
  348. * this function. Modify with care.
  349. *
  350. * Return: true if wait_list isn't empty and false otherwise
  351. */
  352. static inline bool
  353. rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
  354. {
  355. lockdep_assert_held(&sem->wait_lock);
  356. list_del(&waiter->list);
  357. if (likely(!list_empty(&sem->wait_list)))
  358. return true;
  359. atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
  360. return false;
  361. }
  362. /*
  363. * handle the lock release when processes blocked on it that can now run
  364. * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
  365. * have been set.
  366. * - there must be someone on the queue
  367. * - the wait_lock must be held by the caller
  368. * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
  369. * to actually wakeup the blocked task(s) and drop the reference count,
  370. * preferably when the wait_lock is released
  371. * - woken process blocks are discarded from the list after having task zeroed
  372. * - writers are only marked woken if downgrading is false
  373. *
  374. * Implies rwsem_del_waiter() for all woken readers.
  375. */
  376. static void rwsem_mark_wake(struct rw_semaphore *sem,
  377. enum rwsem_wake_type wake_type,
  378. struct wake_q_head *wake_q)
  379. {
  380. struct rwsem_waiter *waiter, *tmp;
  381. long oldcount, woken = 0, adjustment = 0;
  382. struct list_head wlist;
  383. lockdep_assert_held(&sem->wait_lock);
  384. /*
  385. * Take a peek at the queue head waiter such that we can determine
  386. * the wakeup(s) to perform.
  387. */
  388. waiter = rwsem_first_waiter(sem);
  389. if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
  390. if (wake_type == RWSEM_WAKE_ANY) {
  391. /*
  392. * Mark writer at the front of the queue for wakeup.
  393. * Until the task is actually later awoken later by
  394. * the caller, other writers are able to steal it.
  395. * Readers, on the other hand, will block as they
  396. * will notice the queued writer.
  397. */
  398. wake_q_add(wake_q, waiter->task);
  399. lockevent_inc(rwsem_wake_writer);
  400. }
  401. return;
  402. }
  403. /*
  404. * No reader wakeup if there are too many of them already.
  405. */
  406. if (unlikely(atomic_long_read(&sem->count) < 0))
  407. return;
  408. /*
  409. * Writers might steal the lock before we grant it to the next reader.
  410. * We prefer to do the first reader grant before counting readers
  411. * so we can bail out early if a writer stole the lock.
  412. */
  413. if (wake_type != RWSEM_WAKE_READ_OWNED) {
  414. struct task_struct *owner;
  415. adjustment = RWSEM_READER_BIAS;
  416. oldcount = atomic_long_fetch_add(adjustment, &sem->count);
  417. if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
  418. /*
  419. * When we've been waiting "too" long (for writers
  420. * to give up the lock), request a HANDOFF to
  421. * force the issue.
  422. */
  423. if (time_after(jiffies, waiter->timeout)) {
  424. if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
  425. adjustment -= RWSEM_FLAG_HANDOFF;
  426. lockevent_inc(rwsem_rlock_handoff);
  427. }
  428. waiter->handoff_set = true;
  429. }
  430. atomic_long_add(-adjustment, &sem->count);
  431. return;
  432. }
  433. /*
  434. * Set it to reader-owned to give spinners an early
  435. * indication that readers now have the lock.
  436. * The reader nonspinnable bit seen at slowpath entry of
  437. * the reader is copied over.
  438. */
  439. owner = waiter->task;
  440. __rwsem_set_reader_owned(sem, owner);
  441. }
  442. /*
  443. * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
  444. * queue. We know that the woken will be at least 1 as we accounted
  445. * for above. Note we increment the 'active part' of the count by the
  446. * number of readers before waking any processes up.
  447. *
  448. * This is an adaptation of the phase-fair R/W locks where at the
  449. * reader phase (first waiter is a reader), all readers are eligible
  450. * to acquire the lock at the same time irrespective of their order
  451. * in the queue. The writers acquire the lock according to their
  452. * order in the queue.
  453. *
  454. * We have to do wakeup in 2 passes to prevent the possibility that
  455. * the reader count may be decremented before it is incremented. It
  456. * is because the to-be-woken waiter may not have slept yet. So it
  457. * may see waiter->task got cleared, finish its critical section and
  458. * do an unlock before the reader count increment.
  459. *
  460. * 1) Collect the read-waiters in a separate list, count them and
  461. * fully increment the reader count in rwsem.
  462. * 2) For each waiters in the new list, clear waiter->task and
  463. * put them into wake_q to be woken up later.
  464. */
  465. INIT_LIST_HEAD(&wlist);
  466. list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
  467. if (waiter->type == RWSEM_WAITING_FOR_WRITE)
  468. continue;
  469. woken++;
  470. list_move_tail(&waiter->list, &wlist);
  471. /*
  472. * Limit # of readers that can be woken up per wakeup call.
  473. */
  474. if (unlikely(woken >= MAX_READERS_WAKEUP))
  475. break;
  476. }
  477. adjustment = woken * RWSEM_READER_BIAS - adjustment;
  478. lockevent_cond_inc(rwsem_wake_reader, woken);
  479. oldcount = atomic_long_read(&sem->count);
  480. if (list_empty(&sem->wait_list)) {
  481. /*
  482. * Combined with list_move_tail() above, this implies
  483. * rwsem_del_waiter().
  484. */
  485. adjustment -= RWSEM_FLAG_WAITERS;
  486. if (oldcount & RWSEM_FLAG_HANDOFF)
  487. adjustment -= RWSEM_FLAG_HANDOFF;
  488. } else if (woken) {
  489. /*
  490. * When we've woken a reader, we no longer need to force
  491. * writers to give up the lock and we can clear HANDOFF.
  492. */
  493. if (oldcount & RWSEM_FLAG_HANDOFF)
  494. adjustment -= RWSEM_FLAG_HANDOFF;
  495. }
  496. if (adjustment)
  497. atomic_long_add(adjustment, &sem->count);
  498. /* 2nd pass */
  499. list_for_each_entry_safe(waiter, tmp, &wlist, list) {
  500. struct task_struct *tsk;
  501. tsk = waiter->task;
  502. get_task_struct(tsk);
  503. /*
  504. * Ensure calling get_task_struct() before setting the reader
  505. * waiter to nil such that rwsem_down_read_slowpath() cannot
  506. * race with do_exit() by always holding a reference count
  507. * to the task to wakeup.
  508. */
  509. smp_store_release(&waiter->task, NULL);
  510. /*
  511. * Ensure issuing the wakeup (either by us or someone else)
  512. * after setting the reader waiter to nil.
  513. */
  514. wake_q_add_safe(wake_q, tsk);
  515. }
  516. }
  517. /*
  518. * Remove a waiter and try to wake up other waiters in the wait queue
  519. * This function is called from the out_nolock path of both the reader and
  520. * writer slowpaths with wait_lock held. It releases the wait_lock and
  521. * optionally wake up waiters before it returns.
  522. */
  523. static inline void
  524. rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
  525. struct wake_q_head *wake_q)
  526. __releases(&sem->wait_lock)
  527. {
  528. bool first = rwsem_first_waiter(sem) == waiter;
  529. wake_q_init(wake_q);
  530. /*
  531. * If the wait_list isn't empty and the waiter to be deleted is
  532. * the first waiter, we wake up the remaining waiters as they may
  533. * be eligible to acquire or spin on the lock.
  534. */
  535. if (rwsem_del_waiter(sem, waiter) && first)
  536. rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
  537. raw_spin_unlock_irq(&sem->wait_lock);
  538. if (!wake_q_empty(wake_q))
  539. wake_up_q(wake_q);
  540. }
  541. /*
  542. * This function must be called with the sem->wait_lock held to prevent
  543. * race conditions between checking the rwsem wait list and setting the
  544. * sem->count accordingly.
  545. *
  546. * Implies rwsem_del_waiter() on success.
  547. */
  548. static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
  549. struct rwsem_waiter *waiter)
  550. {
  551. struct rwsem_waiter *first = rwsem_first_waiter(sem);
  552. long count, new;
  553. lockdep_assert_held(&sem->wait_lock);
  554. count = atomic_long_read(&sem->count);
  555. do {
  556. bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
  557. if (has_handoff) {
  558. /*
  559. * Honor handoff bit and yield only when the first
  560. * waiter is the one that set it. Otherwisee, we
  561. * still try to acquire the rwsem.
  562. */
  563. if (first->handoff_set && (waiter != first))
  564. return false;
  565. }
  566. new = count;
  567. if (count & RWSEM_LOCK_MASK) {
  568. /*
  569. * A waiter (first or not) can set the handoff bit
  570. * if it is an RT task or wait in the wait queue
  571. * for too long.
  572. */
  573. if (has_handoff || (!rt_task(waiter->task) &&
  574. !time_after(jiffies, waiter->timeout)))
  575. return false;
  576. new |= RWSEM_FLAG_HANDOFF;
  577. } else {
  578. new |= RWSEM_WRITER_LOCKED;
  579. new &= ~RWSEM_FLAG_HANDOFF;
  580. if (list_is_singular(&sem->wait_list))
  581. new &= ~RWSEM_FLAG_WAITERS;
  582. }
  583. } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
  584. /*
  585. * We have either acquired the lock with handoff bit cleared or set
  586. * the handoff bit. Only the first waiter can have its handoff_set
  587. * set here to enable optimistic spinning in slowpath loop.
  588. */
  589. if (new & RWSEM_FLAG_HANDOFF) {
  590. first->handoff_set = true;
  591. lockevent_inc(rwsem_wlock_handoff);
  592. return false;
  593. }
  594. /*
  595. * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
  596. * success.
  597. */
  598. list_del(&waiter->list);
  599. rwsem_set_owner(sem);
  600. return true;
  601. }
  602. /*
  603. * The rwsem_spin_on_owner() function returns the following 4 values
  604. * depending on the lock owner state.
  605. * OWNER_NULL : owner is currently NULL
  606. * OWNER_WRITER: when owner changes and is a writer
  607. * OWNER_READER: when owner changes and the new owner may be a reader.
  608. * OWNER_NONSPINNABLE:
  609. * when optimistic spinning has to stop because either the
  610. * owner stops running, is unknown, or its timeslice has
  611. * been used up.
  612. */
  613. enum owner_state {
  614. OWNER_NULL = 1 << 0,
  615. OWNER_WRITER = 1 << 1,
  616. OWNER_READER = 1 << 2,
  617. OWNER_NONSPINNABLE = 1 << 3,
  618. };
  619. #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
  620. /*
  621. * Try to acquire write lock before the writer has been put on wait queue.
  622. */
  623. static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
  624. {
  625. long count = atomic_long_read(&sem->count);
  626. while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
  627. if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
  628. count | RWSEM_WRITER_LOCKED)) {
  629. rwsem_set_owner(sem);
  630. lockevent_inc(rwsem_opt_lock);
  631. return true;
  632. }
  633. }
  634. return false;
  635. }
  636. static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
  637. {
  638. struct task_struct *owner;
  639. unsigned long flags;
  640. bool ret = true;
  641. if (need_resched()) {
  642. lockevent_inc(rwsem_opt_fail);
  643. return false;
  644. }
  645. /*
  646. * Disable preemption is equal to the RCU read-side crital section,
  647. * thus the task_strcut structure won't go away.
  648. */
  649. owner = rwsem_owner_flags(sem, &flags);
  650. /*
  651. * Don't check the read-owner as the entry may be stale.
  652. */
  653. if ((flags & RWSEM_NONSPINNABLE) ||
  654. (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
  655. ret = false;
  656. trace_android_vh_rwsem_can_spin_on_owner(sem, &ret);
  657. lockevent_cond_inc(rwsem_opt_fail, !ret);
  658. return ret;
  659. }
  660. #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
  661. static inline enum owner_state
  662. rwsem_owner_state(struct task_struct *owner, unsigned long flags)
  663. {
  664. if (flags & RWSEM_NONSPINNABLE)
  665. return OWNER_NONSPINNABLE;
  666. if (flags & RWSEM_READER_OWNED)
  667. return OWNER_READER;
  668. return owner ? OWNER_WRITER : OWNER_NULL;
  669. }
  670. static noinline enum owner_state
  671. rwsem_spin_on_owner(struct rw_semaphore *sem)
  672. {
  673. struct task_struct *new, *owner;
  674. unsigned long flags, new_flags;
  675. enum owner_state state;
  676. int cnt = 0;
  677. bool time_out = false;
  678. lockdep_assert_preemption_disabled();
  679. owner = rwsem_owner_flags(sem, &flags);
  680. state = rwsem_owner_state(owner, flags);
  681. if (state != OWNER_WRITER)
  682. return state;
  683. for (;;) {
  684. trace_android_vh_rwsem_opt_spin_start(sem, &time_out, &cnt, true);
  685. if (time_out)
  686. break;
  687. /*
  688. * When a waiting writer set the handoff flag, it may spin
  689. * on the owner as well. Once that writer acquires the lock,
  690. * we can spin on it. So we don't need to quit even when the
  691. * handoff bit is set.
  692. */
  693. new = rwsem_owner_flags(sem, &new_flags);
  694. if ((new != owner) || (new_flags != flags)) {
  695. state = rwsem_owner_state(new, new_flags);
  696. break;
  697. }
  698. /*
  699. * Ensure we emit the owner->on_cpu, dereference _after_
  700. * checking sem->owner still matches owner, if that fails,
  701. * owner might point to free()d memory, if it still matches,
  702. * our spinning context already disabled preemption which is
  703. * equal to RCU read-side crital section ensures the memory
  704. * stays valid.
  705. */
  706. barrier();
  707. if (need_resched() || !owner_on_cpu(owner)) {
  708. state = OWNER_NONSPINNABLE;
  709. break;
  710. }
  711. cpu_relax();
  712. }
  713. return state;
  714. }
  715. /*
  716. * Calculate reader-owned rwsem spinning threshold for writer
  717. *
  718. * The more readers own the rwsem, the longer it will take for them to
  719. * wind down and free the rwsem. So the empirical formula used to
  720. * determine the actual spinning time limit here is:
  721. *
  722. * Spinning threshold = (10 + nr_readers/2)us
  723. *
  724. * The limit is capped to a maximum of 25us (30 readers). This is just
  725. * a heuristic and is subjected to change in the future.
  726. */
  727. static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
  728. {
  729. long count = atomic_long_read(&sem->count);
  730. int readers = count >> RWSEM_READER_SHIFT;
  731. u64 delta;
  732. if (readers > 30)
  733. readers = 30;
  734. delta = (20 + readers) * NSEC_PER_USEC / 2;
  735. return sched_clock() + delta;
  736. }
  737. static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
  738. {
  739. bool taken = false;
  740. int prev_owner_state = OWNER_NULL;
  741. int loop = 0;
  742. u64 rspin_threshold = 0;
  743. int cnt = 0;
  744. bool time_out = false;
  745. /* sem->wait_lock should not be held when doing optimistic spinning */
  746. if (!osq_lock(&sem->osq))
  747. goto done;
  748. /*
  749. * Optimistically spin on the owner field and attempt to acquire the
  750. * lock whenever the owner changes. Spinning will be stopped when:
  751. * 1) the owning writer isn't running; or
  752. * 2) readers own the lock and spinning time has exceeded limit.
  753. */
  754. for (;;) {
  755. enum owner_state owner_state;
  756. trace_android_vh_rwsem_opt_spin_start(sem, &time_out, &cnt, false);
  757. if (time_out)
  758. break;
  759. owner_state = rwsem_spin_on_owner(sem);
  760. if (!(owner_state & OWNER_SPINNABLE))
  761. break;
  762. /*
  763. * Try to acquire the lock
  764. */
  765. taken = rwsem_try_write_lock_unqueued(sem);
  766. if (taken)
  767. break;
  768. /*
  769. * Time-based reader-owned rwsem optimistic spinning
  770. */
  771. if (owner_state == OWNER_READER) {
  772. /*
  773. * Re-initialize rspin_threshold every time when
  774. * the owner state changes from non-reader to reader.
  775. * This allows a writer to steal the lock in between
  776. * 2 reader phases and have the threshold reset at
  777. * the beginning of the 2nd reader phase.
  778. */
  779. if (prev_owner_state != OWNER_READER) {
  780. if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
  781. break;
  782. rspin_threshold = rwsem_rspin_threshold(sem);
  783. loop = 0;
  784. }
  785. /*
  786. * Check time threshold once every 16 iterations to
  787. * avoid calling sched_clock() too frequently so
  788. * as to reduce the average latency between the times
  789. * when the lock becomes free and when the spinner
  790. * is ready to do a trylock.
  791. */
  792. else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
  793. rwsem_set_nonspinnable(sem);
  794. lockevent_inc(rwsem_opt_nospin);
  795. break;
  796. }
  797. }
  798. /*
  799. * An RT task cannot do optimistic spinning if it cannot
  800. * be sure the lock holder is running or live-lock may
  801. * happen if the current task and the lock holder happen
  802. * to run in the same CPU. However, aborting optimistic
  803. * spinning while a NULL owner is detected may miss some
  804. * opportunity where spinning can continue without causing
  805. * problem.
  806. *
  807. * There are 2 possible cases where an RT task may be able
  808. * to continue spinning.
  809. *
  810. * 1) The lock owner is in the process of releasing the
  811. * lock, sem->owner is cleared but the lock has not
  812. * been released yet.
  813. * 2) The lock was free and owner cleared, but another
  814. * task just comes in and acquire the lock before
  815. * we try to get it. The new owner may be a spinnable
  816. * writer.
  817. *
  818. * To take advantage of two scenarios listed above, the RT
  819. * task is made to retry one more time to see if it can
  820. * acquire the lock or continue spinning on the new owning
  821. * writer. Of course, if the time lag is long enough or the
  822. * new owner is not a writer or spinnable, the RT task will
  823. * quit spinning.
  824. *
  825. * If the owner is a writer, the need_resched() check is
  826. * done inside rwsem_spin_on_owner(). If the owner is not
  827. * a writer, need_resched() check needs to be done here.
  828. */
  829. if (owner_state != OWNER_WRITER) {
  830. if (need_resched())
  831. break;
  832. if (rt_task(current) &&
  833. (prev_owner_state != OWNER_WRITER))
  834. break;
  835. }
  836. prev_owner_state = owner_state;
  837. /*
  838. * The cpu_relax() call is a compiler barrier which forces
  839. * everything in this loop to be re-loaded. We don't need
  840. * memory barriers as we'll eventually observe the right
  841. * values at the cost of a few extra spins.
  842. */
  843. cpu_relax();
  844. }
  845. osq_unlock(&sem->osq);
  846. trace_android_vh_rwsem_opt_spin_finish(sem, taken);
  847. done:
  848. lockevent_cond_inc(rwsem_opt_fail, !taken);
  849. return taken;
  850. }
  851. /*
  852. * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should
  853. * only be called when the reader count reaches 0.
  854. */
  855. static inline void clear_nonspinnable(struct rw_semaphore *sem)
  856. {
  857. if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
  858. atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
  859. }
  860. #else
  861. static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
  862. {
  863. return false;
  864. }
  865. static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
  866. {
  867. return false;
  868. }
  869. static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
  870. static inline enum owner_state
  871. rwsem_spin_on_owner(struct rw_semaphore *sem)
  872. {
  873. return OWNER_NONSPINNABLE;
  874. }
  875. #endif
  876. /*
  877. * Prepare to wake up waiter(s) in the wait queue by putting them into the
  878. * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
  879. * reader-owned, wake up read lock waiters in queue front or wake up any
  880. * front waiter otherwise.
  881. * This is being called from both reader and writer slow paths.
  882. */
  883. static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
  884. struct wake_q_head *wake_q)
  885. {
  886. enum rwsem_wake_type wake_type;
  887. if (count & RWSEM_WRITER_MASK)
  888. return;
  889. if (count & RWSEM_READER_MASK) {
  890. wake_type = RWSEM_WAKE_READERS;
  891. } else {
  892. wake_type = RWSEM_WAKE_ANY;
  893. clear_nonspinnable(sem);
  894. }
  895. rwsem_mark_wake(sem, wake_type, wake_q);
  896. }
  897. /*
  898. * Wait for the read lock to be granted
  899. */
  900. static struct rw_semaphore __sched *
  901. rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
  902. {
  903. long adjustment = -RWSEM_READER_BIAS;
  904. long rcnt = (count >> RWSEM_READER_SHIFT);
  905. struct rwsem_waiter waiter;
  906. DEFINE_WAKE_Q(wake_q);
  907. bool already_on_list = false;
  908. /*
  909. * To prevent a constant stream of readers from starving a sleeping
  910. * waiter, don't attempt optimistic lock stealing if the lock is
  911. * currently owned by readers.
  912. */
  913. if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) &&
  914. (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED))
  915. goto queue;
  916. /*
  917. * Reader optimistic lock stealing.
  918. */
  919. if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) {
  920. rwsem_set_reader_owned(sem);
  921. lockevent_inc(rwsem_rlock_steal);
  922. /*
  923. * Wake up other readers in the wait queue if it is
  924. * the first reader.
  925. */
  926. if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
  927. raw_spin_lock_irq(&sem->wait_lock);
  928. if (!list_empty(&sem->wait_list))
  929. rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
  930. &wake_q);
  931. raw_spin_unlock_irq(&sem->wait_lock);
  932. wake_up_q(&wake_q);
  933. }
  934. trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
  935. return sem;
  936. }
  937. queue:
  938. waiter.task = current;
  939. waiter.type = RWSEM_WAITING_FOR_READ;
  940. waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
  941. waiter.handoff_set = false;
  942. raw_spin_lock_irq(&sem->wait_lock);
  943. if (list_empty(&sem->wait_list)) {
  944. /*
  945. * In case the wait queue is empty and the lock isn't owned
  946. * by a writer, this reader can exit the slowpath and return
  947. * immediately as its RWSEM_READER_BIAS has already been set
  948. * in the count.
  949. */
  950. if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
  951. /* Provide lock ACQUIRE */
  952. smp_acquire__after_ctrl_dep();
  953. raw_spin_unlock_irq(&sem->wait_lock);
  954. rwsem_set_reader_owned(sem);
  955. lockevent_inc(rwsem_rlock_fast);
  956. trace_android_vh_record_rwsem_lock_starttime(
  957. current, jiffies);
  958. return sem;
  959. }
  960. adjustment += RWSEM_FLAG_WAITERS;
  961. }
  962. trace_android_vh_alter_rwsem_list_add(
  963. &waiter,
  964. sem, &already_on_list);
  965. if (!already_on_list)
  966. rwsem_add_waiter(sem, &waiter);
  967. /* we're now waiting on the lock, but no longer actively locking */
  968. count = atomic_long_add_return(adjustment, &sem->count);
  969. rwsem_cond_wake_waiter(sem, count, &wake_q);
  970. trace_android_vh_rwsem_wake(sem);
  971. raw_spin_unlock_irq(&sem->wait_lock);
  972. if (!wake_q_empty(&wake_q))
  973. wake_up_q(&wake_q);
  974. trace_contention_begin(sem, LCB_F_READ);
  975. /* wait to be given the lock */
  976. trace_android_vh_rwsem_read_wait_start(sem);
  977. for (;;) {
  978. set_current_state(state);
  979. if (!smp_load_acquire(&waiter.task)) {
  980. /* Matches rwsem_mark_wake()'s smp_store_release(). */
  981. break;
  982. }
  983. if (signal_pending_state(state, current)) {
  984. raw_spin_lock_irq(&sem->wait_lock);
  985. if (waiter.task)
  986. goto out_nolock;
  987. raw_spin_unlock_irq(&sem->wait_lock);
  988. /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
  989. break;
  990. }
  991. schedule_preempt_disabled();
  992. lockevent_inc(rwsem_sleep_reader);
  993. }
  994. __set_current_state(TASK_RUNNING);
  995. trace_android_vh_rwsem_read_wait_finish(sem);
  996. lockevent_inc(rwsem_rlock);
  997. trace_contention_end(sem, 0);
  998. trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
  999. return sem;
  1000. out_nolock:
  1001. rwsem_del_wake_waiter(sem, &waiter, &wake_q);
  1002. __set_current_state(TASK_RUNNING);
  1003. trace_android_vh_rwsem_read_wait_finish(sem);
  1004. lockevent_inc(rwsem_rlock_fail);
  1005. trace_contention_end(sem, -EINTR);
  1006. return ERR_PTR(-EINTR);
  1007. }
  1008. /*
  1009. * Wait until we successfully acquire the write lock
  1010. */
  1011. static struct rw_semaphore __sched *
  1012. rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
  1013. {
  1014. struct rwsem_waiter waiter;
  1015. DEFINE_WAKE_Q(wake_q);
  1016. bool already_on_list = false;
  1017. /* do optimistic spinning and steal lock if possible */
  1018. if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
  1019. /* rwsem_optimistic_spin() implies ACQUIRE on success */
  1020. trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
  1021. return sem;
  1022. }
  1023. /*
  1024. * Optimistic spinning failed, proceed to the slowpath
  1025. * and block until we can acquire the sem.
  1026. */
  1027. waiter.task = current;
  1028. waiter.type = RWSEM_WAITING_FOR_WRITE;
  1029. waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
  1030. waiter.handoff_set = false;
  1031. raw_spin_lock_irq(&sem->wait_lock);
  1032. trace_android_vh_alter_rwsem_list_add(
  1033. &waiter,
  1034. sem, &already_on_list);
  1035. if (!already_on_list)
  1036. rwsem_add_waiter(sem, &waiter);
  1037. /* we're now waiting on the lock */
  1038. if (rwsem_first_waiter(sem) != &waiter) {
  1039. rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
  1040. &wake_q);
  1041. if (!wake_q_empty(&wake_q)) {
  1042. /*
  1043. * We want to minimize wait_lock hold time especially
  1044. * when a large number of readers are to be woken up.
  1045. */
  1046. raw_spin_unlock_irq(&sem->wait_lock);
  1047. wake_up_q(&wake_q);
  1048. raw_spin_lock_irq(&sem->wait_lock);
  1049. }
  1050. } else {
  1051. atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
  1052. }
  1053. trace_android_vh_rwsem_wake(sem);
  1054. /* wait until we successfully acquire the lock */
  1055. trace_android_vh_rwsem_write_wait_start(sem);
  1056. set_current_state(state);
  1057. trace_contention_begin(sem, LCB_F_WRITE);
  1058. for (;;) {
  1059. if (rwsem_try_write_lock(sem, &waiter)) {
  1060. /* rwsem_try_write_lock() implies ACQUIRE on success */
  1061. break;
  1062. }
  1063. raw_spin_unlock_irq(&sem->wait_lock);
  1064. if (signal_pending_state(state, current))
  1065. goto out_nolock;
  1066. /*
  1067. * After setting the handoff bit and failing to acquire
  1068. * the lock, attempt to spin on owner to accelerate lock
  1069. * transfer. If the previous owner is a on-cpu writer and it
  1070. * has just released the lock, OWNER_NULL will be returned.
  1071. * In this case, we attempt to acquire the lock again
  1072. * without sleeping.
  1073. */
  1074. if (waiter.handoff_set) {
  1075. enum owner_state owner_state;
  1076. owner_state = rwsem_spin_on_owner(sem);
  1077. if (owner_state == OWNER_NULL)
  1078. goto trylock_again;
  1079. }
  1080. schedule_preempt_disabled();
  1081. lockevent_inc(rwsem_sleep_writer);
  1082. set_current_state(state);
  1083. trylock_again:
  1084. raw_spin_lock_irq(&sem->wait_lock);
  1085. }
  1086. __set_current_state(TASK_RUNNING);
  1087. trace_android_vh_rwsem_write_wait_finish(sem);
  1088. raw_spin_unlock_irq(&sem->wait_lock);
  1089. lockevent_inc(rwsem_wlock);
  1090. trace_contention_end(sem, 0);
  1091. trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
  1092. return sem;
  1093. out_nolock:
  1094. __set_current_state(TASK_RUNNING);
  1095. trace_android_vh_rwsem_write_wait_finish(sem);
  1096. raw_spin_lock_irq(&sem->wait_lock);
  1097. rwsem_del_wake_waiter(sem, &waiter, &wake_q);
  1098. lockevent_inc(rwsem_wlock_fail);
  1099. trace_contention_end(sem, -EINTR);
  1100. return ERR_PTR(-EINTR);
  1101. }
  1102. /*
  1103. * handle waking up a waiter on the semaphore
  1104. * - up_read/up_write has decremented the active part of count if we come here
  1105. */
  1106. static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
  1107. {
  1108. unsigned long flags;
  1109. DEFINE_WAKE_Q(wake_q);
  1110. raw_spin_lock_irqsave(&sem->wait_lock, flags);
  1111. if (!list_empty(&sem->wait_list))
  1112. rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
  1113. trace_android_vh_rwsem_wake_finish(sem);
  1114. raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
  1115. wake_up_q(&wake_q);
  1116. return sem;
  1117. }
  1118. /*
  1119. * downgrade a write lock into a read lock
  1120. * - caller incremented waiting part of count and discovered it still negative
  1121. * - just wake up any readers at the front of the queue
  1122. */
  1123. static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
  1124. {
  1125. unsigned long flags;
  1126. DEFINE_WAKE_Q(wake_q);
  1127. raw_spin_lock_irqsave(&sem->wait_lock, flags);
  1128. if (!list_empty(&sem->wait_list))
  1129. rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
  1130. raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
  1131. wake_up_q(&wake_q);
  1132. return sem;
  1133. }
  1134. /*
  1135. * lock for reading
  1136. */
  1137. static __always_inline int __down_read_common(struct rw_semaphore *sem, int state)
  1138. {
  1139. int ret = 0;
  1140. long count;
  1141. preempt_disable();
  1142. if (!rwsem_read_trylock(sem, &count)) {
  1143. if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) {
  1144. ret = -EINTR;
  1145. goto out;
  1146. }
  1147. DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
  1148. }
  1149. out:
  1150. preempt_enable();
  1151. return ret;
  1152. }
  1153. static __always_inline void __down_read(struct rw_semaphore *sem)
  1154. {
  1155. __down_read_common(sem, TASK_UNINTERRUPTIBLE);
  1156. }
  1157. static __always_inline int __down_read_interruptible(struct rw_semaphore *sem)
  1158. {
  1159. return __down_read_common(sem, TASK_INTERRUPTIBLE);
  1160. }
  1161. static __always_inline int __down_read_killable(struct rw_semaphore *sem)
  1162. {
  1163. return __down_read_common(sem, TASK_KILLABLE);
  1164. }
  1165. static inline int __down_read_trylock(struct rw_semaphore *sem)
  1166. {
  1167. int ret = 0;
  1168. long tmp;
  1169. DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
  1170. preempt_disable();
  1171. tmp = atomic_long_read(&sem->count);
  1172. while (!(tmp & RWSEM_READ_FAILED_MASK)) {
  1173. if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
  1174. tmp + RWSEM_READER_BIAS)) {
  1175. rwsem_set_reader_owned(sem);
  1176. ret = 1;
  1177. trace_android_vh_record_rwsem_lock_starttime(current, jiffies);
  1178. break;
  1179. }
  1180. }
  1181. preempt_enable();
  1182. return ret;
  1183. }
  1184. /*
  1185. * lock for writing
  1186. */
  1187. static inline int __down_write_common(struct rw_semaphore *sem, int state)
  1188. {
  1189. int ret = 0;
  1190. preempt_disable();
  1191. if (unlikely(!rwsem_write_trylock(sem))) {
  1192. if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
  1193. ret = -EINTR;
  1194. }
  1195. preempt_enable();
  1196. return ret;
  1197. }
  1198. static inline void __down_write(struct rw_semaphore *sem)
  1199. {
  1200. __down_write_common(sem, TASK_UNINTERRUPTIBLE);
  1201. }
  1202. static inline int __down_write_killable(struct rw_semaphore *sem)
  1203. {
  1204. return __down_write_common(sem, TASK_KILLABLE);
  1205. }
  1206. static inline int __down_write_trylock(struct rw_semaphore *sem)
  1207. {
  1208. int ret;
  1209. preempt_disable();
  1210. DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
  1211. ret = rwsem_write_trylock(sem);
  1212. preempt_enable();
  1213. return ret;
  1214. }
  1215. /*
  1216. * unlock after reading
  1217. */
  1218. static inline void __up_read(struct rw_semaphore *sem)
  1219. {
  1220. long tmp;
  1221. DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
  1222. DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
  1223. preempt_disable();
  1224. rwsem_clear_reader_owned(sem);
  1225. tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
  1226. DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
  1227. if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
  1228. RWSEM_FLAG_WAITERS)) {
  1229. clear_nonspinnable(sem);
  1230. rwsem_wake(sem);
  1231. }
  1232. trace_android_vh_record_rwsem_lock_starttime(current, 0);
  1233. preempt_enable();
  1234. }
  1235. /*
  1236. * unlock after writing
  1237. */
  1238. static inline void __up_write(struct rw_semaphore *sem)
  1239. {
  1240. long tmp;
  1241. DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
  1242. /*
  1243. * sem->owner may differ from current if the ownership is transferred
  1244. * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
  1245. */
  1246. DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
  1247. !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
  1248. preempt_disable();
  1249. rwsem_clear_owner(sem);
  1250. tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
  1251. if (unlikely(tmp & RWSEM_FLAG_WAITERS))
  1252. rwsem_wake(sem);
  1253. trace_android_vh_record_rwsem_lock_starttime(current, 0);
  1254. preempt_enable();
  1255. }
  1256. /*
  1257. * downgrade write lock to read lock
  1258. */
  1259. static inline void __downgrade_write(struct rw_semaphore *sem)
  1260. {
  1261. long tmp;
  1262. /*
  1263. * When downgrading from exclusive to shared ownership,
  1264. * anything inside the write-locked region cannot leak
  1265. * into the read side. In contrast, anything in the
  1266. * read-locked region is ok to be re-ordered into the
  1267. * write side. As such, rely on RELEASE semantics.
  1268. */
  1269. DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
  1270. preempt_disable();
  1271. tmp = atomic_long_fetch_add_release(
  1272. -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
  1273. rwsem_set_reader_owned(sem);
  1274. if (tmp & RWSEM_FLAG_WAITERS)
  1275. rwsem_downgrade_wake(sem);
  1276. preempt_enable();
  1277. }
  1278. #else /* !CONFIG_PREEMPT_RT */
  1279. #define RT_MUTEX_BUILD_MUTEX
  1280. #include "rtmutex.c"
  1281. #define rwbase_set_and_save_current_state(state) \
  1282. set_current_state(state)
  1283. #define rwbase_restore_current_state() \
  1284. __set_current_state(TASK_RUNNING)
  1285. #define rwbase_rtmutex_lock_state(rtm, state) \
  1286. __rt_mutex_lock(rtm, state)
  1287. #define rwbase_rtmutex_slowlock_locked(rtm, state) \
  1288. __rt_mutex_slowlock_locked(rtm, NULL, state)
  1289. #define rwbase_rtmutex_unlock(rtm) \
  1290. __rt_mutex_unlock(rtm)
  1291. #define rwbase_rtmutex_trylock(rtm) \
  1292. __rt_mutex_trylock(rtm)
  1293. #define rwbase_signal_pending_state(state, current) \
  1294. signal_pending_state(state, current)
  1295. #define rwbase_schedule() \
  1296. schedule()
  1297. #include "rwbase_rt.c"
  1298. void __init_rwsem(struct rw_semaphore *sem, const char *name,
  1299. struct lock_class_key *key)
  1300. {
  1301. init_rwbase_rt(&(sem)->rwbase);
  1302. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  1303. debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  1304. lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
  1305. #endif
  1306. }
  1307. EXPORT_SYMBOL(__init_rwsem);
  1308. static inline void __down_read(struct rw_semaphore *sem)
  1309. {
  1310. rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
  1311. }
  1312. static inline int __down_read_interruptible(struct rw_semaphore *sem)
  1313. {
  1314. return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
  1315. }
  1316. static inline int __down_read_killable(struct rw_semaphore *sem)
  1317. {
  1318. return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
  1319. }
  1320. static inline int __down_read_trylock(struct rw_semaphore *sem)
  1321. {
  1322. return rwbase_read_trylock(&sem->rwbase);
  1323. }
  1324. static inline void __up_read(struct rw_semaphore *sem)
  1325. {
  1326. rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
  1327. }
  1328. static inline void __sched __down_write(struct rw_semaphore *sem)
  1329. {
  1330. rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
  1331. }
  1332. static inline int __sched __down_write_killable(struct rw_semaphore *sem)
  1333. {
  1334. return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
  1335. }
  1336. static inline int __down_write_trylock(struct rw_semaphore *sem)
  1337. {
  1338. return rwbase_write_trylock(&sem->rwbase);
  1339. }
  1340. static inline void __up_write(struct rw_semaphore *sem)
  1341. {
  1342. rwbase_write_unlock(&sem->rwbase);
  1343. }
  1344. static inline void __downgrade_write(struct rw_semaphore *sem)
  1345. {
  1346. rwbase_write_downgrade(&sem->rwbase);
  1347. }
  1348. /* Debug stubs for the common API */
  1349. #define DEBUG_RWSEMS_WARN_ON(c, sem)
  1350. static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
  1351. struct task_struct *owner)
  1352. {
  1353. }
  1354. static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
  1355. {
  1356. int count = atomic_read(&sem->rwbase.readers);
  1357. return count < 0 && count != READER_BIAS;
  1358. }
  1359. #endif /* CONFIG_PREEMPT_RT */
  1360. /*
  1361. * lock for reading
  1362. */
  1363. void __sched down_read(struct rw_semaphore *sem)
  1364. {
  1365. might_sleep();
  1366. rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
  1367. LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
  1368. }
  1369. EXPORT_SYMBOL(down_read);
  1370. int __sched down_read_interruptible(struct rw_semaphore *sem)
  1371. {
  1372. might_sleep();
  1373. rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
  1374. if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
  1375. rwsem_release(&sem->dep_map, _RET_IP_);
  1376. return -EINTR;
  1377. }
  1378. return 0;
  1379. }
  1380. EXPORT_SYMBOL(down_read_interruptible);
  1381. int __sched down_read_killable(struct rw_semaphore *sem)
  1382. {
  1383. might_sleep();
  1384. rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
  1385. if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
  1386. rwsem_release(&sem->dep_map, _RET_IP_);
  1387. return -EINTR;
  1388. }
  1389. return 0;
  1390. }
  1391. EXPORT_SYMBOL(down_read_killable);
  1392. /*
  1393. * trylock for reading -- returns 1 if successful, 0 if contention
  1394. */
  1395. int down_read_trylock(struct rw_semaphore *sem)
  1396. {
  1397. int ret = __down_read_trylock(sem);
  1398. if (ret == 1)
  1399. rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
  1400. return ret;
  1401. }
  1402. EXPORT_SYMBOL(down_read_trylock);
  1403. /*
  1404. * lock for writing
  1405. */
  1406. void __sched down_write(struct rw_semaphore *sem)
  1407. {
  1408. might_sleep();
  1409. rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
  1410. LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
  1411. }
  1412. EXPORT_SYMBOL(down_write);
  1413. /*
  1414. * lock for writing
  1415. */
  1416. int __sched down_write_killable(struct rw_semaphore *sem)
  1417. {
  1418. might_sleep();
  1419. rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
  1420. if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
  1421. __down_write_killable)) {
  1422. rwsem_release(&sem->dep_map, _RET_IP_);
  1423. return -EINTR;
  1424. }
  1425. return 0;
  1426. }
  1427. EXPORT_SYMBOL(down_write_killable);
  1428. /*
  1429. * trylock for writing -- returns 1 if successful, 0 if contention
  1430. */
  1431. int down_write_trylock(struct rw_semaphore *sem)
  1432. {
  1433. int ret = __down_write_trylock(sem);
  1434. if (ret == 1)
  1435. rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
  1436. return ret;
  1437. }
  1438. EXPORT_SYMBOL(down_write_trylock);
  1439. /*
  1440. * release a read lock
  1441. */
  1442. void up_read(struct rw_semaphore *sem)
  1443. {
  1444. rwsem_release(&sem->dep_map, _RET_IP_);
  1445. __up_read(sem);
  1446. }
  1447. EXPORT_SYMBOL(up_read);
  1448. /*
  1449. * release a write lock
  1450. */
  1451. void up_write(struct rw_semaphore *sem)
  1452. {
  1453. rwsem_release(&sem->dep_map, _RET_IP_);
  1454. trace_android_vh_rwsem_write_finished(sem);
  1455. __up_write(sem);
  1456. }
  1457. EXPORT_SYMBOL(up_write);
  1458. /*
  1459. * downgrade write lock to read lock
  1460. */
  1461. void downgrade_write(struct rw_semaphore *sem)
  1462. {
  1463. lock_downgrade(&sem->dep_map, _RET_IP_);
  1464. trace_android_vh_rwsem_write_finished(sem);
  1465. __downgrade_write(sem);
  1466. }
  1467. EXPORT_SYMBOL(downgrade_write);
  1468. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  1469. void down_read_nested(struct rw_semaphore *sem, int subclass)
  1470. {
  1471. might_sleep();
  1472. rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
  1473. LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
  1474. }
  1475. EXPORT_SYMBOL(down_read_nested);
  1476. int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
  1477. {
  1478. might_sleep();
  1479. rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
  1480. if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
  1481. rwsem_release(&sem->dep_map, _RET_IP_);
  1482. return -EINTR;
  1483. }
  1484. return 0;
  1485. }
  1486. EXPORT_SYMBOL(down_read_killable_nested);
  1487. void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
  1488. {
  1489. might_sleep();
  1490. rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
  1491. LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
  1492. }
  1493. EXPORT_SYMBOL(_down_write_nest_lock);
  1494. void down_read_non_owner(struct rw_semaphore *sem)
  1495. {
  1496. might_sleep();
  1497. __down_read(sem);
  1498. /*
  1499. * The owner value for a reader-owned lock is mostly for debugging
  1500. * purpose only and is not critical to the correct functioning of
  1501. * rwsem. So it is perfectly fine to set it in a preempt-enabled
  1502. * context here.
  1503. */
  1504. __rwsem_set_reader_owned(sem, NULL);
  1505. }
  1506. EXPORT_SYMBOL(down_read_non_owner);
  1507. void down_write_nested(struct rw_semaphore *sem, int subclass)
  1508. {
  1509. might_sleep();
  1510. rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
  1511. LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
  1512. }
  1513. EXPORT_SYMBOL(down_write_nested);
  1514. int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
  1515. {
  1516. might_sleep();
  1517. rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
  1518. if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
  1519. __down_write_killable)) {
  1520. rwsem_release(&sem->dep_map, _RET_IP_);
  1521. return -EINTR;
  1522. }
  1523. return 0;
  1524. }
  1525. EXPORT_SYMBOL(down_write_killable_nested);
  1526. void up_read_non_owner(struct rw_semaphore *sem)
  1527. {
  1528. DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
  1529. __up_read(sem);
  1530. }
  1531. EXPORT_SYMBOL(up_read_non_owner);
  1532. #endif