percpu-rwsem.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. #include <linux/atomic.h>
  3. #include <linux/percpu.h>
  4. #include <linux/wait.h>
  5. #include <linux/lockdep.h>
  6. #include <linux/percpu-rwsem.h>
  7. #include <linux/rcupdate.h>
  8. #include <linux/sched.h>
  9. #include <linux/sched/task.h>
  10. #include <linux/sched/debug.h>
  11. #include <linux/errno.h>
  12. #include <trace/events/lock.h>
  13. #include <trace/hooks/dtask.h>
  14. /*
  15. * trace_android_vh_record_pcpu_rwsem_starttime is called in
  16. * include/linux/percpu-rwsem.h by including include/hooks/dtask.h, which
  17. * will result to build-err. So we create
  18. * func:_trace_android_vh_record_pcpu_rwsem_starttime for percpu-rwsem.h to call.
  19. */
  20. void _trace_android_vh_record_pcpu_rwsem_starttime(struct task_struct *tsk,
  21. unsigned long settime)
  22. {
  23. trace_android_vh_record_pcpu_rwsem_starttime(tsk, settime);
  24. }
  25. EXPORT_SYMBOL_GPL(_trace_android_vh_record_pcpu_rwsem_starttime);
  26. /*
  27. * trace_android_vh_record_pcpu_rwsem_time_early is called in
  28. * include/linux/percpu-rwsem.h by including include/hooks/dtask.h, which
  29. * will result to build-err. So we create
  30. * func: _trace_android_vh_record_pcpu_rwsem_time_early for percpu-rwsem.h to call.
  31. */
  32. void _trace_android_vh_record_pcpu_rwsem_time_early(
  33. unsigned long settime, struct percpu_rw_semaphore *sem)
  34. {
  35. trace_android_vh_record_pcpu_rwsem_time_early(settime, sem);
  36. }
  37. EXPORT_SYMBOL_GPL(_trace_android_vh_record_pcpu_rwsem_time_early);
  38. int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
  39. const char *name, struct lock_class_key *key)
  40. {
  41. sem->read_count = alloc_percpu(int);
  42. if (unlikely(!sem->read_count))
  43. return -ENOMEM;
  44. rcu_sync_init(&sem->rss);
  45. rcuwait_init(&sem->writer);
  46. init_waitqueue_head(&sem->waiters);
  47. atomic_set(&sem->block, 0);
  48. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  49. debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  50. lockdep_init_map(&sem->dep_map, name, key, 0);
  51. #endif
  52. return 0;
  53. }
  54. EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
  55. void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
  56. {
  57. /*
  58. * XXX: temporary kludge. The error path in alloc_super()
  59. * assumes that percpu_free_rwsem() is safe after kzalloc().
  60. */
  61. if (!sem->read_count)
  62. return;
  63. rcu_sync_dtor(&sem->rss);
  64. free_percpu(sem->read_count);
  65. sem->read_count = NULL; /* catch use after free bugs */
  66. }
  67. EXPORT_SYMBOL_GPL(percpu_free_rwsem);
  68. static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
  69. {
  70. this_cpu_inc(*sem->read_count);
  71. /*
  72. * Due to having preemption disabled the decrement happens on
  73. * the same CPU as the increment, avoiding the
  74. * increment-on-one-CPU-and-decrement-on-another problem.
  75. *
  76. * If the reader misses the writer's assignment of sem->block, then the
  77. * writer is guaranteed to see the reader's increment.
  78. *
  79. * Conversely, any readers that increment their sem->read_count after
  80. * the writer looks are guaranteed to see the sem->block value, which
  81. * in turn means that they are guaranteed to immediately decrement
  82. * their sem->read_count, so that it doesn't matter that the writer
  83. * missed them.
  84. */
  85. smp_mb(); /* A matches D */
  86. /*
  87. * If !sem->block the critical section starts here, matched by the
  88. * release in percpu_up_write().
  89. */
  90. if (likely(!atomic_read_acquire(&sem->block)))
  91. return true;
  92. this_cpu_dec(*sem->read_count);
  93. /* Prod writer to re-evaluate readers_active_check() */
  94. rcuwait_wake_up(&sem->writer);
  95. return false;
  96. }
  97. static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
  98. {
  99. if (atomic_read(&sem->block))
  100. return false;
  101. return atomic_xchg(&sem->block, 1) == 0;
  102. }
  103. static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
  104. {
  105. if (reader) {
  106. bool ret;
  107. preempt_disable();
  108. ret = __percpu_down_read_trylock(sem);
  109. preempt_enable();
  110. return ret;
  111. }
  112. return __percpu_down_write_trylock(sem);
  113. }
  114. /*
  115. * The return value of wait_queue_entry::func means:
  116. *
  117. * <0 - error, wakeup is terminated and the error is returned
  118. * 0 - no wakeup, a next waiter is tried
  119. * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
  120. *
  121. * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
  122. * and play games with the return value to allow waking multiple readers.
  123. *
  124. * Specifically, we wake readers until we've woken a single writer, or until a
  125. * trylock fails.
  126. */
  127. static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
  128. unsigned int mode, int wake_flags,
  129. void *key)
  130. {
  131. bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
  132. struct percpu_rw_semaphore *sem = key;
  133. struct task_struct *p;
  134. /* concurrent against percpu_down_write(), can get stolen */
  135. if (!__percpu_rwsem_trylock(sem, reader))
  136. return 1;
  137. p = get_task_struct(wq_entry->private);
  138. list_del_init(&wq_entry->entry);
  139. smp_store_release(&wq_entry->private, NULL);
  140. wake_up_process(p);
  141. put_task_struct(p);
  142. return !reader; /* wake (readers until) 1 writer */
  143. }
  144. static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
  145. {
  146. DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
  147. bool wait;
  148. spin_lock_irq(&sem->waiters.lock);
  149. /*
  150. * Serialize against the wakeup in percpu_up_write(), if we fail
  151. * the trylock, the wakeup must see us on the list.
  152. */
  153. wait = !__percpu_rwsem_trylock(sem, reader);
  154. if (wait) {
  155. wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
  156. __add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
  157. trace_android_vh_percpu_rwsem_wq_add(sem, reader);
  158. }
  159. spin_unlock_irq(&sem->waiters.lock);
  160. while (wait) {
  161. set_current_state(TASK_UNINTERRUPTIBLE);
  162. if (!smp_load_acquire(&wq_entry.private))
  163. break;
  164. schedule();
  165. }
  166. __set_current_state(TASK_RUNNING);
  167. }
  168. bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
  169. {
  170. if (__percpu_down_read_trylock(sem))
  171. return true;
  172. if (try)
  173. return false;
  174. trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
  175. preempt_enable();
  176. percpu_rwsem_wait(sem, /* .reader = */ true);
  177. preempt_disable();
  178. trace_contention_end(sem, 0);
  179. return true;
  180. }
  181. EXPORT_SYMBOL_GPL(__percpu_down_read);
  182. #define per_cpu_sum(var) \
  183. ({ \
  184. typeof(var) __sum = 0; \
  185. int cpu; \
  186. compiletime_assert_atomic_type(__sum); \
  187. for_each_possible_cpu(cpu) \
  188. __sum += per_cpu(var, cpu); \
  189. __sum; \
  190. })
  191. bool percpu_is_read_locked(struct percpu_rw_semaphore *sem)
  192. {
  193. return per_cpu_sum(*sem->read_count) != 0 && !atomic_read(&sem->block);
  194. }
  195. EXPORT_SYMBOL_GPL(percpu_is_read_locked);
  196. /*
  197. * Return true if the modular sum of the sem->read_count per-CPU variable is
  198. * zero. If this sum is zero, then it is stable due to the fact that if any
  199. * newly arriving readers increment a given counter, they will immediately
  200. * decrement that same counter.
  201. *
  202. * Assumes sem->block is set.
  203. */
  204. static bool readers_active_check(struct percpu_rw_semaphore *sem)
  205. {
  206. if (per_cpu_sum(*sem->read_count) != 0)
  207. return false;
  208. /*
  209. * If we observed the decrement; ensure we see the entire critical
  210. * section.
  211. */
  212. smp_mb(); /* C matches B */
  213. return true;
  214. }
  215. void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
  216. {
  217. might_sleep();
  218. rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
  219. trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
  220. trace_android_vh_record_pcpu_rwsem_time_early(jiffies, sem);
  221. /* Notify readers to take the slow path. */
  222. rcu_sync_enter(&sem->rss);
  223. /*
  224. * Try set sem->block; this provides writer-writer exclusion.
  225. * Having sem->block set makes new readers block.
  226. */
  227. if (!__percpu_down_write_trylock(sem))
  228. percpu_rwsem_wait(sem, /* .reader = */ false);
  229. /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
  230. /*
  231. * If they don't see our store of sem->block, then we are guaranteed to
  232. * see their sem->read_count increment, and therefore will wait for
  233. * them.
  234. */
  235. /* Wait for all active readers to complete. */
  236. rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
  237. trace_contention_end(sem, 0);
  238. trace_android_vh_record_pcpu_rwsem_starttime(current, jiffies);
  239. }
  240. EXPORT_SYMBOL_GPL(percpu_down_write);
  241. void percpu_up_write(struct percpu_rw_semaphore *sem)
  242. {
  243. rwsem_release(&sem->dep_map, _RET_IP_);
  244. /*
  245. * Signal the writer is done, no fast path yet.
  246. *
  247. * One reason that we cannot just immediately flip to readers_fast is
  248. * that new readers might fail to see the results of this writer's
  249. * critical section.
  250. *
  251. * Therefore we force it through the slow path which guarantees an
  252. * acquire and thereby guarantees the critical section's consistency.
  253. */
  254. atomic_set_release(&sem->block, 0);
  255. /*
  256. * Prod any pending reader/writer to make progress.
  257. */
  258. __wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
  259. /*
  260. * Once this completes (at least one RCU-sched grace period hence) the
  261. * reader fast path will be available again. Safe to use outside the
  262. * exclusive write lock because its counting.
  263. */
  264. rcu_sync_exit(&sem->rss);
  265. trace_android_vh_record_pcpu_rwsem_time_early(0, sem);
  266. trace_android_vh_record_pcpu_rwsem_starttime(current, 0);
  267. }
  268. EXPORT_SYMBOL_GPL(percpu_up_write);