percpu-refcount.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. #define pr_fmt(fmt) "%s: " fmt, __func__
  3. #include <linux/kernel.h>
  4. #include <linux/sched.h>
  5. #include <linux/wait.h>
  6. #include <linux/slab.h>
  7. #include <linux/mm.h>
  8. #include <linux/percpu-refcount.h>
  9. /*
  10. * Initially, a percpu refcount is just a set of percpu counters. Initially, we
  11. * don't try to detect the ref hitting 0 - which means that get/put can just
  12. * increment or decrement the local counter. Note that the counter on a
  13. * particular cpu can (and will) wrap - this is fine, when we go to shutdown the
  14. * percpu counters will all sum to the correct value
  15. *
  16. * (More precisely: because modular arithmetic is commutative the sum of all the
  17. * percpu_count vars will be equal to what it would have been if all the gets
  18. * and puts were done to a single integer, even if some of the percpu integers
  19. * overflow or underflow).
  20. *
  21. * The real trick to implementing percpu refcounts is shutdown. We can't detect
  22. * the ref hitting 0 on every put - this would require global synchronization
  23. * and defeat the whole purpose of using percpu refs.
  24. *
  25. * What we do is require the user to keep track of the initial refcount; we know
  26. * the ref can't hit 0 before the user drops the initial ref, so as long as we
  27. * convert to non percpu mode before the initial ref is dropped everything
  28. * works.
  29. *
  30. * Converting to non percpu mode is done with some RCUish stuff in
  31. * percpu_ref_kill. Additionally, we need a bias value so that the
  32. * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  33. */
  34. #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
  35. static DEFINE_SPINLOCK(percpu_ref_switch_lock);
  36. static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
  37. static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
  38. {
  39. return (unsigned long __percpu *)
  40. (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
  41. }
  42. /**
  43. * percpu_ref_init - initialize a percpu refcount
  44. * @ref: percpu_ref to initialize
  45. * @release: function which will be called when refcount hits 0
  46. * @flags: PERCPU_REF_INIT_* flags
  47. * @gfp: allocation mask to use
  48. *
  49. * Initializes @ref. @ref starts out in percpu mode with a refcount of 1 unless
  50. * @flags contains PERCPU_REF_INIT_ATOMIC or PERCPU_REF_INIT_DEAD. These flags
  51. * change the start state to atomic with the latter setting the initial refcount
  52. * to 0. See the definitions of PERCPU_REF_INIT_* flags for flag behaviors.
  53. *
  54. * Note that @release must not sleep - it may potentially be called from RCU
  55. * callback context by percpu_ref_kill().
  56. */
  57. int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
  58. unsigned int flags, gfp_t gfp)
  59. {
  60. size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
  61. __alignof__(unsigned long));
  62. unsigned long start_count = 0;
  63. struct percpu_ref_data *data;
  64. ref->percpu_count_ptr = (unsigned long)
  65. __alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
  66. if (!ref->percpu_count_ptr)
  67. return -ENOMEM;
  68. data = kzalloc(sizeof(*ref->data), gfp);
  69. if (!data) {
  70. free_percpu((void __percpu *)ref->percpu_count_ptr);
  71. ref->percpu_count_ptr = 0;
  72. return -ENOMEM;
  73. }
  74. data->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
  75. data->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT;
  76. if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) {
  77. ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
  78. data->allow_reinit = true;
  79. } else {
  80. start_count += PERCPU_COUNT_BIAS;
  81. }
  82. if (flags & PERCPU_REF_INIT_DEAD)
  83. ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
  84. else
  85. start_count++;
  86. atomic_long_set(&data->count, start_count);
  87. data->release = release;
  88. data->confirm_switch = NULL;
  89. data->ref = ref;
  90. ref->data = data;
  91. return 0;
  92. }
  93. EXPORT_SYMBOL_GPL(percpu_ref_init);
  94. static void __percpu_ref_exit(struct percpu_ref *ref)
  95. {
  96. unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
  97. if (percpu_count) {
  98. /* non-NULL confirm_switch indicates switching in progress */
  99. WARN_ON_ONCE(ref->data && ref->data->confirm_switch);
  100. free_percpu(percpu_count);
  101. ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
  102. }
  103. }
  104. /**
  105. * percpu_ref_exit - undo percpu_ref_init()
  106. * @ref: percpu_ref to exit
  107. *
  108. * This function exits @ref. The caller is responsible for ensuring that
  109. * @ref is no longer in active use. The usual places to invoke this
  110. * function from are the @ref->release() callback or in init failure path
  111. * where percpu_ref_init() succeeded but other parts of the initialization
  112. * of the embedding object failed.
  113. */
  114. void percpu_ref_exit(struct percpu_ref *ref)
  115. {
  116. struct percpu_ref_data *data = ref->data;
  117. unsigned long flags;
  118. __percpu_ref_exit(ref);
  119. if (!data)
  120. return;
  121. spin_lock_irqsave(&percpu_ref_switch_lock, flags);
  122. ref->percpu_count_ptr |= atomic_long_read(&ref->data->count) <<
  123. __PERCPU_REF_FLAG_BITS;
  124. ref->data = NULL;
  125. spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
  126. kfree(data);
  127. }
  128. EXPORT_SYMBOL_GPL(percpu_ref_exit);
  129. static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
  130. {
  131. struct percpu_ref_data *data = container_of(rcu,
  132. struct percpu_ref_data, rcu);
  133. struct percpu_ref *ref = data->ref;
  134. data->confirm_switch(ref);
  135. data->confirm_switch = NULL;
  136. wake_up_all(&percpu_ref_switch_waitq);
  137. if (!data->allow_reinit)
  138. __percpu_ref_exit(ref);
  139. /* drop ref from percpu_ref_switch_to_atomic() */
  140. percpu_ref_put(ref);
  141. }
  142. static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
  143. {
  144. struct percpu_ref_data *data = container_of(rcu,
  145. struct percpu_ref_data, rcu);
  146. struct percpu_ref *ref = data->ref;
  147. unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
  148. static atomic_t underflows;
  149. unsigned long count = 0;
  150. int cpu;
  151. for_each_possible_cpu(cpu)
  152. count += *per_cpu_ptr(percpu_count, cpu);
  153. pr_debug("global %lu percpu %lu\n",
  154. atomic_long_read(&data->count), count);
  155. /*
  156. * It's crucial that we sum the percpu counters _before_ adding the sum
  157. * to &ref->count; since gets could be happening on one cpu while puts
  158. * happen on another, adding a single cpu's count could cause
  159. * @ref->count to hit 0 before we've got a consistent value - but the
  160. * sum of all the counts will be consistent and correct.
  161. *
  162. * Subtracting the bias value then has to happen _after_ adding count to
  163. * &ref->count; we need the bias value to prevent &ref->count from
  164. * reaching 0 before we add the percpu counts. But doing it at the same
  165. * time is equivalent and saves us atomic operations:
  166. */
  167. atomic_long_add((long)count - PERCPU_COUNT_BIAS, &data->count);
  168. if (WARN_ONCE(atomic_long_read(&data->count) <= 0,
  169. "percpu ref (%ps) <= 0 (%ld) after switching to atomic",
  170. data->release, atomic_long_read(&data->count)) &&
  171. atomic_inc_return(&underflows) < 4) {
  172. pr_err("%s(): percpu_ref underflow", __func__);
  173. mem_dump_obj(data);
  174. }
  175. /* @ref is viewed as dead on all CPUs, send out switch confirmation */
  176. percpu_ref_call_confirm_rcu(rcu);
  177. }
  178. static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
  179. {
  180. }
  181. static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
  182. percpu_ref_func_t *confirm_switch)
  183. {
  184. if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) {
  185. if (confirm_switch)
  186. confirm_switch(ref);
  187. return;
  188. }
  189. /* switching from percpu to atomic */
  190. ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
  191. /*
  192. * Non-NULL ->confirm_switch is used to indicate that switching is
  193. * in progress. Use noop one if unspecified.
  194. */
  195. ref->data->confirm_switch = confirm_switch ?:
  196. percpu_ref_noop_confirm_switch;
  197. percpu_ref_get(ref); /* put after confirmation */
  198. call_rcu_hurry(&ref->data->rcu,
  199. percpu_ref_switch_to_atomic_rcu);
  200. }
  201. static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  202. {
  203. unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
  204. int cpu;
  205. BUG_ON(!percpu_count);
  206. if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
  207. return;
  208. if (WARN_ON_ONCE(!ref->data->allow_reinit))
  209. return;
  210. atomic_long_add(PERCPU_COUNT_BIAS, &ref->data->count);
  211. /*
  212. * Restore per-cpu operation. smp_store_release() is paired
  213. * with READ_ONCE() in __ref_is_percpu() and guarantees that the
  214. * zeroing is visible to all percpu accesses which can see the
  215. * following __PERCPU_REF_ATOMIC clearing.
  216. */
  217. for_each_possible_cpu(cpu)
  218. *per_cpu_ptr(percpu_count, cpu) = 0;
  219. smp_store_release(&ref->percpu_count_ptr,
  220. ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
  221. }
  222. static void __percpu_ref_switch_mode(struct percpu_ref *ref,
  223. percpu_ref_func_t *confirm_switch)
  224. {
  225. struct percpu_ref_data *data = ref->data;
  226. lockdep_assert_held(&percpu_ref_switch_lock);
  227. /*
  228. * If the previous ATOMIC switching hasn't finished yet, wait for
  229. * its completion. If the caller ensures that ATOMIC switching
  230. * isn't in progress, this function can be called from any context.
  231. */
  232. wait_event_lock_irq(percpu_ref_switch_waitq, !data->confirm_switch,
  233. percpu_ref_switch_lock);
  234. if (data->force_atomic || percpu_ref_is_dying(ref))
  235. __percpu_ref_switch_to_atomic(ref, confirm_switch);
  236. else
  237. __percpu_ref_switch_to_percpu(ref);
  238. }
  239. /**
  240. * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
  241. * @ref: percpu_ref to switch to atomic mode
  242. * @confirm_switch: optional confirmation callback
  243. *
  244. * There's no reason to use this function for the usual reference counting.
  245. * Use percpu_ref_kill[_and_confirm]().
  246. *
  247. * Schedule switching of @ref to atomic mode. All its percpu counts will
  248. * be collected to the main atomic counter. On completion, when all CPUs
  249. * are guaraneed to be in atomic mode, @confirm_switch, which may not
  250. * block, is invoked. This function may be invoked concurrently with all
  251. * the get/put operations and can safely be mixed with kill and reinit
  252. * operations. Note that @ref will stay in atomic mode across kill/reinit
  253. * cycles until percpu_ref_switch_to_percpu() is called.
  254. *
  255. * This function may block if @ref is in the process of switching to atomic
  256. * mode. If the caller ensures that @ref is not in the process of
  257. * switching to atomic mode, this function can be called from any context.
  258. */
  259. void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
  260. percpu_ref_func_t *confirm_switch)
  261. {
  262. unsigned long flags;
  263. spin_lock_irqsave(&percpu_ref_switch_lock, flags);
  264. ref->data->force_atomic = true;
  265. __percpu_ref_switch_mode(ref, confirm_switch);
  266. spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
  267. }
  268. EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic);
  269. /**
  270. * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode
  271. * @ref: percpu_ref to switch to atomic mode
  272. *
  273. * Schedule switching the ref to atomic mode, and wait for the
  274. * switch to complete. Caller must ensure that no other thread
  275. * will switch back to percpu mode.
  276. */
  277. void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref)
  278. {
  279. percpu_ref_switch_to_atomic(ref, NULL);
  280. wait_event(percpu_ref_switch_waitq, !ref->data->confirm_switch);
  281. }
  282. EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync);
  283. /**
  284. * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
  285. * @ref: percpu_ref to switch to percpu mode
  286. *
  287. * There's no reason to use this function for the usual reference counting.
  288. * To re-use an expired ref, use percpu_ref_reinit().
  289. *
  290. * Switch @ref to percpu mode. This function may be invoked concurrently
  291. * with all the get/put operations and can safely be mixed with kill and
  292. * reinit operations. This function reverses the sticky atomic state set
  293. * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is
  294. * dying or dead, the actual switching takes place on the following
  295. * percpu_ref_reinit().
  296. *
  297. * This function may block if @ref is in the process of switching to atomic
  298. * mode. If the caller ensures that @ref is not in the process of
  299. * switching to atomic mode, this function can be called from any context.
  300. */
  301. void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  302. {
  303. unsigned long flags;
  304. spin_lock_irqsave(&percpu_ref_switch_lock, flags);
  305. ref->data->force_atomic = false;
  306. __percpu_ref_switch_mode(ref, NULL);
  307. spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
  308. }
  309. EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
  310. /**
  311. * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
  312. * @ref: percpu_ref to kill
  313. * @confirm_kill: optional confirmation callback
  314. *
  315. * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
  316. * @confirm_kill is not NULL. @confirm_kill, which may not block, will be
  317. * called after @ref is seen as dead from all CPUs at which point all
  318. * further invocations of percpu_ref_tryget_live() will fail. See
  319. * percpu_ref_tryget_live() for details.
  320. *
  321. * This function normally doesn't block and can be called from any context
  322. * but it may block if @confirm_kill is specified and @ref is in the
  323. * process of switching to atomic mode by percpu_ref_switch_to_atomic().
  324. *
  325. * There are no implied RCU grace periods between kill and release.
  326. */
  327. void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
  328. percpu_ref_func_t *confirm_kill)
  329. {
  330. unsigned long flags;
  331. spin_lock_irqsave(&percpu_ref_switch_lock, flags);
  332. WARN_ONCE(percpu_ref_is_dying(ref),
  333. "%s called more than once on %ps!", __func__,
  334. ref->data->release);
  335. ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
  336. __percpu_ref_switch_mode(ref, confirm_kill);
  337. percpu_ref_put(ref);
  338. spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
  339. }
  340. EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  341. /**
  342. * percpu_ref_is_zero - test whether a percpu refcount reached zero
  343. * @ref: percpu_ref to test
  344. *
  345. * Returns %true if @ref reached zero.
  346. *
  347. * This function is safe to call as long as @ref is between init and exit.
  348. */
  349. bool percpu_ref_is_zero(struct percpu_ref *ref)
  350. {
  351. unsigned long __percpu *percpu_count;
  352. unsigned long count, flags;
  353. if (__ref_is_percpu(ref, &percpu_count))
  354. return false;
  355. /* protect us from being destroyed */
  356. spin_lock_irqsave(&percpu_ref_switch_lock, flags);
  357. if (ref->data)
  358. count = atomic_long_read(&ref->data->count);
  359. else
  360. count = ref->percpu_count_ptr >> __PERCPU_REF_FLAG_BITS;
  361. spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
  362. return count == 0;
  363. }
  364. EXPORT_SYMBOL_GPL(percpu_ref_is_zero);
  365. /**
  366. * percpu_ref_reinit - re-initialize a percpu refcount
  367. * @ref: perpcu_ref to re-initialize
  368. *
  369. * Re-initialize @ref so that it's in the same state as when it finished
  370. * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been
  371. * initialized successfully and reached 0 but not exited.
  372. *
  373. * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
  374. * this function is in progress.
  375. */
  376. void percpu_ref_reinit(struct percpu_ref *ref)
  377. {
  378. WARN_ON_ONCE(!percpu_ref_is_zero(ref));
  379. percpu_ref_resurrect(ref);
  380. }
  381. EXPORT_SYMBOL_GPL(percpu_ref_reinit);
  382. /**
  383. * percpu_ref_resurrect - modify a percpu refcount from dead to live
  384. * @ref: perpcu_ref to resurrect
  385. *
  386. * Modify @ref so that it's in the same state as before percpu_ref_kill() was
  387. * called. @ref must be dead but must not yet have exited.
  388. *
  389. * If @ref->release() frees @ref then the caller is responsible for
  390. * guaranteeing that @ref->release() does not get called while this
  391. * function is in progress.
  392. *
  393. * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
  394. * this function is in progress.
  395. */
  396. void percpu_ref_resurrect(struct percpu_ref *ref)
  397. {
  398. unsigned long __percpu *percpu_count;
  399. unsigned long flags;
  400. spin_lock_irqsave(&percpu_ref_switch_lock, flags);
  401. WARN_ON_ONCE(!percpu_ref_is_dying(ref));
  402. WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count));
  403. ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
  404. percpu_ref_get(ref);
  405. __percpu_ref_switch_mode(ref, NULL);
  406. spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
  407. }
  408. EXPORT_SYMBOL_GPL(percpu_ref_resurrect);