volume.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Volume-level cache cookie handling.
  3. *
  4. * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells ([email protected])
  6. */
  7. #define FSCACHE_DEBUG_LEVEL COOKIE
  8. #include <linux/export.h>
  9. #include <linux/slab.h>
  10. #include "internal.h"
  11. #define fscache_volume_hash_shift 10
  12. static struct hlist_bl_head fscache_volume_hash[1 << fscache_volume_hash_shift];
  13. static atomic_t fscache_volume_debug_id;
  14. static LIST_HEAD(fscache_volumes);
  15. static void fscache_create_volume_work(struct work_struct *work);
  16. struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
  17. enum fscache_volume_trace where)
  18. {
  19. int ref;
  20. __refcount_inc(&volume->ref, &ref);
  21. trace_fscache_volume(volume->debug_id, ref + 1, where);
  22. return volume;
  23. }
  24. static void fscache_see_volume(struct fscache_volume *volume,
  25. enum fscache_volume_trace where)
  26. {
  27. int ref = refcount_read(&volume->ref);
  28. trace_fscache_volume(volume->debug_id, ref, where);
  29. }
  30. /*
  31. * Pin the cache behind a volume so that we can access it.
  32. */
  33. static void __fscache_begin_volume_access(struct fscache_volume *volume,
  34. struct fscache_cookie *cookie,
  35. enum fscache_access_trace why)
  36. {
  37. int n_accesses;
  38. n_accesses = atomic_inc_return(&volume->n_accesses);
  39. smp_mb__after_atomic();
  40. trace_fscache_access_volume(volume->debug_id, cookie ? cookie->debug_id : 0,
  41. refcount_read(&volume->ref),
  42. n_accesses, why);
  43. }
  44. /**
  45. * fscache_begin_volume_access - Pin a cache so a volume can be accessed
  46. * @volume: The volume cookie
  47. * @cookie: A datafile cookie for a tracing reference (or NULL)
  48. * @why: An indication of the circumstances of the access for tracing
  49. *
  50. * Attempt to pin the cache to prevent it from going away whilst we're
  51. * accessing a volume and returns true if successful. This works as follows:
  52. *
  53. * (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE),
  54. * then we return false to indicate access was not permitted.
  55. *
  56. * (2) If the cache tests as live, then we increment the volume's n_accesses
  57. * count and then recheck the cache liveness, ending the access if it
  58. * ceased to be live.
  59. *
  60. * (3) When we end the access, we decrement the volume's n_accesses and wake
  61. * up the any waiters if it reaches 0.
  62. *
  63. * (4) Whilst the cache is caching, the volume's n_accesses is kept
  64. * artificially incremented to prevent wakeups from happening.
  65. *
  66. * (5) When the cache is taken offline, the state is changed to prevent new
  67. * accesses, the volume's n_accesses is decremented and we wait for it to
  68. * become 0.
  69. *
  70. * The datafile @cookie and the @why indicator are merely provided for tracing
  71. * purposes.
  72. */
  73. bool fscache_begin_volume_access(struct fscache_volume *volume,
  74. struct fscache_cookie *cookie,
  75. enum fscache_access_trace why)
  76. {
  77. if (!fscache_cache_is_live(volume->cache))
  78. return false;
  79. __fscache_begin_volume_access(volume, cookie, why);
  80. if (!fscache_cache_is_live(volume->cache)) {
  81. fscache_end_volume_access(volume, cookie, fscache_access_unlive);
  82. return false;
  83. }
  84. return true;
  85. }
  86. /**
  87. * fscache_end_volume_access - Unpin a cache at the end of an access.
  88. * @volume: The volume cookie
  89. * @cookie: A datafile cookie for a tracing reference (or NULL)
  90. * @why: An indication of the circumstances of the access for tracing
  91. *
  92. * Unpin a cache volume after we've accessed it. The datafile @cookie and the
  93. * @why indicator are merely provided for tracing purposes.
  94. */
  95. void fscache_end_volume_access(struct fscache_volume *volume,
  96. struct fscache_cookie *cookie,
  97. enum fscache_access_trace why)
  98. {
  99. int n_accesses;
  100. smp_mb__before_atomic();
  101. n_accesses = atomic_dec_return(&volume->n_accesses);
  102. trace_fscache_access_volume(volume->debug_id, cookie ? cookie->debug_id : 0,
  103. refcount_read(&volume->ref),
  104. n_accesses, why);
  105. if (n_accesses == 0)
  106. wake_up_var(&volume->n_accesses);
  107. }
  108. EXPORT_SYMBOL(fscache_end_volume_access);
  109. static bool fscache_volume_same(const struct fscache_volume *a,
  110. const struct fscache_volume *b)
  111. {
  112. size_t klen;
  113. if (a->key_hash != b->key_hash ||
  114. a->cache != b->cache ||
  115. a->key[0] != b->key[0])
  116. return false;
  117. klen = round_up(a->key[0] + 1, sizeof(__le32));
  118. return memcmp(a->key, b->key, klen) == 0;
  119. }
  120. static bool fscache_is_acquire_pending(struct fscache_volume *volume)
  121. {
  122. return test_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &volume->flags);
  123. }
  124. static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
  125. unsigned int collidee_debug_id)
  126. {
  127. wait_on_bit_timeout(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
  128. TASK_UNINTERRUPTIBLE, 20 * HZ);
  129. if (fscache_is_acquire_pending(candidate)) {
  130. pr_notice("Potential volume collision new=%08x old=%08x",
  131. candidate->debug_id, collidee_debug_id);
  132. fscache_stat(&fscache_n_volumes_collision);
  133. wait_on_bit(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
  134. TASK_UNINTERRUPTIBLE);
  135. }
  136. }
  137. /*
  138. * Attempt to insert the new volume into the hash. If there's a collision, we
  139. * wait for the old volume to complete if it's being relinquished and an error
  140. * otherwise.
  141. */
  142. static bool fscache_hash_volume(struct fscache_volume *candidate)
  143. {
  144. struct fscache_volume *cursor;
  145. struct hlist_bl_head *h;
  146. struct hlist_bl_node *p;
  147. unsigned int bucket, collidee_debug_id = 0;
  148. bucket = candidate->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1);
  149. h = &fscache_volume_hash[bucket];
  150. hlist_bl_lock(h);
  151. hlist_bl_for_each_entry(cursor, p, h, hash_link) {
  152. if (fscache_volume_same(candidate, cursor)) {
  153. if (!test_bit(FSCACHE_VOLUME_RELINQUISHED, &cursor->flags))
  154. goto collision;
  155. fscache_see_volume(cursor, fscache_volume_get_hash_collision);
  156. set_bit(FSCACHE_VOLUME_COLLIDED_WITH, &cursor->flags);
  157. set_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &candidate->flags);
  158. collidee_debug_id = cursor->debug_id;
  159. break;
  160. }
  161. }
  162. hlist_bl_add_head(&candidate->hash_link, h);
  163. hlist_bl_unlock(h);
  164. if (fscache_is_acquire_pending(candidate))
  165. fscache_wait_on_volume_collision(candidate, collidee_debug_id);
  166. return true;
  167. collision:
  168. fscache_see_volume(cursor, fscache_volume_collision);
  169. hlist_bl_unlock(h);
  170. return false;
  171. }
  172. /*
  173. * Allocate and initialise a volume representation cookie.
  174. */
  175. static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
  176. const char *cache_name,
  177. const void *coherency_data,
  178. size_t coherency_len)
  179. {
  180. struct fscache_volume *volume;
  181. struct fscache_cache *cache;
  182. size_t klen, hlen;
  183. u8 *key;
  184. klen = strlen(volume_key);
  185. if (klen > NAME_MAX)
  186. return NULL;
  187. if (!coherency_data)
  188. coherency_len = 0;
  189. cache = fscache_lookup_cache(cache_name, false);
  190. if (IS_ERR(cache))
  191. return NULL;
  192. volume = kzalloc(struct_size(volume, coherency, coherency_len),
  193. GFP_KERNEL);
  194. if (!volume)
  195. goto err_cache;
  196. volume->cache = cache;
  197. volume->coherency_len = coherency_len;
  198. if (coherency_data)
  199. memcpy(volume->coherency, coherency_data, coherency_len);
  200. INIT_LIST_HEAD(&volume->proc_link);
  201. INIT_WORK(&volume->work, fscache_create_volume_work);
  202. refcount_set(&volume->ref, 1);
  203. spin_lock_init(&volume->lock);
  204. /* Stick the length on the front of the key and pad it out to make
  205. * hashing easier.
  206. */
  207. hlen = round_up(1 + klen + 1, sizeof(__le32));
  208. key = kzalloc(hlen, GFP_KERNEL);
  209. if (!key)
  210. goto err_vol;
  211. key[0] = klen;
  212. memcpy(key + 1, volume_key, klen);
  213. volume->key = key;
  214. volume->key_hash = fscache_hash(0, key, hlen);
  215. volume->debug_id = atomic_inc_return(&fscache_volume_debug_id);
  216. down_write(&fscache_addremove_sem);
  217. atomic_inc(&cache->n_volumes);
  218. list_add_tail(&volume->proc_link, &fscache_volumes);
  219. fscache_see_volume(volume, fscache_volume_new_acquire);
  220. fscache_stat(&fscache_n_volumes);
  221. up_write(&fscache_addremove_sem);
  222. _leave(" = v=%x", volume->debug_id);
  223. return volume;
  224. err_vol:
  225. kfree(volume);
  226. err_cache:
  227. fscache_put_cache(cache, fscache_cache_put_alloc_volume);
  228. fscache_stat(&fscache_n_volumes_nomem);
  229. return NULL;
  230. }
  231. /*
  232. * Create a volume's representation on disk. Have a volume ref and a cache
  233. * access we have to release.
  234. */
  235. static void fscache_create_volume_work(struct work_struct *work)
  236. {
  237. const struct fscache_cache_ops *ops;
  238. struct fscache_volume *volume =
  239. container_of(work, struct fscache_volume, work);
  240. fscache_see_volume(volume, fscache_volume_see_create_work);
  241. ops = volume->cache->ops;
  242. if (ops->acquire_volume)
  243. ops->acquire_volume(volume);
  244. fscache_end_cache_access(volume->cache,
  245. fscache_access_acquire_volume_end);
  246. clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags);
  247. fscache_put_volume(volume, fscache_volume_put_create_work);
  248. }
  249. /*
  250. * Dispatch a worker thread to create a volume's representation on disk.
  251. */
  252. void fscache_create_volume(struct fscache_volume *volume, bool wait)
  253. {
  254. if (test_and_set_bit(FSCACHE_VOLUME_CREATING, &volume->flags))
  255. goto maybe_wait;
  256. if (volume->cache_priv)
  257. goto no_wait; /* We raced */
  258. if (!fscache_begin_cache_access(volume->cache,
  259. fscache_access_acquire_volume))
  260. goto no_wait;
  261. fscache_get_volume(volume, fscache_volume_get_create_work);
  262. if (!schedule_work(&volume->work))
  263. fscache_put_volume(volume, fscache_volume_put_create_work);
  264. maybe_wait:
  265. if (wait) {
  266. fscache_see_volume(volume, fscache_volume_wait_create_work);
  267. wait_on_bit(&volume->flags, FSCACHE_VOLUME_CREATING,
  268. TASK_UNINTERRUPTIBLE);
  269. }
  270. return;
  271. no_wait:
  272. clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags);
  273. wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING);
  274. }
  275. /*
  276. * Acquire a volume representation cookie and link it to a (proposed) cache.
  277. */
  278. struct fscache_volume *__fscache_acquire_volume(const char *volume_key,
  279. const char *cache_name,
  280. const void *coherency_data,
  281. size_t coherency_len)
  282. {
  283. struct fscache_volume *volume;
  284. volume = fscache_alloc_volume(volume_key, cache_name,
  285. coherency_data, coherency_len);
  286. if (!volume)
  287. return ERR_PTR(-ENOMEM);
  288. if (!fscache_hash_volume(volume)) {
  289. fscache_put_volume(volume, fscache_volume_put_hash_collision);
  290. return ERR_PTR(-EBUSY);
  291. }
  292. fscache_create_volume(volume, false);
  293. return volume;
  294. }
  295. EXPORT_SYMBOL(__fscache_acquire_volume);
  296. static void fscache_wake_pending_volume(struct fscache_volume *volume,
  297. struct hlist_bl_head *h)
  298. {
  299. struct fscache_volume *cursor;
  300. struct hlist_bl_node *p;
  301. hlist_bl_for_each_entry(cursor, p, h, hash_link) {
  302. if (fscache_volume_same(cursor, volume)) {
  303. fscache_see_volume(cursor, fscache_volume_see_hash_wake);
  304. clear_and_wake_up_bit(FSCACHE_VOLUME_ACQUIRE_PENDING,
  305. &cursor->flags);
  306. return;
  307. }
  308. }
  309. }
  310. /*
  311. * Remove a volume cookie from the hash table.
  312. */
  313. static void fscache_unhash_volume(struct fscache_volume *volume)
  314. {
  315. struct hlist_bl_head *h;
  316. unsigned int bucket;
  317. bucket = volume->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1);
  318. h = &fscache_volume_hash[bucket];
  319. hlist_bl_lock(h);
  320. hlist_bl_del(&volume->hash_link);
  321. if (test_bit(FSCACHE_VOLUME_COLLIDED_WITH, &volume->flags))
  322. fscache_wake_pending_volume(volume, h);
  323. hlist_bl_unlock(h);
  324. }
  325. /*
  326. * Drop a cache's volume attachments.
  327. */
  328. static void fscache_free_volume(struct fscache_volume *volume)
  329. {
  330. struct fscache_cache *cache = volume->cache;
  331. if (volume->cache_priv) {
  332. __fscache_begin_volume_access(volume, NULL,
  333. fscache_access_relinquish_volume);
  334. if (volume->cache_priv)
  335. cache->ops->free_volume(volume);
  336. fscache_end_volume_access(volume, NULL,
  337. fscache_access_relinquish_volume_end);
  338. }
  339. down_write(&fscache_addremove_sem);
  340. list_del_init(&volume->proc_link);
  341. atomic_dec(&volume->cache->n_volumes);
  342. up_write(&fscache_addremove_sem);
  343. if (!hlist_bl_unhashed(&volume->hash_link))
  344. fscache_unhash_volume(volume);
  345. trace_fscache_volume(volume->debug_id, 0, fscache_volume_free);
  346. kfree(volume->key);
  347. kfree(volume);
  348. fscache_stat_d(&fscache_n_volumes);
  349. fscache_put_cache(cache, fscache_cache_put_volume);
  350. }
  351. /*
  352. * Drop a reference to a volume cookie.
  353. */
  354. void fscache_put_volume(struct fscache_volume *volume,
  355. enum fscache_volume_trace where)
  356. {
  357. if (volume) {
  358. unsigned int debug_id = volume->debug_id;
  359. bool zero;
  360. int ref;
  361. zero = __refcount_dec_and_test(&volume->ref, &ref);
  362. trace_fscache_volume(debug_id, ref - 1, where);
  363. if (zero)
  364. fscache_free_volume(volume);
  365. }
  366. }
  367. /*
  368. * Relinquish a volume representation cookie.
  369. */
  370. void __fscache_relinquish_volume(struct fscache_volume *volume,
  371. const void *coherency_data,
  372. bool invalidate)
  373. {
  374. if (WARN_ON(test_and_set_bit(FSCACHE_VOLUME_RELINQUISHED, &volume->flags)))
  375. return;
  376. if (invalidate) {
  377. set_bit(FSCACHE_VOLUME_INVALIDATE, &volume->flags);
  378. } else if (coherency_data) {
  379. memcpy(volume->coherency, coherency_data, volume->coherency_len);
  380. }
  381. fscache_put_volume(volume, fscache_volume_put_relinquish);
  382. }
  383. EXPORT_SYMBOL(__fscache_relinquish_volume);
  384. /**
  385. * fscache_withdraw_volume - Withdraw a volume from being cached
  386. * @volume: Volume cookie
  387. *
  388. * Withdraw a cache volume from service, waiting for all accesses to complete
  389. * before returning.
  390. */
  391. void fscache_withdraw_volume(struct fscache_volume *volume)
  392. {
  393. int n_accesses;
  394. _debug("withdraw V=%x", volume->debug_id);
  395. /* Allow wakeups on dec-to-0 */
  396. n_accesses = atomic_dec_return(&volume->n_accesses);
  397. trace_fscache_access_volume(volume->debug_id, 0,
  398. refcount_read(&volume->ref),
  399. n_accesses, fscache_access_cache_unpin);
  400. wait_var_event(&volume->n_accesses,
  401. atomic_read(&volume->n_accesses) == 0);
  402. }
  403. EXPORT_SYMBOL(fscache_withdraw_volume);
  404. #ifdef CONFIG_PROC_FS
  405. /*
  406. * Generate a list of volumes in /proc/fs/fscache/volumes
  407. */
  408. static int fscache_volumes_seq_show(struct seq_file *m, void *v)
  409. {
  410. struct fscache_volume *volume;
  411. if (v == &fscache_volumes) {
  412. seq_puts(m,
  413. "VOLUME REF nCOOK ACC FL CACHE KEY\n"
  414. "======== ===== ===== === == =============== ================\n");
  415. return 0;
  416. }
  417. volume = list_entry(v, struct fscache_volume, proc_link);
  418. seq_printf(m,
  419. "%08x %5d %5d %3d %02lx %-15.15s %s\n",
  420. volume->debug_id,
  421. refcount_read(&volume->ref),
  422. atomic_read(&volume->n_cookies),
  423. atomic_read(&volume->n_accesses),
  424. volume->flags,
  425. volume->cache->name ?: "-",
  426. volume->key + 1);
  427. return 0;
  428. }
  429. static void *fscache_volumes_seq_start(struct seq_file *m, loff_t *_pos)
  430. __acquires(&fscache_addremove_sem)
  431. {
  432. down_read(&fscache_addremove_sem);
  433. return seq_list_start_head(&fscache_volumes, *_pos);
  434. }
  435. static void *fscache_volumes_seq_next(struct seq_file *m, void *v, loff_t *_pos)
  436. {
  437. return seq_list_next(v, &fscache_volumes, _pos);
  438. }
  439. static void fscache_volumes_seq_stop(struct seq_file *m, void *v)
  440. __releases(&fscache_addremove_sem)
  441. {
  442. up_read(&fscache_addremove_sem);
  443. }
  444. const struct seq_operations fscache_volumes_seq_ops = {
  445. .start = fscache_volumes_seq_start,
  446. .next = fscache_volumes_seq_next,
  447. .stop = fscache_volumes_seq_stop,
  448. .show = fscache_volumes_seq_show,
  449. };
  450. #endif /* CONFIG_PROC_FS */