volume.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* AFS volume management
  3. *
  4. * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells ([email protected])
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/slab.h>
  9. #include "internal.h"
  10. static unsigned __read_mostly afs_volume_record_life = 60 * 60;
  11. /*
  12. * Insert a volume into a cell. If there's an existing volume record, that is
  13. * returned instead with a ref held.
  14. */
  15. static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
  16. struct afs_volume *volume)
  17. {
  18. struct afs_volume *p;
  19. struct rb_node *parent = NULL, **pp;
  20. write_seqlock(&cell->volume_lock);
  21. pp = &cell->volumes.rb_node;
  22. while (*pp) {
  23. parent = *pp;
  24. p = rb_entry(parent, struct afs_volume, cell_node);
  25. if (p->vid < volume->vid) {
  26. pp = &(*pp)->rb_left;
  27. } else if (p->vid > volume->vid) {
  28. pp = &(*pp)->rb_right;
  29. } else {
  30. volume = afs_get_volume(p, afs_volume_trace_get_cell_insert);
  31. goto found;
  32. }
  33. }
  34. rb_link_node_rcu(&volume->cell_node, parent, pp);
  35. rb_insert_color(&volume->cell_node, &cell->volumes);
  36. hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes);
  37. found:
  38. write_sequnlock(&cell->volume_lock);
  39. return volume;
  40. }
  41. static void afs_remove_volume_from_cell(struct afs_volume *volume)
  42. {
  43. struct afs_cell *cell = volume->cell;
  44. if (!hlist_unhashed(&volume->proc_link)) {
  45. trace_afs_volume(volume->vid, refcount_read(&cell->ref),
  46. afs_volume_trace_remove);
  47. write_seqlock(&cell->volume_lock);
  48. hlist_del_rcu(&volume->proc_link);
  49. rb_erase(&volume->cell_node, &cell->volumes);
  50. write_sequnlock(&cell->volume_lock);
  51. }
  52. }
  53. /*
  54. * Allocate a volume record and load it up from a vldb record.
  55. */
  56. static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
  57. struct afs_vldb_entry *vldb,
  58. unsigned long type_mask)
  59. {
  60. struct afs_server_list *slist;
  61. struct afs_volume *volume;
  62. int ret = -ENOMEM, nr_servers = 0, i;
  63. for (i = 0; i < vldb->nr_servers; i++)
  64. if (vldb->fs_mask[i] & type_mask)
  65. nr_servers++;
  66. volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
  67. if (!volume)
  68. goto error_0;
  69. volume->vid = vldb->vid[params->type];
  70. volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
  71. volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol);
  72. volume->type = params->type;
  73. volume->type_force = params->force;
  74. volume->name_len = vldb->name_len;
  75. refcount_set(&volume->ref, 1);
  76. INIT_HLIST_NODE(&volume->proc_link);
  77. rwlock_init(&volume->servers_lock);
  78. rwlock_init(&volume->cb_v_break_lock);
  79. memcpy(volume->name, vldb->name, vldb->name_len + 1);
  80. slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
  81. if (IS_ERR(slist)) {
  82. ret = PTR_ERR(slist);
  83. goto error_1;
  84. }
  85. refcount_set(&slist->usage, 1);
  86. rcu_assign_pointer(volume->servers, slist);
  87. trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
  88. return volume;
  89. error_1:
  90. afs_put_cell(volume->cell, afs_cell_trace_put_vol);
  91. kfree(volume);
  92. error_0:
  93. return ERR_PTR(ret);
  94. }
  95. /*
  96. * Look up or allocate a volume record.
  97. */
  98. static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
  99. struct afs_vldb_entry *vldb,
  100. unsigned long type_mask)
  101. {
  102. struct afs_volume *candidate, *volume;
  103. candidate = afs_alloc_volume(params, vldb, type_mask);
  104. if (IS_ERR(candidate))
  105. return candidate;
  106. volume = afs_insert_volume_into_cell(params->cell, candidate);
  107. if (volume != candidate)
  108. afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup);
  109. return volume;
  110. }
  111. /*
  112. * Look up a VLDB record for a volume.
  113. */
  114. static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
  115. struct key *key,
  116. const char *volname,
  117. size_t volnamesz)
  118. {
  119. struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
  120. struct afs_vl_cursor vc;
  121. int ret;
  122. if (!afs_begin_vlserver_operation(&vc, cell, key))
  123. return ERR_PTR(-ERESTARTSYS);
  124. while (afs_select_vlserver(&vc)) {
  125. vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
  126. }
  127. ret = afs_end_vlserver_operation(&vc);
  128. return ret < 0 ? ERR_PTR(ret) : vldb;
  129. }
  130. /*
  131. * Look up a volume in the VL server and create a candidate volume record for
  132. * it.
  133. *
  134. * The volume name can be one of the following:
  135. * "%[cell:]volume[.]" R/W volume
  136. * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
  137. * or R/W (rwparent=1) volume
  138. * "%[cell:]volume.readonly" R/O volume
  139. * "#[cell:]volume.readonly" R/O volume
  140. * "%[cell:]volume.backup" Backup volume
  141. * "#[cell:]volume.backup" Backup volume
  142. *
  143. * The cell name is optional, and defaults to the current cell.
  144. *
  145. * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  146. * Guide
  147. * - Rule 1: Explicit type suffix forces access of that type or nothing
  148. * (no suffix, then use Rule 2 & 3)
  149. * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  150. * if not available
  151. * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  152. * explicitly told otherwise
  153. */
  154. struct afs_volume *afs_create_volume(struct afs_fs_context *params)
  155. {
  156. struct afs_vldb_entry *vldb;
  157. struct afs_volume *volume;
  158. unsigned long type_mask = 1UL << params->type;
  159. vldb = afs_vl_lookup_vldb(params->cell, params->key,
  160. params->volname, params->volnamesz);
  161. if (IS_ERR(vldb))
  162. return ERR_CAST(vldb);
  163. if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
  164. volume = ERR_PTR(vldb->error);
  165. goto error;
  166. }
  167. /* Make the final decision on the type we want */
  168. volume = ERR_PTR(-ENOMEDIUM);
  169. if (params->force) {
  170. if (!(vldb->flags & type_mask))
  171. goto error;
  172. } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
  173. params->type = AFSVL_ROVOL;
  174. } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
  175. params->type = AFSVL_RWVOL;
  176. } else {
  177. goto error;
  178. }
  179. type_mask = 1UL << params->type;
  180. volume = afs_lookup_volume(params, vldb, type_mask);
  181. error:
  182. kfree(vldb);
  183. return volume;
  184. }
  185. /*
  186. * Destroy a volume record
  187. */
  188. static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
  189. {
  190. _enter("%p", volume);
  191. #ifdef CONFIG_AFS_FSCACHE
  192. ASSERTCMP(volume->cache, ==, NULL);
  193. #endif
  194. afs_remove_volume_from_cell(volume);
  195. afs_put_serverlist(net, rcu_access_pointer(volume->servers));
  196. afs_put_cell(volume->cell, afs_cell_trace_put_vol);
  197. trace_afs_volume(volume->vid, refcount_read(&volume->ref),
  198. afs_volume_trace_free);
  199. kfree_rcu(volume, rcu);
  200. _leave(" [destroyed]");
  201. }
  202. /*
  203. * Get a reference on a volume record.
  204. */
  205. struct afs_volume *afs_get_volume(struct afs_volume *volume,
  206. enum afs_volume_trace reason)
  207. {
  208. if (volume) {
  209. int r;
  210. __refcount_inc(&volume->ref, &r);
  211. trace_afs_volume(volume->vid, r + 1, reason);
  212. }
  213. return volume;
  214. }
  215. /*
  216. * Drop a reference on a volume record.
  217. */
  218. void afs_put_volume(struct afs_net *net, struct afs_volume *volume,
  219. enum afs_volume_trace reason)
  220. {
  221. if (volume) {
  222. afs_volid_t vid = volume->vid;
  223. bool zero;
  224. int r;
  225. zero = __refcount_dec_and_test(&volume->ref, &r);
  226. trace_afs_volume(vid, r - 1, reason);
  227. if (zero)
  228. afs_destroy_volume(net, volume);
  229. }
  230. }
  231. /*
  232. * Activate a volume.
  233. */
  234. int afs_activate_volume(struct afs_volume *volume)
  235. {
  236. #ifdef CONFIG_AFS_FSCACHE
  237. struct fscache_volume *vcookie;
  238. char *name;
  239. name = kasprintf(GFP_KERNEL, "afs,%s,%llx",
  240. volume->cell->name, volume->vid);
  241. if (!name)
  242. return -ENOMEM;
  243. vcookie = fscache_acquire_volume(name, NULL, NULL, 0);
  244. if (IS_ERR(vcookie)) {
  245. if (vcookie != ERR_PTR(-EBUSY)) {
  246. kfree(name);
  247. return PTR_ERR(vcookie);
  248. }
  249. pr_err("AFS: Cache volume key already in use (%s)\n", name);
  250. vcookie = NULL;
  251. }
  252. volume->cache = vcookie;
  253. kfree(name);
  254. #endif
  255. return 0;
  256. }
  257. /*
  258. * Deactivate a volume.
  259. */
  260. void afs_deactivate_volume(struct afs_volume *volume)
  261. {
  262. _enter("%s", volume->name);
  263. #ifdef CONFIG_AFS_FSCACHE
  264. fscache_relinquish_volume(volume->cache, NULL,
  265. test_bit(AFS_VOLUME_DELETED, &volume->flags));
  266. volume->cache = NULL;
  267. #endif
  268. _leave("");
  269. }
  270. /*
  271. * Query the VL service to update the volume status.
  272. */
  273. static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
  274. {
  275. struct afs_server_list *new, *old, *discard;
  276. struct afs_vldb_entry *vldb;
  277. char idbuf[16];
  278. int ret, idsz;
  279. _enter("");
  280. /* We look up an ID by passing it as a decimal string in the
  281. * operation's name parameter.
  282. */
  283. idsz = sprintf(idbuf, "%llu", volume->vid);
  284. vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
  285. if (IS_ERR(vldb)) {
  286. ret = PTR_ERR(vldb);
  287. goto error;
  288. }
  289. /* See if the volume got renamed. */
  290. if (vldb->name_len != volume->name_len ||
  291. memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
  292. /* TODO: Use RCU'd string. */
  293. memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
  294. volume->name_len = vldb->name_len;
  295. }
  296. /* See if the volume's server list got updated. */
  297. new = afs_alloc_server_list(volume->cell, key,
  298. vldb, (1 << volume->type));
  299. if (IS_ERR(new)) {
  300. ret = PTR_ERR(new);
  301. goto error_vldb;
  302. }
  303. write_lock(&volume->servers_lock);
  304. discard = new;
  305. old = rcu_dereference_protected(volume->servers,
  306. lockdep_is_held(&volume->servers_lock));
  307. if (afs_annotate_server_list(new, old)) {
  308. new->seq = volume->servers_seq + 1;
  309. rcu_assign_pointer(volume->servers, new);
  310. smp_wmb();
  311. volume->servers_seq++;
  312. discard = old;
  313. }
  314. volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
  315. write_unlock(&volume->servers_lock);
  316. ret = 0;
  317. afs_put_serverlist(volume->cell->net, discard);
  318. error_vldb:
  319. kfree(vldb);
  320. error:
  321. _leave(" = %d", ret);
  322. return ret;
  323. }
  324. /*
  325. * Make sure the volume record is up to date.
  326. */
  327. int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op)
  328. {
  329. int ret, retries = 0;
  330. _enter("");
  331. retry:
  332. if (test_bit(AFS_VOLUME_WAIT, &volume->flags))
  333. goto wait;
  334. if (volume->update_at <= ktime_get_real_seconds() ||
  335. test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags))
  336. goto update;
  337. _leave(" = 0");
  338. return 0;
  339. update:
  340. if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
  341. clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
  342. ret = afs_update_volume_status(volume, op->key);
  343. if (ret < 0)
  344. set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
  345. clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
  346. clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
  347. wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
  348. _leave(" = %d", ret);
  349. return ret;
  350. }
  351. wait:
  352. if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
  353. _leave(" = 0 [no wait]");
  354. return 0;
  355. }
  356. ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT,
  357. (op->flags & AFS_OPERATION_UNINTR) ?
  358. TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
  359. if (ret == -ERESTARTSYS) {
  360. _leave(" = %d", ret);
  361. return ret;
  362. }
  363. retries++;
  364. if (retries == 4) {
  365. _leave(" = -ESTALE");
  366. return -ESTALE;
  367. }
  368. goto retry;
  369. }