vl_rotate.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Handle vlserver selection and rotation.
  3. *
  4. * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells ([email protected])
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/sched.h>
  9. #include <linux/sched/signal.h>
  10. #include "internal.h"
  11. #include "afs_vl.h"
  12. /*
  13. * Begin an operation on a volume location server.
  14. */
  15. bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
  16. struct key *key)
  17. {
  18. memset(vc, 0, sizeof(*vc));
  19. vc->cell = cell;
  20. vc->key = key;
  21. vc->error = -EDESTADDRREQ;
  22. vc->ac.error = SHRT_MAX;
  23. if (signal_pending(current)) {
  24. vc->error = -EINTR;
  25. vc->flags |= AFS_VL_CURSOR_STOP;
  26. return false;
  27. }
  28. return true;
  29. }
  30. /*
  31. * Begin iteration through a server list, starting with the last used server if
  32. * possible, or the last recorded good server if not.
  33. */
  34. static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
  35. {
  36. struct afs_cell *cell = vc->cell;
  37. unsigned int dns_lookup_count;
  38. if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
  39. cell->dns_expiry <= ktime_get_real_seconds()) {
  40. dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
  41. set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
  42. afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
  43. if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
  44. if (wait_var_event_interruptible(
  45. &cell->dns_lookup_count,
  46. smp_load_acquire(&cell->dns_lookup_count)
  47. != dns_lookup_count) < 0) {
  48. vc->error = -ERESTARTSYS;
  49. return false;
  50. }
  51. }
  52. /* Status load is ordered after lookup counter load */
  53. if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
  54. pr_warn("No record of cell %s\n", cell->name);
  55. vc->error = -ENOENT;
  56. return false;
  57. }
  58. if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
  59. vc->error = -EDESTADDRREQ;
  60. return false;
  61. }
  62. }
  63. read_lock(&cell->vl_servers_lock);
  64. vc->server_list = afs_get_vlserverlist(
  65. rcu_dereference_protected(cell->vl_servers,
  66. lockdep_is_held(&cell->vl_servers_lock)));
  67. read_unlock(&cell->vl_servers_lock);
  68. if (!vc->server_list->nr_servers)
  69. return false;
  70. vc->untried = (1UL << vc->server_list->nr_servers) - 1;
  71. vc->index = -1;
  72. return true;
  73. }
  74. /*
  75. * Select the vlserver to use. May be called multiple times to rotate
  76. * through the vlservers.
  77. */
  78. bool afs_select_vlserver(struct afs_vl_cursor *vc)
  79. {
  80. struct afs_addr_list *alist;
  81. struct afs_vlserver *vlserver;
  82. struct afs_error e;
  83. u32 rtt;
  84. int error = vc->ac.error, i;
  85. _enter("%lx[%d],%lx[%d],%d,%d",
  86. vc->untried, vc->index,
  87. vc->ac.tried, vc->ac.index,
  88. error, vc->ac.abort_code);
  89. if (vc->flags & AFS_VL_CURSOR_STOP) {
  90. _leave(" = f [stopped]");
  91. return false;
  92. }
  93. vc->nr_iterations++;
  94. /* Evaluate the result of the previous operation, if there was one. */
  95. switch (error) {
  96. case SHRT_MAX:
  97. goto start;
  98. default:
  99. case 0:
  100. /* Success or local failure. Stop. */
  101. vc->error = error;
  102. vc->flags |= AFS_VL_CURSOR_STOP;
  103. _leave(" = f [okay/local %d]", vc->ac.error);
  104. return false;
  105. case -ECONNABORTED:
  106. /* The far side rejected the operation on some grounds. This
  107. * might involve the server being busy or the volume having been moved.
  108. */
  109. switch (vc->ac.abort_code) {
  110. case AFSVL_IO:
  111. case AFSVL_BADVOLOPER:
  112. case AFSVL_NOMEM:
  113. /* The server went weird. */
  114. vc->error = -EREMOTEIO;
  115. //write_lock(&vc->cell->vl_servers_lock);
  116. //vc->server_list->weird_mask |= 1 << vc->index;
  117. //write_unlock(&vc->cell->vl_servers_lock);
  118. goto next_server;
  119. default:
  120. vc->error = afs_abort_to_error(vc->ac.abort_code);
  121. goto failed;
  122. }
  123. case -ERFKILL:
  124. case -EADDRNOTAVAIL:
  125. case -ENETUNREACH:
  126. case -EHOSTUNREACH:
  127. case -EHOSTDOWN:
  128. case -ECONNREFUSED:
  129. case -ETIMEDOUT:
  130. case -ETIME:
  131. _debug("no conn %d", error);
  132. vc->error = error;
  133. goto iterate_address;
  134. case -ECONNRESET:
  135. _debug("call reset");
  136. vc->error = error;
  137. vc->flags |= AFS_VL_CURSOR_RETRY;
  138. goto next_server;
  139. case -EOPNOTSUPP:
  140. _debug("notsupp");
  141. goto next_server;
  142. }
  143. restart_from_beginning:
  144. _debug("restart");
  145. afs_end_cursor(&vc->ac);
  146. afs_put_vlserverlist(vc->cell->net, vc->server_list);
  147. vc->server_list = NULL;
  148. if (vc->flags & AFS_VL_CURSOR_RETRIED)
  149. goto failed;
  150. vc->flags |= AFS_VL_CURSOR_RETRIED;
  151. start:
  152. _debug("start");
  153. if (!afs_start_vl_iteration(vc))
  154. goto failed;
  155. error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
  156. if (error < 0)
  157. goto failed_set_error;
  158. pick_server:
  159. _debug("pick [%lx]", vc->untried);
  160. error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
  161. if (error < 0)
  162. goto failed_set_error;
  163. /* Pick the untried server with the lowest RTT. */
  164. vc->index = vc->server_list->preferred;
  165. if (test_bit(vc->index, &vc->untried))
  166. goto selected_server;
  167. vc->index = -1;
  168. rtt = U32_MAX;
  169. for (i = 0; i < vc->server_list->nr_servers; i++) {
  170. struct afs_vlserver *s = vc->server_list->servers[i].server;
  171. if (!test_bit(i, &vc->untried) ||
  172. !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
  173. continue;
  174. if (s->probe.rtt < rtt) {
  175. vc->index = i;
  176. rtt = s->probe.rtt;
  177. }
  178. }
  179. if (vc->index == -1)
  180. goto no_more_servers;
  181. selected_server:
  182. _debug("use %d", vc->index);
  183. __clear_bit(vc->index, &vc->untried);
  184. /* We're starting on a different vlserver from the list. We need to
  185. * check it, find its address list and probe its capabilities before we
  186. * use it.
  187. */
  188. ASSERTCMP(vc->ac.alist, ==, NULL);
  189. vlserver = vc->server_list->servers[vc->index].server;
  190. vc->server = vlserver;
  191. _debug("USING VLSERVER: %s", vlserver->name);
  192. read_lock(&vlserver->lock);
  193. alist = rcu_dereference_protected(vlserver->addresses,
  194. lockdep_is_held(&vlserver->lock));
  195. afs_get_addrlist(alist);
  196. read_unlock(&vlserver->lock);
  197. memset(&vc->ac, 0, sizeof(vc->ac));
  198. if (!vc->ac.alist)
  199. vc->ac.alist = alist;
  200. else
  201. afs_put_addrlist(alist);
  202. vc->ac.index = -1;
  203. iterate_address:
  204. ASSERT(vc->ac.alist);
  205. /* Iterate over the current server's address list to try and find an
  206. * address on which it will respond to us.
  207. */
  208. if (!afs_iterate_addresses(&vc->ac))
  209. goto next_server;
  210. _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
  211. _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
  212. return true;
  213. next_server:
  214. _debug("next");
  215. afs_end_cursor(&vc->ac);
  216. goto pick_server;
  217. no_more_servers:
  218. /* That's all the servers poked to no good effect. Try again if some
  219. * of them were busy.
  220. */
  221. if (vc->flags & AFS_VL_CURSOR_RETRY)
  222. goto restart_from_beginning;
  223. e.error = -EDESTADDRREQ;
  224. e.responded = false;
  225. for (i = 0; i < vc->server_list->nr_servers; i++) {
  226. struct afs_vlserver *s = vc->server_list->servers[i].server;
  227. if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
  228. e.responded = true;
  229. afs_prioritise_error(&e, READ_ONCE(s->probe.error),
  230. s->probe.abort_code);
  231. }
  232. error = e.error;
  233. failed_set_error:
  234. vc->error = error;
  235. failed:
  236. vc->flags |= AFS_VL_CURSOR_STOP;
  237. afs_end_cursor(&vc->ac);
  238. _leave(" = f [failed %d]", vc->error);
  239. return false;
  240. }
  241. /*
  242. * Dump cursor state in the case of the error being EDESTADDRREQ.
  243. */
  244. static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
  245. {
  246. struct afs_cell *cell = vc->cell;
  247. static int count;
  248. int i;
  249. if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
  250. return;
  251. count++;
  252. rcu_read_lock();
  253. pr_notice("EDESTADDR occurred\n");
  254. pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
  255. pr_notice("DNS: src=%u st=%u lc=%x\n",
  256. cell->dns_source, cell->dns_status, cell->dns_lookup_count);
  257. pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
  258. vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
  259. if (vc->server_list) {
  260. const struct afs_vlserver_list *sl = vc->server_list;
  261. pr_notice("VC: SL nr=%u ix=%u\n",
  262. sl->nr_servers, sl->index);
  263. for (i = 0; i < sl->nr_servers; i++) {
  264. const struct afs_vlserver *s = sl->servers[i].server;
  265. pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
  266. s->name, s->port, s->flags, s->probe.error);
  267. if (s->addresses) {
  268. const struct afs_addr_list *a =
  269. rcu_dereference(s->addresses);
  270. pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
  271. a->nr_ipv4, a->nr_addrs, a->max_addrs,
  272. a->preferred);
  273. pr_notice("VC: - R=%lx F=%lx\n",
  274. a->responded, a->failed);
  275. if (a == vc->ac.alist)
  276. pr_notice("VC: - current\n");
  277. }
  278. }
  279. }
  280. pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
  281. vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
  282. vc->ac.responded, vc->ac.nr_iterations);
  283. rcu_read_unlock();
  284. }
  285. /*
  286. * Tidy up a volume location server cursor and unlock the vnode.
  287. */
  288. int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
  289. {
  290. struct afs_net *net = vc->cell->net;
  291. if (vc->error == -EDESTADDRREQ ||
  292. vc->error == -EADDRNOTAVAIL ||
  293. vc->error == -ENETUNREACH ||
  294. vc->error == -EHOSTUNREACH)
  295. afs_vl_dump_edestaddrreq(vc);
  296. afs_end_cursor(&vc->ac);
  297. afs_put_vlserverlist(net, vc->server_list);
  298. if (vc->error == -ECONNABORTED)
  299. vc->error = afs_abort_to_error(vc->ac.abort_code);
  300. return vc->error;
  301. }