pnfs_nfs.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Common NFS I/O operations for the pnfs file based
  4. * layout drivers.
  5. *
  6. * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
  7. *
  8. * Tom Haynes <[email protected]>
  9. */
  10. #include <linux/nfs_fs.h>
  11. #include <linux/nfs_page.h>
  12. #include <linux/sunrpc/addr.h>
  13. #include <linux/module.h>
  14. #include "nfs4session.h"
  15. #include "internal.h"
  16. #include "pnfs.h"
  17. #define NFSDBG_FACILITY NFSDBG_PNFS
  18. void pnfs_generic_rw_release(void *data)
  19. {
  20. struct nfs_pgio_header *hdr = data;
  21. nfs_put_client(hdr->ds_clp);
  22. hdr->mds_ops->rpc_release(data);
  23. }
  24. EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
  25. /* Fake up some data that will cause nfs_commit_release to retry the writes. */
  26. void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
  27. {
  28. struct nfs_writeverf *verf = data->res.verf;
  29. data->task.tk_status = 0;
  30. memset(&verf->verifier, 0, sizeof(verf->verifier));
  31. verf->committed = NFS_UNSTABLE;
  32. }
  33. EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
  34. void pnfs_generic_write_commit_done(struct rpc_task *task, void *data)
  35. {
  36. struct nfs_commit_data *wdata = data;
  37. /* Note this may cause RPC to be resent */
  38. wdata->mds_ops->rpc_call_done(task, data);
  39. }
  40. EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done);
  41. void pnfs_generic_commit_release(void *calldata)
  42. {
  43. struct nfs_commit_data *data = calldata;
  44. data->completion_ops->completion(data);
  45. pnfs_put_lseg(data->lseg);
  46. nfs_put_client(data->ds_clp);
  47. nfs_commitdata_release(data);
  48. }
  49. EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
  50. static struct pnfs_layout_segment *
  51. pnfs_free_bucket_lseg(struct pnfs_commit_bucket *bucket)
  52. {
  53. if (list_empty(&bucket->committing) && list_empty(&bucket->written)) {
  54. struct pnfs_layout_segment *freeme = bucket->lseg;
  55. bucket->lseg = NULL;
  56. return freeme;
  57. }
  58. return NULL;
  59. }
  60. /* The generic layer is about to remove the req from the commit list.
  61. * If this will make the bucket empty, it will need to put the lseg reference.
  62. * Note this must be called holding nfsi->commit_mutex
  63. */
  64. void
  65. pnfs_generic_clear_request_commit(struct nfs_page *req,
  66. struct nfs_commit_info *cinfo)
  67. {
  68. struct pnfs_commit_bucket *bucket = NULL;
  69. if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
  70. goto out;
  71. cinfo->ds->nwritten--;
  72. if (list_is_singular(&req->wb_list))
  73. bucket = list_first_entry(&req->wb_list,
  74. struct pnfs_commit_bucket, written);
  75. out:
  76. nfs_request_remove_commit_list(req, cinfo);
  77. if (bucket)
  78. pnfs_put_lseg(pnfs_free_bucket_lseg(bucket));
  79. }
  80. EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
  81. struct pnfs_commit_array *
  82. pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags)
  83. {
  84. struct pnfs_commit_array *p;
  85. struct pnfs_commit_bucket *b;
  86. p = kmalloc(struct_size(p, buckets, n), gfp_flags);
  87. if (!p)
  88. return NULL;
  89. p->nbuckets = n;
  90. INIT_LIST_HEAD(&p->cinfo_list);
  91. INIT_LIST_HEAD(&p->lseg_list);
  92. p->lseg = NULL;
  93. for (b = &p->buckets[0]; n != 0; b++, n--) {
  94. INIT_LIST_HEAD(&b->written);
  95. INIT_LIST_HEAD(&b->committing);
  96. b->lseg = NULL;
  97. b->direct_verf.committed = NFS_INVALID_STABLE_HOW;
  98. }
  99. return p;
  100. }
  101. EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array);
  102. void
  103. pnfs_free_commit_array(struct pnfs_commit_array *p)
  104. {
  105. kfree_rcu(p, rcu);
  106. }
  107. EXPORT_SYMBOL_GPL(pnfs_free_commit_array);
  108. static struct pnfs_commit_array *
  109. pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info *fl_cinfo,
  110. struct pnfs_layout_segment *lseg)
  111. {
  112. struct pnfs_commit_array *array;
  113. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  114. if (array->lseg == lseg)
  115. return array;
  116. }
  117. return NULL;
  118. }
  119. struct pnfs_commit_array *
  120. pnfs_add_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
  121. struct pnfs_commit_array *new,
  122. struct pnfs_layout_segment *lseg)
  123. {
  124. struct pnfs_commit_array *array;
  125. array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
  126. if (array)
  127. return array;
  128. new->lseg = lseg;
  129. refcount_set(&new->refcount, 1);
  130. list_add_rcu(&new->cinfo_list, &fl_cinfo->commits);
  131. list_add(&new->lseg_list, &lseg->pls_commits);
  132. return new;
  133. }
  134. EXPORT_SYMBOL_GPL(pnfs_add_commit_array);
  135. static struct pnfs_commit_array *
  136. pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
  137. struct pnfs_layout_segment *lseg)
  138. {
  139. struct pnfs_commit_array *array;
  140. rcu_read_lock();
  141. array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
  142. if (!array) {
  143. rcu_read_unlock();
  144. fl_cinfo->ops->setup_ds_info(fl_cinfo, lseg);
  145. rcu_read_lock();
  146. array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
  147. }
  148. rcu_read_unlock();
  149. return array;
  150. }
  151. static void
  152. pnfs_release_commit_array_locked(struct pnfs_commit_array *array)
  153. {
  154. list_del_rcu(&array->cinfo_list);
  155. list_del(&array->lseg_list);
  156. pnfs_free_commit_array(array);
  157. }
  158. static void
  159. pnfs_put_commit_array_locked(struct pnfs_commit_array *array)
  160. {
  161. if (refcount_dec_and_test(&array->refcount))
  162. pnfs_release_commit_array_locked(array);
  163. }
  164. static void
  165. pnfs_put_commit_array(struct pnfs_commit_array *array, struct inode *inode)
  166. {
  167. if (refcount_dec_and_lock(&array->refcount, &inode->i_lock)) {
  168. pnfs_release_commit_array_locked(array);
  169. spin_unlock(&inode->i_lock);
  170. }
  171. }
  172. static struct pnfs_commit_array *
  173. pnfs_get_commit_array(struct pnfs_commit_array *array)
  174. {
  175. if (refcount_inc_not_zero(&array->refcount))
  176. return array;
  177. return NULL;
  178. }
  179. static void
  180. pnfs_remove_and_free_commit_array(struct pnfs_commit_array *array)
  181. {
  182. array->lseg = NULL;
  183. list_del_init(&array->lseg_list);
  184. pnfs_put_commit_array_locked(array);
  185. }
  186. void
  187. pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo,
  188. struct pnfs_layout_segment *lseg)
  189. {
  190. struct pnfs_commit_array *array, *tmp;
  191. list_for_each_entry_safe(array, tmp, &lseg->pls_commits, lseg_list)
  192. pnfs_remove_and_free_commit_array(array);
  193. }
  194. EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg);
  195. void
  196. pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo)
  197. {
  198. struct pnfs_commit_array *array, *tmp;
  199. list_for_each_entry_safe(array, tmp, &fl_cinfo->commits, cinfo_list)
  200. pnfs_remove_and_free_commit_array(array);
  201. }
  202. EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy);
  203. /*
  204. * Locks the nfs_page requests for commit and moves them to
  205. * @bucket->committing.
  206. */
  207. static int
  208. pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
  209. struct nfs_commit_info *cinfo,
  210. int max)
  211. {
  212. struct list_head *src = &bucket->written;
  213. struct list_head *dst = &bucket->committing;
  214. int ret;
  215. lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
  216. ret = nfs_scan_commit_list(src, dst, cinfo, max);
  217. if (ret) {
  218. cinfo->ds->nwritten -= ret;
  219. cinfo->ds->ncommitting += ret;
  220. }
  221. return ret;
  222. }
  223. static int pnfs_bucket_scan_array(struct nfs_commit_info *cinfo,
  224. struct pnfs_commit_bucket *buckets,
  225. unsigned int nbuckets,
  226. int max)
  227. {
  228. unsigned int i;
  229. int rv = 0, cnt;
  230. for (i = 0; i < nbuckets && max != 0; i++) {
  231. cnt = pnfs_bucket_scan_ds_commit_list(&buckets[i], cinfo, max);
  232. rv += cnt;
  233. max -= cnt;
  234. }
  235. return rv;
  236. }
  237. /* Move reqs from written to committing lists, returning count
  238. * of number moved.
  239. */
  240. int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max)
  241. {
  242. struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
  243. struct pnfs_commit_array *array;
  244. int rv = 0, cnt;
  245. rcu_read_lock();
  246. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  247. if (!array->lseg || !pnfs_get_commit_array(array))
  248. continue;
  249. rcu_read_unlock();
  250. cnt = pnfs_bucket_scan_array(cinfo, array->buckets,
  251. array->nbuckets, max);
  252. rcu_read_lock();
  253. pnfs_put_commit_array(array, cinfo->inode);
  254. rv += cnt;
  255. max -= cnt;
  256. if (!max)
  257. break;
  258. }
  259. rcu_read_unlock();
  260. return rv;
  261. }
  262. EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
  263. static unsigned int
  264. pnfs_bucket_recover_commit_reqs(struct list_head *dst,
  265. struct pnfs_commit_bucket *buckets,
  266. unsigned int nbuckets,
  267. struct nfs_commit_info *cinfo)
  268. {
  269. struct pnfs_commit_bucket *b;
  270. struct pnfs_layout_segment *freeme;
  271. unsigned int nwritten, ret = 0;
  272. unsigned int i;
  273. restart:
  274. for (i = 0, b = buckets; i < nbuckets; i++, b++) {
  275. nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
  276. if (!nwritten)
  277. continue;
  278. ret += nwritten;
  279. freeme = pnfs_free_bucket_lseg(b);
  280. if (freeme) {
  281. pnfs_put_lseg(freeme);
  282. goto restart;
  283. }
  284. }
  285. return ret;
  286. }
  287. /* Pull everything off the committing lists and dump into @dst. */
  288. void pnfs_generic_recover_commit_reqs(struct list_head *dst,
  289. struct nfs_commit_info *cinfo)
  290. {
  291. struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
  292. struct pnfs_commit_array *array;
  293. unsigned int nwritten;
  294. lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
  295. rcu_read_lock();
  296. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  297. if (!array->lseg || !pnfs_get_commit_array(array))
  298. continue;
  299. rcu_read_unlock();
  300. nwritten = pnfs_bucket_recover_commit_reqs(dst,
  301. array->buckets,
  302. array->nbuckets,
  303. cinfo);
  304. rcu_read_lock();
  305. pnfs_put_commit_array(array, cinfo->inode);
  306. fl_cinfo->nwritten -= nwritten;
  307. }
  308. rcu_read_unlock();
  309. }
  310. EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
  311. static struct nfs_page *
  312. pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
  313. unsigned int nbuckets, struct page *page)
  314. {
  315. struct nfs_page *req;
  316. struct pnfs_commit_bucket *b;
  317. unsigned int i;
  318. /* Linearly search the commit lists for each bucket until a matching
  319. * request is found */
  320. for (i = 0, b = buckets; i < nbuckets; i++, b++) {
  321. list_for_each_entry(req, &b->written, wb_list) {
  322. if (req->wb_page == page)
  323. return req->wb_head;
  324. }
  325. list_for_each_entry(req, &b->committing, wb_list) {
  326. if (req->wb_page == page)
  327. return req->wb_head;
  328. }
  329. }
  330. return NULL;
  331. }
  332. /* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
  333. * for @page
  334. * @cinfo - commit info for current inode
  335. * @page - page to search for matching head request
  336. *
  337. * Return: the head request if one is found, otherwise %NULL.
  338. */
  339. struct nfs_page *
  340. pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
  341. {
  342. struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
  343. struct pnfs_commit_array *array;
  344. struct nfs_page *req;
  345. list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
  346. req = pnfs_bucket_search_commit_reqs(array->buckets,
  347. array->nbuckets, page);
  348. if (req)
  349. return req;
  350. }
  351. return NULL;
  352. }
  353. EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
  354. static struct pnfs_layout_segment *
  355. pnfs_bucket_get_committing(struct list_head *head,
  356. struct pnfs_commit_bucket *bucket,
  357. struct nfs_commit_info *cinfo)
  358. {
  359. struct pnfs_layout_segment *lseg;
  360. struct list_head *pos;
  361. list_for_each(pos, &bucket->committing)
  362. cinfo->ds->ncommitting--;
  363. list_splice_init(&bucket->committing, head);
  364. lseg = pnfs_free_bucket_lseg(bucket);
  365. if (!lseg)
  366. lseg = pnfs_get_lseg(bucket->lseg);
  367. return lseg;
  368. }
  369. static struct nfs_commit_data *
  370. pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
  371. struct nfs_commit_info *cinfo)
  372. {
  373. struct nfs_commit_data *data = nfs_commitdata_alloc();
  374. if (!data)
  375. return NULL;
  376. data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo);
  377. return data;
  378. }
  379. static void pnfs_generic_retry_commit(struct pnfs_commit_bucket *buckets,
  380. unsigned int nbuckets,
  381. struct nfs_commit_info *cinfo,
  382. unsigned int idx)
  383. {
  384. struct pnfs_commit_bucket *bucket;
  385. struct pnfs_layout_segment *freeme;
  386. LIST_HEAD(pages);
  387. for (bucket = buckets; idx < nbuckets; bucket++, idx++) {
  388. if (list_empty(&bucket->committing))
  389. continue;
  390. mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
  391. freeme = pnfs_bucket_get_committing(&pages, bucket, cinfo);
  392. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  393. nfs_retry_commit(&pages, freeme, cinfo, idx);
  394. pnfs_put_lseg(freeme);
  395. }
  396. }
  397. static unsigned int
  398. pnfs_bucket_alloc_ds_commits(struct list_head *list,
  399. struct pnfs_commit_bucket *buckets,
  400. unsigned int nbuckets,
  401. struct nfs_commit_info *cinfo)
  402. {
  403. struct pnfs_commit_bucket *bucket;
  404. struct nfs_commit_data *data;
  405. unsigned int i;
  406. unsigned int nreq = 0;
  407. for (i = 0, bucket = buckets; i < nbuckets; i++, bucket++) {
  408. if (list_empty(&bucket->committing))
  409. continue;
  410. mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
  411. if (!list_empty(&bucket->committing)) {
  412. data = pnfs_bucket_fetch_commitdata(bucket, cinfo);
  413. if (!data)
  414. goto out_error;
  415. data->ds_commit_index = i;
  416. list_add_tail(&data->list, list);
  417. nreq++;
  418. }
  419. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  420. }
  421. return nreq;
  422. out_error:
  423. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  424. /* Clean up on error */
  425. pnfs_generic_retry_commit(buckets, nbuckets, cinfo, i);
  426. return nreq;
  427. }
  428. static unsigned int
  429. pnfs_alloc_ds_commits_list(struct list_head *list,
  430. struct pnfs_ds_commit_info *fl_cinfo,
  431. struct nfs_commit_info *cinfo)
  432. {
  433. struct pnfs_commit_array *array;
  434. unsigned int ret = 0;
  435. rcu_read_lock();
  436. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  437. if (!array->lseg || !pnfs_get_commit_array(array))
  438. continue;
  439. rcu_read_unlock();
  440. ret += pnfs_bucket_alloc_ds_commits(list, array->buckets,
  441. array->nbuckets, cinfo);
  442. rcu_read_lock();
  443. pnfs_put_commit_array(array, cinfo->inode);
  444. }
  445. rcu_read_unlock();
  446. return ret;
  447. }
  448. /* This follows nfs_commit_list pretty closely */
  449. int
  450. pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
  451. int how, struct nfs_commit_info *cinfo,
  452. int (*initiate_commit)(struct nfs_commit_data *data,
  453. int how))
  454. {
  455. struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
  456. struct nfs_commit_data *data, *tmp;
  457. LIST_HEAD(list);
  458. unsigned int nreq = 0;
  459. if (!list_empty(mds_pages)) {
  460. data = nfs_commitdata_alloc();
  461. if (!data) {
  462. nfs_retry_commit(mds_pages, NULL, cinfo, -1);
  463. return -ENOMEM;
  464. }
  465. data->ds_commit_index = -1;
  466. list_splice_init(mds_pages, &data->pages);
  467. list_add_tail(&data->list, &list);
  468. nreq++;
  469. }
  470. nreq += pnfs_alloc_ds_commits_list(&list, fl_cinfo, cinfo);
  471. if (nreq == 0)
  472. goto out;
  473. list_for_each_entry_safe(data, tmp, &list, list) {
  474. list_del(&data->list);
  475. if (data->ds_commit_index < 0) {
  476. nfs_init_commit(data, NULL, NULL, cinfo);
  477. nfs_initiate_commit(NFS_CLIENT(inode), data,
  478. NFS_PROTO(data->inode),
  479. data->mds_ops, how,
  480. RPC_TASK_CRED_NOREF);
  481. } else {
  482. nfs_init_commit(data, NULL, data->lseg, cinfo);
  483. initiate_commit(data, how);
  484. }
  485. }
  486. out:
  487. return PNFS_ATTEMPTED;
  488. }
  489. EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
  490. /*
  491. * Data server cache
  492. *
  493. * Data servers can be mapped to different device ids.
  494. * nfs4_pnfs_ds reference counting
  495. * - set to 1 on allocation
  496. * - incremented when a device id maps a data server already in the cache.
  497. * - decremented when deviceid is removed from the cache.
  498. */
  499. static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
  500. static LIST_HEAD(nfs4_data_server_cache);
  501. /* Debug routines */
  502. static void
  503. print_ds(struct nfs4_pnfs_ds *ds)
  504. {
  505. if (ds == NULL) {
  506. printk(KERN_WARNING "%s NULL device\n", __func__);
  507. return;
  508. }
  509. printk(KERN_WARNING " ds %s\n"
  510. " ref count %d\n"
  511. " client %p\n"
  512. " cl_exchange_flags %x\n",
  513. ds->ds_remotestr,
  514. refcount_read(&ds->ds_count), ds->ds_clp,
  515. ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
  516. }
  517. static bool
  518. same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
  519. {
  520. struct sockaddr_in *a, *b;
  521. struct sockaddr_in6 *a6, *b6;
  522. if (addr1->sa_family != addr2->sa_family)
  523. return false;
  524. switch (addr1->sa_family) {
  525. case AF_INET:
  526. a = (struct sockaddr_in *)addr1;
  527. b = (struct sockaddr_in *)addr2;
  528. if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
  529. a->sin_port == b->sin_port)
  530. return true;
  531. break;
  532. case AF_INET6:
  533. a6 = (struct sockaddr_in6 *)addr1;
  534. b6 = (struct sockaddr_in6 *)addr2;
  535. /* LINKLOCAL addresses must have matching scope_id */
  536. if (ipv6_addr_src_scope(&a6->sin6_addr) ==
  537. IPV6_ADDR_SCOPE_LINKLOCAL &&
  538. a6->sin6_scope_id != b6->sin6_scope_id)
  539. return false;
  540. if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
  541. a6->sin6_port == b6->sin6_port)
  542. return true;
  543. break;
  544. default:
  545. dprintk("%s: unhandled address family: %u\n",
  546. __func__, addr1->sa_family);
  547. return false;
  548. }
  549. return false;
  550. }
  551. /*
  552. * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
  553. * declare a match.
  554. */
  555. static bool
  556. _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
  557. const struct list_head *dsaddrs2)
  558. {
  559. struct nfs4_pnfs_ds_addr *da1, *da2;
  560. struct sockaddr *sa1, *sa2;
  561. bool match = false;
  562. list_for_each_entry(da1, dsaddrs1, da_node) {
  563. sa1 = (struct sockaddr *)&da1->da_addr;
  564. match = false;
  565. list_for_each_entry(da2, dsaddrs2, da_node) {
  566. sa2 = (struct sockaddr *)&da2->da_addr;
  567. match = same_sockaddr(sa1, sa2);
  568. if (match)
  569. break;
  570. }
  571. if (!match)
  572. break;
  573. }
  574. return match;
  575. }
  576. /*
  577. * Lookup DS by addresses. nfs4_ds_cache_lock is held
  578. */
  579. static struct nfs4_pnfs_ds *
  580. _data_server_lookup_locked(const struct list_head *dsaddrs)
  581. {
  582. struct nfs4_pnfs_ds *ds;
  583. list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
  584. if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
  585. return ds;
  586. return NULL;
  587. }
  588. static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
  589. {
  590. struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
  591. if (da)
  592. INIT_LIST_HEAD(&da->da_node);
  593. return da;
  594. }
  595. static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
  596. {
  597. kfree(da->da_remotestr);
  598. kfree(da->da_netid);
  599. kfree(da);
  600. }
  601. static void destroy_ds(struct nfs4_pnfs_ds *ds)
  602. {
  603. struct nfs4_pnfs_ds_addr *da;
  604. dprintk("--> %s\n", __func__);
  605. ifdebug(FACILITY)
  606. print_ds(ds);
  607. nfs_put_client(ds->ds_clp);
  608. while (!list_empty(&ds->ds_addrs)) {
  609. da = list_first_entry(&ds->ds_addrs,
  610. struct nfs4_pnfs_ds_addr,
  611. da_node);
  612. list_del_init(&da->da_node);
  613. nfs4_pnfs_ds_addr_free(da);
  614. }
  615. kfree(ds->ds_remotestr);
  616. kfree(ds);
  617. }
  618. void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
  619. {
  620. if (refcount_dec_and_lock(&ds->ds_count,
  621. &nfs4_ds_cache_lock)) {
  622. list_del_init(&ds->ds_node);
  623. spin_unlock(&nfs4_ds_cache_lock);
  624. destroy_ds(ds);
  625. }
  626. }
  627. EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put);
  628. /*
  629. * Create a string with a human readable address and port to avoid
  630. * complicated setup around many dprinks.
  631. */
  632. static char *
  633. nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
  634. {
  635. struct nfs4_pnfs_ds_addr *da;
  636. char *remotestr;
  637. size_t len;
  638. char *p;
  639. len = 3; /* '{', '}' and eol */
  640. list_for_each_entry(da, dsaddrs, da_node) {
  641. len += strlen(da->da_remotestr) + 1; /* string plus comma */
  642. }
  643. remotestr = kzalloc(len, gfp_flags);
  644. if (!remotestr)
  645. return NULL;
  646. p = remotestr;
  647. *(p++) = '{';
  648. len--;
  649. list_for_each_entry(da, dsaddrs, da_node) {
  650. size_t ll = strlen(da->da_remotestr);
  651. if (ll > len)
  652. goto out_err;
  653. memcpy(p, da->da_remotestr, ll);
  654. p += ll;
  655. len -= ll;
  656. if (len < 1)
  657. goto out_err;
  658. (*p++) = ',';
  659. len--;
  660. }
  661. if (len < 2)
  662. goto out_err;
  663. *(p++) = '}';
  664. *p = '\0';
  665. return remotestr;
  666. out_err:
  667. kfree(remotestr);
  668. return NULL;
  669. }
  670. /*
  671. * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
  672. * uncached and return cached struct nfs4_pnfs_ds.
  673. */
  674. struct nfs4_pnfs_ds *
  675. nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
  676. {
  677. struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
  678. char *remotestr;
  679. if (list_empty(dsaddrs)) {
  680. dprintk("%s: no addresses defined\n", __func__);
  681. goto out;
  682. }
  683. ds = kzalloc(sizeof(*ds), gfp_flags);
  684. if (!ds)
  685. goto out;
  686. /* this is only used for debugging, so it's ok if its NULL */
  687. remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
  688. spin_lock(&nfs4_ds_cache_lock);
  689. tmp_ds = _data_server_lookup_locked(dsaddrs);
  690. if (tmp_ds == NULL) {
  691. INIT_LIST_HEAD(&ds->ds_addrs);
  692. list_splice_init(dsaddrs, &ds->ds_addrs);
  693. ds->ds_remotestr = remotestr;
  694. refcount_set(&ds->ds_count, 1);
  695. INIT_LIST_HEAD(&ds->ds_node);
  696. ds->ds_clp = NULL;
  697. list_add(&ds->ds_node, &nfs4_data_server_cache);
  698. dprintk("%s add new data server %s\n", __func__,
  699. ds->ds_remotestr);
  700. } else {
  701. kfree(remotestr);
  702. kfree(ds);
  703. refcount_inc(&tmp_ds->ds_count);
  704. dprintk("%s data server %s found, inc'ed ds_count to %d\n",
  705. __func__, tmp_ds->ds_remotestr,
  706. refcount_read(&tmp_ds->ds_count));
  707. ds = tmp_ds;
  708. }
  709. spin_unlock(&nfs4_ds_cache_lock);
  710. out:
  711. return ds;
  712. }
  713. EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
  714. static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
  715. {
  716. might_sleep();
  717. return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE);
  718. }
  719. static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
  720. {
  721. smp_mb__before_atomic();
  722. clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state);
  723. }
  724. static struct nfs_client *(*get_v3_ds_connect)(
  725. struct nfs_server *mds_srv,
  726. const struct sockaddr_storage *ds_addr,
  727. int ds_addrlen,
  728. int ds_proto,
  729. unsigned int ds_timeo,
  730. unsigned int ds_retrans);
  731. static bool load_v3_ds_connect(void)
  732. {
  733. if (!get_v3_ds_connect) {
  734. get_v3_ds_connect = symbol_request(nfs3_set_ds_client);
  735. WARN_ON_ONCE(!get_v3_ds_connect);
  736. }
  737. return(get_v3_ds_connect != NULL);
  738. }
  739. void nfs4_pnfs_v3_ds_connect_unload(void)
  740. {
  741. if (get_v3_ds_connect) {
  742. symbol_put(nfs3_set_ds_client);
  743. get_v3_ds_connect = NULL;
  744. }
  745. }
  746. static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
  747. struct nfs4_pnfs_ds *ds,
  748. unsigned int timeo,
  749. unsigned int retrans)
  750. {
  751. struct nfs_client *clp = ERR_PTR(-EIO);
  752. struct nfs4_pnfs_ds_addr *da;
  753. int status = 0;
  754. dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
  755. if (!load_v3_ds_connect())
  756. return -EPROTONOSUPPORT;
  757. list_for_each_entry(da, &ds->ds_addrs, da_node) {
  758. dprintk("%s: DS %s: trying address %s\n",
  759. __func__, ds->ds_remotestr, da->da_remotestr);
  760. if (!IS_ERR(clp)) {
  761. struct xprt_create xprt_args = {
  762. .ident = da->da_transport,
  763. .net = clp->cl_net,
  764. .dstaddr = (struct sockaddr *)&da->da_addr,
  765. .addrlen = da->da_addrlen,
  766. .servername = clp->cl_hostname,
  767. };
  768. if (da->da_transport != clp->cl_proto)
  769. continue;
  770. if (da->da_addr.ss_family != clp->cl_addr.ss_family)
  771. continue;
  772. /* Add this address as an alias */
  773. rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
  774. rpc_clnt_test_and_add_xprt, NULL);
  775. continue;
  776. }
  777. clp = get_v3_ds_connect(mds_srv,
  778. &da->da_addr,
  779. da->da_addrlen, da->da_transport,
  780. timeo, retrans);
  781. if (IS_ERR(clp))
  782. continue;
  783. clp->cl_rpcclient->cl_softerr = 0;
  784. clp->cl_rpcclient->cl_softrtry = 0;
  785. }
  786. if (IS_ERR(clp)) {
  787. status = PTR_ERR(clp);
  788. goto out;
  789. }
  790. smp_wmb();
  791. WRITE_ONCE(ds->ds_clp, clp);
  792. dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
  793. out:
  794. return status;
  795. }
  796. static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
  797. struct nfs4_pnfs_ds *ds,
  798. unsigned int timeo,
  799. unsigned int retrans,
  800. u32 minor_version)
  801. {
  802. struct nfs_client *clp = ERR_PTR(-EIO);
  803. struct nfs4_pnfs_ds_addr *da;
  804. int status = 0;
  805. dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
  806. list_for_each_entry(da, &ds->ds_addrs, da_node) {
  807. dprintk("%s: DS %s: trying address %s\n",
  808. __func__, ds->ds_remotestr, da->da_remotestr);
  809. if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
  810. struct xprt_create xprt_args = {
  811. .ident = da->da_transport,
  812. .net = clp->cl_net,
  813. .dstaddr = (struct sockaddr *)&da->da_addr,
  814. .addrlen = da->da_addrlen,
  815. .servername = clp->cl_hostname,
  816. };
  817. struct nfs4_add_xprt_data xprtdata = {
  818. .clp = clp,
  819. };
  820. struct rpc_add_xprt_test rpcdata = {
  821. .add_xprt_test = clp->cl_mvops->session_trunk,
  822. .data = &xprtdata,
  823. };
  824. if (da->da_transport != clp->cl_proto)
  825. continue;
  826. if (da->da_addr.ss_family != clp->cl_addr.ss_family)
  827. continue;
  828. /**
  829. * Test this address for session trunking and
  830. * add as an alias
  831. */
  832. xprtdata.cred = nfs4_get_clid_cred(clp);
  833. rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
  834. rpc_clnt_setup_test_and_add_xprt,
  835. &rpcdata);
  836. if (xprtdata.cred)
  837. put_cred(xprtdata.cred);
  838. } else {
  839. clp = nfs4_set_ds_client(mds_srv,
  840. &da->da_addr,
  841. da->da_addrlen,
  842. da->da_transport, timeo,
  843. retrans, minor_version);
  844. if (IS_ERR(clp))
  845. continue;
  846. status = nfs4_init_ds_session(clp,
  847. mds_srv->nfs_client->cl_lease_time);
  848. if (status) {
  849. nfs_put_client(clp);
  850. clp = ERR_PTR(-EIO);
  851. continue;
  852. }
  853. }
  854. }
  855. if (IS_ERR(clp)) {
  856. status = PTR_ERR(clp);
  857. goto out;
  858. }
  859. smp_wmb();
  860. WRITE_ONCE(ds->ds_clp, clp);
  861. dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
  862. out:
  863. return status;
  864. }
  865. /*
  866. * Create an rpc connection to the nfs4_pnfs_ds data server.
  867. * Currently only supports IPv4 and IPv6 addresses.
  868. * If connection fails, make devid unavailable and return a -errno.
  869. */
  870. int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
  871. struct nfs4_deviceid_node *devid, unsigned int timeo,
  872. unsigned int retrans, u32 version, u32 minor_version)
  873. {
  874. int err;
  875. do {
  876. err = nfs4_wait_ds_connect(ds);
  877. if (err || ds->ds_clp)
  878. goto out;
  879. if (nfs4_test_deviceid_unavailable(devid))
  880. return -ENODEV;
  881. } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
  882. if (ds->ds_clp)
  883. goto connect_done;
  884. switch (version) {
  885. case 3:
  886. err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans);
  887. break;
  888. case 4:
  889. err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans,
  890. minor_version);
  891. break;
  892. default:
  893. dprintk("%s: unsupported DS version %d\n", __func__, version);
  894. err = -EPROTONOSUPPORT;
  895. }
  896. connect_done:
  897. nfs4_clear_ds_conn_bit(ds);
  898. out:
  899. /*
  900. * At this point the ds->ds_clp should be ready, but it might have
  901. * hit an error.
  902. */
  903. if (!err) {
  904. if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
  905. WARN_ON_ONCE(ds->ds_clp ||
  906. !nfs4_test_deviceid_unavailable(devid));
  907. return -EINVAL;
  908. }
  909. err = nfs_client_init_status(ds->ds_clp);
  910. }
  911. return err;
  912. }
  913. EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
  914. /*
  915. * Currently only supports ipv4, ipv6 and one multi-path address.
  916. */
  917. struct nfs4_pnfs_ds_addr *
  918. nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
  919. {
  920. struct nfs4_pnfs_ds_addr *da = NULL;
  921. char *buf, *portstr;
  922. __be16 port;
  923. ssize_t nlen, rlen;
  924. int tmp[2];
  925. char *netid;
  926. size_t len;
  927. char *startsep = "";
  928. char *endsep = "";
  929. /* r_netid */
  930. nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
  931. gfp_flags);
  932. if (unlikely(nlen < 0))
  933. goto out_err;
  934. /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
  935. /* port is ".ABC.DEF", 8 chars max */
  936. rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
  937. IPV6_SCOPE_ID_LEN + 8, gfp_flags);
  938. if (unlikely(rlen < 0))
  939. goto out_free_netid;
  940. /* replace port '.' with '-' */
  941. portstr = strrchr(buf, '.');
  942. if (!portstr) {
  943. dprintk("%s: Failed finding expected dot in port\n",
  944. __func__);
  945. goto out_free_buf;
  946. }
  947. *portstr = '-';
  948. /* find '.' between address and port */
  949. portstr = strrchr(buf, '.');
  950. if (!portstr) {
  951. dprintk("%s: Failed finding expected dot between address and "
  952. "port\n", __func__);
  953. goto out_free_buf;
  954. }
  955. *portstr = '\0';
  956. da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
  957. if (unlikely(!da))
  958. goto out_free_buf;
  959. if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
  960. sizeof(da->da_addr))) {
  961. dprintk("%s: error parsing address %s\n", __func__, buf);
  962. goto out_free_da;
  963. }
  964. portstr++;
  965. sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
  966. port = htons((tmp[0] << 8) | (tmp[1]));
  967. switch (da->da_addr.ss_family) {
  968. case AF_INET:
  969. ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
  970. da->da_addrlen = sizeof(struct sockaddr_in);
  971. break;
  972. case AF_INET6:
  973. ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
  974. da->da_addrlen = sizeof(struct sockaddr_in6);
  975. startsep = "[";
  976. endsep = "]";
  977. break;
  978. default:
  979. dprintk("%s: unsupported address family: %u\n",
  980. __func__, da->da_addr.ss_family);
  981. goto out_free_da;
  982. }
  983. da->da_transport = xprt_find_transport_ident(netid);
  984. if (da->da_transport < 0) {
  985. dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
  986. __func__, netid);
  987. goto out_free_da;
  988. }
  989. da->da_netid = netid;
  990. /* save human readable address */
  991. len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
  992. da->da_remotestr = kzalloc(len, gfp_flags);
  993. /* NULL is ok, only used for dprintk */
  994. if (da->da_remotestr)
  995. snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
  996. buf, endsep, ntohs(port));
  997. dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
  998. kfree(buf);
  999. return da;
  1000. out_free_da:
  1001. kfree(da);
  1002. out_free_buf:
  1003. dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
  1004. kfree(buf);
  1005. out_free_netid:
  1006. kfree(netid);
  1007. out_err:
  1008. return NULL;
  1009. }
  1010. EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
  1011. void
  1012. pnfs_layout_mark_request_commit(struct nfs_page *req,
  1013. struct pnfs_layout_segment *lseg,
  1014. struct nfs_commit_info *cinfo,
  1015. u32 ds_commit_idx)
  1016. {
  1017. struct list_head *list;
  1018. struct pnfs_commit_array *array;
  1019. struct pnfs_commit_bucket *bucket;
  1020. mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
  1021. array = pnfs_lookup_commit_array(cinfo->ds, lseg);
  1022. if (!array || !pnfs_is_valid_lseg(lseg))
  1023. goto out_resched;
  1024. bucket = &array->buckets[ds_commit_idx];
  1025. list = &bucket->written;
  1026. /* Non-empty buckets hold a reference on the lseg. That ref
  1027. * is normally transferred to the COMMIT call and released
  1028. * there. It could also be released if the last req is pulled
  1029. * off due to a rewrite, in which case it will be done in
  1030. * pnfs_common_clear_request_commit
  1031. */
  1032. if (!bucket->lseg)
  1033. bucket->lseg = pnfs_get_lseg(lseg);
  1034. set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
  1035. cinfo->ds->nwritten++;
  1036. nfs_request_add_commit_list_locked(req, list, cinfo);
  1037. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  1038. nfs_mark_page_unstable(req->wb_page, cinfo);
  1039. return;
  1040. out_resched:
  1041. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  1042. cinfo->completion_ops->resched_write(cinfo, req);
  1043. }
  1044. EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
  1045. int
  1046. pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
  1047. {
  1048. int ret;
  1049. if (!pnfs_layoutcommit_outstanding(inode))
  1050. return 0;
  1051. ret = nfs_commit_inode(inode, FLUSH_SYNC);
  1052. if (ret < 0)
  1053. return ret;
  1054. if (datasync)
  1055. return 0;
  1056. return pnfs_layoutcommit_inode(inode, true);
  1057. }
  1058. EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);