rxe_mr.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
  2. /*
  3. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  4. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
  5. */
  6. #include "rxe.h"
  7. #include "rxe_loc.h"
  8. /* Return a random 8 bit key value that is
  9. * different than the last_key. Set last_key to -1
  10. * if this is the first key for an MR or MW
  11. */
  12. u8 rxe_get_next_key(u32 last_key)
  13. {
  14. u8 key;
  15. do {
  16. get_random_bytes(&key, 1);
  17. } while (key == last_key);
  18. return key;
  19. }
  20. int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
  21. {
  22. switch (mr->type) {
  23. case IB_MR_TYPE_DMA:
  24. return 0;
  25. case IB_MR_TYPE_USER:
  26. case IB_MR_TYPE_MEM_REG:
  27. if (iova < mr->ibmr.iova || length > mr->ibmr.length ||
  28. iova > mr->ibmr.iova + mr->ibmr.length - length)
  29. return -EFAULT;
  30. return 0;
  31. default:
  32. pr_warn("%s: mr type (%d) not supported\n",
  33. __func__, mr->type);
  34. return -EFAULT;
  35. }
  36. }
  37. #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
  38. | IB_ACCESS_REMOTE_WRITE \
  39. | IB_ACCESS_REMOTE_ATOMIC)
  40. static void rxe_mr_init(int access, struct rxe_mr *mr)
  41. {
  42. u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
  43. u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
  44. /* set ibmr->l/rkey and also copy into private l/rkey
  45. * for user MRs these will always be the same
  46. * for cases where caller 'owns' the key portion
  47. * they may be different until REG_MR WQE is executed.
  48. */
  49. mr->lkey = mr->ibmr.lkey = lkey;
  50. mr->rkey = mr->ibmr.rkey = rkey;
  51. mr->state = RXE_MR_STATE_INVALID;
  52. mr->map_shift = ilog2(RXE_BUF_PER_MAP);
  53. }
  54. static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
  55. {
  56. int i;
  57. int num_map;
  58. struct rxe_map **map = mr->map;
  59. num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
  60. mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
  61. if (!mr->map)
  62. goto err1;
  63. for (i = 0; i < num_map; i++) {
  64. mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
  65. if (!mr->map[i])
  66. goto err2;
  67. }
  68. BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
  69. mr->map_shift = ilog2(RXE_BUF_PER_MAP);
  70. mr->map_mask = RXE_BUF_PER_MAP - 1;
  71. mr->num_buf = num_buf;
  72. mr->num_map = num_map;
  73. mr->max_buf = num_map * RXE_BUF_PER_MAP;
  74. return 0;
  75. err2:
  76. for (i--; i >= 0; i--)
  77. kfree(mr->map[i]);
  78. kfree(mr->map);
  79. mr->map = NULL;
  80. err1:
  81. return -ENOMEM;
  82. }
  83. void rxe_mr_init_dma(int access, struct rxe_mr *mr)
  84. {
  85. rxe_mr_init(access, mr);
  86. mr->access = access;
  87. mr->state = RXE_MR_STATE_VALID;
  88. mr->type = IB_MR_TYPE_DMA;
  89. }
  90. int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
  91. int access, struct rxe_mr *mr)
  92. {
  93. struct rxe_map **map;
  94. struct rxe_phys_buf *buf = NULL;
  95. struct ib_umem *umem;
  96. struct sg_page_iter sg_iter;
  97. int num_buf;
  98. void *vaddr;
  99. int err;
  100. umem = ib_umem_get(&rxe->ib_dev, start, length, access);
  101. if (IS_ERR(umem)) {
  102. pr_warn("%s: Unable to pin memory region err = %d\n",
  103. __func__, (int)PTR_ERR(umem));
  104. err = PTR_ERR(umem);
  105. goto err_out;
  106. }
  107. num_buf = ib_umem_num_pages(umem);
  108. rxe_mr_init(access, mr);
  109. err = rxe_mr_alloc(mr, num_buf);
  110. if (err) {
  111. pr_warn("%s: Unable to allocate memory for map\n",
  112. __func__);
  113. goto err_release_umem;
  114. }
  115. mr->page_shift = PAGE_SHIFT;
  116. mr->page_mask = PAGE_SIZE - 1;
  117. num_buf = 0;
  118. map = mr->map;
  119. if (length > 0) {
  120. buf = map[0]->buf;
  121. for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
  122. if (num_buf >= RXE_BUF_PER_MAP) {
  123. map++;
  124. buf = map[0]->buf;
  125. num_buf = 0;
  126. }
  127. vaddr = page_address(sg_page_iter_page(&sg_iter));
  128. if (!vaddr) {
  129. pr_warn("%s: Unable to get virtual address\n",
  130. __func__);
  131. err = -ENOMEM;
  132. goto err_release_umem;
  133. }
  134. buf->addr = (uintptr_t)vaddr;
  135. buf->size = PAGE_SIZE;
  136. num_buf++;
  137. buf++;
  138. }
  139. }
  140. mr->umem = umem;
  141. mr->access = access;
  142. mr->offset = ib_umem_offset(umem);
  143. mr->state = RXE_MR_STATE_VALID;
  144. mr->type = IB_MR_TYPE_USER;
  145. return 0;
  146. err_release_umem:
  147. ib_umem_release(umem);
  148. err_out:
  149. return err;
  150. }
  151. int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
  152. {
  153. int err;
  154. /* always allow remote access for FMRs */
  155. rxe_mr_init(IB_ACCESS_REMOTE, mr);
  156. err = rxe_mr_alloc(mr, max_pages);
  157. if (err)
  158. goto err1;
  159. mr->max_buf = max_pages;
  160. mr->state = RXE_MR_STATE_FREE;
  161. mr->type = IB_MR_TYPE_MEM_REG;
  162. return 0;
  163. err1:
  164. return err;
  165. }
  166. static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
  167. size_t *offset_out)
  168. {
  169. size_t offset = iova - mr->ibmr.iova + mr->offset;
  170. int map_index;
  171. int buf_index;
  172. u64 length;
  173. if (likely(mr->page_shift)) {
  174. *offset_out = offset & mr->page_mask;
  175. offset >>= mr->page_shift;
  176. *n_out = offset & mr->map_mask;
  177. *m_out = offset >> mr->map_shift;
  178. } else {
  179. map_index = 0;
  180. buf_index = 0;
  181. length = mr->map[map_index]->buf[buf_index].size;
  182. while (offset >= length) {
  183. offset -= length;
  184. buf_index++;
  185. if (buf_index == RXE_BUF_PER_MAP) {
  186. map_index++;
  187. buf_index = 0;
  188. }
  189. length = mr->map[map_index]->buf[buf_index].size;
  190. }
  191. *m_out = map_index;
  192. *n_out = buf_index;
  193. *offset_out = offset;
  194. }
  195. }
  196. void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
  197. {
  198. size_t offset;
  199. int m, n;
  200. void *addr;
  201. if (mr->state != RXE_MR_STATE_VALID) {
  202. pr_warn("mr not in valid state\n");
  203. addr = NULL;
  204. goto out;
  205. }
  206. if (!mr->map) {
  207. addr = (void *)(uintptr_t)iova;
  208. goto out;
  209. }
  210. if (mr_check_range(mr, iova, length)) {
  211. pr_warn("range violation\n");
  212. addr = NULL;
  213. goto out;
  214. }
  215. lookup_iova(mr, iova, &m, &n, &offset);
  216. if (offset + length > mr->map[m]->buf[n].size) {
  217. pr_warn("crosses page boundary\n");
  218. addr = NULL;
  219. goto out;
  220. }
  221. addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
  222. out:
  223. return addr;
  224. }
  225. /* copy data from a range (vaddr, vaddr+length-1) to or from
  226. * a mr object starting at iova.
  227. */
  228. int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
  229. enum rxe_mr_copy_dir dir)
  230. {
  231. int err;
  232. int bytes;
  233. u8 *va;
  234. struct rxe_map **map;
  235. struct rxe_phys_buf *buf;
  236. int m;
  237. int i;
  238. size_t offset;
  239. if (length == 0)
  240. return 0;
  241. if (mr->type == IB_MR_TYPE_DMA) {
  242. u8 *src, *dest;
  243. src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
  244. dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
  245. memcpy(dest, src, length);
  246. return 0;
  247. }
  248. WARN_ON_ONCE(!mr->map);
  249. err = mr_check_range(mr, iova, length);
  250. if (err) {
  251. err = -EFAULT;
  252. goto err1;
  253. }
  254. lookup_iova(mr, iova, &m, &i, &offset);
  255. map = mr->map + m;
  256. buf = map[0]->buf + i;
  257. while (length > 0) {
  258. u8 *src, *dest;
  259. va = (u8 *)(uintptr_t)buf->addr + offset;
  260. src = (dir == RXE_TO_MR_OBJ) ? addr : va;
  261. dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
  262. bytes = buf->size - offset;
  263. if (bytes > length)
  264. bytes = length;
  265. memcpy(dest, src, bytes);
  266. length -= bytes;
  267. addr += bytes;
  268. offset = 0;
  269. buf++;
  270. i++;
  271. if (i == RXE_BUF_PER_MAP) {
  272. i = 0;
  273. map++;
  274. buf = map[0]->buf;
  275. }
  276. }
  277. return 0;
  278. err1:
  279. return err;
  280. }
  281. /* copy data in or out of a wqe, i.e. sg list
  282. * under the control of a dma descriptor
  283. */
  284. int copy_data(
  285. struct rxe_pd *pd,
  286. int access,
  287. struct rxe_dma_info *dma,
  288. void *addr,
  289. int length,
  290. enum rxe_mr_copy_dir dir)
  291. {
  292. int bytes;
  293. struct rxe_sge *sge = &dma->sge[dma->cur_sge];
  294. int offset = dma->sge_offset;
  295. int resid = dma->resid;
  296. struct rxe_mr *mr = NULL;
  297. u64 iova;
  298. int err;
  299. if (length == 0)
  300. return 0;
  301. if (length > resid) {
  302. err = -EINVAL;
  303. goto err2;
  304. }
  305. if (sge->length && (offset < sge->length)) {
  306. mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
  307. if (!mr) {
  308. err = -EINVAL;
  309. goto err1;
  310. }
  311. }
  312. while (length > 0) {
  313. bytes = length;
  314. if (offset >= sge->length) {
  315. if (mr) {
  316. rxe_put(mr);
  317. mr = NULL;
  318. }
  319. sge++;
  320. dma->cur_sge++;
  321. offset = 0;
  322. if (dma->cur_sge >= dma->num_sge) {
  323. err = -ENOSPC;
  324. goto err2;
  325. }
  326. if (sge->length) {
  327. mr = lookup_mr(pd, access, sge->lkey,
  328. RXE_LOOKUP_LOCAL);
  329. if (!mr) {
  330. err = -EINVAL;
  331. goto err1;
  332. }
  333. } else {
  334. continue;
  335. }
  336. }
  337. if (bytes > sge->length - offset)
  338. bytes = sge->length - offset;
  339. if (bytes > 0) {
  340. iova = sge->addr + offset;
  341. err = rxe_mr_copy(mr, iova, addr, bytes, dir);
  342. if (err)
  343. goto err2;
  344. offset += bytes;
  345. resid -= bytes;
  346. length -= bytes;
  347. addr += bytes;
  348. }
  349. }
  350. dma->sge_offset = offset;
  351. dma->resid = resid;
  352. if (mr)
  353. rxe_put(mr);
  354. return 0;
  355. err2:
  356. if (mr)
  357. rxe_put(mr);
  358. err1:
  359. return err;
  360. }
  361. int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
  362. {
  363. struct rxe_sge *sge = &dma->sge[dma->cur_sge];
  364. int offset = dma->sge_offset;
  365. int resid = dma->resid;
  366. while (length) {
  367. unsigned int bytes;
  368. if (offset >= sge->length) {
  369. sge++;
  370. dma->cur_sge++;
  371. offset = 0;
  372. if (dma->cur_sge >= dma->num_sge)
  373. return -ENOSPC;
  374. }
  375. bytes = length;
  376. if (bytes > sge->length - offset)
  377. bytes = sge->length - offset;
  378. offset += bytes;
  379. resid -= bytes;
  380. length -= bytes;
  381. }
  382. dma->sge_offset = offset;
  383. dma->resid = resid;
  384. return 0;
  385. }
  386. /* (1) find the mr corresponding to lkey/rkey
  387. * depending on lookup_type
  388. * (2) verify that the (qp) pd matches the mr pd
  389. * (3) verify that the mr can support the requested access
  390. * (4) verify that mr state is valid
  391. */
  392. struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
  393. enum rxe_mr_lookup_type type)
  394. {
  395. struct rxe_mr *mr;
  396. struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
  397. int index = key >> 8;
  398. mr = rxe_pool_get_index(&rxe->mr_pool, index);
  399. if (!mr)
  400. return NULL;
  401. if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
  402. (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
  403. mr_pd(mr) != pd || (access && !(access & mr->access)) ||
  404. mr->state != RXE_MR_STATE_VALID)) {
  405. rxe_put(mr);
  406. mr = NULL;
  407. }
  408. return mr;
  409. }
  410. int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
  411. {
  412. struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
  413. struct rxe_mr *mr;
  414. int ret;
  415. mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
  416. if (!mr) {
  417. pr_err("%s: No MR for key %#x\n", __func__, key);
  418. ret = -EINVAL;
  419. goto err;
  420. }
  421. if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
  422. pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
  423. __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
  424. ret = -EINVAL;
  425. goto err_drop_ref;
  426. }
  427. if (atomic_read(&mr->num_mw) > 0) {
  428. pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
  429. __func__);
  430. ret = -EINVAL;
  431. goto err_drop_ref;
  432. }
  433. if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
  434. pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
  435. ret = -EINVAL;
  436. goto err_drop_ref;
  437. }
  438. mr->state = RXE_MR_STATE_FREE;
  439. ret = 0;
  440. err_drop_ref:
  441. rxe_put(mr);
  442. err:
  443. return ret;
  444. }
  445. /* user can (re)register fast MR by executing a REG_MR WQE.
  446. * user is expected to hold a reference on the ib mr until the
  447. * WQE completes.
  448. * Once a fast MR is created this is the only way to change the
  449. * private keys. It is the responsibility of the user to maintain
  450. * the ib mr keys in sync with rxe mr keys.
  451. */
  452. int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
  453. {
  454. struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
  455. u32 key = wqe->wr.wr.reg.key;
  456. u32 access = wqe->wr.wr.reg.access;
  457. /* user can only register MR in free state */
  458. if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
  459. pr_warn("%s: mr->lkey = 0x%x not free\n",
  460. __func__, mr->lkey);
  461. return -EINVAL;
  462. }
  463. /* user can only register mr with qp in same protection domain */
  464. if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
  465. pr_warn("%s: qp->pd and mr->pd don't match\n",
  466. __func__);
  467. return -EINVAL;
  468. }
  469. /* user is only allowed to change key portion of l/rkey */
  470. if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
  471. pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
  472. __func__, key, mr->lkey);
  473. return -EINVAL;
  474. }
  475. mr->access = access;
  476. mr->lkey = key;
  477. mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
  478. mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
  479. mr->state = RXE_MR_STATE_VALID;
  480. return 0;
  481. }
  482. int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
  483. {
  484. struct rxe_mr *mr = to_rmr(ibmr);
  485. /* See IBA 10.6.7.2.6 */
  486. if (atomic_read(&mr->num_mw) > 0)
  487. return -EINVAL;
  488. rxe_cleanup(mr);
  489. return 0;
  490. }
  491. void rxe_mr_cleanup(struct rxe_pool_elem *elem)
  492. {
  493. struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
  494. int i;
  495. rxe_put(mr_pd(mr));
  496. ib_umem_release(mr->umem);
  497. if (mr->map) {
  498. for (i = 0; i < mr->num_map; i++)
  499. kfree(mr->map[i]);
  500. kfree(mr->map);
  501. }
  502. }