rxe_verbs.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
  2. /*
  3. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  4. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
  5. */
  6. #include <linux/dma-mapping.h>
  7. #include <net/addrconf.h>
  8. #include <rdma/uverbs_ioctl.h>
  9. #include "rxe.h"
  10. #include "rxe_queue.h"
  11. #include "rxe_hw_counters.h"
  12. static int rxe_query_device(struct ib_device *dev,
  13. struct ib_device_attr *attr,
  14. struct ib_udata *uhw)
  15. {
  16. struct rxe_dev *rxe = to_rdev(dev);
  17. if (uhw->inlen || uhw->outlen)
  18. return -EINVAL;
  19. *attr = rxe->attr;
  20. return 0;
  21. }
  22. static int rxe_query_port(struct ib_device *dev,
  23. u32 port_num, struct ib_port_attr *attr)
  24. {
  25. struct rxe_dev *rxe = to_rdev(dev);
  26. int rc;
  27. /* *attr being zeroed by the caller, avoid zeroing it here */
  28. *attr = rxe->port.attr;
  29. mutex_lock(&rxe->usdev_lock);
  30. rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
  31. &attr->active_width);
  32. if (attr->state == IB_PORT_ACTIVE)
  33. attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
  34. else if (dev_get_flags(rxe->ndev) & IFF_UP)
  35. attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
  36. else
  37. attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
  38. mutex_unlock(&rxe->usdev_lock);
  39. return rc;
  40. }
  41. static int rxe_query_pkey(struct ib_device *device,
  42. u32 port_num, u16 index, u16 *pkey)
  43. {
  44. if (index > 0)
  45. return -EINVAL;
  46. *pkey = IB_DEFAULT_PKEY_FULL;
  47. return 0;
  48. }
  49. static int rxe_modify_device(struct ib_device *dev,
  50. int mask, struct ib_device_modify *attr)
  51. {
  52. struct rxe_dev *rxe = to_rdev(dev);
  53. if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
  54. IB_DEVICE_MODIFY_NODE_DESC))
  55. return -EOPNOTSUPP;
  56. if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
  57. rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
  58. if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
  59. memcpy(rxe->ib_dev.node_desc,
  60. attr->node_desc, sizeof(rxe->ib_dev.node_desc));
  61. }
  62. return 0;
  63. }
  64. static int rxe_modify_port(struct ib_device *dev,
  65. u32 port_num, int mask, struct ib_port_modify *attr)
  66. {
  67. struct rxe_dev *rxe = to_rdev(dev);
  68. struct rxe_port *port;
  69. port = &rxe->port;
  70. port->attr.port_cap_flags |= attr->set_port_cap_mask;
  71. port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
  72. if (mask & IB_PORT_RESET_QKEY_CNTR)
  73. port->attr.qkey_viol_cntr = 0;
  74. return 0;
  75. }
  76. static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
  77. u32 port_num)
  78. {
  79. return IB_LINK_LAYER_ETHERNET;
  80. }
  81. static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
  82. {
  83. struct rxe_dev *rxe = to_rdev(ibuc->device);
  84. struct rxe_ucontext *uc = to_ruc(ibuc);
  85. return rxe_add_to_pool(&rxe->uc_pool, uc);
  86. }
  87. static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
  88. {
  89. struct rxe_ucontext *uc = to_ruc(ibuc);
  90. rxe_cleanup(uc);
  91. }
  92. static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
  93. struct ib_port_immutable *immutable)
  94. {
  95. int err;
  96. struct ib_port_attr attr;
  97. immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
  98. err = ib_query_port(dev, port_num, &attr);
  99. if (err)
  100. return err;
  101. immutable->pkey_tbl_len = attr.pkey_tbl_len;
  102. immutable->gid_tbl_len = attr.gid_tbl_len;
  103. immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  104. return 0;
  105. }
  106. static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
  107. {
  108. struct rxe_dev *rxe = to_rdev(ibpd->device);
  109. struct rxe_pd *pd = to_rpd(ibpd);
  110. return rxe_add_to_pool(&rxe->pd_pool, pd);
  111. }
  112. static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
  113. {
  114. struct rxe_pd *pd = to_rpd(ibpd);
  115. rxe_cleanup(pd);
  116. return 0;
  117. }
  118. static int rxe_create_ah(struct ib_ah *ibah,
  119. struct rdma_ah_init_attr *init_attr,
  120. struct ib_udata *udata)
  121. {
  122. struct rxe_dev *rxe = to_rdev(ibah->device);
  123. struct rxe_ah *ah = to_rah(ibah);
  124. struct rxe_create_ah_resp __user *uresp = NULL;
  125. int err;
  126. if (udata) {
  127. /* test if new user provider */
  128. if (udata->outlen >= sizeof(*uresp))
  129. uresp = udata->outbuf;
  130. ah->is_user = true;
  131. } else {
  132. ah->is_user = false;
  133. }
  134. err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
  135. if (err)
  136. return err;
  137. err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
  138. init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
  139. if (err)
  140. return err;
  141. /* create index > 0 */
  142. ah->ah_num = ah->elem.index;
  143. if (uresp) {
  144. /* only if new user provider */
  145. err = copy_to_user(&uresp->ah_num, &ah->ah_num,
  146. sizeof(uresp->ah_num));
  147. if (err) {
  148. rxe_cleanup(ah);
  149. return -EFAULT;
  150. }
  151. } else if (ah->is_user) {
  152. /* only if old user provider */
  153. ah->ah_num = 0;
  154. }
  155. rxe_init_av(init_attr->ah_attr, &ah->av);
  156. rxe_finalize(ah);
  157. return 0;
  158. }
  159. static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
  160. {
  161. int err;
  162. struct rxe_dev *rxe = to_rdev(ibah->device);
  163. struct rxe_ah *ah = to_rah(ibah);
  164. err = rxe_av_chk_attr(rxe, attr);
  165. if (err)
  166. return err;
  167. rxe_init_av(attr, &ah->av);
  168. return 0;
  169. }
  170. static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
  171. {
  172. struct rxe_ah *ah = to_rah(ibah);
  173. memset(attr, 0, sizeof(*attr));
  174. attr->type = ibah->type;
  175. rxe_av_to_attr(&ah->av, attr);
  176. return 0;
  177. }
  178. static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
  179. {
  180. struct rxe_ah *ah = to_rah(ibah);
  181. rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
  182. return 0;
  183. }
  184. static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
  185. {
  186. int i;
  187. u32 length;
  188. struct rxe_recv_wqe *recv_wqe;
  189. int num_sge = ibwr->num_sge;
  190. int full;
  191. full = queue_full(rq->queue, QUEUE_TYPE_FROM_ULP);
  192. if (unlikely(full))
  193. return -ENOMEM;
  194. if (unlikely(num_sge > rq->max_sge))
  195. return -EINVAL;
  196. length = 0;
  197. for (i = 0; i < num_sge; i++)
  198. length += ibwr->sg_list[i].length;
  199. recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_FROM_ULP);
  200. recv_wqe->wr_id = ibwr->wr_id;
  201. memcpy(recv_wqe->dma.sge, ibwr->sg_list,
  202. num_sge * sizeof(struct ib_sge));
  203. recv_wqe->dma.length = length;
  204. recv_wqe->dma.resid = length;
  205. recv_wqe->dma.num_sge = num_sge;
  206. recv_wqe->dma.cur_sge = 0;
  207. recv_wqe->dma.sge_offset = 0;
  208. queue_advance_producer(rq->queue, QUEUE_TYPE_FROM_ULP);
  209. return 0;
  210. }
  211. static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
  212. struct ib_udata *udata)
  213. {
  214. int err;
  215. struct rxe_dev *rxe = to_rdev(ibsrq->device);
  216. struct rxe_pd *pd = to_rpd(ibsrq->pd);
  217. struct rxe_srq *srq = to_rsrq(ibsrq);
  218. struct rxe_create_srq_resp __user *uresp = NULL;
  219. if (udata) {
  220. if (udata->outlen < sizeof(*uresp))
  221. return -EINVAL;
  222. uresp = udata->outbuf;
  223. }
  224. if (init->srq_type != IB_SRQT_BASIC)
  225. return -EOPNOTSUPP;
  226. err = rxe_srq_chk_init(rxe, init);
  227. if (err)
  228. return err;
  229. err = rxe_add_to_pool(&rxe->srq_pool, srq);
  230. if (err)
  231. return err;
  232. rxe_get(pd);
  233. srq->pd = pd;
  234. err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
  235. if (err)
  236. goto err_cleanup;
  237. return 0;
  238. err_cleanup:
  239. rxe_cleanup(srq);
  240. return err;
  241. }
  242. static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
  243. enum ib_srq_attr_mask mask,
  244. struct ib_udata *udata)
  245. {
  246. int err;
  247. struct rxe_srq *srq = to_rsrq(ibsrq);
  248. struct rxe_dev *rxe = to_rdev(ibsrq->device);
  249. struct rxe_modify_srq_cmd ucmd = {};
  250. if (udata) {
  251. if (udata->inlen < sizeof(ucmd))
  252. return -EINVAL;
  253. err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
  254. if (err)
  255. return err;
  256. }
  257. err = rxe_srq_chk_attr(rxe, srq, attr, mask);
  258. if (err)
  259. return err;
  260. return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
  261. }
  262. static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
  263. {
  264. struct rxe_srq *srq = to_rsrq(ibsrq);
  265. if (srq->error)
  266. return -EINVAL;
  267. attr->max_wr = srq->rq.queue->buf->index_mask;
  268. attr->max_sge = srq->rq.max_sge;
  269. attr->srq_limit = srq->limit;
  270. return 0;
  271. }
  272. static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
  273. {
  274. struct rxe_srq *srq = to_rsrq(ibsrq);
  275. rxe_cleanup(srq);
  276. return 0;
  277. }
  278. static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
  279. const struct ib_recv_wr **bad_wr)
  280. {
  281. int err = 0;
  282. struct rxe_srq *srq = to_rsrq(ibsrq);
  283. unsigned long flags;
  284. spin_lock_irqsave(&srq->rq.producer_lock, flags);
  285. while (wr) {
  286. err = post_one_recv(&srq->rq, wr);
  287. if (unlikely(err))
  288. break;
  289. wr = wr->next;
  290. }
  291. spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
  292. if (err)
  293. *bad_wr = wr;
  294. return err;
  295. }
  296. static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
  297. struct ib_udata *udata)
  298. {
  299. int err;
  300. struct rxe_dev *rxe = to_rdev(ibqp->device);
  301. struct rxe_pd *pd = to_rpd(ibqp->pd);
  302. struct rxe_qp *qp = to_rqp(ibqp);
  303. struct rxe_create_qp_resp __user *uresp = NULL;
  304. if (udata) {
  305. if (udata->outlen < sizeof(*uresp))
  306. return -EINVAL;
  307. uresp = udata->outbuf;
  308. }
  309. if (init->create_flags)
  310. return -EOPNOTSUPP;
  311. err = rxe_qp_chk_init(rxe, init);
  312. if (err)
  313. return err;
  314. if (udata) {
  315. if (udata->inlen)
  316. return -EINVAL;
  317. qp->is_user = true;
  318. } else {
  319. qp->is_user = false;
  320. }
  321. err = rxe_add_to_pool(&rxe->qp_pool, qp);
  322. if (err)
  323. return err;
  324. err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
  325. if (err)
  326. goto qp_init;
  327. rxe_finalize(qp);
  328. return 0;
  329. qp_init:
  330. rxe_cleanup(qp);
  331. return err;
  332. }
  333. static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  334. int mask, struct ib_udata *udata)
  335. {
  336. int err;
  337. struct rxe_dev *rxe = to_rdev(ibqp->device);
  338. struct rxe_qp *qp = to_rqp(ibqp);
  339. if (mask & ~IB_QP_ATTR_STANDARD_BITS)
  340. return -EOPNOTSUPP;
  341. err = rxe_qp_chk_attr(rxe, qp, attr, mask);
  342. if (err)
  343. return err;
  344. err = rxe_qp_from_attr(qp, attr, mask, udata);
  345. if (err)
  346. return err;
  347. if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
  348. qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
  349. qp->ibqp.qp_num,
  350. qp->attr.dest_qp_num);
  351. return 0;
  352. }
  353. static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  354. int mask, struct ib_qp_init_attr *init)
  355. {
  356. struct rxe_qp *qp = to_rqp(ibqp);
  357. rxe_qp_to_init(qp, init);
  358. rxe_qp_to_attr(qp, attr, mask);
  359. return 0;
  360. }
  361. static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
  362. {
  363. struct rxe_qp *qp = to_rqp(ibqp);
  364. int ret;
  365. ret = rxe_qp_chk_destroy(qp);
  366. if (ret)
  367. return ret;
  368. rxe_cleanup(qp);
  369. return 0;
  370. }
  371. static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
  372. unsigned int mask, unsigned int length)
  373. {
  374. int num_sge = ibwr->num_sge;
  375. struct rxe_sq *sq = &qp->sq;
  376. if (unlikely(num_sge > sq->max_sge))
  377. return -EINVAL;
  378. if (unlikely(mask & WR_ATOMIC_MASK)) {
  379. if (length < 8)
  380. return -EINVAL;
  381. if (atomic_wr(ibwr)->remote_addr & 0x7)
  382. return -EINVAL;
  383. }
  384. if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
  385. (length > sq->max_inline)))
  386. return -EINVAL;
  387. return 0;
  388. }
  389. static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
  390. const struct ib_send_wr *ibwr)
  391. {
  392. wr->wr_id = ibwr->wr_id;
  393. wr->opcode = ibwr->opcode;
  394. wr->send_flags = ibwr->send_flags;
  395. if (qp_type(qp) == IB_QPT_UD ||
  396. qp_type(qp) == IB_QPT_GSI) {
  397. struct ib_ah *ibah = ud_wr(ibwr)->ah;
  398. wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
  399. wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
  400. wr->wr.ud.ah_num = to_rah(ibah)->ah_num;
  401. if (qp_type(qp) == IB_QPT_GSI)
  402. wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
  403. if (wr->opcode == IB_WR_SEND_WITH_IMM)
  404. wr->ex.imm_data = ibwr->ex.imm_data;
  405. } else {
  406. switch (wr->opcode) {
  407. case IB_WR_RDMA_WRITE_WITH_IMM:
  408. wr->ex.imm_data = ibwr->ex.imm_data;
  409. fallthrough;
  410. case IB_WR_RDMA_READ:
  411. case IB_WR_RDMA_WRITE:
  412. wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
  413. wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;
  414. break;
  415. case IB_WR_SEND_WITH_IMM:
  416. wr->ex.imm_data = ibwr->ex.imm_data;
  417. break;
  418. case IB_WR_SEND_WITH_INV:
  419. wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
  420. break;
  421. case IB_WR_ATOMIC_CMP_AND_SWP:
  422. case IB_WR_ATOMIC_FETCH_AND_ADD:
  423. wr->wr.atomic.remote_addr =
  424. atomic_wr(ibwr)->remote_addr;
  425. wr->wr.atomic.compare_add =
  426. atomic_wr(ibwr)->compare_add;
  427. wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
  428. wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
  429. break;
  430. case IB_WR_LOCAL_INV:
  431. wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
  432. break;
  433. case IB_WR_REG_MR:
  434. wr->wr.reg.mr = reg_wr(ibwr)->mr;
  435. wr->wr.reg.key = reg_wr(ibwr)->key;
  436. wr->wr.reg.access = reg_wr(ibwr)->access;
  437. break;
  438. default:
  439. break;
  440. }
  441. }
  442. }
  443. static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
  444. const struct ib_send_wr *ibwr)
  445. {
  446. struct ib_sge *sge = ibwr->sg_list;
  447. u8 *p = wqe->dma.inline_data;
  448. int i;
  449. for (i = 0; i < ibwr->num_sge; i++, sge++) {
  450. memcpy(p, (void *)(uintptr_t)sge->addr, sge->length);
  451. p += sge->length;
  452. }
  453. }
  454. static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
  455. unsigned int mask, unsigned int length,
  456. struct rxe_send_wqe *wqe)
  457. {
  458. int num_sge = ibwr->num_sge;
  459. init_send_wr(qp, &wqe->wr, ibwr);
  460. /* local operation */
  461. if (unlikely(mask & WR_LOCAL_OP_MASK)) {
  462. wqe->mask = mask;
  463. wqe->state = wqe_state_posted;
  464. return;
  465. }
  466. if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
  467. copy_inline_data_to_wqe(wqe, ibwr);
  468. else
  469. memcpy(wqe->dma.sge, ibwr->sg_list,
  470. num_sge * sizeof(struct ib_sge));
  471. wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
  472. mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
  473. wqe->mask = mask;
  474. wqe->dma.length = length;
  475. wqe->dma.resid = length;
  476. wqe->dma.num_sge = num_sge;
  477. wqe->dma.cur_sge = 0;
  478. wqe->dma.sge_offset = 0;
  479. wqe->state = wqe_state_posted;
  480. wqe->ssn = atomic_add_return(1, &qp->ssn);
  481. }
  482. static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
  483. unsigned int mask, u32 length)
  484. {
  485. int err;
  486. struct rxe_sq *sq = &qp->sq;
  487. struct rxe_send_wqe *send_wqe;
  488. unsigned long flags;
  489. int full;
  490. err = validate_send_wr(qp, ibwr, mask, length);
  491. if (err)
  492. return err;
  493. spin_lock_irqsave(&qp->sq.sq_lock, flags);
  494. full = queue_full(sq->queue, QUEUE_TYPE_FROM_ULP);
  495. if (unlikely(full)) {
  496. spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
  497. return -ENOMEM;
  498. }
  499. send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_FROM_ULP);
  500. init_send_wqe(qp, ibwr, mask, length, send_wqe);
  501. queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP);
  502. spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
  503. return 0;
  504. }
  505. static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
  506. const struct ib_send_wr **bad_wr)
  507. {
  508. int err = 0;
  509. unsigned int mask;
  510. unsigned int length = 0;
  511. int i;
  512. struct ib_send_wr *next;
  513. while (wr) {
  514. mask = wr_opcode_mask(wr->opcode, qp);
  515. if (unlikely(!mask)) {
  516. err = -EINVAL;
  517. *bad_wr = wr;
  518. break;
  519. }
  520. if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
  521. !(mask & WR_INLINE_MASK))) {
  522. err = -EINVAL;
  523. *bad_wr = wr;
  524. break;
  525. }
  526. next = wr->next;
  527. length = 0;
  528. for (i = 0; i < wr->num_sge; i++)
  529. length += wr->sg_list[i].length;
  530. err = post_one_send(qp, wr, mask, length);
  531. if (err) {
  532. *bad_wr = wr;
  533. break;
  534. }
  535. wr = next;
  536. }
  537. rxe_sched_task(&qp->req.task);
  538. if (unlikely(qp->req.state == QP_STATE_ERROR))
  539. rxe_sched_task(&qp->comp.task);
  540. return err;
  541. }
  542. static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
  543. const struct ib_send_wr **bad_wr)
  544. {
  545. struct rxe_qp *qp = to_rqp(ibqp);
  546. if (unlikely(!qp->valid)) {
  547. *bad_wr = wr;
  548. return -EINVAL;
  549. }
  550. if (unlikely(qp->req.state < QP_STATE_READY)) {
  551. *bad_wr = wr;
  552. return -EINVAL;
  553. }
  554. if (qp->is_user) {
  555. /* Utilize process context to do protocol processing */
  556. rxe_run_task(&qp->req.task);
  557. return 0;
  558. } else
  559. return rxe_post_send_kernel(qp, wr, bad_wr);
  560. }
  561. static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
  562. const struct ib_recv_wr **bad_wr)
  563. {
  564. int err = 0;
  565. struct rxe_qp *qp = to_rqp(ibqp);
  566. struct rxe_rq *rq = &qp->rq;
  567. unsigned long flags;
  568. if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
  569. *bad_wr = wr;
  570. return -EINVAL;
  571. }
  572. if (unlikely(qp->srq)) {
  573. *bad_wr = wr;
  574. return -EINVAL;
  575. }
  576. spin_lock_irqsave(&rq->producer_lock, flags);
  577. while (wr) {
  578. err = post_one_recv(rq, wr);
  579. if (unlikely(err)) {
  580. *bad_wr = wr;
  581. break;
  582. }
  583. wr = wr->next;
  584. }
  585. spin_unlock_irqrestore(&rq->producer_lock, flags);
  586. if (qp->resp.state == QP_STATE_ERROR)
  587. rxe_sched_task(&qp->resp.task);
  588. return err;
  589. }
  590. static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
  591. struct ib_udata *udata)
  592. {
  593. int err;
  594. struct ib_device *dev = ibcq->device;
  595. struct rxe_dev *rxe = to_rdev(dev);
  596. struct rxe_cq *cq = to_rcq(ibcq);
  597. struct rxe_create_cq_resp __user *uresp = NULL;
  598. if (udata) {
  599. if (udata->outlen < sizeof(*uresp))
  600. return -EINVAL;
  601. uresp = udata->outbuf;
  602. }
  603. if (attr->flags)
  604. return -EOPNOTSUPP;
  605. err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
  606. if (err)
  607. return err;
  608. err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
  609. uresp);
  610. if (err)
  611. return err;
  612. return rxe_add_to_pool(&rxe->cq_pool, cq);
  613. }
  614. static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
  615. {
  616. struct rxe_cq *cq = to_rcq(ibcq);
  617. /* See IBA C11-17: The CI shall return an error if this Verb is
  618. * invoked while a Work Queue is still associated with the CQ.
  619. */
  620. if (atomic_read(&cq->num_wq))
  621. return -EINVAL;
  622. rxe_cq_disable(cq);
  623. rxe_cleanup(cq);
  624. return 0;
  625. }
  626. static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
  627. {
  628. int err;
  629. struct rxe_cq *cq = to_rcq(ibcq);
  630. struct rxe_dev *rxe = to_rdev(ibcq->device);
  631. struct rxe_resize_cq_resp __user *uresp = NULL;
  632. if (udata) {
  633. if (udata->outlen < sizeof(*uresp))
  634. return -EINVAL;
  635. uresp = udata->outbuf;
  636. }
  637. err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
  638. if (err)
  639. return err;
  640. return rxe_cq_resize_queue(cq, cqe, uresp, udata);
  641. }
  642. static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
  643. {
  644. int i;
  645. struct rxe_cq *cq = to_rcq(ibcq);
  646. struct rxe_cqe *cqe;
  647. unsigned long flags;
  648. spin_lock_irqsave(&cq->cq_lock, flags);
  649. for (i = 0; i < num_entries; i++) {
  650. cqe = queue_head(cq->queue, QUEUE_TYPE_TO_ULP);
  651. if (!cqe)
  652. break;
  653. memcpy(wc++, &cqe->ibwc, sizeof(*wc));
  654. queue_advance_consumer(cq->queue, QUEUE_TYPE_TO_ULP);
  655. }
  656. spin_unlock_irqrestore(&cq->cq_lock, flags);
  657. return i;
  658. }
  659. static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
  660. {
  661. struct rxe_cq *cq = to_rcq(ibcq);
  662. int count;
  663. count = queue_count(cq->queue, QUEUE_TYPE_TO_ULP);
  664. return (count > wc_cnt) ? wc_cnt : count;
  665. }
  666. static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
  667. {
  668. struct rxe_cq *cq = to_rcq(ibcq);
  669. int ret = 0;
  670. int empty;
  671. unsigned long irq_flags;
  672. spin_lock_irqsave(&cq->cq_lock, irq_flags);
  673. if (cq->notify != IB_CQ_NEXT_COMP)
  674. cq->notify = flags & IB_CQ_SOLICITED_MASK;
  675. empty = queue_empty(cq->queue, QUEUE_TYPE_TO_ULP);
  676. if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
  677. ret = 1;
  678. spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
  679. return ret;
  680. }
  681. static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
  682. {
  683. struct rxe_dev *rxe = to_rdev(ibpd->device);
  684. struct rxe_pd *pd = to_rpd(ibpd);
  685. struct rxe_mr *mr;
  686. mr = rxe_alloc(&rxe->mr_pool);
  687. if (!mr)
  688. return ERR_PTR(-ENOMEM);
  689. rxe_get(pd);
  690. mr->ibmr.pd = ibpd;
  691. rxe_mr_init_dma(access, mr);
  692. rxe_finalize(mr);
  693. return &mr->ibmr;
  694. }
  695. static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
  696. u64 start,
  697. u64 length,
  698. u64 iova,
  699. int access, struct ib_udata *udata)
  700. {
  701. int err;
  702. struct rxe_dev *rxe = to_rdev(ibpd->device);
  703. struct rxe_pd *pd = to_rpd(ibpd);
  704. struct rxe_mr *mr;
  705. mr = rxe_alloc(&rxe->mr_pool);
  706. if (!mr)
  707. return ERR_PTR(-ENOMEM);
  708. rxe_get(pd);
  709. mr->ibmr.pd = ibpd;
  710. err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
  711. if (err)
  712. goto err1;
  713. rxe_finalize(mr);
  714. return &mr->ibmr;
  715. err1:
  716. rxe_cleanup(mr);
  717. return ERR_PTR(err);
  718. }
  719. static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
  720. u32 max_num_sg)
  721. {
  722. struct rxe_dev *rxe = to_rdev(ibpd->device);
  723. struct rxe_pd *pd = to_rpd(ibpd);
  724. struct rxe_mr *mr;
  725. int err;
  726. if (mr_type != IB_MR_TYPE_MEM_REG)
  727. return ERR_PTR(-EINVAL);
  728. mr = rxe_alloc(&rxe->mr_pool);
  729. if (!mr)
  730. return ERR_PTR(-ENOMEM);
  731. rxe_get(pd);
  732. mr->ibmr.pd = ibpd;
  733. err = rxe_mr_init_fast(max_num_sg, mr);
  734. if (err)
  735. goto err1;
  736. rxe_finalize(mr);
  737. return &mr->ibmr;
  738. err1:
  739. rxe_cleanup(mr);
  740. return ERR_PTR(err);
  741. }
  742. static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
  743. {
  744. struct rxe_mr *mr = to_rmr(ibmr);
  745. struct rxe_map *map;
  746. struct rxe_phys_buf *buf;
  747. if (unlikely(mr->nbuf == mr->num_buf))
  748. return -ENOMEM;
  749. map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
  750. buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
  751. buf->addr = addr;
  752. buf->size = ibmr->page_size;
  753. mr->nbuf++;
  754. return 0;
  755. }
  756. static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
  757. int sg_nents, unsigned int *sg_offset)
  758. {
  759. struct rxe_mr *mr = to_rmr(ibmr);
  760. int n;
  761. mr->nbuf = 0;
  762. n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
  763. mr->page_shift = ilog2(ibmr->page_size);
  764. mr->page_mask = ibmr->page_size - 1;
  765. mr->offset = ibmr->iova & mr->page_mask;
  766. return n;
  767. }
  768. static ssize_t parent_show(struct device *device,
  769. struct device_attribute *attr, char *buf)
  770. {
  771. struct rxe_dev *rxe =
  772. rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
  773. return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1));
  774. }
  775. static DEVICE_ATTR_RO(parent);
  776. static struct attribute *rxe_dev_attributes[] = {
  777. &dev_attr_parent.attr,
  778. NULL
  779. };
  780. static const struct attribute_group rxe_attr_group = {
  781. .attrs = rxe_dev_attributes,
  782. };
  783. static int rxe_enable_driver(struct ib_device *ib_dev)
  784. {
  785. struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
  786. rxe_set_port_state(rxe);
  787. dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
  788. return 0;
  789. }
  790. static const struct ib_device_ops rxe_dev_ops = {
  791. .owner = THIS_MODULE,
  792. .driver_id = RDMA_DRIVER_RXE,
  793. .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
  794. .alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
  795. .alloc_mr = rxe_alloc_mr,
  796. .alloc_mw = rxe_alloc_mw,
  797. .alloc_pd = rxe_alloc_pd,
  798. .alloc_ucontext = rxe_alloc_ucontext,
  799. .attach_mcast = rxe_attach_mcast,
  800. .create_ah = rxe_create_ah,
  801. .create_cq = rxe_create_cq,
  802. .create_qp = rxe_create_qp,
  803. .create_srq = rxe_create_srq,
  804. .create_user_ah = rxe_create_ah,
  805. .dealloc_driver = rxe_dealloc,
  806. .dealloc_mw = rxe_dealloc_mw,
  807. .dealloc_pd = rxe_dealloc_pd,
  808. .dealloc_ucontext = rxe_dealloc_ucontext,
  809. .dereg_mr = rxe_dereg_mr,
  810. .destroy_ah = rxe_destroy_ah,
  811. .destroy_cq = rxe_destroy_cq,
  812. .destroy_qp = rxe_destroy_qp,
  813. .destroy_srq = rxe_destroy_srq,
  814. .detach_mcast = rxe_detach_mcast,
  815. .device_group = &rxe_attr_group,
  816. .enable_driver = rxe_enable_driver,
  817. .get_dma_mr = rxe_get_dma_mr,
  818. .get_hw_stats = rxe_ib_get_hw_stats,
  819. .get_link_layer = rxe_get_link_layer,
  820. .get_port_immutable = rxe_port_immutable,
  821. .map_mr_sg = rxe_map_mr_sg,
  822. .mmap = rxe_mmap,
  823. .modify_ah = rxe_modify_ah,
  824. .modify_device = rxe_modify_device,
  825. .modify_port = rxe_modify_port,
  826. .modify_qp = rxe_modify_qp,
  827. .modify_srq = rxe_modify_srq,
  828. .peek_cq = rxe_peek_cq,
  829. .poll_cq = rxe_poll_cq,
  830. .post_recv = rxe_post_recv,
  831. .post_send = rxe_post_send,
  832. .post_srq_recv = rxe_post_srq_recv,
  833. .query_ah = rxe_query_ah,
  834. .query_device = rxe_query_device,
  835. .query_pkey = rxe_query_pkey,
  836. .query_port = rxe_query_port,
  837. .query_qp = rxe_query_qp,
  838. .query_srq = rxe_query_srq,
  839. .reg_user_mr = rxe_reg_user_mr,
  840. .req_notify_cq = rxe_req_notify_cq,
  841. .resize_cq = rxe_resize_cq,
  842. INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
  843. INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
  844. INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
  845. INIT_RDMA_OBJ_SIZE(ib_qp, rxe_qp, ibqp),
  846. INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
  847. INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
  848. INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
  849. };
  850. int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
  851. {
  852. int err;
  853. struct ib_device *dev = &rxe->ib_dev;
  854. strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
  855. dev->node_type = RDMA_NODE_IB_CA;
  856. dev->phys_port_cnt = 1;
  857. dev->num_comp_vectors = num_possible_cpus();
  858. dev->local_dma_lkey = 0;
  859. addrconf_addr_eui48((unsigned char *)&dev->node_guid,
  860. rxe->ndev->dev_addr);
  861. dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
  862. BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
  863. ib_set_device_ops(dev, &rxe_dev_ops);
  864. err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
  865. if (err)
  866. return err;
  867. err = rxe_icrc_init(rxe);
  868. if (err)
  869. return err;
  870. err = ib_register_device(dev, ibdev_name, NULL);
  871. if (err)
  872. pr_warn("%s failed with error %d\n", __func__, err);
  873. /*
  874. * Note that rxe may be invalid at this point if another thread
  875. * unregistered it.
  876. */
  877. return err;
  878. }