siw_verbs.c 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889
  1. // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
  2. /* Authors: Bernard Metzler <[email protected]> */
  3. /* Copyright (c) 2008-2019, IBM Corporation */
  4. #include <linux/errno.h>
  5. #include <linux/types.h>
  6. #include <linux/uaccess.h>
  7. #include <linux/vmalloc.h>
  8. #include <linux/xarray.h>
  9. #include <net/addrconf.h>
  10. #include <rdma/iw_cm.h>
  11. #include <rdma/ib_verbs.h>
  12. #include <rdma/ib_user_verbs.h>
  13. #include <rdma/uverbs_ioctl.h>
  14. #include "siw.h"
  15. #include "siw_verbs.h"
  16. #include "siw_mem.h"
  17. static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = {
  18. [IB_QPS_RESET] = SIW_QP_STATE_IDLE,
  19. [IB_QPS_INIT] = SIW_QP_STATE_IDLE,
  20. [IB_QPS_RTR] = SIW_QP_STATE_RTR,
  21. [IB_QPS_RTS] = SIW_QP_STATE_RTS,
  22. [IB_QPS_SQD] = SIW_QP_STATE_CLOSING,
  23. [IB_QPS_SQE] = SIW_QP_STATE_TERMINATE,
  24. [IB_QPS_ERR] = SIW_QP_STATE_ERROR
  25. };
  26. static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = {
  27. [IB_QPS_RESET] = "RESET", [IB_QPS_INIT] = "INIT", [IB_QPS_RTR] = "RTR",
  28. [IB_QPS_RTS] = "RTS", [IB_QPS_SQD] = "SQD", [IB_QPS_SQE] = "SQE",
  29. [IB_QPS_ERR] = "ERR"
  30. };
  31. void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
  32. {
  33. struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_entry);
  34. kfree(entry);
  35. }
  36. int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
  37. {
  38. struct siw_ucontext *uctx = to_siw_ctx(ctx);
  39. size_t size = vma->vm_end - vma->vm_start;
  40. struct rdma_user_mmap_entry *rdma_entry;
  41. struct siw_user_mmap_entry *entry;
  42. int rv = -EINVAL;
  43. /*
  44. * Must be page aligned
  45. */
  46. if (vma->vm_start & (PAGE_SIZE - 1)) {
  47. pr_warn("siw: mmap not page aligned\n");
  48. return -EINVAL;
  49. }
  50. rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, vma);
  51. if (!rdma_entry) {
  52. siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n",
  53. vma->vm_pgoff, size);
  54. return -EINVAL;
  55. }
  56. entry = to_siw_mmap_entry(rdma_entry);
  57. rv = remap_vmalloc_range(vma, entry->address, 0);
  58. if (rv) {
  59. pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff,
  60. size);
  61. goto out;
  62. }
  63. out:
  64. rdma_user_mmap_entry_put(rdma_entry);
  65. return rv;
  66. }
  67. int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata)
  68. {
  69. struct siw_device *sdev = to_siw_dev(base_ctx->device);
  70. struct siw_ucontext *ctx = to_siw_ctx(base_ctx);
  71. struct siw_uresp_alloc_ctx uresp = {};
  72. int rv;
  73. if (atomic_inc_return(&sdev->num_ctx) > SIW_MAX_CONTEXT) {
  74. rv = -ENOMEM;
  75. goto err_out;
  76. }
  77. ctx->sdev = sdev;
  78. uresp.dev_id = sdev->vendor_part_id;
  79. if (udata->outlen < sizeof(uresp)) {
  80. rv = -EINVAL;
  81. goto err_out;
  82. }
  83. rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
  84. if (rv)
  85. goto err_out;
  86. siw_dbg(base_ctx->device, "success. now %d context(s)\n",
  87. atomic_read(&sdev->num_ctx));
  88. return 0;
  89. err_out:
  90. atomic_dec(&sdev->num_ctx);
  91. siw_dbg(base_ctx->device, "failure %d. now %d context(s)\n", rv,
  92. atomic_read(&sdev->num_ctx));
  93. return rv;
  94. }
  95. void siw_dealloc_ucontext(struct ib_ucontext *base_ctx)
  96. {
  97. struct siw_ucontext *uctx = to_siw_ctx(base_ctx);
  98. atomic_dec(&uctx->sdev->num_ctx);
  99. }
  100. int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
  101. struct ib_udata *udata)
  102. {
  103. struct siw_device *sdev = to_siw_dev(base_dev);
  104. if (udata->inlen || udata->outlen)
  105. return -EINVAL;
  106. memset(attr, 0, sizeof(*attr));
  107. /* Revisit atomic caps if RFC 7306 gets supported */
  108. attr->atomic_cap = 0;
  109. attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
  110. attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG;
  111. attr->max_cq = sdev->attrs.max_cq;
  112. attr->max_cqe = sdev->attrs.max_cqe;
  113. attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL;
  114. attr->max_mr = sdev->attrs.max_mr;
  115. attr->max_mw = sdev->attrs.max_mw;
  116. attr->max_mr_size = ~0ull;
  117. attr->max_pd = sdev->attrs.max_pd;
  118. attr->max_qp = sdev->attrs.max_qp;
  119. attr->max_qp_init_rd_atom = sdev->attrs.max_ird;
  120. attr->max_qp_rd_atom = sdev->attrs.max_ord;
  121. attr->max_qp_wr = sdev->attrs.max_qp_wr;
  122. attr->max_recv_sge = sdev->attrs.max_sge;
  123. attr->max_res_rd_atom = sdev->attrs.max_qp * sdev->attrs.max_ird;
  124. attr->max_send_sge = sdev->attrs.max_sge;
  125. attr->max_sge_rd = sdev->attrs.max_sge_rd;
  126. attr->max_srq = sdev->attrs.max_srq;
  127. attr->max_srq_sge = sdev->attrs.max_srq_sge;
  128. attr->max_srq_wr = sdev->attrs.max_srq_wr;
  129. attr->page_size_cap = PAGE_SIZE;
  130. attr->vendor_id = SIW_VENDOR_ID;
  131. attr->vendor_part_id = sdev->vendor_part_id;
  132. addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
  133. sdev->raw_gid);
  134. return 0;
  135. }
  136. int siw_query_port(struct ib_device *base_dev, u32 port,
  137. struct ib_port_attr *attr)
  138. {
  139. struct siw_device *sdev = to_siw_dev(base_dev);
  140. int rv;
  141. memset(attr, 0, sizeof(*attr));
  142. rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
  143. &attr->active_width);
  144. attr->gid_tbl_len = 1;
  145. attr->max_msg_sz = -1;
  146. attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
  147. attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
  148. attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
  149. IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
  150. attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
  151. attr->state = sdev->state;
  152. /*
  153. * All zero
  154. *
  155. * attr->lid = 0;
  156. * attr->bad_pkey_cntr = 0;
  157. * attr->qkey_viol_cntr = 0;
  158. * attr->sm_lid = 0;
  159. * attr->lmc = 0;
  160. * attr->max_vl_num = 0;
  161. * attr->sm_sl = 0;
  162. * attr->subnet_timeout = 0;
  163. * attr->init_type_repy = 0;
  164. */
  165. return rv;
  166. }
  167. int siw_get_port_immutable(struct ib_device *base_dev, u32 port,
  168. struct ib_port_immutable *port_immutable)
  169. {
  170. struct ib_port_attr attr;
  171. int rv = siw_query_port(base_dev, port, &attr);
  172. if (rv)
  173. return rv;
  174. port_immutable->gid_tbl_len = attr.gid_tbl_len;
  175. port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
  176. return 0;
  177. }
  178. int siw_query_gid(struct ib_device *base_dev, u32 port, int idx,
  179. union ib_gid *gid)
  180. {
  181. struct siw_device *sdev = to_siw_dev(base_dev);
  182. /* subnet_prefix == interface_id == 0; */
  183. memset(gid, 0, sizeof(*gid));
  184. memcpy(gid->raw, sdev->raw_gid, ETH_ALEN);
  185. return 0;
  186. }
  187. int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
  188. {
  189. struct siw_device *sdev = to_siw_dev(pd->device);
  190. if (atomic_inc_return(&sdev->num_pd) > SIW_MAX_PD) {
  191. atomic_dec(&sdev->num_pd);
  192. return -ENOMEM;
  193. }
  194. siw_dbg_pd(pd, "now %d PD's(s)\n", atomic_read(&sdev->num_pd));
  195. return 0;
  196. }
  197. int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
  198. {
  199. struct siw_device *sdev = to_siw_dev(pd->device);
  200. siw_dbg_pd(pd, "free PD\n");
  201. atomic_dec(&sdev->num_pd);
  202. return 0;
  203. }
  204. void siw_qp_get_ref(struct ib_qp *base_qp)
  205. {
  206. siw_qp_get(to_siw_qp(base_qp));
  207. }
  208. void siw_qp_put_ref(struct ib_qp *base_qp)
  209. {
  210. siw_qp_put(to_siw_qp(base_qp));
  211. }
  212. static struct rdma_user_mmap_entry *
  213. siw_mmap_entry_insert(struct siw_ucontext *uctx,
  214. void *address, size_t length,
  215. u64 *offset)
  216. {
  217. struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
  218. int rv;
  219. *offset = SIW_INVAL_UOBJ_KEY;
  220. if (!entry)
  221. return NULL;
  222. entry->address = address;
  223. rv = rdma_user_mmap_entry_insert(&uctx->base_ucontext,
  224. &entry->rdma_entry,
  225. length);
  226. if (rv) {
  227. kfree(entry);
  228. return NULL;
  229. }
  230. *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
  231. return &entry->rdma_entry;
  232. }
  233. /*
  234. * siw_create_qp()
  235. *
  236. * Create QP of requested size on given device.
  237. *
  238. * @qp: Queue pait
  239. * @attrs: Initial QP attributes.
  240. * @udata: used to provide QP ID, SQ and RQ size back to user.
  241. */
  242. int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
  243. struct ib_udata *udata)
  244. {
  245. struct ib_pd *pd = ibqp->pd;
  246. struct siw_qp *qp = to_siw_qp(ibqp);
  247. struct ib_device *base_dev = pd->device;
  248. struct siw_device *sdev = to_siw_dev(base_dev);
  249. struct siw_ucontext *uctx =
  250. rdma_udata_to_drv_context(udata, struct siw_ucontext,
  251. base_ucontext);
  252. unsigned long flags;
  253. int num_sqe, num_rqe, rv = 0;
  254. size_t length;
  255. siw_dbg(base_dev, "create new QP\n");
  256. if (attrs->create_flags)
  257. return -EOPNOTSUPP;
  258. if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
  259. siw_dbg(base_dev, "too many QP's\n");
  260. rv = -ENOMEM;
  261. goto err_atomic;
  262. }
  263. if (attrs->qp_type != IB_QPT_RC) {
  264. siw_dbg(base_dev, "only RC QP's supported\n");
  265. rv = -EOPNOTSUPP;
  266. goto err_atomic;
  267. }
  268. if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) ||
  269. (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) ||
  270. (attrs->cap.max_send_sge > SIW_MAX_SGE) ||
  271. (attrs->cap.max_recv_sge > SIW_MAX_SGE)) {
  272. siw_dbg(base_dev, "QP size error\n");
  273. rv = -EINVAL;
  274. goto err_atomic;
  275. }
  276. if (attrs->cap.max_inline_data > SIW_MAX_INLINE) {
  277. siw_dbg(base_dev, "max inline send: %d > %d\n",
  278. attrs->cap.max_inline_data, (int)SIW_MAX_INLINE);
  279. rv = -EINVAL;
  280. goto err_atomic;
  281. }
  282. /*
  283. * NOTE: we allow for zero element SQ and RQ WQE's SGL's
  284. * but not for a QP unable to hold any WQE (SQ + RQ)
  285. */
  286. if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) {
  287. siw_dbg(base_dev, "QP must have send or receive queue\n");
  288. rv = -EINVAL;
  289. goto err_atomic;
  290. }
  291. if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) {
  292. siw_dbg(base_dev, "send CQ or receive CQ invalid\n");
  293. rv = -EINVAL;
  294. goto err_atomic;
  295. }
  296. init_rwsem(&qp->state_lock);
  297. spin_lock_init(&qp->sq_lock);
  298. spin_lock_init(&qp->rq_lock);
  299. spin_lock_init(&qp->orq_lock);
  300. rv = siw_qp_add(sdev, qp);
  301. if (rv)
  302. goto err_atomic;
  303. num_sqe = attrs->cap.max_send_wr;
  304. num_rqe = attrs->cap.max_recv_wr;
  305. /* All queue indices are derived from modulo operations
  306. * on a free running 'get' (consumer) and 'put' (producer)
  307. * unsigned counter. Having queue sizes at power of two
  308. * avoids handling counter wrap around.
  309. */
  310. if (num_sqe)
  311. num_sqe = roundup_pow_of_two(num_sqe);
  312. else {
  313. /* Zero sized SQ is not supported */
  314. rv = -EINVAL;
  315. goto err_out_xa;
  316. }
  317. if (num_rqe)
  318. num_rqe = roundup_pow_of_two(num_rqe);
  319. if (udata)
  320. qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe));
  321. else
  322. qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
  323. if (qp->sendq == NULL) {
  324. rv = -ENOMEM;
  325. goto err_out_xa;
  326. }
  327. if (attrs->sq_sig_type != IB_SIGNAL_REQ_WR) {
  328. if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
  329. qp->attrs.flags |= SIW_SIGNAL_ALL_WR;
  330. else {
  331. rv = -EINVAL;
  332. goto err_out_xa;
  333. }
  334. }
  335. qp->pd = pd;
  336. qp->scq = to_siw_cq(attrs->send_cq);
  337. qp->rcq = to_siw_cq(attrs->recv_cq);
  338. if (attrs->srq) {
  339. /*
  340. * SRQ support.
  341. * Verbs 6.3.7: ignore RQ size, if SRQ present
  342. * Verbs 6.3.5: do not check PD of SRQ against PD of QP
  343. */
  344. qp->srq = to_siw_srq(attrs->srq);
  345. qp->attrs.rq_size = 0;
  346. siw_dbg(base_dev, "QP [%u]: SRQ attached\n",
  347. qp->base_qp.qp_num);
  348. } else if (num_rqe) {
  349. if (udata)
  350. qp->recvq =
  351. vmalloc_user(num_rqe * sizeof(struct siw_rqe));
  352. else
  353. qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe));
  354. if (qp->recvq == NULL) {
  355. rv = -ENOMEM;
  356. goto err_out_xa;
  357. }
  358. qp->attrs.rq_size = num_rqe;
  359. }
  360. qp->attrs.sq_size = num_sqe;
  361. qp->attrs.sq_max_sges = attrs->cap.max_send_sge;
  362. qp->attrs.rq_max_sges = attrs->cap.max_recv_sge;
  363. /* Make those two tunables fixed for now. */
  364. qp->tx_ctx.gso_seg_limit = 1;
  365. qp->tx_ctx.zcopy_tx = zcopy_tx;
  366. qp->attrs.state = SIW_QP_STATE_IDLE;
  367. if (udata) {
  368. struct siw_uresp_create_qp uresp = {};
  369. uresp.num_sqe = num_sqe;
  370. uresp.num_rqe = num_rqe;
  371. uresp.qp_id = qp_id(qp);
  372. if (qp->sendq) {
  373. length = num_sqe * sizeof(struct siw_sqe);
  374. qp->sq_entry =
  375. siw_mmap_entry_insert(uctx, qp->sendq,
  376. length, &uresp.sq_key);
  377. if (!qp->sq_entry) {
  378. rv = -ENOMEM;
  379. goto err_out_xa;
  380. }
  381. }
  382. if (qp->recvq) {
  383. length = num_rqe * sizeof(struct siw_rqe);
  384. qp->rq_entry =
  385. siw_mmap_entry_insert(uctx, qp->recvq,
  386. length, &uresp.rq_key);
  387. if (!qp->rq_entry) {
  388. uresp.sq_key = SIW_INVAL_UOBJ_KEY;
  389. rv = -ENOMEM;
  390. goto err_out_xa;
  391. }
  392. }
  393. if (udata->outlen < sizeof(uresp)) {
  394. rv = -EINVAL;
  395. goto err_out_xa;
  396. }
  397. rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
  398. if (rv)
  399. goto err_out_xa;
  400. }
  401. qp->tx_cpu = siw_get_tx_cpu(sdev);
  402. if (qp->tx_cpu < 0) {
  403. rv = -EINVAL;
  404. goto err_out_xa;
  405. }
  406. INIT_LIST_HEAD(&qp->devq);
  407. spin_lock_irqsave(&sdev->lock, flags);
  408. list_add_tail(&qp->devq, &sdev->qp_list);
  409. spin_unlock_irqrestore(&sdev->lock, flags);
  410. init_completion(&qp->qp_free);
  411. return 0;
  412. err_out_xa:
  413. xa_erase(&sdev->qp_xa, qp_id(qp));
  414. if (uctx) {
  415. rdma_user_mmap_entry_remove(qp->sq_entry);
  416. rdma_user_mmap_entry_remove(qp->rq_entry);
  417. }
  418. vfree(qp->sendq);
  419. vfree(qp->recvq);
  420. err_atomic:
  421. atomic_dec(&sdev->num_qp);
  422. return rv;
  423. }
  424. /*
  425. * Minimum siw_query_qp() verb interface.
  426. *
  427. * @qp_attr_mask is not used but all available information is provided
  428. */
  429. int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
  430. int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
  431. {
  432. struct siw_qp *qp;
  433. struct siw_device *sdev;
  434. if (base_qp && qp_attr && qp_init_attr) {
  435. qp = to_siw_qp(base_qp);
  436. sdev = to_siw_dev(base_qp->device);
  437. } else {
  438. return -EINVAL;
  439. }
  440. qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
  441. qp_attr->cap.max_send_wr = qp->attrs.sq_size;
  442. qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
  443. qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
  444. qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
  445. qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
  446. qp_attr->max_rd_atomic = qp->attrs.irq_size;
  447. qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
  448. qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
  449. IB_ACCESS_REMOTE_WRITE |
  450. IB_ACCESS_REMOTE_READ;
  451. qp_init_attr->qp_type = base_qp->qp_type;
  452. qp_init_attr->send_cq = base_qp->send_cq;
  453. qp_init_attr->recv_cq = base_qp->recv_cq;
  454. qp_init_attr->srq = base_qp->srq;
  455. qp_init_attr->cap = qp_attr->cap;
  456. return 0;
  457. }
  458. int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr,
  459. int attr_mask, struct ib_udata *udata)
  460. {
  461. struct siw_qp_attrs new_attrs;
  462. enum siw_qp_attr_mask siw_attr_mask = 0;
  463. struct siw_qp *qp = to_siw_qp(base_qp);
  464. int rv = 0;
  465. if (!attr_mask)
  466. return 0;
  467. if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
  468. return -EOPNOTSUPP;
  469. memset(&new_attrs, 0, sizeof(new_attrs));
  470. if (attr_mask & IB_QP_ACCESS_FLAGS) {
  471. siw_attr_mask = SIW_QP_ATTR_ACCESS_FLAGS;
  472. if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
  473. new_attrs.flags |= SIW_RDMA_READ_ENABLED;
  474. if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
  475. new_attrs.flags |= SIW_RDMA_WRITE_ENABLED;
  476. if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
  477. new_attrs.flags |= SIW_RDMA_BIND_ENABLED;
  478. }
  479. if (attr_mask & IB_QP_STATE) {
  480. siw_dbg_qp(qp, "desired IB QP state: %s\n",
  481. ib_qp_state_to_string[attr->qp_state]);
  482. new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state];
  483. if (new_attrs.state > SIW_QP_STATE_RTS)
  484. qp->tx_ctx.tx_suspend = 1;
  485. siw_attr_mask |= SIW_QP_ATTR_STATE;
  486. }
  487. if (!siw_attr_mask)
  488. goto out;
  489. down_write(&qp->state_lock);
  490. rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask);
  491. up_write(&qp->state_lock);
  492. out:
  493. return rv;
  494. }
  495. int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
  496. {
  497. struct siw_qp *qp = to_siw_qp(base_qp);
  498. struct siw_ucontext *uctx =
  499. rdma_udata_to_drv_context(udata, struct siw_ucontext,
  500. base_ucontext);
  501. struct siw_qp_attrs qp_attrs;
  502. siw_dbg_qp(qp, "state %d\n", qp->attrs.state);
  503. /*
  504. * Mark QP as in process of destruction to prevent from
  505. * any async callbacks to RDMA core
  506. */
  507. qp->attrs.flags |= SIW_QP_IN_DESTROY;
  508. qp->rx_stream.rx_suspend = 1;
  509. if (uctx) {
  510. rdma_user_mmap_entry_remove(qp->sq_entry);
  511. rdma_user_mmap_entry_remove(qp->rq_entry);
  512. }
  513. down_write(&qp->state_lock);
  514. qp_attrs.state = SIW_QP_STATE_ERROR;
  515. siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE);
  516. if (qp->cep) {
  517. siw_cep_put(qp->cep);
  518. qp->cep = NULL;
  519. }
  520. up_write(&qp->state_lock);
  521. kfree(qp->tx_ctx.mpa_crc_hd);
  522. kfree(qp->rx_stream.mpa_crc_hd);
  523. qp->scq = qp->rcq = NULL;
  524. siw_qp_put(qp);
  525. wait_for_completion(&qp->qp_free);
  526. return 0;
  527. }
  528. /*
  529. * siw_copy_inline_sgl()
  530. *
  531. * Prepare sgl of inlined data for sending. For userland callers
  532. * function checks if given buffer addresses and len's are within
  533. * process context bounds.
  534. * Data from all provided sge's are copied together into the wqe,
  535. * referenced by a single sge.
  536. */
  537. static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
  538. struct siw_sqe *sqe)
  539. {
  540. struct ib_sge *core_sge = core_wr->sg_list;
  541. void *kbuf = &sqe->sge[1];
  542. int num_sge = core_wr->num_sge, bytes = 0;
  543. sqe->sge[0].laddr = (uintptr_t)kbuf;
  544. sqe->sge[0].lkey = 0;
  545. while (num_sge--) {
  546. if (!core_sge->length) {
  547. core_sge++;
  548. continue;
  549. }
  550. bytes += core_sge->length;
  551. if (bytes > SIW_MAX_INLINE) {
  552. bytes = -EINVAL;
  553. break;
  554. }
  555. memcpy(kbuf, (void *)(uintptr_t)core_sge->addr,
  556. core_sge->length);
  557. kbuf += core_sge->length;
  558. core_sge++;
  559. }
  560. sqe->sge[0].length = max(bytes, 0);
  561. sqe->num_sge = bytes > 0 ? 1 : 0;
  562. return bytes;
  563. }
  564. /* Complete SQ WR's without processing */
  565. static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
  566. const struct ib_send_wr **bad_wr)
  567. {
  568. int rv = 0;
  569. while (wr) {
  570. struct siw_sqe sqe = {};
  571. switch (wr->opcode) {
  572. case IB_WR_RDMA_WRITE:
  573. sqe.opcode = SIW_OP_WRITE;
  574. break;
  575. case IB_WR_RDMA_READ:
  576. sqe.opcode = SIW_OP_READ;
  577. break;
  578. case IB_WR_RDMA_READ_WITH_INV:
  579. sqe.opcode = SIW_OP_READ_LOCAL_INV;
  580. break;
  581. case IB_WR_SEND:
  582. sqe.opcode = SIW_OP_SEND;
  583. break;
  584. case IB_WR_SEND_WITH_IMM:
  585. sqe.opcode = SIW_OP_SEND_WITH_IMM;
  586. break;
  587. case IB_WR_SEND_WITH_INV:
  588. sqe.opcode = SIW_OP_SEND_REMOTE_INV;
  589. break;
  590. case IB_WR_LOCAL_INV:
  591. sqe.opcode = SIW_OP_INVAL_STAG;
  592. break;
  593. case IB_WR_REG_MR:
  594. sqe.opcode = SIW_OP_REG_MR;
  595. break;
  596. default:
  597. rv = -EINVAL;
  598. break;
  599. }
  600. if (!rv) {
  601. sqe.id = wr->wr_id;
  602. rv = siw_sqe_complete(qp, &sqe, 0,
  603. SIW_WC_WR_FLUSH_ERR);
  604. }
  605. if (rv) {
  606. if (bad_wr)
  607. *bad_wr = wr;
  608. break;
  609. }
  610. wr = wr->next;
  611. }
  612. return rv;
  613. }
  614. /* Complete RQ WR's without processing */
  615. static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr,
  616. const struct ib_recv_wr **bad_wr)
  617. {
  618. struct siw_rqe rqe = {};
  619. int rv = 0;
  620. while (wr) {
  621. rqe.id = wr->wr_id;
  622. rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR);
  623. if (rv) {
  624. if (bad_wr)
  625. *bad_wr = wr;
  626. break;
  627. }
  628. wr = wr->next;
  629. }
  630. return rv;
  631. }
  632. /*
  633. * siw_post_send()
  634. *
  635. * Post a list of S-WR's to a SQ.
  636. *
  637. * @base_qp: Base QP contained in siw QP
  638. * @wr: Null terminated list of user WR's
  639. * @bad_wr: Points to failing WR in case of synchronous failure.
  640. */
  641. int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
  642. const struct ib_send_wr **bad_wr)
  643. {
  644. struct siw_qp *qp = to_siw_qp(base_qp);
  645. struct siw_wqe *wqe = tx_wqe(qp);
  646. unsigned long flags;
  647. int rv = 0;
  648. if (wr && !rdma_is_kernel_res(&qp->base_qp.res)) {
  649. siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
  650. *bad_wr = wr;
  651. return -EINVAL;
  652. }
  653. /*
  654. * Try to acquire QP state lock. Must be non-blocking
  655. * to accommodate kernel clients needs.
  656. */
  657. if (!down_read_trylock(&qp->state_lock)) {
  658. if (qp->attrs.state == SIW_QP_STATE_ERROR) {
  659. /*
  660. * ERROR state is final, so we can be sure
  661. * this state will not change as long as the QP
  662. * exists.
  663. *
  664. * This handles an ib_drain_sq() call with
  665. * a concurrent request to set the QP state
  666. * to ERROR.
  667. */
  668. rv = siw_sq_flush_wr(qp, wr, bad_wr);
  669. } else {
  670. siw_dbg_qp(qp, "QP locked, state %d\n",
  671. qp->attrs.state);
  672. *bad_wr = wr;
  673. rv = -ENOTCONN;
  674. }
  675. return rv;
  676. }
  677. if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) {
  678. if (qp->attrs.state == SIW_QP_STATE_ERROR) {
  679. /*
  680. * Immediately flush this WR to CQ, if QP
  681. * is in ERROR state. SQ is guaranteed to
  682. * be empty, so WR complets in-order.
  683. *
  684. * Typically triggered by ib_drain_sq().
  685. */
  686. rv = siw_sq_flush_wr(qp, wr, bad_wr);
  687. } else {
  688. siw_dbg_qp(qp, "QP out of state %d\n",
  689. qp->attrs.state);
  690. *bad_wr = wr;
  691. rv = -ENOTCONN;
  692. }
  693. up_read(&qp->state_lock);
  694. return rv;
  695. }
  696. spin_lock_irqsave(&qp->sq_lock, flags);
  697. while (wr) {
  698. u32 idx = qp->sq_put % qp->attrs.sq_size;
  699. struct siw_sqe *sqe = &qp->sendq[idx];
  700. if (sqe->flags) {
  701. siw_dbg_qp(qp, "sq full\n");
  702. rv = -ENOMEM;
  703. break;
  704. }
  705. if (wr->num_sge > qp->attrs.sq_max_sges) {
  706. siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge);
  707. rv = -EINVAL;
  708. break;
  709. }
  710. sqe->id = wr->wr_id;
  711. if ((wr->send_flags & IB_SEND_SIGNALED) ||
  712. (qp->attrs.flags & SIW_SIGNAL_ALL_WR))
  713. sqe->flags |= SIW_WQE_SIGNALLED;
  714. if (wr->send_flags & IB_SEND_FENCE)
  715. sqe->flags |= SIW_WQE_READ_FENCE;
  716. switch (wr->opcode) {
  717. case IB_WR_SEND:
  718. case IB_WR_SEND_WITH_INV:
  719. if (wr->send_flags & IB_SEND_SOLICITED)
  720. sqe->flags |= SIW_WQE_SOLICITED;
  721. if (!(wr->send_flags & IB_SEND_INLINE)) {
  722. siw_copy_sgl(wr->sg_list, sqe->sge,
  723. wr->num_sge);
  724. sqe->num_sge = wr->num_sge;
  725. } else {
  726. rv = siw_copy_inline_sgl(wr, sqe);
  727. if (rv <= 0) {
  728. rv = -EINVAL;
  729. break;
  730. }
  731. sqe->flags |= SIW_WQE_INLINE;
  732. sqe->num_sge = 1;
  733. }
  734. if (wr->opcode == IB_WR_SEND)
  735. sqe->opcode = SIW_OP_SEND;
  736. else {
  737. sqe->opcode = SIW_OP_SEND_REMOTE_INV;
  738. sqe->rkey = wr->ex.invalidate_rkey;
  739. }
  740. break;
  741. case IB_WR_RDMA_READ_WITH_INV:
  742. case IB_WR_RDMA_READ:
  743. /*
  744. * iWarp restricts RREAD sink to SGL containing
  745. * 1 SGE only. we could relax to SGL with multiple
  746. * elements referring the SAME ltag or even sending
  747. * a private per-rreq tag referring to a checked
  748. * local sgl with MULTIPLE ltag's.
  749. */
  750. if (unlikely(wr->num_sge != 1)) {
  751. rv = -EINVAL;
  752. break;
  753. }
  754. siw_copy_sgl(wr->sg_list, &sqe->sge[0], 1);
  755. /*
  756. * NOTE: zero length RREAD is allowed!
  757. */
  758. sqe->raddr = rdma_wr(wr)->remote_addr;
  759. sqe->rkey = rdma_wr(wr)->rkey;
  760. sqe->num_sge = 1;
  761. if (wr->opcode == IB_WR_RDMA_READ)
  762. sqe->opcode = SIW_OP_READ;
  763. else
  764. sqe->opcode = SIW_OP_READ_LOCAL_INV;
  765. break;
  766. case IB_WR_RDMA_WRITE:
  767. if (!(wr->send_flags & IB_SEND_INLINE)) {
  768. siw_copy_sgl(wr->sg_list, &sqe->sge[0],
  769. wr->num_sge);
  770. sqe->num_sge = wr->num_sge;
  771. } else {
  772. rv = siw_copy_inline_sgl(wr, sqe);
  773. if (unlikely(rv < 0)) {
  774. rv = -EINVAL;
  775. break;
  776. }
  777. sqe->flags |= SIW_WQE_INLINE;
  778. sqe->num_sge = 1;
  779. }
  780. sqe->raddr = rdma_wr(wr)->remote_addr;
  781. sqe->rkey = rdma_wr(wr)->rkey;
  782. sqe->opcode = SIW_OP_WRITE;
  783. break;
  784. case IB_WR_REG_MR:
  785. sqe->base_mr = (uintptr_t)reg_wr(wr)->mr;
  786. sqe->rkey = reg_wr(wr)->key;
  787. sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK;
  788. sqe->opcode = SIW_OP_REG_MR;
  789. break;
  790. case IB_WR_LOCAL_INV:
  791. sqe->rkey = wr->ex.invalidate_rkey;
  792. sqe->opcode = SIW_OP_INVAL_STAG;
  793. break;
  794. default:
  795. siw_dbg_qp(qp, "ib wr type %d unsupported\n",
  796. wr->opcode);
  797. rv = -EINVAL;
  798. break;
  799. }
  800. siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n",
  801. sqe->opcode, sqe->flags,
  802. (void *)(uintptr_t)sqe->id);
  803. if (unlikely(rv < 0))
  804. break;
  805. /* make SQE only valid after completely written */
  806. smp_wmb();
  807. sqe->flags |= SIW_WQE_VALID;
  808. qp->sq_put++;
  809. wr = wr->next;
  810. }
  811. /*
  812. * Send directly if SQ processing is not in progress.
  813. * Eventual immediate errors (rv < 0) do not affect the involved
  814. * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ
  815. * processing, if new work is already pending. But rv must be passed
  816. * to caller.
  817. */
  818. if (wqe->wr_status != SIW_WR_IDLE) {
  819. spin_unlock_irqrestore(&qp->sq_lock, flags);
  820. goto skip_direct_sending;
  821. }
  822. rv = siw_activate_tx(qp);
  823. spin_unlock_irqrestore(&qp->sq_lock, flags);
  824. if (rv <= 0)
  825. goto skip_direct_sending;
  826. if (rdma_is_kernel_res(&qp->base_qp.res)) {
  827. rv = siw_sq_start(qp);
  828. } else {
  829. qp->tx_ctx.in_syscall = 1;
  830. if (siw_qp_sq_process(qp) != 0 && !(qp->tx_ctx.tx_suspend))
  831. siw_qp_cm_drop(qp, 0);
  832. qp->tx_ctx.in_syscall = 0;
  833. }
  834. skip_direct_sending:
  835. up_read(&qp->state_lock);
  836. if (rv >= 0)
  837. return 0;
  838. /*
  839. * Immediate error
  840. */
  841. siw_dbg_qp(qp, "error %d\n", rv);
  842. *bad_wr = wr;
  843. return rv;
  844. }
  845. /*
  846. * siw_post_receive()
  847. *
  848. * Post a list of R-WR's to a RQ.
  849. *
  850. * @base_qp: Base QP contained in siw QP
  851. * @wr: Null terminated list of user WR's
  852. * @bad_wr: Points to failing WR in case of synchronous failure.
  853. */
  854. int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
  855. const struct ib_recv_wr **bad_wr)
  856. {
  857. struct siw_qp *qp = to_siw_qp(base_qp);
  858. unsigned long flags;
  859. int rv = 0;
  860. if (qp->srq || qp->attrs.rq_size == 0) {
  861. *bad_wr = wr;
  862. return -EINVAL;
  863. }
  864. if (!rdma_is_kernel_res(&qp->base_qp.res)) {
  865. siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n");
  866. *bad_wr = wr;
  867. return -EINVAL;
  868. }
  869. /*
  870. * Try to acquire QP state lock. Must be non-blocking
  871. * to accommodate kernel clients needs.
  872. */
  873. if (!down_read_trylock(&qp->state_lock)) {
  874. if (qp->attrs.state == SIW_QP_STATE_ERROR) {
  875. /*
  876. * ERROR state is final, so we can be sure
  877. * this state will not change as long as the QP
  878. * exists.
  879. *
  880. * This handles an ib_drain_rq() call with
  881. * a concurrent request to set the QP state
  882. * to ERROR.
  883. */
  884. rv = siw_rq_flush_wr(qp, wr, bad_wr);
  885. } else {
  886. siw_dbg_qp(qp, "QP locked, state %d\n",
  887. qp->attrs.state);
  888. *bad_wr = wr;
  889. rv = -ENOTCONN;
  890. }
  891. return rv;
  892. }
  893. if (qp->attrs.state > SIW_QP_STATE_RTS) {
  894. if (qp->attrs.state == SIW_QP_STATE_ERROR) {
  895. /*
  896. * Immediately flush this WR to CQ, if QP
  897. * is in ERROR state. RQ is guaranteed to
  898. * be empty, so WR complets in-order.
  899. *
  900. * Typically triggered by ib_drain_rq().
  901. */
  902. rv = siw_rq_flush_wr(qp, wr, bad_wr);
  903. } else {
  904. siw_dbg_qp(qp, "QP out of state %d\n",
  905. qp->attrs.state);
  906. *bad_wr = wr;
  907. rv = -ENOTCONN;
  908. }
  909. up_read(&qp->state_lock);
  910. return rv;
  911. }
  912. /*
  913. * Serialize potentially multiple producers.
  914. * Not needed for single threaded consumer side.
  915. */
  916. spin_lock_irqsave(&qp->rq_lock, flags);
  917. while (wr) {
  918. u32 idx = qp->rq_put % qp->attrs.rq_size;
  919. struct siw_rqe *rqe = &qp->recvq[idx];
  920. if (rqe->flags) {
  921. siw_dbg_qp(qp, "RQ full\n");
  922. rv = -ENOMEM;
  923. break;
  924. }
  925. if (wr->num_sge > qp->attrs.rq_max_sges) {
  926. siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge);
  927. rv = -EINVAL;
  928. break;
  929. }
  930. rqe->id = wr->wr_id;
  931. rqe->num_sge = wr->num_sge;
  932. siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge);
  933. /* make sure RQE is completely written before valid */
  934. smp_wmb();
  935. rqe->flags = SIW_WQE_VALID;
  936. qp->rq_put++;
  937. wr = wr->next;
  938. }
  939. spin_unlock_irqrestore(&qp->rq_lock, flags);
  940. up_read(&qp->state_lock);
  941. if (rv < 0) {
  942. siw_dbg_qp(qp, "error %d\n", rv);
  943. *bad_wr = wr;
  944. }
  945. return rv > 0 ? 0 : rv;
  946. }
  947. int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
  948. {
  949. struct siw_cq *cq = to_siw_cq(base_cq);
  950. struct siw_device *sdev = to_siw_dev(base_cq->device);
  951. struct siw_ucontext *ctx =
  952. rdma_udata_to_drv_context(udata, struct siw_ucontext,
  953. base_ucontext);
  954. siw_dbg_cq(cq, "free CQ resources\n");
  955. siw_cq_flush(cq);
  956. if (ctx)
  957. rdma_user_mmap_entry_remove(cq->cq_entry);
  958. atomic_dec(&sdev->num_cq);
  959. vfree(cq->queue);
  960. return 0;
  961. }
  962. /*
  963. * siw_create_cq()
  964. *
  965. * Populate CQ of requested size
  966. *
  967. * @base_cq: CQ as allocated by RDMA midlayer
  968. * @attr: Initial CQ attributes
  969. * @udata: relates to user context
  970. */
  971. int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
  972. struct ib_udata *udata)
  973. {
  974. struct siw_device *sdev = to_siw_dev(base_cq->device);
  975. struct siw_cq *cq = to_siw_cq(base_cq);
  976. int rv, size = attr->cqe;
  977. if (attr->flags)
  978. return -EOPNOTSUPP;
  979. if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
  980. siw_dbg(base_cq->device, "too many CQ's\n");
  981. rv = -ENOMEM;
  982. goto err_out;
  983. }
  984. if (size < 1 || size > sdev->attrs.max_cqe) {
  985. siw_dbg(base_cq->device, "CQ size error: %d\n", size);
  986. rv = -EINVAL;
  987. goto err_out;
  988. }
  989. size = roundup_pow_of_two(size);
  990. cq->base_cq.cqe = size;
  991. cq->num_cqe = size;
  992. if (udata)
  993. cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
  994. sizeof(struct siw_cq_ctrl));
  995. else
  996. cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
  997. sizeof(struct siw_cq_ctrl));
  998. if (cq->queue == NULL) {
  999. rv = -ENOMEM;
  1000. goto err_out;
  1001. }
  1002. get_random_bytes(&cq->id, 4);
  1003. siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id);
  1004. spin_lock_init(&cq->lock);
  1005. cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
  1006. if (udata) {
  1007. struct siw_uresp_create_cq uresp = {};
  1008. struct siw_ucontext *ctx =
  1009. rdma_udata_to_drv_context(udata, struct siw_ucontext,
  1010. base_ucontext);
  1011. size_t length = size * sizeof(struct siw_cqe) +
  1012. sizeof(struct siw_cq_ctrl);
  1013. cq->cq_entry =
  1014. siw_mmap_entry_insert(ctx, cq->queue,
  1015. length, &uresp.cq_key);
  1016. if (!cq->cq_entry) {
  1017. rv = -ENOMEM;
  1018. goto err_out;
  1019. }
  1020. uresp.cq_id = cq->id;
  1021. uresp.num_cqe = size;
  1022. if (udata->outlen < sizeof(uresp)) {
  1023. rv = -EINVAL;
  1024. goto err_out;
  1025. }
  1026. rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
  1027. if (rv)
  1028. goto err_out;
  1029. }
  1030. return 0;
  1031. err_out:
  1032. siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
  1033. if (cq->queue) {
  1034. struct siw_ucontext *ctx =
  1035. rdma_udata_to_drv_context(udata, struct siw_ucontext,
  1036. base_ucontext);
  1037. if (ctx)
  1038. rdma_user_mmap_entry_remove(cq->cq_entry);
  1039. vfree(cq->queue);
  1040. }
  1041. atomic_dec(&sdev->num_cq);
  1042. return rv;
  1043. }
  1044. /*
  1045. * siw_poll_cq()
  1046. *
  1047. * Reap CQ entries if available and copy work completion status into
  1048. * array of WC's provided by caller. Returns number of reaped CQE's.
  1049. *
  1050. * @base_cq: Base CQ contained in siw CQ.
  1051. * @num_cqe: Maximum number of CQE's to reap.
  1052. * @wc: Array of work completions to be filled by siw.
  1053. */
  1054. int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc)
  1055. {
  1056. struct siw_cq *cq = to_siw_cq(base_cq);
  1057. int i;
  1058. for (i = 0; i < num_cqe; i++) {
  1059. if (!siw_reap_cqe(cq, wc))
  1060. break;
  1061. wc++;
  1062. }
  1063. return i;
  1064. }
  1065. /*
  1066. * siw_req_notify_cq()
  1067. *
  1068. * Request notification for new CQE's added to that CQ.
  1069. * Defined flags:
  1070. * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification
  1071. * event if a WQE with notification flag set enters the CQ
  1072. * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification
  1073. * event if a WQE enters the CQ.
  1074. * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the
  1075. * number of not reaped CQE's regardless of its notification
  1076. * type and current or new CQ notification settings.
  1077. *
  1078. * @base_cq: Base CQ contained in siw CQ.
  1079. * @flags: Requested notification flags.
  1080. */
  1081. int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
  1082. {
  1083. struct siw_cq *cq = to_siw_cq(base_cq);
  1084. siw_dbg_cq(cq, "flags: 0x%02x\n", flags);
  1085. if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
  1086. /*
  1087. * Enable CQ event for next solicited completion.
  1088. * and make it visible to all associated producers.
  1089. */
  1090. smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);
  1091. else
  1092. /*
  1093. * Enable CQ event for any signalled completion.
  1094. * and make it visible to all associated producers.
  1095. */
  1096. smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);
  1097. if (flags & IB_CQ_REPORT_MISSED_EVENTS)
  1098. return cq->cq_put - cq->cq_get;
  1099. return 0;
  1100. }
  1101. /*
  1102. * siw_dereg_mr()
  1103. *
  1104. * Release Memory Region.
  1105. *
  1106. * @base_mr: Base MR contained in siw MR.
  1107. * @udata: points to user context, unused.
  1108. */
  1109. int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata)
  1110. {
  1111. struct siw_mr *mr = to_siw_mr(base_mr);
  1112. struct siw_device *sdev = to_siw_dev(base_mr->device);
  1113. siw_dbg_mem(mr->mem, "deregister MR\n");
  1114. atomic_dec(&sdev->num_mr);
  1115. siw_mr_drop_mem(mr);
  1116. kfree_rcu(mr, rcu);
  1117. return 0;
  1118. }
  1119. /*
  1120. * siw_reg_user_mr()
  1121. *
  1122. * Register Memory Region.
  1123. *
  1124. * @pd: Protection Domain
  1125. * @start: starting address of MR (virtual address)
  1126. * @len: len of MR
  1127. * @rnic_va: not used by siw
  1128. * @rights: MR access rights
  1129. * @udata: user buffer to communicate STag and Key.
  1130. */
  1131. struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
  1132. u64 rnic_va, int rights, struct ib_udata *udata)
  1133. {
  1134. struct siw_mr *mr = NULL;
  1135. struct siw_umem *umem = NULL;
  1136. struct siw_ureq_reg_mr ureq;
  1137. struct siw_device *sdev = to_siw_dev(pd->device);
  1138. unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
  1139. int rv;
  1140. siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
  1141. (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va,
  1142. (unsigned long long)len);
  1143. if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) {
  1144. siw_dbg_pd(pd, "too many mr's\n");
  1145. rv = -ENOMEM;
  1146. goto err_out;
  1147. }
  1148. if (!len) {
  1149. rv = -EINVAL;
  1150. goto err_out;
  1151. }
  1152. if (mem_limit != RLIM_INFINITY) {
  1153. unsigned long num_pages =
  1154. (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
  1155. mem_limit >>= PAGE_SHIFT;
  1156. if (num_pages > mem_limit - current->mm->locked_vm) {
  1157. siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
  1158. num_pages, mem_limit,
  1159. current->mm->locked_vm);
  1160. rv = -ENOMEM;
  1161. goto err_out;
  1162. }
  1163. }
  1164. umem = siw_umem_get(start, len, ib_access_writable(rights));
  1165. if (IS_ERR(umem)) {
  1166. rv = PTR_ERR(umem);
  1167. siw_dbg_pd(pd, "getting user memory failed: %d\n", rv);
  1168. umem = NULL;
  1169. goto err_out;
  1170. }
  1171. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1172. if (!mr) {
  1173. rv = -ENOMEM;
  1174. goto err_out;
  1175. }
  1176. rv = siw_mr_add_mem(mr, pd, umem, start, len, rights);
  1177. if (rv)
  1178. goto err_out;
  1179. if (udata) {
  1180. struct siw_uresp_reg_mr uresp = {};
  1181. struct siw_mem *mem = mr->mem;
  1182. if (udata->inlen < sizeof(ureq)) {
  1183. rv = -EINVAL;
  1184. goto err_out;
  1185. }
  1186. rv = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
  1187. if (rv)
  1188. goto err_out;
  1189. mr->base_mr.lkey |= ureq.stag_key;
  1190. mr->base_mr.rkey |= ureq.stag_key;
  1191. mem->stag |= ureq.stag_key;
  1192. uresp.stag = mem->stag;
  1193. if (udata->outlen < sizeof(uresp)) {
  1194. rv = -EINVAL;
  1195. goto err_out;
  1196. }
  1197. rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
  1198. if (rv)
  1199. goto err_out;
  1200. }
  1201. mr->mem->stag_valid = 1;
  1202. return &mr->base_mr;
  1203. err_out:
  1204. atomic_dec(&sdev->num_mr);
  1205. if (mr) {
  1206. if (mr->mem)
  1207. siw_mr_drop_mem(mr);
  1208. kfree_rcu(mr, rcu);
  1209. } else {
  1210. if (umem)
  1211. siw_umem_release(umem, false);
  1212. }
  1213. return ERR_PTR(rv);
  1214. }
  1215. struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
  1216. u32 max_sge)
  1217. {
  1218. struct siw_device *sdev = to_siw_dev(pd->device);
  1219. struct siw_mr *mr = NULL;
  1220. struct siw_pbl *pbl = NULL;
  1221. int rv;
  1222. if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) {
  1223. siw_dbg_pd(pd, "too many mr's\n");
  1224. rv = -ENOMEM;
  1225. goto err_out;
  1226. }
  1227. if (mr_type != IB_MR_TYPE_MEM_REG) {
  1228. siw_dbg_pd(pd, "mr type %d unsupported\n", mr_type);
  1229. rv = -EOPNOTSUPP;
  1230. goto err_out;
  1231. }
  1232. if (max_sge > SIW_MAX_SGE_PBL) {
  1233. siw_dbg_pd(pd, "too many sge's: %d\n", max_sge);
  1234. rv = -ENOMEM;
  1235. goto err_out;
  1236. }
  1237. pbl = siw_pbl_alloc(max_sge);
  1238. if (IS_ERR(pbl)) {
  1239. rv = PTR_ERR(pbl);
  1240. siw_dbg_pd(pd, "pbl allocation failed: %d\n", rv);
  1241. pbl = NULL;
  1242. goto err_out;
  1243. }
  1244. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1245. if (!mr) {
  1246. rv = -ENOMEM;
  1247. goto err_out;
  1248. }
  1249. rv = siw_mr_add_mem(mr, pd, pbl, 0, max_sge * PAGE_SIZE, 0);
  1250. if (rv)
  1251. goto err_out;
  1252. mr->mem->is_pbl = 1;
  1253. siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag);
  1254. return &mr->base_mr;
  1255. err_out:
  1256. atomic_dec(&sdev->num_mr);
  1257. if (!mr) {
  1258. kfree(pbl);
  1259. } else {
  1260. if (mr->mem)
  1261. siw_mr_drop_mem(mr);
  1262. kfree_rcu(mr, rcu);
  1263. }
  1264. siw_dbg_pd(pd, "failed: %d\n", rv);
  1265. return ERR_PTR(rv);
  1266. }
  1267. /* Just used to count number of pages being mapped */
  1268. static int siw_set_pbl_page(struct ib_mr *base_mr, u64 buf_addr)
  1269. {
  1270. return 0;
  1271. }
  1272. int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle,
  1273. unsigned int *sg_off)
  1274. {
  1275. struct scatterlist *slp;
  1276. struct siw_mr *mr = to_siw_mr(base_mr);
  1277. struct siw_mem *mem = mr->mem;
  1278. struct siw_pbl *pbl = mem->pbl;
  1279. struct siw_pble *pble;
  1280. unsigned long pbl_size;
  1281. int i, rv;
  1282. if (!pbl) {
  1283. siw_dbg_mem(mem, "no PBL allocated\n");
  1284. return -EINVAL;
  1285. }
  1286. pble = pbl->pbe;
  1287. if (pbl->max_buf < num_sle) {
  1288. siw_dbg_mem(mem, "too many SGE's: %d > %d\n",
  1289. num_sle, pbl->max_buf);
  1290. return -ENOMEM;
  1291. }
  1292. for_each_sg(sl, slp, num_sle, i) {
  1293. if (sg_dma_len(slp) == 0) {
  1294. siw_dbg_mem(mem, "empty SGE\n");
  1295. return -EINVAL;
  1296. }
  1297. if (i == 0) {
  1298. pble->addr = sg_dma_address(slp);
  1299. pble->size = sg_dma_len(slp);
  1300. pble->pbl_off = 0;
  1301. pbl_size = pble->size;
  1302. pbl->num_buf = 1;
  1303. } else {
  1304. /* Merge PBL entries if adjacent */
  1305. if (pble->addr + pble->size == sg_dma_address(slp)) {
  1306. pble->size += sg_dma_len(slp);
  1307. } else {
  1308. pble++;
  1309. pbl->num_buf++;
  1310. pble->addr = sg_dma_address(slp);
  1311. pble->size = sg_dma_len(slp);
  1312. pble->pbl_off = pbl_size;
  1313. }
  1314. pbl_size += sg_dma_len(slp);
  1315. }
  1316. siw_dbg_mem(mem,
  1317. "sge[%d], size %u, addr 0x%p, total %lu\n",
  1318. i, pble->size, (void *)(uintptr_t)pble->addr,
  1319. pbl_size);
  1320. }
  1321. rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page);
  1322. if (rv > 0) {
  1323. mem->len = base_mr->length;
  1324. mem->va = base_mr->iova;
  1325. siw_dbg_mem(mem,
  1326. "%llu bytes, start 0x%pK, %u SLE to %u entries\n",
  1327. mem->len, (void *)(uintptr_t)mem->va, num_sle,
  1328. pbl->num_buf);
  1329. }
  1330. return rv;
  1331. }
  1332. /*
  1333. * siw_get_dma_mr()
  1334. *
  1335. * Create a (empty) DMA memory region, where no umem is attached.
  1336. */
  1337. struct ib_mr *siw_get_dma_mr(struct ib_pd *pd, int rights)
  1338. {
  1339. struct siw_device *sdev = to_siw_dev(pd->device);
  1340. struct siw_mr *mr = NULL;
  1341. int rv;
  1342. if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) {
  1343. siw_dbg_pd(pd, "too many mr's\n");
  1344. rv = -ENOMEM;
  1345. goto err_out;
  1346. }
  1347. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1348. if (!mr) {
  1349. rv = -ENOMEM;
  1350. goto err_out;
  1351. }
  1352. rv = siw_mr_add_mem(mr, pd, NULL, 0, ULONG_MAX, rights);
  1353. if (rv)
  1354. goto err_out;
  1355. mr->mem->stag_valid = 1;
  1356. siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag);
  1357. return &mr->base_mr;
  1358. err_out:
  1359. if (rv)
  1360. kfree(mr);
  1361. atomic_dec(&sdev->num_mr);
  1362. return ERR_PTR(rv);
  1363. }
  1364. /*
  1365. * siw_create_srq()
  1366. *
  1367. * Create Shared Receive Queue of attributes @init_attrs
  1368. * within protection domain given by @pd.
  1369. *
  1370. * @base_srq: Base SRQ contained in siw SRQ.
  1371. * @init_attrs: SRQ init attributes.
  1372. * @udata: points to user context
  1373. */
  1374. int siw_create_srq(struct ib_srq *base_srq,
  1375. struct ib_srq_init_attr *init_attrs, struct ib_udata *udata)
  1376. {
  1377. struct siw_srq *srq = to_siw_srq(base_srq);
  1378. struct ib_srq_attr *attrs = &init_attrs->attr;
  1379. struct siw_device *sdev = to_siw_dev(base_srq->device);
  1380. struct siw_ucontext *ctx =
  1381. rdma_udata_to_drv_context(udata, struct siw_ucontext,
  1382. base_ucontext);
  1383. int rv;
  1384. if (init_attrs->srq_type != IB_SRQT_BASIC)
  1385. return -EOPNOTSUPP;
  1386. if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) {
  1387. siw_dbg_pd(base_srq->pd, "too many SRQ's\n");
  1388. rv = -ENOMEM;
  1389. goto err_out;
  1390. }
  1391. if (attrs->max_wr == 0 || attrs->max_wr > SIW_MAX_SRQ_WR ||
  1392. attrs->max_sge > SIW_MAX_SGE || attrs->srq_limit > attrs->max_wr) {
  1393. rv = -EINVAL;
  1394. goto err_out;
  1395. }
  1396. srq->max_sge = attrs->max_sge;
  1397. srq->num_rqe = roundup_pow_of_two(attrs->max_wr);
  1398. srq->limit = attrs->srq_limit;
  1399. if (srq->limit)
  1400. srq->armed = true;
  1401. srq->is_kernel_res = !udata;
  1402. if (udata)
  1403. srq->recvq =
  1404. vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe));
  1405. else
  1406. srq->recvq = vzalloc(srq->num_rqe * sizeof(struct siw_rqe));
  1407. if (srq->recvq == NULL) {
  1408. rv = -ENOMEM;
  1409. goto err_out;
  1410. }
  1411. if (udata) {
  1412. struct siw_uresp_create_srq uresp = {};
  1413. size_t length = srq->num_rqe * sizeof(struct siw_rqe);
  1414. srq->srq_entry =
  1415. siw_mmap_entry_insert(ctx, srq->recvq,
  1416. length, &uresp.srq_key);
  1417. if (!srq->srq_entry) {
  1418. rv = -ENOMEM;
  1419. goto err_out;
  1420. }
  1421. uresp.num_rqe = srq->num_rqe;
  1422. if (udata->outlen < sizeof(uresp)) {
  1423. rv = -EINVAL;
  1424. goto err_out;
  1425. }
  1426. rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
  1427. if (rv)
  1428. goto err_out;
  1429. }
  1430. spin_lock_init(&srq->lock);
  1431. siw_dbg_pd(base_srq->pd, "[SRQ]: success\n");
  1432. return 0;
  1433. err_out:
  1434. if (srq->recvq) {
  1435. if (ctx)
  1436. rdma_user_mmap_entry_remove(srq->srq_entry);
  1437. vfree(srq->recvq);
  1438. }
  1439. atomic_dec(&sdev->num_srq);
  1440. return rv;
  1441. }
  1442. /*
  1443. * siw_modify_srq()
  1444. *
  1445. * Modify SRQ. The caller may resize SRQ and/or set/reset notification
  1446. * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification.
  1447. *
  1448. * NOTE: it is unclear if RDMA core allows for changing the MAX_SGE
  1449. * parameter. siw_modify_srq() does not check the attrs->max_sge param.
  1450. */
  1451. int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs,
  1452. enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
  1453. {
  1454. struct siw_srq *srq = to_siw_srq(base_srq);
  1455. unsigned long flags;
  1456. int rv = 0;
  1457. spin_lock_irqsave(&srq->lock, flags);
  1458. if (attr_mask & IB_SRQ_MAX_WR) {
  1459. /* resize request not yet supported */
  1460. rv = -EOPNOTSUPP;
  1461. goto out;
  1462. }
  1463. if (attr_mask & IB_SRQ_LIMIT) {
  1464. if (attrs->srq_limit) {
  1465. if (unlikely(attrs->srq_limit > srq->num_rqe)) {
  1466. rv = -EINVAL;
  1467. goto out;
  1468. }
  1469. srq->armed = true;
  1470. } else {
  1471. srq->armed = false;
  1472. }
  1473. srq->limit = attrs->srq_limit;
  1474. }
  1475. out:
  1476. spin_unlock_irqrestore(&srq->lock, flags);
  1477. return rv;
  1478. }
  1479. /*
  1480. * siw_query_srq()
  1481. *
  1482. * Query SRQ attributes.
  1483. */
  1484. int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs)
  1485. {
  1486. struct siw_srq *srq = to_siw_srq(base_srq);
  1487. unsigned long flags;
  1488. spin_lock_irqsave(&srq->lock, flags);
  1489. attrs->max_wr = srq->num_rqe;
  1490. attrs->max_sge = srq->max_sge;
  1491. attrs->srq_limit = srq->limit;
  1492. spin_unlock_irqrestore(&srq->lock, flags);
  1493. return 0;
  1494. }
  1495. /*
  1496. * siw_destroy_srq()
  1497. *
  1498. * Destroy SRQ.
  1499. * It is assumed that the SRQ is not referenced by any
  1500. * QP anymore - the code trusts the RDMA core environment to keep track
  1501. * of QP references.
  1502. */
  1503. int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
  1504. {
  1505. struct siw_srq *srq = to_siw_srq(base_srq);
  1506. struct siw_device *sdev = to_siw_dev(base_srq->device);
  1507. struct siw_ucontext *ctx =
  1508. rdma_udata_to_drv_context(udata, struct siw_ucontext,
  1509. base_ucontext);
  1510. if (ctx)
  1511. rdma_user_mmap_entry_remove(srq->srq_entry);
  1512. vfree(srq->recvq);
  1513. atomic_dec(&sdev->num_srq);
  1514. return 0;
  1515. }
  1516. /*
  1517. * siw_post_srq_recv()
  1518. *
  1519. * Post a list of receive queue elements to SRQ.
  1520. * NOTE: The function does not check or lock a certain SRQ state
  1521. * during the post operation. The code simply trusts the
  1522. * RDMA core environment.
  1523. *
  1524. * @base_srq: Base SRQ contained in siw SRQ
  1525. * @wr: List of R-WR's
  1526. * @bad_wr: Updated to failing WR if posting fails.
  1527. */
  1528. int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
  1529. const struct ib_recv_wr **bad_wr)
  1530. {
  1531. struct siw_srq *srq = to_siw_srq(base_srq);
  1532. unsigned long flags;
  1533. int rv = 0;
  1534. if (unlikely(!srq->is_kernel_res)) {
  1535. siw_dbg_pd(base_srq->pd,
  1536. "[SRQ]: no kernel post_recv for mapped srq\n");
  1537. rv = -EINVAL;
  1538. goto out;
  1539. }
  1540. /*
  1541. * Serialize potentially multiple producers.
  1542. * Also needed to serialize potentially multiple
  1543. * consumers.
  1544. */
  1545. spin_lock_irqsave(&srq->lock, flags);
  1546. while (wr) {
  1547. u32 idx = srq->rq_put % srq->num_rqe;
  1548. struct siw_rqe *rqe = &srq->recvq[idx];
  1549. if (rqe->flags) {
  1550. siw_dbg_pd(base_srq->pd, "SRQ full\n");
  1551. rv = -ENOMEM;
  1552. break;
  1553. }
  1554. if (unlikely(wr->num_sge > srq->max_sge)) {
  1555. siw_dbg_pd(base_srq->pd,
  1556. "[SRQ]: too many sge's: %d\n", wr->num_sge);
  1557. rv = -EINVAL;
  1558. break;
  1559. }
  1560. rqe->id = wr->wr_id;
  1561. rqe->num_sge = wr->num_sge;
  1562. siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge);
  1563. /* Make sure S-RQE is completely written before valid */
  1564. smp_wmb();
  1565. rqe->flags = SIW_WQE_VALID;
  1566. srq->rq_put++;
  1567. wr = wr->next;
  1568. }
  1569. spin_unlock_irqrestore(&srq->lock, flags);
  1570. out:
  1571. if (unlikely(rv < 0)) {
  1572. siw_dbg_pd(base_srq->pd, "[SRQ]: error %d\n", rv);
  1573. *bad_wr = wr;
  1574. }
  1575. return rv;
  1576. }
  1577. void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype)
  1578. {
  1579. struct ib_event event;
  1580. struct ib_qp *base_qp = &qp->base_qp;
  1581. /*
  1582. * Do not report asynchronous errors on QP which gets
  1583. * destroyed via verbs interface (siw_destroy_qp())
  1584. */
  1585. if (qp->attrs.flags & SIW_QP_IN_DESTROY)
  1586. return;
  1587. event.event = etype;
  1588. event.device = base_qp->device;
  1589. event.element.qp = base_qp;
  1590. if (base_qp->event_handler) {
  1591. siw_dbg_qp(qp, "reporting event %d\n", etype);
  1592. base_qp->event_handler(&event, base_qp->qp_context);
  1593. }
  1594. }
  1595. void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype)
  1596. {
  1597. struct ib_event event;
  1598. struct ib_cq *base_cq = &cq->base_cq;
  1599. event.event = etype;
  1600. event.device = base_cq->device;
  1601. event.element.cq = base_cq;
  1602. if (base_cq->event_handler) {
  1603. siw_dbg_cq(cq, "reporting CQ event %d\n", etype);
  1604. base_cq->event_handler(&event, base_cq->cq_context);
  1605. }
  1606. }
  1607. void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype)
  1608. {
  1609. struct ib_event event;
  1610. struct ib_srq *base_srq = &srq->base_srq;
  1611. event.event = etype;
  1612. event.device = base_srq->device;
  1613. event.element.srq = base_srq;
  1614. if (base_srq->event_handler) {
  1615. siw_dbg_pd(srq->base_srq.pd,
  1616. "reporting SRQ event %d\n", etype);
  1617. base_srq->event_handler(&event, base_srq->srq_context);
  1618. }
  1619. }
  1620. void siw_port_event(struct siw_device *sdev, u32 port, enum ib_event_type etype)
  1621. {
  1622. struct ib_event event;
  1623. event.event = etype;
  1624. event.device = &sdev->base_dev;
  1625. event.element.port_num = port;
  1626. siw_dbg(&sdev->base_dev, "reporting port event %d\n", etype);
  1627. ib_dispatch_event(&event);
  1628. }