cq.c 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/kref.h>
  33. #include <rdma/ib_umem.h>
  34. #include <rdma/ib_user_verbs.h>
  35. #include <rdma/ib_cache.h>
  36. #include "mlx5_ib.h"
  37. #include "srq.h"
  38. #include "qp.h"
  39. static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
  40. {
  41. struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
  42. ibcq->comp_handler(ibcq, ibcq->cq_context);
  43. }
  44. static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
  45. {
  46. struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
  47. struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  48. struct ib_cq *ibcq = &cq->ibcq;
  49. struct ib_event event;
  50. if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
  51. mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
  52. type, mcq->cqn);
  53. return;
  54. }
  55. if (ibcq->event_handler) {
  56. event.device = &dev->ib_dev;
  57. event.event = IB_EVENT_CQ_ERR;
  58. event.element.cq = ibcq;
  59. ibcq->event_handler(&event, ibcq->cq_context);
  60. }
  61. }
  62. static void *get_cqe(struct mlx5_ib_cq *cq, int n)
  63. {
  64. return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
  65. }
  66. static u8 sw_ownership_bit(int n, int nent)
  67. {
  68. return (n & nent) ? 1 : 0;
  69. }
  70. static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
  71. {
  72. void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
  73. struct mlx5_cqe64 *cqe64;
  74. cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
  75. if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
  76. !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
  77. return cqe;
  78. } else {
  79. return NULL;
  80. }
  81. }
  82. static void *next_cqe_sw(struct mlx5_ib_cq *cq)
  83. {
  84. return get_sw_cqe(cq, cq->mcq.cons_index);
  85. }
  86. static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
  87. {
  88. switch (wq->wr_data[idx]) {
  89. case MLX5_IB_WR_UMR:
  90. return 0;
  91. case IB_WR_LOCAL_INV:
  92. return IB_WC_LOCAL_INV;
  93. case IB_WR_REG_MR:
  94. return IB_WC_REG_MR;
  95. default:
  96. pr_warn("unknown completion status\n");
  97. return 0;
  98. }
  99. }
  100. static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
  101. struct mlx5_ib_wq *wq, int idx)
  102. {
  103. wc->wc_flags = 0;
  104. switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
  105. case MLX5_OPCODE_RDMA_WRITE_IMM:
  106. wc->wc_flags |= IB_WC_WITH_IMM;
  107. fallthrough;
  108. case MLX5_OPCODE_RDMA_WRITE:
  109. wc->opcode = IB_WC_RDMA_WRITE;
  110. break;
  111. case MLX5_OPCODE_SEND_IMM:
  112. wc->wc_flags |= IB_WC_WITH_IMM;
  113. fallthrough;
  114. case MLX5_OPCODE_SEND:
  115. case MLX5_OPCODE_SEND_INVAL:
  116. wc->opcode = IB_WC_SEND;
  117. break;
  118. case MLX5_OPCODE_RDMA_READ:
  119. wc->opcode = IB_WC_RDMA_READ;
  120. wc->byte_len = be32_to_cpu(cqe->byte_cnt);
  121. break;
  122. case MLX5_OPCODE_ATOMIC_CS:
  123. wc->opcode = IB_WC_COMP_SWAP;
  124. wc->byte_len = 8;
  125. break;
  126. case MLX5_OPCODE_ATOMIC_FA:
  127. wc->opcode = IB_WC_FETCH_ADD;
  128. wc->byte_len = 8;
  129. break;
  130. case MLX5_OPCODE_ATOMIC_MASKED_CS:
  131. wc->opcode = IB_WC_MASKED_COMP_SWAP;
  132. wc->byte_len = 8;
  133. break;
  134. case MLX5_OPCODE_ATOMIC_MASKED_FA:
  135. wc->opcode = IB_WC_MASKED_FETCH_ADD;
  136. wc->byte_len = 8;
  137. break;
  138. case MLX5_OPCODE_UMR:
  139. wc->opcode = get_umr_comp(wq, idx);
  140. break;
  141. }
  142. }
  143. enum {
  144. MLX5_GRH_IN_BUFFER = 1,
  145. MLX5_GRH_IN_CQE = 2,
  146. };
  147. static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
  148. struct mlx5_ib_qp *qp)
  149. {
  150. enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
  151. struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
  152. struct mlx5_ib_srq *srq = NULL;
  153. struct mlx5_ib_wq *wq;
  154. u16 wqe_ctr;
  155. u8 roce_packet_type;
  156. bool vlan_present;
  157. u8 g;
  158. if (qp->ibqp.srq || qp->ibqp.xrcd) {
  159. struct mlx5_core_srq *msrq = NULL;
  160. if (qp->ibqp.xrcd) {
  161. msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
  162. if (msrq)
  163. srq = to_mibsrq(msrq);
  164. } else {
  165. srq = to_msrq(qp->ibqp.srq);
  166. }
  167. if (srq) {
  168. wqe_ctr = be16_to_cpu(cqe->wqe_counter);
  169. wc->wr_id = srq->wrid[wqe_ctr];
  170. mlx5_ib_free_srq_wqe(srq, wqe_ctr);
  171. if (msrq)
  172. mlx5_core_res_put(&msrq->common);
  173. }
  174. } else {
  175. wq = &qp->rq;
  176. wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
  177. ++wq->tail;
  178. }
  179. wc->byte_len = be32_to_cpu(cqe->byte_cnt);
  180. switch (get_cqe_opcode(cqe)) {
  181. case MLX5_CQE_RESP_WR_IMM:
  182. wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
  183. wc->wc_flags = IB_WC_WITH_IMM;
  184. wc->ex.imm_data = cqe->immediate;
  185. break;
  186. case MLX5_CQE_RESP_SEND:
  187. wc->opcode = IB_WC_RECV;
  188. wc->wc_flags = IB_WC_IP_CSUM_OK;
  189. if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
  190. (cqe->hds_ip_ext & CQE_L4_OK))))
  191. wc->wc_flags = 0;
  192. break;
  193. case MLX5_CQE_RESP_SEND_IMM:
  194. wc->opcode = IB_WC_RECV;
  195. wc->wc_flags = IB_WC_WITH_IMM;
  196. wc->ex.imm_data = cqe->immediate;
  197. break;
  198. case MLX5_CQE_RESP_SEND_INV:
  199. wc->opcode = IB_WC_RECV;
  200. wc->wc_flags = IB_WC_WITH_INVALIDATE;
  201. wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey);
  202. break;
  203. }
  204. wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
  205. wc->dlid_path_bits = cqe->ml_path;
  206. g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
  207. wc->wc_flags |= g ? IB_WC_GRH : 0;
  208. if (is_qp1(qp->type)) {
  209. u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff;
  210. ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
  211. &wc->pkey_index);
  212. } else {
  213. wc->pkey_index = 0;
  214. }
  215. if (ll != IB_LINK_LAYER_ETHERNET) {
  216. wc->slid = be16_to_cpu(cqe->slid);
  217. wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
  218. return;
  219. }
  220. wc->slid = 0;
  221. vlan_present = cqe->l4_l3_hdr_type & 0x1;
  222. roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
  223. if (vlan_present) {
  224. wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
  225. wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
  226. wc->wc_flags |= IB_WC_WITH_VLAN;
  227. } else {
  228. wc->sl = 0;
  229. }
  230. switch (roce_packet_type) {
  231. case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
  232. wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
  233. break;
  234. case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
  235. wc->network_hdr_type = RDMA_NETWORK_IPV6;
  236. break;
  237. case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
  238. wc->network_hdr_type = RDMA_NETWORK_IPV4;
  239. break;
  240. }
  241. wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
  242. }
  243. static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
  244. {
  245. mlx5_ib_warn(dev, "dump error cqe\n");
  246. mlx5_dump_err_cqe(dev->mdev, cqe);
  247. }
  248. static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
  249. struct mlx5_err_cqe *cqe,
  250. struct ib_wc *wc)
  251. {
  252. int dump = 1;
  253. switch (cqe->syndrome) {
  254. case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
  255. wc->status = IB_WC_LOC_LEN_ERR;
  256. break;
  257. case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
  258. wc->status = IB_WC_LOC_QP_OP_ERR;
  259. break;
  260. case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
  261. wc->status = IB_WC_LOC_PROT_ERR;
  262. break;
  263. case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
  264. dump = 0;
  265. wc->status = IB_WC_WR_FLUSH_ERR;
  266. break;
  267. case MLX5_CQE_SYNDROME_MW_BIND_ERR:
  268. wc->status = IB_WC_MW_BIND_ERR;
  269. break;
  270. case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
  271. wc->status = IB_WC_BAD_RESP_ERR;
  272. break;
  273. case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
  274. wc->status = IB_WC_LOC_ACCESS_ERR;
  275. break;
  276. case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
  277. wc->status = IB_WC_REM_INV_REQ_ERR;
  278. break;
  279. case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
  280. wc->status = IB_WC_REM_ACCESS_ERR;
  281. break;
  282. case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
  283. wc->status = IB_WC_REM_OP_ERR;
  284. break;
  285. case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
  286. wc->status = IB_WC_RETRY_EXC_ERR;
  287. dump = 0;
  288. break;
  289. case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
  290. wc->status = IB_WC_RNR_RETRY_EXC_ERR;
  291. dump = 0;
  292. break;
  293. case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
  294. wc->status = IB_WC_REM_ABORT_ERR;
  295. break;
  296. default:
  297. wc->status = IB_WC_GENERAL_ERR;
  298. break;
  299. }
  300. wc->vendor_err = cqe->vendor_err_synd;
  301. if (dump) {
  302. mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
  303. ib_wc_status_msg(wc->status));
  304. dump_cqe(dev, cqe);
  305. }
  306. }
  307. static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
  308. u16 tail, u16 head)
  309. {
  310. u16 idx;
  311. do {
  312. idx = tail & (qp->sq.wqe_cnt - 1);
  313. if (idx == head)
  314. break;
  315. tail = qp->sq.w_list[idx].next;
  316. } while (1);
  317. tail = qp->sq.w_list[idx].next;
  318. qp->sq.last_poll = tail;
  319. }
  320. static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
  321. {
  322. mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
  323. }
  324. static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
  325. struct ib_sig_err *item)
  326. {
  327. u16 syndrome = be16_to_cpu(cqe->syndrome);
  328. #define GUARD_ERR (1 << 13)
  329. #define APPTAG_ERR (1 << 12)
  330. #define REFTAG_ERR (1 << 11)
  331. if (syndrome & GUARD_ERR) {
  332. item->err_type = IB_SIG_BAD_GUARD;
  333. item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
  334. item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
  335. } else
  336. if (syndrome & REFTAG_ERR) {
  337. item->err_type = IB_SIG_BAD_REFTAG;
  338. item->expected = be32_to_cpu(cqe->expected_reftag);
  339. item->actual = be32_to_cpu(cqe->actual_reftag);
  340. } else
  341. if (syndrome & APPTAG_ERR) {
  342. item->err_type = IB_SIG_BAD_APPTAG;
  343. item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
  344. item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
  345. } else {
  346. pr_err("Got signature completion error with bad syndrome %04x\n",
  347. syndrome);
  348. }
  349. item->sig_err_offset = be64_to_cpu(cqe->err_offset);
  350. item->key = be32_to_cpu(cqe->mkey);
  351. }
  352. static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
  353. int *npolled, bool is_send)
  354. {
  355. struct mlx5_ib_wq *wq;
  356. unsigned int cur;
  357. int np;
  358. int i;
  359. wq = (is_send) ? &qp->sq : &qp->rq;
  360. cur = wq->head - wq->tail;
  361. np = *npolled;
  362. if (cur == 0)
  363. return;
  364. for (i = 0; i < cur && np < num_entries; i++) {
  365. unsigned int idx;
  366. idx = (is_send) ? wq->last_poll : wq->tail;
  367. idx &= (wq->wqe_cnt - 1);
  368. wc->wr_id = wq->wrid[idx];
  369. wc->status = IB_WC_WR_FLUSH_ERR;
  370. wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
  371. wq->tail++;
  372. if (is_send)
  373. wq->last_poll = wq->w_list[idx].next;
  374. np++;
  375. wc->qp = &qp->ibqp;
  376. wc++;
  377. }
  378. *npolled = np;
  379. }
  380. static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
  381. struct ib_wc *wc, int *npolled)
  382. {
  383. struct mlx5_ib_qp *qp;
  384. *npolled = 0;
  385. /* Find uncompleted WQEs belonging to that cq and return mmics ones */
  386. list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
  387. sw_comp(qp, num_entries, wc + *npolled, npolled, true);
  388. if (*npolled >= num_entries)
  389. return;
  390. }
  391. list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
  392. sw_comp(qp, num_entries, wc + *npolled, npolled, false);
  393. if (*npolled >= num_entries)
  394. return;
  395. }
  396. }
  397. static int mlx5_poll_one(struct mlx5_ib_cq *cq,
  398. struct mlx5_ib_qp **cur_qp,
  399. struct ib_wc *wc)
  400. {
  401. struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  402. struct mlx5_err_cqe *err_cqe;
  403. struct mlx5_cqe64 *cqe64;
  404. struct mlx5_core_qp *mqp;
  405. struct mlx5_ib_wq *wq;
  406. uint8_t opcode;
  407. uint32_t qpn;
  408. u16 wqe_ctr;
  409. void *cqe;
  410. int idx;
  411. repoll:
  412. cqe = next_cqe_sw(cq);
  413. if (!cqe)
  414. return -EAGAIN;
  415. cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
  416. ++cq->mcq.cons_index;
  417. /* Make sure we read CQ entry contents after we've checked the
  418. * ownership bit.
  419. */
  420. rmb();
  421. opcode = get_cqe_opcode(cqe64);
  422. if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
  423. if (likely(cq->resize_buf)) {
  424. free_cq_buf(dev, &cq->buf);
  425. cq->buf = *cq->resize_buf;
  426. kfree(cq->resize_buf);
  427. cq->resize_buf = NULL;
  428. goto repoll;
  429. } else {
  430. mlx5_ib_warn(dev, "unexpected resize cqe\n");
  431. }
  432. }
  433. qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
  434. if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
  435. /* We do not have to take the QP table lock here,
  436. * because CQs will be locked while QPs are removed
  437. * from the table.
  438. */
  439. mqp = radix_tree_lookup(&dev->qp_table.tree, qpn);
  440. *cur_qp = to_mibqp(mqp);
  441. }
  442. wc->qp = &(*cur_qp)->ibqp;
  443. switch (opcode) {
  444. case MLX5_CQE_REQ:
  445. wq = &(*cur_qp)->sq;
  446. wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
  447. idx = wqe_ctr & (wq->wqe_cnt - 1);
  448. handle_good_req(wc, cqe64, wq, idx);
  449. handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
  450. wc->wr_id = wq->wrid[idx];
  451. wq->tail = wq->wqe_head[idx] + 1;
  452. wc->status = IB_WC_SUCCESS;
  453. break;
  454. case MLX5_CQE_RESP_WR_IMM:
  455. case MLX5_CQE_RESP_SEND:
  456. case MLX5_CQE_RESP_SEND_IMM:
  457. case MLX5_CQE_RESP_SEND_INV:
  458. handle_responder(wc, cqe64, *cur_qp);
  459. wc->status = IB_WC_SUCCESS;
  460. break;
  461. case MLX5_CQE_RESIZE_CQ:
  462. break;
  463. case MLX5_CQE_REQ_ERR:
  464. case MLX5_CQE_RESP_ERR:
  465. err_cqe = (struct mlx5_err_cqe *)cqe64;
  466. mlx5_handle_error_cqe(dev, err_cqe, wc);
  467. mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
  468. opcode == MLX5_CQE_REQ_ERR ?
  469. "Requestor" : "Responder", cq->mcq.cqn);
  470. mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
  471. err_cqe->syndrome, err_cqe->vendor_err_synd);
  472. if (wc->status != IB_WC_WR_FLUSH_ERR &&
  473. (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
  474. dev->umrc.state = MLX5_UMR_STATE_RECOVER;
  475. if (opcode == MLX5_CQE_REQ_ERR) {
  476. wq = &(*cur_qp)->sq;
  477. wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
  478. idx = wqe_ctr & (wq->wqe_cnt - 1);
  479. wc->wr_id = wq->wrid[idx];
  480. wq->tail = wq->wqe_head[idx] + 1;
  481. } else {
  482. struct mlx5_ib_srq *srq;
  483. if ((*cur_qp)->ibqp.srq) {
  484. srq = to_msrq((*cur_qp)->ibqp.srq);
  485. wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
  486. wc->wr_id = srq->wrid[wqe_ctr];
  487. mlx5_ib_free_srq_wqe(srq, wqe_ctr);
  488. } else {
  489. wq = &(*cur_qp)->rq;
  490. wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
  491. ++wq->tail;
  492. }
  493. }
  494. break;
  495. case MLX5_CQE_SIG_ERR: {
  496. struct mlx5_sig_err_cqe *sig_err_cqe =
  497. (struct mlx5_sig_err_cqe *)cqe64;
  498. struct mlx5_core_sig_ctx *sig;
  499. xa_lock(&dev->sig_mrs);
  500. sig = xa_load(&dev->sig_mrs,
  501. mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
  502. get_sig_err_item(sig_err_cqe, &sig->err_item);
  503. sig->sig_err_exists = true;
  504. sig->sigerr_count++;
  505. mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
  506. cq->mcq.cqn, sig->err_item.key,
  507. sig->err_item.err_type,
  508. sig->err_item.sig_err_offset,
  509. sig->err_item.expected,
  510. sig->err_item.actual);
  511. xa_unlock(&dev->sig_mrs);
  512. goto repoll;
  513. }
  514. }
  515. return 0;
  516. }
  517. static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
  518. struct ib_wc *wc, bool is_fatal_err)
  519. {
  520. struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  521. struct mlx5_ib_wc *soft_wc, *next;
  522. int npolled = 0;
  523. list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
  524. if (npolled >= num_entries)
  525. break;
  526. mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
  527. cq->mcq.cqn);
  528. if (unlikely(is_fatal_err)) {
  529. soft_wc->wc.status = IB_WC_WR_FLUSH_ERR;
  530. soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
  531. }
  532. wc[npolled++] = soft_wc->wc;
  533. list_del(&soft_wc->list);
  534. kfree(soft_wc);
  535. }
  536. return npolled;
  537. }
  538. int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
  539. {
  540. struct mlx5_ib_cq *cq = to_mcq(ibcq);
  541. struct mlx5_ib_qp *cur_qp = NULL;
  542. struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  543. struct mlx5_core_dev *mdev = dev->mdev;
  544. unsigned long flags;
  545. int soft_polled = 0;
  546. int npolled;
  547. spin_lock_irqsave(&cq->lock, flags);
  548. if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
  549. /* make sure no soft wqe's are waiting */
  550. if (unlikely(!list_empty(&cq->wc_list)))
  551. soft_polled = poll_soft_wc(cq, num_entries, wc, true);
  552. mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled,
  553. wc + soft_polled, &npolled);
  554. goto out;
  555. }
  556. if (unlikely(!list_empty(&cq->wc_list)))
  557. soft_polled = poll_soft_wc(cq, num_entries, wc, false);
  558. for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
  559. if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
  560. break;
  561. }
  562. if (npolled)
  563. mlx5_cq_set_ci(&cq->mcq);
  564. out:
  565. spin_unlock_irqrestore(&cq->lock, flags);
  566. return soft_polled + npolled;
  567. }
  568. int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
  569. {
  570. struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
  571. struct mlx5_ib_cq *cq = to_mcq(ibcq);
  572. void __iomem *uar_page = mdev->priv.uar->map;
  573. unsigned long irq_flags;
  574. int ret = 0;
  575. spin_lock_irqsave(&cq->lock, irq_flags);
  576. if (cq->notify_flags != IB_CQ_NEXT_COMP)
  577. cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
  578. if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
  579. ret = 1;
  580. spin_unlock_irqrestore(&cq->lock, irq_flags);
  581. mlx5_cq_arm(&cq->mcq,
  582. (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
  583. MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
  584. uar_page, to_mcq(ibcq)->mcq.cons_index);
  585. return ret;
  586. }
  587. static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
  588. struct mlx5_ib_cq_buf *buf,
  589. int nent,
  590. int cqe_size)
  591. {
  592. struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
  593. u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
  594. u8 log_wq_sz = ilog2(cqe_size);
  595. int err;
  596. err = mlx5_frag_buf_alloc_node(dev->mdev,
  597. nent * cqe_size,
  598. frag_buf,
  599. dev->mdev->priv.numa_node);
  600. if (err)
  601. return err;
  602. mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
  603. buf->cqe_size = cqe_size;
  604. buf->nent = nent;
  605. return 0;
  606. }
  607. enum {
  608. MLX5_CQE_RES_FORMAT_HASH = 0,
  609. MLX5_CQE_RES_FORMAT_CSUM = 1,
  610. MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3,
  611. };
  612. static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
  613. {
  614. switch (format) {
  615. case MLX5_IB_CQE_RES_FORMAT_HASH:
  616. return MLX5_CQE_RES_FORMAT_HASH;
  617. case MLX5_IB_CQE_RES_FORMAT_CSUM:
  618. return MLX5_CQE_RES_FORMAT_CSUM;
  619. case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX:
  620. if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
  621. return MLX5_CQE_RES_FORMAT_CSUM_STRIDX;
  622. return -EOPNOTSUPP;
  623. default:
  624. return -EINVAL;
  625. }
  626. }
  627. static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
  628. struct mlx5_ib_cq *cq, int entries, u32 **cqb,
  629. int *cqe_size, int *index, int *inlen)
  630. {
  631. struct mlx5_ib_create_cq ucmd = {};
  632. unsigned long page_size;
  633. unsigned int page_offset_quantized;
  634. size_t ucmdlen;
  635. __be64 *pas;
  636. int ncont;
  637. void *cqc;
  638. int err;
  639. struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
  640. udata, struct mlx5_ib_ucontext, ibucontext);
  641. ucmdlen = min(udata->inlen, sizeof(ucmd));
  642. if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
  643. return -EINVAL;
  644. if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
  645. return -EFAULT;
  646. if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
  647. MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX |
  648. MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)))
  649. return -EINVAL;
  650. if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
  651. ucmd.reserved0 || ucmd.reserved1)
  652. return -EINVAL;
  653. *cqe_size = ucmd.cqe_size;
  654. cq->buf.umem =
  655. ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
  656. entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
  657. if (IS_ERR(cq->buf.umem)) {
  658. err = PTR_ERR(cq->buf.umem);
  659. return err;
  660. }
  661. page_size = mlx5_umem_find_best_cq_quantized_pgoff(
  662. cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
  663. page_offset, 64, &page_offset_quantized);
  664. if (!page_size) {
  665. err = -EINVAL;
  666. goto err_umem;
  667. }
  668. err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db);
  669. if (err)
  670. goto err_umem;
  671. ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size);
  672. mlx5_ib_dbg(
  673. dev,
  674. "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n",
  675. ucmd.buf_addr, entries * ucmd.cqe_size,
  676. ib_umem_num_pages(cq->buf.umem), page_size, ncont);
  677. *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
  678. MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
  679. *cqb = kvzalloc(*inlen, GFP_KERNEL);
  680. if (!*cqb) {
  681. err = -ENOMEM;
  682. goto err_db;
  683. }
  684. pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
  685. mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0);
  686. cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
  687. MLX5_SET(cqc, cqc, log_page_size,
  688. order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
  689. MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
  690. if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
  691. *index = ucmd.uar_page_index;
  692. } else if (context->bfregi.lib_uar_dyn) {
  693. err = -EINVAL;
  694. goto err_cqb;
  695. } else {
  696. *index = context->bfregi.sys_pages[0];
  697. }
  698. if (ucmd.cqe_comp_en == 1) {
  699. int mini_cqe_format;
  700. if (!((*cqe_size == 128 &&
  701. MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) ||
  702. (*cqe_size == 64 &&
  703. MLX5_CAP_GEN(dev->mdev, cqe_compression)))) {
  704. err = -EOPNOTSUPP;
  705. mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
  706. *cqe_size);
  707. goto err_cqb;
  708. }
  709. mini_cqe_format =
  710. mini_cqe_res_format_to_hw(dev,
  711. ucmd.cqe_comp_res_format);
  712. if (mini_cqe_format < 0) {
  713. err = mini_cqe_format;
  714. mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n",
  715. ucmd.cqe_comp_res_format, err);
  716. goto err_cqb;
  717. }
  718. MLX5_SET(cqc, cqc, cqe_comp_en, 1);
  719. MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format);
  720. }
  721. if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) {
  722. if (*cqe_size != 128 ||
  723. !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) {
  724. err = -EOPNOTSUPP;
  725. mlx5_ib_warn(dev,
  726. "CQE padding is not supported for CQE size of %dB!\n",
  727. *cqe_size);
  728. goto err_cqb;
  729. }
  730. cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
  731. }
  732. if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)
  733. cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS;
  734. MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
  735. return 0;
  736. err_cqb:
  737. kvfree(*cqb);
  738. err_db:
  739. mlx5_ib_db_unmap_user(context, &cq->db);
  740. err_umem:
  741. ib_umem_release(cq->buf.umem);
  742. return err;
  743. }
  744. static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
  745. {
  746. struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
  747. udata, struct mlx5_ib_ucontext, ibucontext);
  748. mlx5_ib_db_unmap_user(context, &cq->db);
  749. ib_umem_release(cq->buf.umem);
  750. }
  751. static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf)
  752. {
  753. int i;
  754. void *cqe;
  755. struct mlx5_cqe64 *cqe64;
  756. for (i = 0; i < buf->nent; i++) {
  757. cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
  758. cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
  759. cqe64->op_own = MLX5_CQE_INVALID << 4;
  760. }
  761. }
  762. static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
  763. int entries, int cqe_size,
  764. u32 **cqb, int *index, int *inlen)
  765. {
  766. __be64 *pas;
  767. void *cqc;
  768. int err;
  769. err = mlx5_db_alloc(dev->mdev, &cq->db);
  770. if (err)
  771. return err;
  772. cq->mcq.set_ci_db = cq->db.db;
  773. cq->mcq.arm_db = cq->db.db + 1;
  774. cq->mcq.cqe_sz = cqe_size;
  775. err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
  776. if (err)
  777. goto err_db;
  778. init_cq_frag_buf(&cq->buf);
  779. *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
  780. MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
  781. cq->buf.frag_buf.npages;
  782. *cqb = kvzalloc(*inlen, GFP_KERNEL);
  783. if (!*cqb) {
  784. err = -ENOMEM;
  785. goto err_buf;
  786. }
  787. pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
  788. mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
  789. cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
  790. MLX5_SET(cqc, cqc, log_page_size,
  791. cq->buf.frag_buf.page_shift -
  792. MLX5_ADAPTER_PAGE_SHIFT);
  793. *index = dev->mdev->priv.uar->index;
  794. return 0;
  795. err_buf:
  796. free_cq_buf(dev, &cq->buf);
  797. err_db:
  798. mlx5_db_free(dev->mdev, &cq->db);
  799. return err;
  800. }
  801. static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
  802. {
  803. free_cq_buf(dev, &cq->buf);
  804. mlx5_db_free(dev->mdev, &cq->db);
  805. }
  806. static void notify_soft_wc_handler(struct work_struct *work)
  807. {
  808. struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
  809. notify_work);
  810. cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
  811. }
  812. int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
  813. struct ib_udata *udata)
  814. {
  815. struct ib_device *ibdev = ibcq->device;
  816. int entries = attr->cqe;
  817. int vector = attr->comp_vector;
  818. struct mlx5_ib_dev *dev = to_mdev(ibdev);
  819. struct mlx5_ib_cq *cq = to_mcq(ibcq);
  820. u32 out[MLX5_ST_SZ_DW(create_cq_out)];
  821. int index;
  822. int inlen;
  823. u32 *cqb = NULL;
  824. void *cqc;
  825. int cqe_size;
  826. int eqn;
  827. int err;
  828. if (entries < 0 ||
  829. (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
  830. return -EINVAL;
  831. if (check_cq_create_flags(attr->flags))
  832. return -EOPNOTSUPP;
  833. entries = roundup_pow_of_two(entries + 1);
  834. if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
  835. return -EINVAL;
  836. cq->ibcq.cqe = entries - 1;
  837. mutex_init(&cq->resize_mutex);
  838. spin_lock_init(&cq->lock);
  839. cq->resize_buf = NULL;
  840. cq->resize_umem = NULL;
  841. cq->create_flags = attr->flags;
  842. INIT_LIST_HEAD(&cq->list_send_qp);
  843. INIT_LIST_HEAD(&cq->list_recv_qp);
  844. if (udata) {
  845. err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
  846. &index, &inlen);
  847. if (err)
  848. return err;
  849. } else {
  850. cqe_size = cache_line_size() == 128 ? 128 : 64;
  851. err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
  852. &index, &inlen);
  853. if (err)
  854. return err;
  855. INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
  856. }
  857. err = mlx5_vector2eqn(dev->mdev, vector, &eqn);
  858. if (err)
  859. goto err_cqb;
  860. cq->cqe_size = cqe_size;
  861. cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
  862. MLX5_SET(cqc, cqc, cqe_sz,
  863. cqe_sz_to_mlx_sz(cqe_size,
  864. cq->private_flags &
  865. MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
  866. MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
  867. MLX5_SET(cqc, cqc, uar_page, index);
  868. MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
  869. MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
  870. if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
  871. MLX5_SET(cqc, cqc, oi, 1);
  872. err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
  873. if (err)
  874. goto err_cqb;
  875. mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
  876. if (udata)
  877. cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
  878. else
  879. cq->mcq.comp = mlx5_ib_cq_comp;
  880. cq->mcq.event = mlx5_ib_cq_event;
  881. INIT_LIST_HEAD(&cq->wc_list);
  882. if (udata)
  883. if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
  884. err = -EFAULT;
  885. goto err_cmd;
  886. }
  887. kvfree(cqb);
  888. return 0;
  889. err_cmd:
  890. mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
  891. err_cqb:
  892. kvfree(cqb);
  893. if (udata)
  894. destroy_cq_user(cq, udata);
  895. else
  896. destroy_cq_kernel(dev, cq);
  897. return err;
  898. }
  899. int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
  900. {
  901. struct mlx5_ib_dev *dev = to_mdev(cq->device);
  902. struct mlx5_ib_cq *mcq = to_mcq(cq);
  903. int ret;
  904. ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
  905. if (ret)
  906. return ret;
  907. if (udata)
  908. destroy_cq_user(mcq, udata);
  909. else
  910. destroy_cq_kernel(dev, mcq);
  911. return 0;
  912. }
  913. static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
  914. {
  915. return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
  916. }
  917. void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
  918. {
  919. struct mlx5_cqe64 *cqe64, *dest64;
  920. void *cqe, *dest;
  921. u32 prod_index;
  922. int nfreed = 0;
  923. u8 owner_bit;
  924. if (!cq)
  925. return;
  926. /* First we need to find the current producer index, so we
  927. * know where to start cleaning from. It doesn't matter if HW
  928. * adds new entries after this loop -- the QP we're worried
  929. * about is already in RESET, so the new entries won't come
  930. * from our QP and therefore don't need to be checked.
  931. */
  932. for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
  933. if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
  934. break;
  935. /* Now sweep backwards through the CQ, removing CQ entries
  936. * that match our QP by copying older entries on top of them.
  937. */
  938. while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
  939. cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
  940. cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
  941. if (is_equal_rsn(cqe64, rsn)) {
  942. if (srq && (ntohl(cqe64->srqn) & 0xffffff))
  943. mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
  944. ++nfreed;
  945. } else if (nfreed) {
  946. dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
  947. dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
  948. owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
  949. memcpy(dest, cqe, cq->mcq.cqe_sz);
  950. dest64->op_own = owner_bit |
  951. (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
  952. }
  953. }
  954. if (nfreed) {
  955. cq->mcq.cons_index += nfreed;
  956. /* Make sure update of buffer contents is done before
  957. * updating consumer index.
  958. */
  959. wmb();
  960. mlx5_cq_set_ci(&cq->mcq);
  961. }
  962. }
  963. void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
  964. {
  965. if (!cq)
  966. return;
  967. spin_lock_irq(&cq->lock);
  968. __mlx5_ib_cq_clean(cq, qpn, srq);
  969. spin_unlock_irq(&cq->lock);
  970. }
  971. int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
  972. {
  973. struct mlx5_ib_dev *dev = to_mdev(cq->device);
  974. struct mlx5_ib_cq *mcq = to_mcq(cq);
  975. int err;
  976. if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
  977. return -EOPNOTSUPP;
  978. if (cq_period > MLX5_MAX_CQ_PERIOD)
  979. return -EINVAL;
  980. err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
  981. cq_period, cq_count);
  982. if (err)
  983. mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
  984. return err;
  985. }
  986. static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
  987. int entries, struct ib_udata *udata,
  988. int *cqe_size)
  989. {
  990. struct mlx5_ib_resize_cq ucmd;
  991. struct ib_umem *umem;
  992. int err;
  993. err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
  994. if (err)
  995. return err;
  996. if (ucmd.reserved0 || ucmd.reserved1)
  997. return -EINVAL;
  998. /* check multiplication overflow */
  999. if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
  1000. return -EINVAL;
  1001. umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
  1002. (size_t)ucmd.cqe_size * entries,
  1003. IB_ACCESS_LOCAL_WRITE);
  1004. if (IS_ERR(umem)) {
  1005. err = PTR_ERR(umem);
  1006. return err;
  1007. }
  1008. cq->resize_umem = umem;
  1009. *cqe_size = ucmd.cqe_size;
  1010. return 0;
  1011. }
  1012. static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
  1013. int entries, int cqe_size)
  1014. {
  1015. int err;
  1016. cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
  1017. if (!cq->resize_buf)
  1018. return -ENOMEM;
  1019. err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
  1020. if (err)
  1021. goto ex;
  1022. init_cq_frag_buf(cq->resize_buf);
  1023. return 0;
  1024. ex:
  1025. kfree(cq->resize_buf);
  1026. return err;
  1027. }
  1028. static int copy_resize_cqes(struct mlx5_ib_cq *cq)
  1029. {
  1030. struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  1031. struct mlx5_cqe64 *scqe64;
  1032. struct mlx5_cqe64 *dcqe64;
  1033. void *start_cqe;
  1034. void *scqe;
  1035. void *dcqe;
  1036. int ssize;
  1037. int dsize;
  1038. int i;
  1039. u8 sw_own;
  1040. ssize = cq->buf.cqe_size;
  1041. dsize = cq->resize_buf->cqe_size;
  1042. if (ssize != dsize) {
  1043. mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
  1044. return -EINVAL;
  1045. }
  1046. i = cq->mcq.cons_index;
  1047. scqe = get_sw_cqe(cq, i);
  1048. scqe64 = ssize == 64 ? scqe : scqe + 64;
  1049. start_cqe = scqe;
  1050. if (!scqe) {
  1051. mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
  1052. return -EINVAL;
  1053. }
  1054. while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
  1055. dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
  1056. (i + 1) & cq->resize_buf->nent);
  1057. dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
  1058. sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
  1059. memcpy(dcqe, scqe, dsize);
  1060. dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
  1061. ++i;
  1062. scqe = get_sw_cqe(cq, i);
  1063. scqe64 = ssize == 64 ? scqe : scqe + 64;
  1064. if (!scqe) {
  1065. mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
  1066. return -EINVAL;
  1067. }
  1068. if (scqe == start_cqe) {
  1069. pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
  1070. cq->mcq.cqn);
  1071. return -ENOMEM;
  1072. }
  1073. }
  1074. ++cq->mcq.cons_index;
  1075. return 0;
  1076. }
  1077. int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
  1078. {
  1079. struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
  1080. struct mlx5_ib_cq *cq = to_mcq(ibcq);
  1081. void *cqc;
  1082. u32 *in;
  1083. int err;
  1084. int npas;
  1085. __be64 *pas;
  1086. unsigned int page_offset_quantized = 0;
  1087. unsigned int page_shift;
  1088. int inlen;
  1089. int cqe_size;
  1090. unsigned long flags;
  1091. if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
  1092. pr_info("Firmware does not support resize CQ\n");
  1093. return -ENOSYS;
  1094. }
  1095. if (entries < 1 ||
  1096. entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
  1097. mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
  1098. entries,
  1099. 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
  1100. return -EINVAL;
  1101. }
  1102. entries = roundup_pow_of_two(entries + 1);
  1103. if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
  1104. return -EINVAL;
  1105. if (entries == ibcq->cqe + 1)
  1106. return 0;
  1107. mutex_lock(&cq->resize_mutex);
  1108. if (udata) {
  1109. unsigned long page_size;
  1110. err = resize_user(dev, cq, entries, udata, &cqe_size);
  1111. if (err)
  1112. goto ex;
  1113. page_size = mlx5_umem_find_best_cq_quantized_pgoff(
  1114. cq->resize_umem, cqc, log_page_size,
  1115. MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
  1116. &page_offset_quantized);
  1117. if (!page_size) {
  1118. err = -EINVAL;
  1119. goto ex_resize;
  1120. }
  1121. npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size);
  1122. page_shift = order_base_2(page_size);
  1123. } else {
  1124. struct mlx5_frag_buf *frag_buf;
  1125. cqe_size = 64;
  1126. err = resize_kernel(dev, cq, entries, cqe_size);
  1127. if (err)
  1128. goto ex;
  1129. frag_buf = &cq->resize_buf->frag_buf;
  1130. npas = frag_buf->npages;
  1131. page_shift = frag_buf->page_shift;
  1132. }
  1133. inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
  1134. MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
  1135. in = kvzalloc(inlen, GFP_KERNEL);
  1136. if (!in) {
  1137. err = -ENOMEM;
  1138. goto ex_resize;
  1139. }
  1140. pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
  1141. if (udata)
  1142. mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas,
  1143. 0);
  1144. else
  1145. mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
  1146. MLX5_SET(modify_cq_in, in,
  1147. modify_field_select_resize_field_select.resize_field_select.resize_field_select,
  1148. MLX5_MODIFY_CQ_MASK_LOG_SIZE |
  1149. MLX5_MODIFY_CQ_MASK_PG_OFFSET |
  1150. MLX5_MODIFY_CQ_MASK_PG_SIZE);
  1151. cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
  1152. MLX5_SET(cqc, cqc, log_page_size,
  1153. page_shift - MLX5_ADAPTER_PAGE_SHIFT);
  1154. MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
  1155. MLX5_SET(cqc, cqc, cqe_sz,
  1156. cqe_sz_to_mlx_sz(cqe_size,
  1157. cq->private_flags &
  1158. MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
  1159. MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
  1160. MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
  1161. MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
  1162. err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
  1163. if (err)
  1164. goto ex_alloc;
  1165. if (udata) {
  1166. cq->ibcq.cqe = entries - 1;
  1167. ib_umem_release(cq->buf.umem);
  1168. cq->buf.umem = cq->resize_umem;
  1169. cq->resize_umem = NULL;
  1170. } else {
  1171. struct mlx5_ib_cq_buf tbuf;
  1172. int resized = 0;
  1173. spin_lock_irqsave(&cq->lock, flags);
  1174. if (cq->resize_buf) {
  1175. err = copy_resize_cqes(cq);
  1176. if (!err) {
  1177. tbuf = cq->buf;
  1178. cq->buf = *cq->resize_buf;
  1179. kfree(cq->resize_buf);
  1180. cq->resize_buf = NULL;
  1181. resized = 1;
  1182. }
  1183. }
  1184. cq->ibcq.cqe = entries - 1;
  1185. spin_unlock_irqrestore(&cq->lock, flags);
  1186. if (resized)
  1187. free_cq_buf(dev, &tbuf);
  1188. }
  1189. mutex_unlock(&cq->resize_mutex);
  1190. kvfree(in);
  1191. return 0;
  1192. ex_alloc:
  1193. kvfree(in);
  1194. ex_resize:
  1195. ib_umem_release(cq->resize_umem);
  1196. if (!udata) {
  1197. free_cq_buf(dev, cq->resize_buf);
  1198. cq->resize_buf = NULL;
  1199. }
  1200. ex:
  1201. mutex_unlock(&cq->resize_mutex);
  1202. return err;
  1203. }
  1204. int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
  1205. {
  1206. struct mlx5_ib_cq *cq;
  1207. if (!ibcq)
  1208. return 128;
  1209. cq = to_mcq(ibcq);
  1210. return cq->cqe_size;
  1211. }
  1212. /* Called from atomic context */
  1213. int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
  1214. {
  1215. struct mlx5_ib_wc *soft_wc;
  1216. struct mlx5_ib_cq *cq = to_mcq(ibcq);
  1217. unsigned long flags;
  1218. soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
  1219. if (!soft_wc)
  1220. return -ENOMEM;
  1221. soft_wc->wc = *wc;
  1222. spin_lock_irqsave(&cq->lock, flags);
  1223. list_add_tail(&soft_wc->list, &cq->wc_list);
  1224. if (cq->notify_flags == IB_CQ_NEXT_COMP ||
  1225. wc->status != IB_WC_SUCCESS) {
  1226. cq->notify_flags = 0;
  1227. schedule_work(&cq->notify_work);
  1228. }
  1229. spin_unlock_irqrestore(&cq->lock, flags);
  1230. return 0;
  1231. }