ud.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023
  1. // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
  2. /*
  3. * Copyright(c) 2015 - 2019 Intel Corporation.
  4. */
  5. #include <linux/net.h>
  6. #include <rdma/ib_smi.h>
  7. #include "hfi.h"
  8. #include "mad.h"
  9. #include "verbs_txreq.h"
  10. #include "trace_ibhdrs.h"
  11. #include "qp.h"
  12. /* We support only two types - 9B and 16B for now */
  13. static const hfi1_make_req hfi1_make_ud_req_tbl[2] = {
  14. [HFI1_PKT_TYPE_9B] = &hfi1_make_ud_req_9B,
  15. [HFI1_PKT_TYPE_16B] = &hfi1_make_ud_req_16B
  16. };
  17. /**
  18. * ud_loopback - handle send on loopback QPs
  19. * @sqp: the sending QP
  20. * @swqe: the send work request
  21. *
  22. * This is called from hfi1_make_ud_req() to forward a WQE addressed
  23. * to the same HFI.
  24. * Note that the receive interrupt handler may be calling hfi1_ud_rcv()
  25. * while this is being called.
  26. */
  27. static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
  28. {
  29. struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
  30. struct hfi1_pportdata *ppd;
  31. struct hfi1_qp_priv *priv = sqp->priv;
  32. struct rvt_qp *qp;
  33. struct rdma_ah_attr *ah_attr;
  34. unsigned long flags;
  35. struct rvt_sge_state ssge;
  36. struct rvt_sge *sge;
  37. struct ib_wc wc;
  38. u32 length;
  39. enum ib_qp_type sqptype, dqptype;
  40. rcu_read_lock();
  41. qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
  42. rvt_get_swqe_remote_qpn(swqe));
  43. if (!qp) {
  44. ibp->rvp.n_pkt_drops++;
  45. rcu_read_unlock();
  46. return;
  47. }
  48. sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
  49. IB_QPT_UD : sqp->ibqp.qp_type;
  50. dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
  51. IB_QPT_UD : qp->ibqp.qp_type;
  52. if (dqptype != sqptype ||
  53. !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
  54. ibp->rvp.n_pkt_drops++;
  55. goto drop;
  56. }
  57. ah_attr = rvt_get_swqe_ah_attr(swqe);
  58. ppd = ppd_from_ibp(ibp);
  59. if (qp->ibqp.qp_num > 1) {
  60. u16 pkey;
  61. u32 slid;
  62. u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
  63. pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index);
  64. slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
  65. ((1 << ppd->lmc) - 1));
  66. if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
  67. qp->s_pkey_index,
  68. slid, false))) {
  69. hfi1_bad_pkey(ibp, pkey,
  70. rdma_ah_get_sl(ah_attr),
  71. sqp->ibqp.qp_num, qp->ibqp.qp_num,
  72. slid, rdma_ah_get_dlid(ah_attr));
  73. goto drop;
  74. }
  75. }
  76. /*
  77. * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
  78. * Qkeys with the high order bit set mean use the
  79. * qkey from the QP context instead of the WR (see 10.2.5).
  80. */
  81. if (qp->ibqp.qp_num) {
  82. u32 qkey;
  83. qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ?
  84. sqp->qkey : rvt_get_swqe_remote_qkey(swqe);
  85. if (unlikely(qkey != qp->qkey))
  86. goto drop; /* silently drop per IBTA spec */
  87. }
  88. /*
  89. * A GRH is expected to precede the data even if not
  90. * present on the wire.
  91. */
  92. length = swqe->length;
  93. memset(&wc, 0, sizeof(wc));
  94. wc.byte_len = length + sizeof(struct ib_grh);
  95. if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
  96. wc.wc_flags = IB_WC_WITH_IMM;
  97. wc.ex.imm_data = swqe->wr.ex.imm_data;
  98. }
  99. spin_lock_irqsave(&qp->r_lock, flags);
  100. /*
  101. * Get the next work request entry to find where to put the data.
  102. */
  103. if (qp->r_flags & RVT_R_REUSE_SGE) {
  104. qp->r_flags &= ~RVT_R_REUSE_SGE;
  105. } else {
  106. int ret;
  107. ret = rvt_get_rwqe(qp, false);
  108. if (ret < 0) {
  109. rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
  110. goto bail_unlock;
  111. }
  112. if (!ret) {
  113. if (qp->ibqp.qp_num == 0)
  114. ibp->rvp.n_vl15_dropped++;
  115. goto bail_unlock;
  116. }
  117. }
  118. /* Silently drop packets which are too big. */
  119. if (unlikely(wc.byte_len > qp->r_len)) {
  120. qp->r_flags |= RVT_R_REUSE_SGE;
  121. ibp->rvp.n_pkt_drops++;
  122. goto bail_unlock;
  123. }
  124. if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
  125. struct ib_grh grh;
  126. struct ib_global_route grd = *(rdma_ah_read_grh(ah_attr));
  127. /*
  128. * For loopback packets with extended LIDs, the
  129. * sgid_index in the GRH is 0 and the dgid is
  130. * OPA GID of the sender. While creating a response
  131. * to the loopback packet, IB core creates the new
  132. * sgid_index from the DGID and that will be the
  133. * OPA_GID_INDEX. The new dgid is from the sgid
  134. * index and that will be in the IB GID format.
  135. *
  136. * We now have a case where the sent packet had a
  137. * different sgid_index and dgid compared to the
  138. * one that was received in response.
  139. *
  140. * Fix this inconsistency.
  141. */
  142. if (priv->hdr_type == HFI1_PKT_TYPE_16B) {
  143. if (grd.sgid_index == 0)
  144. grd.sgid_index = OPA_GID_INDEX;
  145. if (ib_is_opa_gid(&grd.dgid))
  146. grd.dgid.global.interface_id =
  147. cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]);
  148. }
  149. hfi1_make_grh(ibp, &grh, &grd, 0, 0);
  150. rvt_copy_sge(qp, &qp->r_sge, &grh,
  151. sizeof(grh), true, false);
  152. wc.wc_flags |= IB_WC_GRH;
  153. } else {
  154. rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
  155. }
  156. ssge.sg_list = swqe->sg_list + 1;
  157. ssge.sge = *swqe->sg_list;
  158. ssge.num_sge = swqe->wr.num_sge;
  159. sge = &ssge.sge;
  160. while (length) {
  161. u32 len = rvt_get_sge_length(sge, length);
  162. WARN_ON_ONCE(len == 0);
  163. rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
  164. rvt_update_sge(&ssge, len, false);
  165. length -= len;
  166. }
  167. rvt_put_ss(&qp->r_sge);
  168. if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
  169. goto bail_unlock;
  170. wc.wr_id = qp->r_wr_id;
  171. wc.status = IB_WC_SUCCESS;
  172. wc.opcode = IB_WC_RECV;
  173. wc.qp = &qp->ibqp;
  174. wc.src_qp = sqp->ibqp.qp_num;
  175. if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
  176. if (sqp->ibqp.qp_type == IB_QPT_GSI ||
  177. sqp->ibqp.qp_type == IB_QPT_SMI)
  178. wc.pkey_index = rvt_get_swqe_pkey_index(swqe);
  179. else
  180. wc.pkey_index = sqp->s_pkey_index;
  181. } else {
  182. wc.pkey_index = 0;
  183. }
  184. wc.slid = (ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
  185. ((1 << ppd->lmc) - 1))) & U16_MAX;
  186. /* Check for loopback when the port lid is not set */
  187. if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
  188. wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
  189. wc.sl = rdma_ah_get_sl(ah_attr);
  190. wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
  191. wc.port_num = qp->port_num;
  192. /* Signal completion event if the solicited bit is set. */
  193. rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED);
  194. ibp->rvp.n_loop_pkts++;
  195. bail_unlock:
  196. spin_unlock_irqrestore(&qp->r_lock, flags);
  197. drop:
  198. rcu_read_unlock();
  199. }
  200. static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe,
  201. struct ib_other_headers *ohdr,
  202. u16 *pkey, u32 extra_bytes, bool bypass)
  203. {
  204. u32 bth0;
  205. struct hfi1_ibport *ibp;
  206. ibp = to_iport(qp->ibqp.device, qp->port_num);
  207. if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
  208. ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
  209. bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
  210. } else {
  211. bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
  212. }
  213. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  214. bth0 |= IB_BTH_SOLICITED;
  215. bth0 |= extra_bytes << 20;
  216. if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
  217. *pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
  218. else
  219. *pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
  220. if (!bypass)
  221. bth0 |= *pkey;
  222. ohdr->bth[0] = cpu_to_be32(bth0);
  223. ohdr->bth[1] = cpu_to_be32(rvt_get_swqe_remote_qpn(wqe));
  224. ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
  225. /*
  226. * Qkeys with the high order bit set mean use the
  227. * qkey from the QP context instead of the WR (see 10.2.5).
  228. */
  229. ohdr->u.ud.deth[0] =
  230. cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey :
  231. rvt_get_swqe_remote_qkey(wqe));
  232. ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
  233. }
  234. void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
  235. struct rvt_swqe *wqe)
  236. {
  237. u32 nwords, extra_bytes;
  238. u16 len, slid, dlid, pkey;
  239. u16 lrh0 = 0;
  240. u8 sc5;
  241. struct hfi1_qp_priv *priv = qp->priv;
  242. struct ib_other_headers *ohdr;
  243. struct rdma_ah_attr *ah_attr;
  244. struct hfi1_pportdata *ppd;
  245. struct hfi1_ibport *ibp;
  246. struct ib_grh *grh;
  247. ibp = to_iport(qp->ibqp.device, qp->port_num);
  248. ppd = ppd_from_ibp(ibp);
  249. ah_attr = rvt_get_swqe_ah_attr(wqe);
  250. extra_bytes = -wqe->length & 3;
  251. nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC;
  252. /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
  253. ps->s_txreq->hdr_dwords = 7;
  254. if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
  255. ps->s_txreq->hdr_dwords++;
  256. if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
  257. grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh;
  258. ps->s_txreq->hdr_dwords +=
  259. hfi1_make_grh(ibp, grh, rdma_ah_read_grh(ah_attr),
  260. ps->s_txreq->hdr_dwords - LRH_9B_DWORDS,
  261. nwords);
  262. lrh0 = HFI1_LRH_GRH;
  263. ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
  264. } else {
  265. lrh0 = HFI1_LRH_BTH;
  266. ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
  267. }
  268. sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
  269. lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
  270. if (qp->ibqp.qp_type == IB_QPT_SMI) {
  271. lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
  272. priv->s_sc = 0xf;
  273. } else {
  274. lrh0 |= (sc5 & 0xf) << 12;
  275. priv->s_sc = sc5;
  276. }
  277. dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
  278. if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
  279. slid = be16_to_cpu(IB_LID_PERMISSIVE);
  280. } else {
  281. u16 lid = (u16)ppd->lid;
  282. if (lid) {
  283. lid |= rdma_ah_get_path_bits(ah_attr) &
  284. ((1 << ppd->lmc) - 1);
  285. slid = lid;
  286. } else {
  287. slid = be16_to_cpu(IB_LID_PERMISSIVE);
  288. }
  289. }
  290. hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, false);
  291. len = ps->s_txreq->hdr_dwords + nwords;
  292. /* Setup the packet */
  293. ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_9B;
  294. hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh,
  295. lrh0, len, dlid, slid);
  296. }
  297. void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
  298. struct rvt_swqe *wqe)
  299. {
  300. struct hfi1_qp_priv *priv = qp->priv;
  301. struct ib_other_headers *ohdr;
  302. struct rdma_ah_attr *ah_attr;
  303. struct hfi1_pportdata *ppd;
  304. struct hfi1_ibport *ibp;
  305. u32 dlid, slid, nwords, extra_bytes;
  306. u32 dest_qp = rvt_get_swqe_remote_qpn(wqe);
  307. u32 src_qp = qp->ibqp.qp_num;
  308. u16 len, pkey;
  309. u8 l4, sc5;
  310. bool is_mgmt = false;
  311. ibp = to_iport(qp->ibqp.device, qp->port_num);
  312. ppd = ppd_from_ibp(ibp);
  313. ah_attr = rvt_get_swqe_ah_attr(wqe);
  314. /*
  315. * Build 16B Management Packet if either the destination
  316. * or source queue pair number is 0 or 1.
  317. */
  318. if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) {
  319. /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */
  320. ps->s_txreq->hdr_dwords = 6;
  321. is_mgmt = true;
  322. } else {
  323. /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
  324. ps->s_txreq->hdr_dwords = 9;
  325. if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
  326. ps->s_txreq->hdr_dwords++;
  327. }
  328. /* SW provides space for CRC and LT for bypass packets. */
  329. extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2),
  330. wqe->length);
  331. nwords = ((wqe->length + extra_bytes + SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
  332. if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) &&
  333. hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) {
  334. struct ib_grh *grh;
  335. struct ib_global_route *grd = rdma_ah_retrieve_grh(ah_attr);
  336. /*
  337. * Ensure OPA GIDs are transformed to IB gids
  338. * before creating the GRH.
  339. */
  340. if (grd->sgid_index == OPA_GID_INDEX) {
  341. dd_dev_warn(ppd->dd, "Bad sgid_index. sgid_index: %d\n",
  342. grd->sgid_index);
  343. grd->sgid_index = 0;
  344. }
  345. grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh;
  346. ps->s_txreq->hdr_dwords += hfi1_make_grh(
  347. ibp, grh, grd,
  348. ps->s_txreq->hdr_dwords - LRH_16B_DWORDS,
  349. nwords);
  350. ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
  351. l4 = OPA_16B_L4_IB_GLOBAL;
  352. } else {
  353. ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
  354. l4 = OPA_16B_L4_IB_LOCAL;
  355. }
  356. sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
  357. if (qp->ibqp.qp_type == IB_QPT_SMI)
  358. priv->s_sc = 0xf;
  359. else
  360. priv->s_sc = sc5;
  361. dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 16B);
  362. if (!ppd->lid)
  363. slid = be32_to_cpu(OPA_LID_PERMISSIVE);
  364. else
  365. slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
  366. ((1 << ppd->lmc) - 1));
  367. if (is_mgmt) {
  368. l4 = OPA_16B_L4_FM;
  369. pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
  370. hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt,
  371. dest_qp, src_qp);
  372. } else {
  373. hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true);
  374. }
  375. /* Convert dwords to flits */
  376. len = (ps->s_txreq->hdr_dwords + nwords) >> 1;
  377. /* Setup the packet */
  378. ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_16B;
  379. hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah,
  380. slid, dlid, len, pkey, 0, 0, l4, priv->s_sc);
  381. }
  382. /**
  383. * hfi1_make_ud_req - construct a UD request packet
  384. * @qp: the QP
  385. * @ps: the current packet state
  386. *
  387. * Assume s_lock is held.
  388. *
  389. * Return 1 if constructed; otherwise, return 0.
  390. */
  391. int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
  392. {
  393. struct hfi1_qp_priv *priv = qp->priv;
  394. struct rdma_ah_attr *ah_attr;
  395. struct hfi1_pportdata *ppd;
  396. struct hfi1_ibport *ibp;
  397. struct rvt_swqe *wqe;
  398. int next_cur;
  399. u32 lid;
  400. ps->s_txreq = get_txreq(ps->dev, qp);
  401. if (!ps->s_txreq)
  402. goto bail_no_tx;
  403. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
  404. if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
  405. goto bail;
  406. /* We are in the error state, flush the work request. */
  407. if (qp->s_last == READ_ONCE(qp->s_head))
  408. goto bail;
  409. /* If DMAs are in progress, we can't flush immediately. */
  410. if (iowait_sdma_pending(&priv->s_iowait)) {
  411. qp->s_flags |= RVT_S_WAIT_DMA;
  412. goto bail;
  413. }
  414. wqe = rvt_get_swqe_ptr(qp, qp->s_last);
  415. rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
  416. goto done_free_tx;
  417. }
  418. /* see post_one_send() */
  419. if (qp->s_cur == READ_ONCE(qp->s_head))
  420. goto bail;
  421. wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
  422. next_cur = qp->s_cur + 1;
  423. if (next_cur >= qp->s_size)
  424. next_cur = 0;
  425. /* Construct the header. */
  426. ibp = to_iport(qp->ibqp.device, qp->port_num);
  427. ppd = ppd_from_ibp(ibp);
  428. ah_attr = rvt_get_swqe_ah_attr(wqe);
  429. priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr);
  430. if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) ||
  431. (rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) {
  432. lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
  433. if (unlikely(!loopback &&
  434. ((lid == ppd->lid) ||
  435. ((lid == be32_to_cpu(OPA_LID_PERMISSIVE)) &&
  436. (qp->ibqp.qp_type == IB_QPT_GSI))))) {
  437. unsigned long tflags = ps->flags;
  438. /*
  439. * If DMAs are in progress, we can't generate
  440. * a completion for the loopback packet since
  441. * it would be out of order.
  442. * Instead of waiting, we could queue a
  443. * zero length descriptor so we get a callback.
  444. */
  445. if (iowait_sdma_pending(&priv->s_iowait)) {
  446. qp->s_flags |= RVT_S_WAIT_DMA;
  447. goto bail;
  448. }
  449. qp->s_cur = next_cur;
  450. spin_unlock_irqrestore(&qp->s_lock, tflags);
  451. ud_loopback(qp, wqe);
  452. spin_lock_irqsave(&qp->s_lock, tflags);
  453. ps->flags = tflags;
  454. rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
  455. goto done_free_tx;
  456. }
  457. }
  458. qp->s_cur = next_cur;
  459. ps->s_txreq->s_cur_size = wqe->length;
  460. ps->s_txreq->ss = &qp->s_sge;
  461. qp->s_srate = rdma_ah_get_static_rate(ah_attr);
  462. qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
  463. qp->s_wqe = wqe;
  464. qp->s_sge.sge = wqe->sg_list[0];
  465. qp->s_sge.sg_list = wqe->sg_list + 1;
  466. qp->s_sge.num_sge = wqe->wr.num_sge;
  467. qp->s_sge.total_len = wqe->length;
  468. /* Make the appropriate header */
  469. hfi1_make_ud_req_tbl[priv->hdr_type](qp, ps, qp->s_wqe);
  470. priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
  471. ps->s_txreq->sde = priv->s_sde;
  472. priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
  473. ps->s_txreq->psc = priv->s_sendcontext;
  474. /* disarm any ahg */
  475. priv->s_ahg->ahgcount = 0;
  476. priv->s_ahg->ahgidx = 0;
  477. priv->s_ahg->tx_flags = 0;
  478. return 1;
  479. done_free_tx:
  480. hfi1_put_txreq(ps->s_txreq);
  481. ps->s_txreq = NULL;
  482. return 1;
  483. bail:
  484. hfi1_put_txreq(ps->s_txreq);
  485. bail_no_tx:
  486. ps->s_txreq = NULL;
  487. qp->s_flags &= ~RVT_S_BUSY;
  488. return 0;
  489. }
  490. /*
  491. * Hardware can't check this so we do it here.
  492. *
  493. * This is a slightly different algorithm than the standard pkey check. It
  494. * special cases the management keys and allows for 0x7fff and 0xffff to be in
  495. * the table at the same time.
  496. *
  497. * @returns the index found or -1 if not found
  498. */
  499. int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
  500. {
  501. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  502. unsigned i;
  503. if (pkey == FULL_MGMT_P_KEY || pkey == LIM_MGMT_P_KEY) {
  504. unsigned lim_idx = -1;
  505. for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i) {
  506. /* here we look for an exact match */
  507. if (ppd->pkeys[i] == pkey)
  508. return i;
  509. if (ppd->pkeys[i] == LIM_MGMT_P_KEY)
  510. lim_idx = i;
  511. }
  512. /* did not find 0xffff return 0x7fff idx if found */
  513. if (pkey == FULL_MGMT_P_KEY)
  514. return lim_idx;
  515. /* no match... */
  516. return -1;
  517. }
  518. pkey &= 0x7fff; /* remove limited/full membership bit */
  519. for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i)
  520. if ((ppd->pkeys[i] & 0x7fff) == pkey)
  521. return i;
  522. /*
  523. * Should not get here, this means hardware failed to validate pkeys.
  524. */
  525. return -1;
  526. }
  527. void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
  528. u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
  529. u8 sc5, const struct ib_grh *old_grh)
  530. {
  531. u64 pbc, pbc_flags = 0;
  532. u32 bth0, plen, vl, hwords = 7;
  533. u16 len;
  534. u8 l4;
  535. struct hfi1_opa_header hdr;
  536. struct ib_other_headers *ohdr;
  537. struct pio_buf *pbuf;
  538. struct send_context *ctxt = qp_to_send_context(qp, sc5);
  539. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  540. u32 nwords;
  541. hdr.hdr_type = HFI1_PKT_TYPE_16B;
  542. /* Populate length */
  543. nwords = ((hfi1_get_16b_padding(hwords << 2, 0) +
  544. SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
  545. if (old_grh) {
  546. struct ib_grh *grh = &hdr.opah.u.l.grh;
  547. grh->version_tclass_flow = old_grh->version_tclass_flow;
  548. grh->paylen = cpu_to_be16(
  549. (hwords - LRH_16B_DWORDS + nwords) << 2);
  550. grh->hop_limit = 0xff;
  551. grh->sgid = old_grh->dgid;
  552. grh->dgid = old_grh->sgid;
  553. ohdr = &hdr.opah.u.l.oth;
  554. l4 = OPA_16B_L4_IB_GLOBAL;
  555. hwords += sizeof(struct ib_grh) / sizeof(u32);
  556. } else {
  557. ohdr = &hdr.opah.u.oth;
  558. l4 = OPA_16B_L4_IB_LOCAL;
  559. }
  560. /* BIT 16 to 19 is TVER. Bit 20 to 22 is pad cnt */
  561. bth0 = (IB_OPCODE_CNP << 24) | (1 << 16) |
  562. (hfi1_get_16b_padding(hwords << 2, 0) << 20);
  563. ohdr->bth[0] = cpu_to_be32(bth0);
  564. ohdr->bth[1] = cpu_to_be32(remote_qpn);
  565. ohdr->bth[2] = 0; /* PSN 0 */
  566. /* Convert dwords to flits */
  567. len = (hwords + nwords) >> 1;
  568. hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5);
  569. plen = 2 /* PBC */ + hwords + nwords;
  570. pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
  571. vl = sc_to_vlt(ppd->dd, sc5);
  572. pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
  573. if (ctxt) {
  574. pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
  575. if (!IS_ERR_OR_NULL(pbuf)) {
  576. trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
  577. ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
  578. &hdr, hwords);
  579. }
  580. }
  581. }
  582. void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
  583. u16 pkey, u32 slid, u32 dlid, u8 sc5,
  584. const struct ib_grh *old_grh)
  585. {
  586. u64 pbc, pbc_flags = 0;
  587. u32 bth0, plen, vl, hwords = 5;
  588. u16 lrh0;
  589. u8 sl = ibp->sc_to_sl[sc5];
  590. struct hfi1_opa_header hdr;
  591. struct ib_other_headers *ohdr;
  592. struct pio_buf *pbuf;
  593. struct send_context *ctxt = qp_to_send_context(qp, sc5);
  594. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  595. hdr.hdr_type = HFI1_PKT_TYPE_9B;
  596. if (old_grh) {
  597. struct ib_grh *grh = &hdr.ibh.u.l.grh;
  598. grh->version_tclass_flow = old_grh->version_tclass_flow;
  599. grh->paylen = cpu_to_be16(
  600. (hwords - LRH_9B_DWORDS + SIZE_OF_CRC) << 2);
  601. grh->hop_limit = 0xff;
  602. grh->sgid = old_grh->dgid;
  603. grh->dgid = old_grh->sgid;
  604. ohdr = &hdr.ibh.u.l.oth;
  605. lrh0 = HFI1_LRH_GRH;
  606. hwords += sizeof(struct ib_grh) / sizeof(u32);
  607. } else {
  608. ohdr = &hdr.ibh.u.oth;
  609. lrh0 = HFI1_LRH_BTH;
  610. }
  611. lrh0 |= (sc5 & 0xf) << 12 | sl << 4;
  612. bth0 = pkey | (IB_OPCODE_CNP << 24);
  613. ohdr->bth[0] = cpu_to_be32(bth0);
  614. ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
  615. ohdr->bth[2] = 0; /* PSN 0 */
  616. hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
  617. plen = 2 /* PBC */ + hwords;
  618. pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
  619. vl = sc_to_vlt(ppd->dd, sc5);
  620. pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
  621. if (ctxt) {
  622. pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
  623. if (!IS_ERR_OR_NULL(pbuf)) {
  624. trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
  625. ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
  626. &hdr, hwords);
  627. }
  628. }
  629. }
  630. /*
  631. * opa_smp_check() - Do the regular pkey checking, and the additional
  632. * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section
  633. * 9.10.25 ("SMA Packet Checks").
  634. *
  635. * Note that:
  636. * - Checks are done using the pkey directly from the packet's BTH,
  637. * and specifically _not_ the pkey that we attach to the completion,
  638. * which may be different.
  639. * - These checks are specifically for "non-local" SMPs (i.e., SMPs
  640. * which originated on another node). SMPs which are sent from, and
  641. * destined to this node are checked in opa_local_smp_check().
  642. *
  643. * At the point where opa_smp_check() is called, we know:
  644. * - destination QP is QP0
  645. *
  646. * opa_smp_check() returns 0 if all checks succeed, 1 otherwise.
  647. */
  648. static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
  649. struct rvt_qp *qp, u16 slid, struct opa_smp *smp)
  650. {
  651. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  652. /*
  653. * I don't think it's possible for us to get here with sc != 0xf,
  654. * but check it to be certain.
  655. */
  656. if (sc5 != 0xf)
  657. return 1;
  658. if (rcv_pkey_check(ppd, pkey, sc5, slid))
  659. return 1;
  660. /*
  661. * At this point we know (and so don't need to check again) that
  662. * the pkey is either LIM_MGMT_P_KEY, or FULL_MGMT_P_KEY
  663. * (see ingress_pkey_check).
  664. */
  665. if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE &&
  666. smp->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) {
  667. ingress_pkey_table_fail(ppd, pkey, slid);
  668. return 1;
  669. }
  670. /*
  671. * SMPs fall into one of four (disjoint) categories:
  672. * SMA request, SMA response, SMA trap, or SMA trap repress.
  673. * Our response depends, in part, on which type of SMP we're
  674. * processing.
  675. *
  676. * If this is an SMA response, skip the check here.
  677. *
  678. * If this is an SMA request or SMA trap repress:
  679. * - pkey != FULL_MGMT_P_KEY =>
  680. * increment port recv constraint errors, drop MAD
  681. *
  682. * Otherwise:
  683. * - accept if the port is running an SM
  684. * - drop MAD if it's an SMA trap
  685. * - pkey == FULL_MGMT_P_KEY =>
  686. * reply with unsupported method
  687. * - pkey != FULL_MGMT_P_KEY =>
  688. * increment port recv constraint errors, drop MAD
  689. */
  690. switch (smp->method) {
  691. case IB_MGMT_METHOD_GET_RESP:
  692. case IB_MGMT_METHOD_REPORT_RESP:
  693. break;
  694. case IB_MGMT_METHOD_GET:
  695. case IB_MGMT_METHOD_SET:
  696. case IB_MGMT_METHOD_REPORT:
  697. case IB_MGMT_METHOD_TRAP_REPRESS:
  698. if (pkey != FULL_MGMT_P_KEY) {
  699. ingress_pkey_table_fail(ppd, pkey, slid);
  700. return 1;
  701. }
  702. break;
  703. default:
  704. if (ibp->rvp.port_cap_flags & IB_PORT_SM)
  705. return 0;
  706. if (smp->method == IB_MGMT_METHOD_TRAP)
  707. return 1;
  708. if (pkey == FULL_MGMT_P_KEY) {
  709. smp->status |= IB_SMP_UNSUP_METHOD;
  710. return 0;
  711. }
  712. ingress_pkey_table_fail(ppd, pkey, slid);
  713. return 1;
  714. }
  715. return 0;
  716. }
  717. /**
  718. * hfi1_ud_rcv - receive an incoming UD packet
  719. * @packet: the packet structure
  720. *
  721. * This is called from qp_rcv() to process an incoming UD packet
  722. * for the given QP.
  723. * Called at interrupt level.
  724. */
  725. void hfi1_ud_rcv(struct hfi1_packet *packet)
  726. {
  727. u32 hdrsize = packet->hlen;
  728. struct ib_wc wc;
  729. u32 src_qp;
  730. u16 pkey;
  731. int mgmt_pkey_idx = -1;
  732. struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
  733. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  734. void *data = packet->payload;
  735. u32 tlen = packet->tlen;
  736. struct rvt_qp *qp = packet->qp;
  737. u8 sc5 = packet->sc;
  738. u8 sl_from_sc;
  739. u8 opcode = packet->opcode;
  740. u8 sl = packet->sl;
  741. u32 dlid = packet->dlid;
  742. u32 slid = packet->slid;
  743. u8 extra_bytes;
  744. u8 l4 = 0;
  745. bool dlid_is_permissive;
  746. bool slid_is_permissive;
  747. bool solicited = false;
  748. extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2);
  749. if (packet->etype == RHF_RCV_TYPE_BYPASS) {
  750. u32 permissive_lid =
  751. opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B);
  752. l4 = hfi1_16B_get_l4(packet->hdr);
  753. pkey = hfi1_16B_get_pkey(packet->hdr);
  754. dlid_is_permissive = (dlid == permissive_lid);
  755. slid_is_permissive = (slid == permissive_lid);
  756. } else {
  757. pkey = ib_bth_get_pkey(packet->ohdr);
  758. dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE));
  759. slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE));
  760. }
  761. sl_from_sc = ibp->sc_to_sl[sc5];
  762. if (likely(l4 != OPA_16B_L4_FM)) {
  763. src_qp = ib_get_sqpn(packet->ohdr);
  764. solicited = ib_bth_is_solicited(packet->ohdr);
  765. } else {
  766. src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
  767. }
  768. process_ecn(qp, packet);
  769. /*
  770. * Get the number of bytes the message was padded by
  771. * and drop incomplete packets.
  772. */
  773. if (unlikely(tlen < (hdrsize + extra_bytes)))
  774. goto drop;
  775. tlen -= hdrsize + extra_bytes;
  776. /*
  777. * Check that the permissive LID is only used on QP0
  778. * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
  779. */
  780. if (qp->ibqp.qp_num) {
  781. if (unlikely(dlid_is_permissive || slid_is_permissive))
  782. goto drop;
  783. if (qp->ibqp.qp_num > 1) {
  784. if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
  785. /*
  786. * Traps will not be sent for packets dropped
  787. * by the HW. This is fine, as sending trap
  788. * for invalid pkeys is optional according to
  789. * IB spec (release 1.3, section 10.9.4)
  790. */
  791. hfi1_bad_pkey(ibp,
  792. pkey, sl,
  793. src_qp, qp->ibqp.qp_num,
  794. slid, dlid);
  795. return;
  796. }
  797. } else {
  798. /* GSI packet */
  799. mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
  800. if (mgmt_pkey_idx < 0)
  801. goto drop;
  802. }
  803. if (unlikely(l4 != OPA_16B_L4_FM &&
  804. ib_get_qkey(packet->ohdr) != qp->qkey))
  805. return; /* Silent drop */
  806. /* Drop invalid MAD packets (see 13.5.3.1). */
  807. if (unlikely(qp->ibqp.qp_num == 1 &&
  808. (tlen > 2048 || (sc5 == 0xF))))
  809. goto drop;
  810. } else {
  811. /* Received on QP0, and so by definition, this is an SMP */
  812. struct opa_smp *smp = (struct opa_smp *)data;
  813. if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
  814. goto drop;
  815. if (tlen > 2048)
  816. goto drop;
  817. if ((dlid_is_permissive || slid_is_permissive) &&
  818. smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
  819. goto drop;
  820. /* look up SMI pkey */
  821. mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
  822. if (mgmt_pkey_idx < 0)
  823. goto drop;
  824. }
  825. if (qp->ibqp.qp_num > 1 &&
  826. opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
  827. wc.ex.imm_data = packet->ohdr->u.ud.imm_data;
  828. wc.wc_flags = IB_WC_WITH_IMM;
  829. } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
  830. wc.ex.imm_data = 0;
  831. wc.wc_flags = 0;
  832. } else {
  833. goto drop;
  834. }
  835. /*
  836. * A GRH is expected to precede the data even if not
  837. * present on the wire.
  838. */
  839. wc.byte_len = tlen + sizeof(struct ib_grh);
  840. /*
  841. * Get the next work request entry to find where to put the data.
  842. */
  843. if (qp->r_flags & RVT_R_REUSE_SGE) {
  844. qp->r_flags &= ~RVT_R_REUSE_SGE;
  845. } else {
  846. int ret;
  847. ret = rvt_get_rwqe(qp, false);
  848. if (ret < 0) {
  849. rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
  850. return;
  851. }
  852. if (!ret) {
  853. if (qp->ibqp.qp_num == 0)
  854. ibp->rvp.n_vl15_dropped++;
  855. return;
  856. }
  857. }
  858. /* Silently drop packets which are too big. */
  859. if (unlikely(wc.byte_len > qp->r_len)) {
  860. qp->r_flags |= RVT_R_REUSE_SGE;
  861. goto drop;
  862. }
  863. if (packet->grh) {
  864. rvt_copy_sge(qp, &qp->r_sge, packet->grh,
  865. sizeof(struct ib_grh), true, false);
  866. wc.wc_flags |= IB_WC_GRH;
  867. } else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
  868. struct ib_grh grh;
  869. /*
  870. * Assuming we only created 16B on the send side
  871. * if we want to use large LIDs, since GRH was stripped
  872. * out when creating 16B, add back the GRH here.
  873. */
  874. hfi1_make_ext_grh(packet, &grh, slid, dlid);
  875. rvt_copy_sge(qp, &qp->r_sge, &grh,
  876. sizeof(struct ib_grh), true, false);
  877. wc.wc_flags |= IB_WC_GRH;
  878. } else {
  879. rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
  880. }
  881. rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
  882. true, false);
  883. rvt_put_ss(&qp->r_sge);
  884. if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
  885. return;
  886. wc.wr_id = qp->r_wr_id;
  887. wc.status = IB_WC_SUCCESS;
  888. wc.opcode = IB_WC_RECV;
  889. wc.vendor_err = 0;
  890. wc.qp = &qp->ibqp;
  891. wc.src_qp = src_qp;
  892. if (qp->ibqp.qp_type == IB_QPT_GSI ||
  893. qp->ibqp.qp_type == IB_QPT_SMI) {
  894. if (mgmt_pkey_idx < 0) {
  895. if (net_ratelimit()) {
  896. struct hfi1_devdata *dd = ppd->dd;
  897. dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n",
  898. qp->ibqp.qp_type);
  899. mgmt_pkey_idx = 0;
  900. }
  901. }
  902. wc.pkey_index = (unsigned)mgmt_pkey_idx;
  903. } else {
  904. wc.pkey_index = 0;
  905. }
  906. if (slid_is_permissive)
  907. slid = be32_to_cpu(OPA_LID_PERMISSIVE);
  908. wc.slid = slid & U16_MAX;
  909. wc.sl = sl_from_sc;
  910. /*
  911. * Save the LMC lower bits if the destination LID is a unicast LID.
  912. */
  913. wc.dlid_path_bits = hfi1_check_mcast(dlid) ? 0 :
  914. dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
  915. wc.port_num = qp->port_num;
  916. /* Signal completion event if the solicited bit is set. */
  917. rvt_recv_cq(qp, &wc, solicited);
  918. return;
  919. drop:
  920. ibp->rvp.n_pkt_drops++;
  921. }