qp.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
  2. /*
  3. * Copyright(c) 2015 - 2020 Intel Corporation.
  4. */
  5. #include <linux/err.h>
  6. #include <linux/vmalloc.h>
  7. #include <linux/hash.h>
  8. #include <linux/module.h>
  9. #include <linux/seq_file.h>
  10. #include <rdma/rdma_vt.h>
  11. #include <rdma/rdmavt_qp.h>
  12. #include <rdma/ib_verbs.h>
  13. #include "hfi.h"
  14. #include "qp.h"
  15. #include "trace.h"
  16. #include "verbs_txreq.h"
  17. unsigned int hfi1_qp_table_size = 256;
  18. module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
  19. MODULE_PARM_DESC(qp_table_size, "QP table size");
  20. static void flush_tx_list(struct rvt_qp *qp);
  21. static int iowait_sleep(
  22. struct sdma_engine *sde,
  23. struct iowait_work *wait,
  24. struct sdma_txreq *stx,
  25. unsigned int seq,
  26. bool pkts_sent);
  27. static void iowait_wakeup(struct iowait *wait, int reason);
  28. static void iowait_sdma_drained(struct iowait *wait);
  29. static void qp_pio_drain(struct rvt_qp *qp);
  30. const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
  31. [IB_WR_RDMA_WRITE] = {
  32. .length = sizeof(struct ib_rdma_wr),
  33. .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  34. },
  35. [IB_WR_RDMA_READ] = {
  36. .length = sizeof(struct ib_rdma_wr),
  37. .qpt_support = BIT(IB_QPT_RC),
  38. .flags = RVT_OPERATION_ATOMIC,
  39. },
  40. [IB_WR_ATOMIC_CMP_AND_SWP] = {
  41. .length = sizeof(struct ib_atomic_wr),
  42. .qpt_support = BIT(IB_QPT_RC),
  43. .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
  44. },
  45. [IB_WR_ATOMIC_FETCH_AND_ADD] = {
  46. .length = sizeof(struct ib_atomic_wr),
  47. .qpt_support = BIT(IB_QPT_RC),
  48. .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
  49. },
  50. [IB_WR_RDMA_WRITE_WITH_IMM] = {
  51. .length = sizeof(struct ib_rdma_wr),
  52. .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  53. },
  54. [IB_WR_SEND] = {
  55. .length = sizeof(struct ib_send_wr),
  56. .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
  57. BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  58. },
  59. [IB_WR_SEND_WITH_IMM] = {
  60. .length = sizeof(struct ib_send_wr),
  61. .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
  62. BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  63. },
  64. [IB_WR_REG_MR] = {
  65. .length = sizeof(struct ib_reg_wr),
  66. .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  67. .flags = RVT_OPERATION_LOCAL,
  68. },
  69. [IB_WR_LOCAL_INV] = {
  70. .length = sizeof(struct ib_send_wr),
  71. .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  72. .flags = RVT_OPERATION_LOCAL,
  73. },
  74. [IB_WR_SEND_WITH_INV] = {
  75. .length = sizeof(struct ib_send_wr),
  76. .qpt_support = BIT(IB_QPT_RC),
  77. },
  78. [IB_WR_OPFN] = {
  79. .length = sizeof(struct ib_atomic_wr),
  80. .qpt_support = BIT(IB_QPT_RC),
  81. .flags = RVT_OPERATION_USE_RESERVE,
  82. },
  83. [IB_WR_TID_RDMA_WRITE] = {
  84. .length = sizeof(struct ib_rdma_wr),
  85. .qpt_support = BIT(IB_QPT_RC),
  86. .flags = RVT_OPERATION_IGN_RNR_CNT,
  87. },
  88. };
  89. static void flush_list_head(struct list_head *l)
  90. {
  91. while (!list_empty(l)) {
  92. struct sdma_txreq *tx;
  93. tx = list_first_entry(
  94. l,
  95. struct sdma_txreq,
  96. list);
  97. list_del_init(&tx->list);
  98. hfi1_put_txreq(
  99. container_of(tx, struct verbs_txreq, txreq));
  100. }
  101. }
  102. static void flush_tx_list(struct rvt_qp *qp)
  103. {
  104. struct hfi1_qp_priv *priv = qp->priv;
  105. flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
  106. flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
  107. }
  108. static void flush_iowait(struct rvt_qp *qp)
  109. {
  110. struct hfi1_qp_priv *priv = qp->priv;
  111. unsigned long flags;
  112. seqlock_t *lock = priv->s_iowait.lock;
  113. if (!lock)
  114. return;
  115. write_seqlock_irqsave(lock, flags);
  116. if (!list_empty(&priv->s_iowait.list)) {
  117. list_del_init(&priv->s_iowait.list);
  118. priv->s_iowait.lock = NULL;
  119. rvt_put_qp(qp);
  120. }
  121. write_sequnlock_irqrestore(lock, flags);
  122. }
  123. /*
  124. * This function is what we would push to the core layer if we wanted to be a
  125. * "first class citizen". Instead we hide this here and rely on Verbs ULPs
  126. * to blindly pass the MTU enum value from the PathRecord to us.
  127. */
  128. static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
  129. {
  130. /* Constraining 10KB packets to 8KB packets */
  131. if (mtu == (enum ib_mtu)OPA_MTU_10240)
  132. mtu = (enum ib_mtu)OPA_MTU_8192;
  133. return opa_mtu_enum_to_int((enum opa_mtu)mtu);
  134. }
  135. int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
  136. int attr_mask, struct ib_udata *udata)
  137. {
  138. struct ib_qp *ibqp = &qp->ibqp;
  139. struct hfi1_ibdev *dev = to_idev(ibqp->device);
  140. struct hfi1_devdata *dd = dd_from_dev(dev);
  141. u8 sc;
  142. if (attr_mask & IB_QP_AV) {
  143. sc = ah_to_sc(ibqp->device, &attr->ah_attr);
  144. if (sc == 0xf)
  145. return -EINVAL;
  146. if (!qp_to_sdma_engine(qp, sc) &&
  147. dd->flags & HFI1_HAS_SEND_DMA)
  148. return -EINVAL;
  149. if (!qp_to_send_context(qp, sc))
  150. return -EINVAL;
  151. }
  152. if (attr_mask & IB_QP_ALT_PATH) {
  153. sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
  154. if (sc == 0xf)
  155. return -EINVAL;
  156. if (!qp_to_sdma_engine(qp, sc) &&
  157. dd->flags & HFI1_HAS_SEND_DMA)
  158. return -EINVAL;
  159. if (!qp_to_send_context(qp, sc))
  160. return -EINVAL;
  161. }
  162. return 0;
  163. }
  164. /*
  165. * qp_set_16b - Set the hdr_type based on whether the slid or the
  166. * dlid in the connection is extended. Only applicable for RC and UC
  167. * QPs. UD QPs determine this on the fly from the ah in the wqe
  168. */
  169. static inline void qp_set_16b(struct rvt_qp *qp)
  170. {
  171. struct hfi1_pportdata *ppd;
  172. struct hfi1_ibport *ibp;
  173. struct hfi1_qp_priv *priv = qp->priv;
  174. /* Update ah_attr to account for extended LIDs */
  175. hfi1_update_ah_attr(qp->ibqp.device, &qp->remote_ah_attr);
  176. /* Create 32 bit LIDs */
  177. hfi1_make_opa_lid(&qp->remote_ah_attr);
  178. if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH))
  179. return;
  180. ibp = to_iport(qp->ibqp.device, qp->port_num);
  181. ppd = ppd_from_ibp(ibp);
  182. priv->hdr_type = hfi1_get_hdr_type(ppd->lid, &qp->remote_ah_attr);
  183. }
  184. void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
  185. int attr_mask, struct ib_udata *udata)
  186. {
  187. struct ib_qp *ibqp = &qp->ibqp;
  188. struct hfi1_qp_priv *priv = qp->priv;
  189. if (attr_mask & IB_QP_AV) {
  190. priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
  191. priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
  192. priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
  193. qp_set_16b(qp);
  194. }
  195. if (attr_mask & IB_QP_PATH_MIG_STATE &&
  196. attr->path_mig_state == IB_MIG_MIGRATED &&
  197. qp->s_mig_state == IB_MIG_ARMED) {
  198. qp->s_flags |= HFI1_S_AHG_CLEAR;
  199. priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
  200. priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
  201. priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
  202. qp_set_16b(qp);
  203. }
  204. opfn_qp_init(qp, attr, attr_mask);
  205. }
  206. /**
  207. * hfi1_setup_wqe - set up the wqe
  208. * @qp: The qp
  209. * @wqe: The built wqe
  210. * @call_send: Determine if the send should be posted or scheduled.
  211. *
  212. * Perform setup of the wqe. This is called
  213. * prior to inserting the wqe into the ring but after
  214. * the wqe has been setup by RDMAVT. This function
  215. * allows the driver the opportunity to perform
  216. * validation and additional setup of the wqe.
  217. *
  218. * Returns 0 on success, -EINVAL on failure
  219. *
  220. */
  221. int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
  222. {
  223. struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
  224. struct rvt_ah *ah;
  225. struct hfi1_pportdata *ppd;
  226. struct hfi1_devdata *dd;
  227. switch (qp->ibqp.qp_type) {
  228. case IB_QPT_RC:
  229. hfi1_setup_tid_rdma_wqe(qp, wqe);
  230. fallthrough;
  231. case IB_QPT_UC:
  232. if (wqe->length > 0x80000000U)
  233. return -EINVAL;
  234. if (wqe->length > qp->pmtu)
  235. *call_send = false;
  236. break;
  237. case IB_QPT_SMI:
  238. /*
  239. * SM packets should exclusively use VL15 and their SL is
  240. * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
  241. * is created, SL is 0 in most cases and as a result some
  242. * fields (vl and pmtu) in ah may not be set correctly,
  243. * depending on the SL2SC and SC2VL tables at the time.
  244. */
  245. ppd = ppd_from_ibp(ibp);
  246. dd = dd_from_ppd(ppd);
  247. if (wqe->length > dd->vld[15].mtu)
  248. return -EINVAL;
  249. break;
  250. case IB_QPT_GSI:
  251. case IB_QPT_UD:
  252. ah = rvt_get_swqe_ah(wqe);
  253. if (wqe->length > (1 << ah->log_pmtu))
  254. return -EINVAL;
  255. if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
  256. return -EINVAL;
  257. break;
  258. default:
  259. break;
  260. }
  261. /*
  262. * System latency between send and schedule is large enough that
  263. * forcing call_send to true for piothreshold packets is necessary.
  264. */
  265. if (wqe->length <= piothreshold)
  266. *call_send = true;
  267. return 0;
  268. }
  269. /**
  270. * _hfi1_schedule_send - schedule progress
  271. * @qp: the QP
  272. *
  273. * This schedules qp progress w/o regard to the s_flags.
  274. *
  275. * It is only used in the post send, which doesn't hold
  276. * the s_lock.
  277. */
  278. bool _hfi1_schedule_send(struct rvt_qp *qp)
  279. {
  280. struct hfi1_qp_priv *priv = qp->priv;
  281. struct hfi1_ibport *ibp =
  282. to_iport(qp->ibqp.device, qp->port_num);
  283. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  284. struct hfi1_devdata *dd = ppd->dd;
  285. if (dd->flags & HFI1_SHUTDOWN)
  286. return true;
  287. return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
  288. priv->s_sde ?
  289. priv->s_sde->cpu :
  290. cpumask_first(cpumask_of_node(dd->node)));
  291. }
  292. static void qp_pio_drain(struct rvt_qp *qp)
  293. {
  294. struct hfi1_qp_priv *priv = qp->priv;
  295. if (!priv->s_sendcontext)
  296. return;
  297. while (iowait_pio_pending(&priv->s_iowait)) {
  298. write_seqlock_irq(&priv->s_sendcontext->waitlock);
  299. hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
  300. write_sequnlock_irq(&priv->s_sendcontext->waitlock);
  301. iowait_pio_drain(&priv->s_iowait);
  302. write_seqlock_irq(&priv->s_sendcontext->waitlock);
  303. hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
  304. write_sequnlock_irq(&priv->s_sendcontext->waitlock);
  305. }
  306. }
  307. /**
  308. * hfi1_schedule_send - schedule progress
  309. * @qp: the QP
  310. *
  311. * This schedules qp progress and caller should hold
  312. * the s_lock.
  313. * @return true if the first leg is scheduled;
  314. * false if the first leg is not scheduled.
  315. */
  316. bool hfi1_schedule_send(struct rvt_qp *qp)
  317. {
  318. lockdep_assert_held(&qp->s_lock);
  319. if (hfi1_send_ok(qp)) {
  320. _hfi1_schedule_send(qp);
  321. return true;
  322. }
  323. if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
  324. iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
  325. IOWAIT_PENDING_IB);
  326. return false;
  327. }
  328. static void hfi1_qp_schedule(struct rvt_qp *qp)
  329. {
  330. struct hfi1_qp_priv *priv = qp->priv;
  331. bool ret;
  332. if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
  333. ret = hfi1_schedule_send(qp);
  334. if (ret)
  335. iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
  336. }
  337. if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
  338. ret = hfi1_schedule_tid_send(qp);
  339. if (ret)
  340. iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
  341. }
  342. }
  343. void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
  344. {
  345. unsigned long flags;
  346. spin_lock_irqsave(&qp->s_lock, flags);
  347. if (qp->s_flags & flag) {
  348. qp->s_flags &= ~flag;
  349. trace_hfi1_qpwakeup(qp, flag);
  350. hfi1_qp_schedule(qp);
  351. }
  352. spin_unlock_irqrestore(&qp->s_lock, flags);
  353. /* Notify hfi1_destroy_qp() if it is waiting. */
  354. rvt_put_qp(qp);
  355. }
  356. void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
  357. {
  358. struct hfi1_qp_priv *priv = qp->priv;
  359. if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
  360. qp->s_flags &= ~RVT_S_BUSY;
  361. /*
  362. * If we are sending a first-leg packet from the second leg,
  363. * we need to clear the busy flag from priv->s_flags to
  364. * avoid a race condition when the qp wakes up before
  365. * the call to hfi1_verbs_send() returns to the second
  366. * leg. In that case, the second leg will terminate without
  367. * being re-scheduled, resulting in failure to send TID RDMA
  368. * WRITE DATA and TID RDMA ACK packets.
  369. */
  370. if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
  371. priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
  372. RVT_S_BUSY);
  373. iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
  374. }
  375. } else {
  376. priv->s_flags &= ~RVT_S_BUSY;
  377. }
  378. }
  379. static int iowait_sleep(
  380. struct sdma_engine *sde,
  381. struct iowait_work *wait,
  382. struct sdma_txreq *stx,
  383. uint seq,
  384. bool pkts_sent)
  385. {
  386. struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
  387. struct rvt_qp *qp;
  388. struct hfi1_qp_priv *priv;
  389. unsigned long flags;
  390. int ret = 0;
  391. qp = tx->qp;
  392. priv = qp->priv;
  393. spin_lock_irqsave(&qp->s_lock, flags);
  394. if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
  395. /*
  396. * If we couldn't queue the DMA request, save the info
  397. * and try again later rather than destroying the
  398. * buffer and undoing the side effects of the copy.
  399. */
  400. /* Make a common routine? */
  401. list_add_tail(&stx->list, &wait->tx_head);
  402. write_seqlock(&sde->waitlock);
  403. if (sdma_progress(sde, seq, stx))
  404. goto eagain;
  405. if (list_empty(&priv->s_iowait.list)) {
  406. struct hfi1_ibport *ibp =
  407. to_iport(qp->ibqp.device, qp->port_num);
  408. ibp->rvp.n_dmawait++;
  409. qp->s_flags |= RVT_S_WAIT_DMA_DESC;
  410. iowait_get_priority(&priv->s_iowait);
  411. iowait_queue(pkts_sent, &priv->s_iowait,
  412. &sde->dmawait);
  413. priv->s_iowait.lock = &sde->waitlock;
  414. trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
  415. rvt_get_qp(qp);
  416. }
  417. write_sequnlock(&sde->waitlock);
  418. hfi1_qp_unbusy(qp, wait);
  419. spin_unlock_irqrestore(&qp->s_lock, flags);
  420. ret = -EBUSY;
  421. } else {
  422. spin_unlock_irqrestore(&qp->s_lock, flags);
  423. hfi1_put_txreq(tx);
  424. }
  425. return ret;
  426. eagain:
  427. write_sequnlock(&sde->waitlock);
  428. spin_unlock_irqrestore(&qp->s_lock, flags);
  429. list_del_init(&stx->list);
  430. return -EAGAIN;
  431. }
  432. static void iowait_wakeup(struct iowait *wait, int reason)
  433. {
  434. struct rvt_qp *qp = iowait_to_qp(wait);
  435. WARN_ON(reason != SDMA_AVAIL_REASON);
  436. hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
  437. }
  438. static void iowait_sdma_drained(struct iowait *wait)
  439. {
  440. struct rvt_qp *qp = iowait_to_qp(wait);
  441. unsigned long flags;
  442. /*
  443. * This happens when the send engine notes
  444. * a QP in the error state and cannot
  445. * do the flush work until that QP's
  446. * sdma work has finished.
  447. */
  448. spin_lock_irqsave(&qp->s_lock, flags);
  449. if (qp->s_flags & RVT_S_WAIT_DMA) {
  450. qp->s_flags &= ~RVT_S_WAIT_DMA;
  451. hfi1_schedule_send(qp);
  452. }
  453. spin_unlock_irqrestore(&qp->s_lock, flags);
  454. }
  455. static void hfi1_init_priority(struct iowait *w)
  456. {
  457. struct rvt_qp *qp = iowait_to_qp(w);
  458. struct hfi1_qp_priv *priv = qp->priv;
  459. if (qp->s_flags & RVT_S_ACK_PENDING)
  460. w->priority++;
  461. if (priv->s_flags & RVT_S_ACK_PENDING)
  462. w->priority++;
  463. }
  464. /**
  465. * qp_to_sdma_engine - map a qp to a send engine
  466. * @qp: the QP
  467. * @sc5: the 5 bit sc
  468. *
  469. * Return:
  470. * A send engine for the qp or NULL for SMI type qp.
  471. */
  472. struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
  473. {
  474. struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
  475. struct sdma_engine *sde;
  476. if (!(dd->flags & HFI1_HAS_SEND_DMA))
  477. return NULL;
  478. switch (qp->ibqp.qp_type) {
  479. case IB_QPT_SMI:
  480. return NULL;
  481. default:
  482. break;
  483. }
  484. sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5);
  485. return sde;
  486. }
  487. /**
  488. * qp_to_send_context - map a qp to a send context
  489. * @qp: the QP
  490. * @sc5: the 5 bit sc
  491. *
  492. * Return:
  493. * A send context for the qp
  494. */
  495. struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
  496. {
  497. struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
  498. switch (qp->ibqp.qp_type) {
  499. case IB_QPT_SMI:
  500. /* SMA packets to VL15 */
  501. return dd->vld[15].sc;
  502. default:
  503. break;
  504. }
  505. return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
  506. sc5);
  507. }
  508. static const char * const qp_type_str[] = {
  509. "SMI", "GSI", "RC", "UC", "UD",
  510. };
  511. static int qp_idle(struct rvt_qp *qp)
  512. {
  513. return
  514. qp->s_last == qp->s_acked &&
  515. qp->s_acked == qp->s_cur &&
  516. qp->s_cur == qp->s_tail &&
  517. qp->s_tail == qp->s_head;
  518. }
  519. /**
  520. * qp_iter_print - print the qp information to seq_file
  521. * @s: the seq_file to emit the qp information on
  522. * @iter: the iterator for the qp hash list
  523. */
  524. void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
  525. {
  526. struct rvt_swqe *wqe;
  527. struct rvt_qp *qp = iter->qp;
  528. struct hfi1_qp_priv *priv = qp->priv;
  529. struct sdma_engine *sde;
  530. struct send_context *send_context;
  531. struct rvt_ack_entry *e = NULL;
  532. struct rvt_srq *srq = qp->ibqp.srq ?
  533. ibsrq_to_rvtsrq(qp->ibqp.srq) : NULL;
  534. sde = qp_to_sdma_engine(qp, priv->s_sc);
  535. wqe = rvt_get_swqe_ptr(qp, qp->s_last);
  536. send_context = qp_to_send_context(qp, priv->s_sc);
  537. if (qp->s_ack_queue)
  538. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  539. seq_printf(s,
  540. "N %d %s QP %x R %u %s %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n",
  541. iter->n,
  542. qp_idle(qp) ? "I" : "B",
  543. qp->ibqp.qp_num,
  544. atomic_read(&qp->refcount),
  545. qp_type_str[qp->ibqp.qp_type],
  546. qp->state,
  547. wqe ? wqe->wr.opcode : 0,
  548. qp->s_flags,
  549. iowait_sdma_pending(&priv->s_iowait),
  550. iowait_pio_pending(&priv->s_iowait),
  551. !list_empty(&priv->s_iowait.list),
  552. qp->timeout,
  553. wqe ? wqe->ssn : 0,
  554. qp->s_lsn,
  555. qp->s_last_psn,
  556. qp->s_psn, qp->s_next_psn,
  557. qp->s_sending_psn, qp->s_sending_hpsn,
  558. qp->r_psn,
  559. qp->s_last, qp->s_acked, qp->s_cur,
  560. qp->s_tail, qp->s_head, qp->s_size,
  561. qp->s_avail,
  562. /* ack_queue ring pointers, size */
  563. qp->s_tail_ack_queue, qp->r_head_ack_queue,
  564. rvt_max_atomic(&to_idev(qp->ibqp.device)->rdi),
  565. /* remote QP info */
  566. qp->remote_qpn,
  567. rdma_ah_get_dlid(&qp->remote_ah_attr),
  568. rdma_ah_get_sl(&qp->remote_ah_attr),
  569. qp->pmtu,
  570. qp->s_retry,
  571. qp->s_retry_cnt,
  572. qp->s_rnr_retry_cnt,
  573. qp->s_rnr_retry,
  574. sde,
  575. sde ? sde->this_idx : 0,
  576. send_context,
  577. send_context ? send_context->sw_index : 0,
  578. ib_cq_head(qp->ibqp.send_cq),
  579. ib_cq_tail(qp->ibqp.send_cq),
  580. qp->pid,
  581. qp->s_state,
  582. qp->s_ack_state,
  583. /* ack queue information */
  584. e ? e->opcode : 0,
  585. e ? e->psn : 0,
  586. e ? e->lpsn : 0,
  587. qp->r_min_rnr_timer,
  588. srq ? "SRQ" : "RQ",
  589. srq ? srq->rq.size : qp->r_rq.size
  590. );
  591. }
  592. void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
  593. {
  594. struct hfi1_qp_priv *priv;
  595. priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node);
  596. if (!priv)
  597. return ERR_PTR(-ENOMEM);
  598. priv->owner = qp;
  599. priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL,
  600. rdi->dparms.node);
  601. if (!priv->s_ahg) {
  602. kfree(priv);
  603. return ERR_PTR(-ENOMEM);
  604. }
  605. iowait_init(
  606. &priv->s_iowait,
  607. 1,
  608. _hfi1_do_send,
  609. _hfi1_do_tid_send,
  610. iowait_sleep,
  611. iowait_wakeup,
  612. iowait_sdma_drained,
  613. hfi1_init_priority);
  614. /* Init to a value to start the running average correctly */
  615. priv->s_running_pkt_size = piothreshold / 2;
  616. return priv;
  617. }
  618. void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
  619. {
  620. struct hfi1_qp_priv *priv = qp->priv;
  621. hfi1_qp_priv_tid_free(rdi, qp);
  622. kfree(priv->s_ahg);
  623. kfree(priv);
  624. }
  625. unsigned free_all_qps(struct rvt_dev_info *rdi)
  626. {
  627. struct hfi1_ibdev *verbs_dev = container_of(rdi,
  628. struct hfi1_ibdev,
  629. rdi);
  630. struct hfi1_devdata *dd = container_of(verbs_dev,
  631. struct hfi1_devdata,
  632. verbs_dev);
  633. int n;
  634. unsigned qp_inuse = 0;
  635. for (n = 0; n < dd->num_pports; n++) {
  636. struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
  637. rcu_read_lock();
  638. if (rcu_dereference(ibp->rvp.qp[0]))
  639. qp_inuse++;
  640. if (rcu_dereference(ibp->rvp.qp[1]))
  641. qp_inuse++;
  642. rcu_read_unlock();
  643. }
  644. return qp_inuse;
  645. }
  646. void flush_qp_waiters(struct rvt_qp *qp)
  647. {
  648. lockdep_assert_held(&qp->s_lock);
  649. flush_iowait(qp);
  650. hfi1_tid_rdma_flush_wait(qp);
  651. }
  652. void stop_send_queue(struct rvt_qp *qp)
  653. {
  654. struct hfi1_qp_priv *priv = qp->priv;
  655. iowait_cancel_work(&priv->s_iowait);
  656. if (cancel_work_sync(&priv->tid_rdma.trigger_work))
  657. rvt_put_qp(qp);
  658. }
  659. void quiesce_qp(struct rvt_qp *qp)
  660. {
  661. struct hfi1_qp_priv *priv = qp->priv;
  662. hfi1_del_tid_reap_timer(qp);
  663. hfi1_del_tid_retry_timer(qp);
  664. iowait_sdma_drain(&priv->s_iowait);
  665. qp_pio_drain(qp);
  666. flush_tx_list(qp);
  667. }
  668. void notify_qp_reset(struct rvt_qp *qp)
  669. {
  670. hfi1_qp_kern_exp_rcv_clear_all(qp);
  671. qp->r_adefered = 0;
  672. clear_ahg(qp);
  673. /* Clear any OPFN state */
  674. if (qp->ibqp.qp_type == IB_QPT_RC)
  675. opfn_conn_error(qp);
  676. }
  677. /*
  678. * Switch to alternate path.
  679. * The QP s_lock should be held and interrupts disabled.
  680. */
  681. void hfi1_migrate_qp(struct rvt_qp *qp)
  682. {
  683. struct hfi1_qp_priv *priv = qp->priv;
  684. struct ib_event ev;
  685. qp->s_mig_state = IB_MIG_MIGRATED;
  686. qp->remote_ah_attr = qp->alt_ah_attr;
  687. qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
  688. qp->s_pkey_index = qp->s_alt_pkey_index;
  689. qp->s_flags |= HFI1_S_AHG_CLEAR;
  690. priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
  691. priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
  692. qp_set_16b(qp);
  693. ev.device = qp->ibqp.device;
  694. ev.element.qp = &qp->ibqp;
  695. ev.event = IB_EVENT_PATH_MIG;
  696. qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
  697. }
  698. int mtu_to_path_mtu(u32 mtu)
  699. {
  700. return mtu_to_enum(mtu, OPA_MTU_8192);
  701. }
  702. u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
  703. {
  704. u32 mtu;
  705. struct hfi1_ibdev *verbs_dev = container_of(rdi,
  706. struct hfi1_ibdev,
  707. rdi);
  708. struct hfi1_devdata *dd = container_of(verbs_dev,
  709. struct hfi1_devdata,
  710. verbs_dev);
  711. struct hfi1_ibport *ibp;
  712. u8 sc, vl;
  713. ibp = &dd->pport[qp->port_num - 1].ibport_data;
  714. sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
  715. vl = sc_to_vlt(dd, sc);
  716. mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
  717. if (vl < PER_VL_SEND_CONTEXTS)
  718. mtu = min_t(u32, mtu, dd->vld[vl].mtu);
  719. return mtu;
  720. }
  721. int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
  722. struct ib_qp_attr *attr)
  723. {
  724. int mtu, pidx = qp->port_num - 1;
  725. struct hfi1_ibdev *verbs_dev = container_of(rdi,
  726. struct hfi1_ibdev,
  727. rdi);
  728. struct hfi1_devdata *dd = container_of(verbs_dev,
  729. struct hfi1_devdata,
  730. verbs_dev);
  731. mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
  732. if (mtu == -1)
  733. return -1; /* values less than 0 are error */
  734. if (mtu > dd->pport[pidx].ibmtu)
  735. return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
  736. else
  737. return attr->path_mtu;
  738. }
  739. void notify_error_qp(struct rvt_qp *qp)
  740. {
  741. struct hfi1_qp_priv *priv = qp->priv;
  742. seqlock_t *lock = priv->s_iowait.lock;
  743. if (lock) {
  744. write_seqlock(lock);
  745. if (!list_empty(&priv->s_iowait.list) &&
  746. !(qp->s_flags & RVT_S_BUSY) &&
  747. !(priv->s_flags & RVT_S_BUSY)) {
  748. qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
  749. iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
  750. iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
  751. list_del_init(&priv->s_iowait.list);
  752. priv->s_iowait.lock = NULL;
  753. rvt_put_qp(qp);
  754. }
  755. write_sequnlock(lock);
  756. }
  757. if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
  758. qp->s_hdrwords = 0;
  759. if (qp->s_rdma_mr) {
  760. rvt_put_mr(qp->s_rdma_mr);
  761. qp->s_rdma_mr = NULL;
  762. }
  763. flush_tx_list(qp);
  764. }
  765. }
  766. /**
  767. * hfi1_qp_iter_cb - callback for iterator
  768. * @qp: the qp
  769. * @v: the sl in low bits of v
  770. *
  771. * This is called from the iterator callback to work
  772. * on an individual qp.
  773. */
  774. static void hfi1_qp_iter_cb(struct rvt_qp *qp, u64 v)
  775. {
  776. int lastwqe;
  777. struct ib_event ev;
  778. struct hfi1_ibport *ibp =
  779. to_iport(qp->ibqp.device, qp->port_num);
  780. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  781. u8 sl = (u8)v;
  782. if (qp->port_num != ppd->port ||
  783. (qp->ibqp.qp_type != IB_QPT_UC &&
  784. qp->ibqp.qp_type != IB_QPT_RC) ||
  785. rdma_ah_get_sl(&qp->remote_ah_attr) != sl ||
  786. !(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))
  787. return;
  788. spin_lock_irq(&qp->r_lock);
  789. spin_lock(&qp->s_hlock);
  790. spin_lock(&qp->s_lock);
  791. lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  792. spin_unlock(&qp->s_lock);
  793. spin_unlock(&qp->s_hlock);
  794. spin_unlock_irq(&qp->r_lock);
  795. if (lastwqe) {
  796. ev.device = qp->ibqp.device;
  797. ev.element.qp = &qp->ibqp;
  798. ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
  799. qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
  800. }
  801. }
  802. /**
  803. * hfi1_error_port_qps - put a port's RC/UC qps into error state
  804. * @ibp: the ibport.
  805. * @sl: the service level.
  806. *
  807. * This function places all RC/UC qps with a given service level into error
  808. * state. It is generally called to force upper lay apps to abandon stale qps
  809. * after an sl->sc mapping change.
  810. */
  811. void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
  812. {
  813. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  814. struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
  815. rvt_qp_iter(&dev->rdi, sl, hfi1_qp_iter_cb);
  816. }