opfn.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
  2. /*
  3. * Copyright(c) 2018 Intel Corporation.
  4. *
  5. */
  6. #include "hfi.h"
  7. #include "trace.h"
  8. #include "qp.h"
  9. #include "opfn.h"
  10. #define IB_BTHE_E BIT(IB_BTHE_E_SHIFT)
  11. #define OPFN_CODE(code) BIT((code) - 1)
  12. #define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
  13. struct hfi1_opfn_type {
  14. bool (*request)(struct rvt_qp *qp, u64 *data);
  15. bool (*response)(struct rvt_qp *qp, u64 *data);
  16. bool (*reply)(struct rvt_qp *qp, u64 data);
  17. void (*error)(struct rvt_qp *qp);
  18. };
  19. static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
  20. [STL_VERBS_EXTD_TID_RDMA] = {
  21. .request = tid_rdma_conn_req,
  22. .response = tid_rdma_conn_resp,
  23. .reply = tid_rdma_conn_reply,
  24. .error = tid_rdma_conn_error,
  25. },
  26. };
  27. static struct workqueue_struct *opfn_wq;
  28. static void opfn_schedule_conn_request(struct rvt_qp *qp);
  29. static bool hfi1_opfn_extended(u32 bth1)
  30. {
  31. return !!(bth1 & IB_BTHE_E);
  32. }
  33. static void opfn_conn_request(struct rvt_qp *qp)
  34. {
  35. struct hfi1_qp_priv *priv = qp->priv;
  36. struct ib_atomic_wr wr;
  37. u16 mask, capcode;
  38. struct hfi1_opfn_type *extd;
  39. u64 data;
  40. unsigned long flags;
  41. int ret = 0;
  42. trace_hfi1_opfn_state_conn_request(qp);
  43. spin_lock_irqsave(&priv->opfn.lock, flags);
  44. /*
  45. * Exit if the extended bit is not set, or if nothing is requested, or
  46. * if we have completed all requests, or if a previous request is in
  47. * progress
  48. */
  49. if (!priv->opfn.extended || !priv->opfn.requested ||
  50. priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
  51. goto done;
  52. mask = priv->opfn.requested & ~priv->opfn.completed;
  53. capcode = ilog2(mask & ~(mask - 1)) + 1;
  54. if (capcode >= STL_VERBS_EXTD_MAX) {
  55. priv->opfn.completed |= OPFN_CODE(capcode);
  56. goto done;
  57. }
  58. extd = &hfi1_opfn_handlers[capcode];
  59. if (!extd || !extd->request || !extd->request(qp, &data)) {
  60. /*
  61. * Either there is no handler for this capability or the request
  62. * packet could not be generated. Either way, mark it as done so
  63. * we don't keep attempting to complete it.
  64. */
  65. priv->opfn.completed |= OPFN_CODE(capcode);
  66. goto done;
  67. }
  68. trace_hfi1_opfn_data_conn_request(qp, capcode, data);
  69. data = (data & ~0xf) | capcode;
  70. memset(&wr, 0, sizeof(wr));
  71. wr.wr.opcode = IB_WR_OPFN;
  72. wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
  73. wr.compare_add = data;
  74. priv->opfn.curr = capcode; /* A new request is now in progress */
  75. /* Drop opfn.lock before calling ib_post_send() */
  76. spin_unlock_irqrestore(&priv->opfn.lock, flags);
  77. ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
  78. if (ret)
  79. goto err;
  80. trace_hfi1_opfn_state_conn_request(qp);
  81. return;
  82. err:
  83. trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
  84. (u64)ret);
  85. spin_lock_irqsave(&priv->opfn.lock, flags);
  86. /*
  87. * In case of an unexpected error return from ib_post_send
  88. * clear opfn.curr and reschedule to try again
  89. */
  90. priv->opfn.curr = STL_VERBS_EXTD_NONE;
  91. opfn_schedule_conn_request(qp);
  92. done:
  93. spin_unlock_irqrestore(&priv->opfn.lock, flags);
  94. }
  95. void opfn_send_conn_request(struct work_struct *work)
  96. {
  97. struct hfi1_opfn_data *od;
  98. struct hfi1_qp_priv *qpriv;
  99. od = container_of(work, struct hfi1_opfn_data, opfn_work);
  100. qpriv = container_of(od, struct hfi1_qp_priv, opfn);
  101. opfn_conn_request(qpriv->owner);
  102. }
  103. /*
  104. * When QP s_lock is held in the caller, the OPFN request must be scheduled
  105. * to a different workqueue to avoid double locking QP s_lock in call to
  106. * ib_post_send in opfn_conn_request
  107. */
  108. static void opfn_schedule_conn_request(struct rvt_qp *qp)
  109. {
  110. struct hfi1_qp_priv *priv = qp->priv;
  111. trace_hfi1_opfn_state_sched_conn_request(qp);
  112. queue_work(opfn_wq, &priv->opfn.opfn_work);
  113. }
  114. void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
  115. struct ib_atomic_eth *ateth)
  116. {
  117. struct hfi1_qp_priv *priv = qp->priv;
  118. u64 data = be64_to_cpu(ateth->compare_data);
  119. struct hfi1_opfn_type *extd;
  120. u8 capcode;
  121. unsigned long flags;
  122. trace_hfi1_opfn_state_conn_response(qp);
  123. capcode = data & 0xf;
  124. trace_hfi1_opfn_data_conn_response(qp, capcode, data);
  125. if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
  126. return;
  127. extd = &hfi1_opfn_handlers[capcode];
  128. if (!extd || !extd->response) {
  129. e->atomic_data = capcode;
  130. return;
  131. }
  132. spin_lock_irqsave(&priv->opfn.lock, flags);
  133. if (priv->opfn.completed & OPFN_CODE(capcode)) {
  134. /*
  135. * We are receiving a request for a feature that has already
  136. * been negotiated. This may mean that the other side has reset
  137. */
  138. priv->opfn.completed &= ~OPFN_CODE(capcode);
  139. if (extd->error)
  140. extd->error(qp);
  141. }
  142. if (extd->response(qp, &data))
  143. priv->opfn.completed |= OPFN_CODE(capcode);
  144. e->atomic_data = (data & ~0xf) | capcode;
  145. trace_hfi1_opfn_state_conn_response(qp);
  146. spin_unlock_irqrestore(&priv->opfn.lock, flags);
  147. }
  148. void opfn_conn_reply(struct rvt_qp *qp, u64 data)
  149. {
  150. struct hfi1_qp_priv *priv = qp->priv;
  151. struct hfi1_opfn_type *extd;
  152. u8 capcode;
  153. unsigned long flags;
  154. trace_hfi1_opfn_state_conn_reply(qp);
  155. capcode = data & 0xf;
  156. trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
  157. if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
  158. return;
  159. spin_lock_irqsave(&priv->opfn.lock, flags);
  160. /*
  161. * Either there is no previous request or the reply is not for the
  162. * current request
  163. */
  164. if (!priv->opfn.curr || capcode != priv->opfn.curr)
  165. goto done;
  166. extd = &hfi1_opfn_handlers[capcode];
  167. if (!extd || !extd->reply)
  168. goto clear;
  169. if (extd->reply(qp, data))
  170. priv->opfn.completed |= OPFN_CODE(capcode);
  171. clear:
  172. /*
  173. * Clear opfn.curr to indicate that the previous request is no longer in
  174. * progress
  175. */
  176. priv->opfn.curr = STL_VERBS_EXTD_NONE;
  177. trace_hfi1_opfn_state_conn_reply(qp);
  178. done:
  179. spin_unlock_irqrestore(&priv->opfn.lock, flags);
  180. }
  181. void opfn_conn_error(struct rvt_qp *qp)
  182. {
  183. struct hfi1_qp_priv *priv = qp->priv;
  184. struct hfi1_opfn_type *extd = NULL;
  185. unsigned long flags;
  186. u16 capcode;
  187. trace_hfi1_opfn_state_conn_error(qp);
  188. trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
  189. /*
  190. * The QP has gone into the Error state. We have to invalidate all
  191. * negotiated feature, including the one in progress (if any). The RC
  192. * QP handling will clean the WQE for the connection request.
  193. */
  194. spin_lock_irqsave(&priv->opfn.lock, flags);
  195. while (priv->opfn.completed) {
  196. capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
  197. extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
  198. if (extd->error)
  199. extd->error(qp);
  200. priv->opfn.completed &= ~OPFN_CODE(capcode);
  201. }
  202. priv->opfn.extended = 0;
  203. priv->opfn.requested = 0;
  204. priv->opfn.curr = STL_VERBS_EXTD_NONE;
  205. spin_unlock_irqrestore(&priv->opfn.lock, flags);
  206. }
  207. void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
  208. {
  209. struct ib_qp *ibqp = &qp->ibqp;
  210. struct hfi1_qp_priv *priv = qp->priv;
  211. unsigned long flags;
  212. if (attr_mask & IB_QP_RETRY_CNT)
  213. priv->s_retry = attr->retry_cnt;
  214. spin_lock_irqsave(&priv->opfn.lock, flags);
  215. if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
  216. struct tid_rdma_params *local = &priv->tid_rdma.local;
  217. if (attr_mask & IB_QP_TIMEOUT)
  218. priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
  219. if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
  220. qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
  221. tid_rdma_opfn_init(qp, local);
  222. /*
  223. * We only want to set the OPFN requested bit when the
  224. * QP transitions to RTS.
  225. */
  226. if (attr_mask & IB_QP_STATE &&
  227. attr->qp_state == IB_QPS_RTS) {
  228. priv->opfn.requested |= OPFN_MASK(TID_RDMA);
  229. /*
  230. * If the QP is transitioning to RTS and the
  231. * opfn.completed for TID RDMA has already been
  232. * set, the QP is being moved *back* into RTS.
  233. * We can now renegotiate the TID RDMA
  234. * parameters.
  235. */
  236. if (priv->opfn.completed &
  237. OPFN_MASK(TID_RDMA)) {
  238. priv->opfn.completed &=
  239. ~OPFN_MASK(TID_RDMA);
  240. /*
  241. * Since the opfn.completed bit was
  242. * already set, it is safe to assume
  243. * that the opfn.extended is also set.
  244. */
  245. opfn_schedule_conn_request(qp);
  246. }
  247. }
  248. } else {
  249. memset(local, 0, sizeof(*local));
  250. }
  251. }
  252. spin_unlock_irqrestore(&priv->opfn.lock, flags);
  253. }
  254. void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
  255. {
  256. struct hfi1_qp_priv *priv = qp->priv;
  257. if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
  258. HFI1_CAP_IS_KSET(OPFN)) {
  259. priv->opfn.extended = 1;
  260. if (qp->state == IB_QPS_RTS)
  261. opfn_conn_request(qp);
  262. }
  263. }
  264. int opfn_init(void)
  265. {
  266. opfn_wq = alloc_workqueue("hfi_opfn",
  267. WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
  268. WQ_MEM_RECLAIM,
  269. HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
  270. if (!opfn_wq)
  271. return -ENOMEM;
  272. return 0;
  273. }
  274. void opfn_exit(void)
  275. {
  276. if (opfn_wq) {
  277. destroy_workqueue(opfn_wq);
  278. opfn_wq = NULL;
  279. }
  280. }