ipoib_tx.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
  2. /*
  3. * Copyright(c) 2020 Intel Corporation.
  4. *
  5. */
  6. /*
  7. * This file contains HFI1 support for IPOIB SDMA functionality
  8. */
  9. #include <linux/log2.h>
  10. #include <linux/circ_buf.h>
  11. #include "sdma.h"
  12. #include "verbs.h"
  13. #include "trace_ibhdrs.h"
  14. #include "ipoib.h"
  15. #include "trace_tx.h"
  16. /* Add a convenience helper */
  17. #define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
  18. #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
  19. #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
  20. struct ipoib_txparms {
  21. struct hfi1_devdata *dd;
  22. struct rdma_ah_attr *ah_attr;
  23. struct hfi1_ibport *ibp;
  24. struct hfi1_ipoib_txq *txq;
  25. union hfi1_ipoib_flow flow;
  26. u32 dqpn;
  27. u8 hdr_dwords;
  28. u8 entropy;
  29. };
  30. static struct ipoib_txreq *
  31. hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx)
  32. {
  33. return (struct ipoib_txreq *)(r->items + (idx << r->shift));
  34. }
  35. static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
  36. {
  37. return sent - completed;
  38. }
  39. static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
  40. {
  41. return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
  42. txq->tx_ring.complete_txreqs);
  43. }
  44. static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
  45. {
  46. trace_hfi1_txq_stop(txq);
  47. if (atomic_inc_return(&txq->tx_ring.stops) == 1)
  48. netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
  49. }
  50. static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
  51. {
  52. trace_hfi1_txq_wake(txq);
  53. if (atomic_dec_and_test(&txq->tx_ring.stops))
  54. netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
  55. }
  56. static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq)
  57. {
  58. return min_t(uint, txq->priv->netdev->tx_queue_len,
  59. txq->tx_ring.max_items - 1);
  60. }
  61. static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
  62. {
  63. return min_t(uint, txq->priv->netdev->tx_queue_len,
  64. txq->tx_ring.max_items) >> 1;
  65. }
  66. static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
  67. {
  68. ++txq->tx_ring.sent_txreqs;
  69. if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
  70. !atomic_xchg(&txq->tx_ring.ring_full, 1)) {
  71. trace_hfi1_txq_full(txq);
  72. hfi1_ipoib_stop_txq(txq);
  73. }
  74. }
  75. static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
  76. {
  77. struct net_device *dev = txq->priv->netdev;
  78. /* If shutting down just return as queue state is irrelevant */
  79. if (unlikely(dev->reg_state != NETREG_REGISTERED))
  80. return;
  81. /*
  82. * When the queue has been drained to less than half full it will be
  83. * restarted.
  84. * The size of the txreq ring is fixed at initialization.
  85. * The tx queue len can be adjusted upward while the interface is
  86. * running.
  87. * The tx queue len can be large enough to overflow the txreq_ring.
  88. * Use the minimum of the current tx_queue_len or the rings max txreqs
  89. * to protect against ring overflow.
  90. */
  91. if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
  92. atomic_xchg(&txq->tx_ring.ring_full, 0)) {
  93. trace_hfi1_txq_xmit_unstopped(txq);
  94. hfi1_ipoib_wake_txq(txq);
  95. }
  96. }
  97. static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
  98. {
  99. struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
  100. if (likely(!tx->sdma_status)) {
  101. dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len);
  102. } else {
  103. ++priv->netdev->stats.tx_errors;
  104. dd_dev_warn(priv->dd,
  105. "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
  106. __func__, tx->sdma_status,
  107. le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx,
  108. tx->txq->sde->this_idx);
  109. }
  110. napi_consume_skb(tx->skb, budget);
  111. tx->skb = NULL;
  112. sdma_txclean(priv->dd, &tx->txreq);
  113. }
  114. static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq)
  115. {
  116. struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
  117. int i;
  118. struct ipoib_txreq *tx;
  119. for (i = 0; i < tx_ring->max_items; i++) {
  120. tx = hfi1_txreq_from_idx(tx_ring, i);
  121. tx->complete = 0;
  122. dev_kfree_skb_any(tx->skb);
  123. tx->skb = NULL;
  124. sdma_txclean(txq->priv->dd, &tx->txreq);
  125. }
  126. tx_ring->head = 0;
  127. tx_ring->tail = 0;
  128. tx_ring->complete_txreqs = 0;
  129. tx_ring->sent_txreqs = 0;
  130. tx_ring->avail = hfi1_ipoib_ring_hwat(txq);
  131. }
  132. static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget)
  133. {
  134. struct hfi1_ipoib_txq *txq =
  135. container_of(napi, struct hfi1_ipoib_txq, napi);
  136. struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
  137. u32 head = tx_ring->head;
  138. u32 max_tx = tx_ring->max_items;
  139. int work_done;
  140. struct ipoib_txreq *tx = hfi1_txreq_from_idx(tx_ring, head);
  141. trace_hfi1_txq_poll(txq);
  142. for (work_done = 0; work_done < budget; work_done++) {
  143. /* See hfi1_ipoib_sdma_complete() */
  144. if (!smp_load_acquire(&tx->complete))
  145. break;
  146. tx->complete = 0;
  147. trace_hfi1_tx_produce(tx, head);
  148. hfi1_ipoib_free_tx(tx, budget);
  149. head = CIRC_NEXT(head, max_tx);
  150. tx = hfi1_txreq_from_idx(tx_ring, head);
  151. }
  152. tx_ring->complete_txreqs += work_done;
  153. /* Finished freeing tx items so store the head value. */
  154. smp_store_release(&tx_ring->head, head);
  155. hfi1_ipoib_check_queue_stopped(txq);
  156. if (work_done < budget)
  157. napi_complete_done(napi, work_done);
  158. return work_done;
  159. }
  160. static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status)
  161. {
  162. struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq);
  163. trace_hfi1_txq_complete(tx->txq);
  164. tx->sdma_status = status;
  165. /* see hfi1_ipoib_poll_tx_ring */
  166. smp_store_release(&tx->complete, 1);
  167. napi_schedule_irqoff(&tx->txq->napi);
  168. }
  169. static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
  170. struct ipoib_txparms *txp)
  171. {
  172. struct hfi1_devdata *dd = txp->dd;
  173. struct sdma_txreq *txreq = &tx->txreq;
  174. struct sk_buff *skb = tx->skb;
  175. int ret = 0;
  176. int i;
  177. if (skb_headlen(skb)) {
  178. ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb));
  179. if (unlikely(ret))
  180. return ret;
  181. }
  182. for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  183. const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  184. ret = sdma_txadd_page(dd,
  185. txreq,
  186. skb_frag_page(frag),
  187. frag->bv_offset,
  188. skb_frag_size(frag),
  189. NULL, NULL, NULL);
  190. if (unlikely(ret))
  191. break;
  192. }
  193. return ret;
  194. }
  195. static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
  196. struct ipoib_txparms *txp)
  197. {
  198. struct hfi1_devdata *dd = txp->dd;
  199. struct sdma_txreq *txreq = &tx->txreq;
  200. struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
  201. u16 pkt_bytes =
  202. sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
  203. int ret;
  204. ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete);
  205. if (unlikely(ret))
  206. return ret;
  207. /* add pbc + headers */
  208. ret = sdma_txadd_kvaddr(dd,
  209. txreq,
  210. sdma_hdr,
  211. sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2));
  212. if (unlikely(ret))
  213. return ret;
  214. /* add the ulp payload */
  215. return hfi1_ipoib_build_ulp_payload(tx, txp);
  216. }
  217. static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
  218. struct ipoib_txparms *txp)
  219. {
  220. struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
  221. struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
  222. struct sk_buff *skb = tx->skb;
  223. struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
  224. struct rdma_ah_attr *ah_attr = txp->ah_attr;
  225. struct ib_other_headers *ohdr;
  226. struct ib_grh *grh;
  227. u16 dwords;
  228. u16 slid;
  229. u16 dlid;
  230. u16 lrh0;
  231. u32 bth0;
  232. u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 |
  233. priv->netdev->dev_addr[2] << 8 |
  234. priv->netdev->dev_addr[3]);
  235. u16 payload_dwords;
  236. u8 pad_cnt;
  237. pad_cnt = -skb->len & 3;
  238. /* Includes ICRC */
  239. payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC;
  240. /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
  241. txp->hdr_dwords = 7;
  242. if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
  243. grh = &sdma_hdr->hdr.ibh.u.l.grh;
  244. txp->hdr_dwords +=
  245. hfi1_make_grh(txp->ibp,
  246. grh,
  247. rdma_ah_read_grh(ah_attr),
  248. txp->hdr_dwords - LRH_9B_DWORDS,
  249. payload_dwords);
  250. lrh0 = HFI1_LRH_GRH;
  251. ohdr = &sdma_hdr->hdr.ibh.u.l.oth;
  252. } else {
  253. lrh0 = HFI1_LRH_BTH;
  254. ohdr = &sdma_hdr->hdr.ibh.u.oth;
  255. }
  256. lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
  257. lrh0 |= (txp->flow.sc5 & 0xf) << 12;
  258. dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
  259. if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
  260. slid = be16_to_cpu(IB_LID_PERMISSIVE);
  261. } else {
  262. u16 lid = (u16)ppd->lid;
  263. if (lid) {
  264. lid |= rdma_ah_get_path_bits(ah_attr) &
  265. ((1 << ppd->lmc) - 1);
  266. slid = lid;
  267. } else {
  268. slid = be16_to_cpu(IB_LID_PERMISSIVE);
  269. }
  270. }
  271. /* Includes ICRC */
  272. dwords = txp->hdr_dwords + payload_dwords;
  273. /* Build the lrh */
  274. sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B;
  275. hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid);
  276. /* Build the bth */
  277. bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey;
  278. ohdr->bth[0] = cpu_to_be32(bth0);
  279. ohdr->bth[1] = cpu_to_be32(txp->dqpn);
  280. ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs));
  281. /* Build the deth */
  282. ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
  283. ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy <<
  284. HFI1_IPOIB_ENTROPY_SHIFT) | sqpn);
  285. /* Construct the pbc. */
  286. sdma_hdr->pbc =
  287. cpu_to_le64(create_pbc(ppd,
  288. ib_is_sc5(txp->flow.sc5) <<
  289. PBC_DC_INFO_SHIFT,
  290. 0,
  291. sc_to_vlt(priv->dd, txp->flow.sc5),
  292. dwords - SIZE_OF_CRC +
  293. (sizeof(sdma_hdr->pbc) >> 2)));
  294. }
  295. static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
  296. struct sk_buff *skb,
  297. struct ipoib_txparms *txp)
  298. {
  299. struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
  300. struct hfi1_ipoib_txq *txq = txp->txq;
  301. struct ipoib_txreq *tx;
  302. struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
  303. u32 tail = tx_ring->tail;
  304. int ret;
  305. if (unlikely(!tx_ring->avail)) {
  306. u32 head;
  307. if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq))
  308. /* This shouldn't happen with a stopped queue */
  309. return ERR_PTR(-ENOMEM);
  310. /* See hfi1_ipoib_poll_tx_ring() */
  311. head = smp_load_acquire(&tx_ring->head);
  312. tx_ring->avail =
  313. min_t(u32, hfi1_ipoib_ring_hwat(txq),
  314. CIRC_CNT(head, tail, tx_ring->max_items));
  315. } else {
  316. tx_ring->avail--;
  317. }
  318. tx = hfi1_txreq_from_idx(tx_ring, tail);
  319. trace_hfi1_txq_alloc_tx(txq);
  320. /* so that we can test if the sdma descriptors are there */
  321. tx->txreq.num_desc = 0;
  322. tx->txq = txq;
  323. tx->skb = skb;
  324. INIT_LIST_HEAD(&tx->txreq.list);
  325. hfi1_ipoib_build_ib_tx_headers(tx, txp);
  326. ret = hfi1_ipoib_build_tx_desc(tx, txp);
  327. if (likely(!ret)) {
  328. if (txq->flow.as_int != txp->flow.as_int) {
  329. txq->flow.tx_queue = txp->flow.tx_queue;
  330. txq->flow.sc5 = txp->flow.sc5;
  331. txq->sde =
  332. sdma_select_engine_sc(priv->dd,
  333. txp->flow.tx_queue,
  334. txp->flow.sc5);
  335. trace_hfi1_flow_switch(txq);
  336. }
  337. return tx;
  338. }
  339. sdma_txclean(priv->dd, &tx->txreq);
  340. return ERR_PTR(ret);
  341. }
  342. static int hfi1_ipoib_submit_tx_list(struct net_device *dev,
  343. struct hfi1_ipoib_txq *txq)
  344. {
  345. int ret;
  346. u16 count_out;
  347. ret = sdma_send_txlist(txq->sde,
  348. iowait_get_ib_work(&txq->wait),
  349. &txq->tx_list,
  350. &count_out);
  351. if (likely(!ret) || ret == -EBUSY || ret == -ECOMM)
  352. return ret;
  353. dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret);
  354. return ret;
  355. }
  356. static int hfi1_ipoib_flush_tx_list(struct net_device *dev,
  357. struct hfi1_ipoib_txq *txq)
  358. {
  359. int ret = 0;
  360. if (!list_empty(&txq->tx_list)) {
  361. /* Flush the current list */
  362. ret = hfi1_ipoib_submit_tx_list(dev, txq);
  363. if (unlikely(ret))
  364. if (ret != -EBUSY)
  365. ++dev->stats.tx_carrier_errors;
  366. }
  367. return ret;
  368. }
  369. static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq,
  370. struct ipoib_txreq *tx)
  371. {
  372. int ret;
  373. ret = sdma_send_txreq(txq->sde,
  374. iowait_get_ib_work(&txq->wait),
  375. &tx->txreq,
  376. txq->pkts_sent);
  377. if (likely(!ret)) {
  378. txq->pkts_sent = true;
  379. iowait_starve_clear(txq->pkts_sent, &txq->wait);
  380. }
  381. return ret;
  382. }
  383. static int hfi1_ipoib_send_dma_single(struct net_device *dev,
  384. struct sk_buff *skb,
  385. struct ipoib_txparms *txp)
  386. {
  387. struct hfi1_ipoib_txq *txq = txp->txq;
  388. struct hfi1_ipoib_circ_buf *tx_ring;
  389. struct ipoib_txreq *tx;
  390. int ret;
  391. tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
  392. if (IS_ERR(tx)) {
  393. int ret = PTR_ERR(tx);
  394. dev_kfree_skb_any(skb);
  395. if (ret == -ENOMEM)
  396. ++dev->stats.tx_errors;
  397. else
  398. ++dev->stats.tx_carrier_errors;
  399. return NETDEV_TX_OK;
  400. }
  401. tx_ring = &txq->tx_ring;
  402. trace_hfi1_tx_consume(tx, tx_ring->tail);
  403. /* consume tx */
  404. smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
  405. ret = hfi1_ipoib_submit_tx(txq, tx);
  406. if (likely(!ret)) {
  407. tx_ok:
  408. trace_sdma_output_ibhdr(txq->priv->dd,
  409. &tx->sdma_hdr->hdr,
  410. ib_is_sc5(txp->flow.sc5));
  411. hfi1_ipoib_check_queue_depth(txq);
  412. return NETDEV_TX_OK;
  413. }
  414. txq->pkts_sent = false;
  415. if (ret == -EBUSY || ret == -ECOMM)
  416. goto tx_ok;
  417. /* mark complete and kick napi tx */
  418. smp_store_release(&tx->complete, 1);
  419. napi_schedule(&tx->txq->napi);
  420. ++dev->stats.tx_carrier_errors;
  421. return NETDEV_TX_OK;
  422. }
  423. static int hfi1_ipoib_send_dma_list(struct net_device *dev,
  424. struct sk_buff *skb,
  425. struct ipoib_txparms *txp)
  426. {
  427. struct hfi1_ipoib_txq *txq = txp->txq;
  428. struct hfi1_ipoib_circ_buf *tx_ring;
  429. struct ipoib_txreq *tx;
  430. /* Has the flow change ? */
  431. if (txq->flow.as_int != txp->flow.as_int) {
  432. int ret;
  433. trace_hfi1_flow_flush(txq);
  434. ret = hfi1_ipoib_flush_tx_list(dev, txq);
  435. if (unlikely(ret)) {
  436. if (ret == -EBUSY)
  437. ++dev->stats.tx_dropped;
  438. dev_kfree_skb_any(skb);
  439. return NETDEV_TX_OK;
  440. }
  441. }
  442. tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
  443. if (IS_ERR(tx)) {
  444. int ret = PTR_ERR(tx);
  445. dev_kfree_skb_any(skb);
  446. if (ret == -ENOMEM)
  447. ++dev->stats.tx_errors;
  448. else
  449. ++dev->stats.tx_carrier_errors;
  450. return NETDEV_TX_OK;
  451. }
  452. tx_ring = &txq->tx_ring;
  453. trace_hfi1_tx_consume(tx, tx_ring->tail);
  454. /* consume tx */
  455. smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
  456. list_add_tail(&tx->txreq.list, &txq->tx_list);
  457. hfi1_ipoib_check_queue_depth(txq);
  458. trace_sdma_output_ibhdr(txq->priv->dd,
  459. &tx->sdma_hdr->hdr,
  460. ib_is_sc5(txp->flow.sc5));
  461. if (!netdev_xmit_more())
  462. (void)hfi1_ipoib_flush_tx_list(dev, txq);
  463. return NETDEV_TX_OK;
  464. }
  465. static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb)
  466. {
  467. if (skb_transport_header_was_set(skb)) {
  468. u8 *hdr = (u8 *)skb_transport_header(skb);
  469. return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]);
  470. }
  471. return (u8)skb_get_queue_mapping(skb);
  472. }
  473. int hfi1_ipoib_send(struct net_device *dev,
  474. struct sk_buff *skb,
  475. struct ib_ah *address,
  476. u32 dqpn)
  477. {
  478. struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
  479. struct ipoib_txparms txp;
  480. struct rdma_netdev *rn = netdev_priv(dev);
  481. if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) {
  482. dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n",
  483. skb->len,
  484. rn->mtu + HFI1_IPOIB_ENCAP_LEN);
  485. ++dev->stats.tx_dropped;
  486. ++dev->stats.tx_errors;
  487. dev_kfree_skb_any(skb);
  488. return NETDEV_TX_OK;
  489. }
  490. txp.dd = priv->dd;
  491. txp.ah_attr = &ibah_to_rvtah(address)->attr;
  492. txp.ibp = to_iport(priv->device, priv->port_num);
  493. txp.txq = &priv->txqs[skb_get_queue_mapping(skb)];
  494. txp.dqpn = dqpn;
  495. txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)];
  496. txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb);
  497. txp.entropy = hfi1_ipoib_calc_entropy(skb);
  498. if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list))
  499. return hfi1_ipoib_send_dma_list(dev, skb, &txp);
  500. return hfi1_ipoib_send_dma_single(dev, skb, &txp);
  501. }
  502. /*
  503. * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function
  504. *
  505. * This function gets called from sdma_send_txreq() when there are not enough
  506. * sdma descriptors available to send the packet. It adds Tx queue's wait
  507. * structure to sdma engine's dmawait list to be woken up when descriptors
  508. * become available.
  509. */
  510. static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
  511. struct iowait_work *wait,
  512. struct sdma_txreq *txreq,
  513. uint seq,
  514. bool pkts_sent)
  515. {
  516. struct hfi1_ipoib_txq *txq =
  517. container_of(wait->iow, struct hfi1_ipoib_txq, wait);
  518. write_seqlock(&sde->waitlock);
  519. if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) {
  520. if (sdma_progress(sde, seq, txreq)) {
  521. write_sequnlock(&sde->waitlock);
  522. return -EAGAIN;
  523. }
  524. if (list_empty(&txreq->list))
  525. /* came from non-list submit */
  526. list_add_tail(&txreq->list, &txq->tx_list);
  527. if (list_empty(&txq->wait.list)) {
  528. struct hfi1_ibport *ibp = &sde->ppd->ibport_data;
  529. if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) {
  530. trace_hfi1_txq_queued(txq);
  531. hfi1_ipoib_stop_txq(txq);
  532. }
  533. ibp->rvp.n_dmawait++;
  534. iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
  535. }
  536. write_sequnlock(&sde->waitlock);
  537. return -EBUSY;
  538. }
  539. write_sequnlock(&sde->waitlock);
  540. return -EINVAL;
  541. }
  542. /*
  543. * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function
  544. *
  545. * This function gets called when SDMA descriptors becomes available and Tx
  546. * queue's wait structure was previously added to sdma engine's dmawait list.
  547. */
  548. static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason)
  549. {
  550. struct hfi1_ipoib_txq *txq =
  551. container_of(wait, struct hfi1_ipoib_txq, wait);
  552. trace_hfi1_txq_wakeup(txq);
  553. if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED))
  554. iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND);
  555. }
  556. static void hfi1_ipoib_flush_txq(struct work_struct *work)
  557. {
  558. struct iowait_work *ioww =
  559. container_of(work, struct iowait_work, iowork);
  560. struct iowait *wait = iowait_ioww_to_iow(ioww);
  561. struct hfi1_ipoib_txq *txq =
  562. container_of(wait, struct hfi1_ipoib_txq, wait);
  563. struct net_device *dev = txq->priv->netdev;
  564. if (likely(dev->reg_state == NETREG_REGISTERED) &&
  565. likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
  566. if (atomic_xchg(&txq->tx_ring.no_desc, 0))
  567. hfi1_ipoib_wake_txq(txq);
  568. }
  569. int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
  570. {
  571. struct net_device *dev = priv->netdev;
  572. u32 tx_ring_size, tx_item_size;
  573. struct hfi1_ipoib_circ_buf *tx_ring;
  574. int i, j;
  575. /*
  576. * Ring holds 1 less than tx_ring_size
  577. * Round up to next power of 2 in order to hold at least tx_queue_len
  578. */
  579. tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1);
  580. tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq));
  581. priv->txqs = kcalloc_node(dev->num_tx_queues,
  582. sizeof(struct hfi1_ipoib_txq),
  583. GFP_KERNEL,
  584. priv->dd->node);
  585. if (!priv->txqs)
  586. return -ENOMEM;
  587. for (i = 0; i < dev->num_tx_queues; i++) {
  588. struct hfi1_ipoib_txq *txq = &priv->txqs[i];
  589. struct ipoib_txreq *tx;
  590. tx_ring = &txq->tx_ring;
  591. iowait_init(&txq->wait,
  592. 0,
  593. hfi1_ipoib_flush_txq,
  594. NULL,
  595. hfi1_ipoib_sdma_sleep,
  596. hfi1_ipoib_sdma_wakeup,
  597. NULL,
  598. NULL);
  599. txq->priv = priv;
  600. txq->sde = NULL;
  601. INIT_LIST_HEAD(&txq->tx_list);
  602. atomic_set(&txq->tx_ring.stops, 0);
  603. atomic_set(&txq->tx_ring.ring_full, 0);
  604. atomic_set(&txq->tx_ring.no_desc, 0);
  605. txq->q_idx = i;
  606. txq->flow.tx_queue = 0xff;
  607. txq->flow.sc5 = 0xff;
  608. txq->pkts_sent = false;
  609. netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
  610. priv->dd->node);
  611. txq->tx_ring.items =
  612. kvzalloc_node(array_size(tx_ring_size, tx_item_size),
  613. GFP_KERNEL, priv->dd->node);
  614. if (!txq->tx_ring.items)
  615. goto free_txqs;
  616. txq->tx_ring.max_items = tx_ring_size;
  617. txq->tx_ring.shift = ilog2(tx_item_size);
  618. txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
  619. tx_ring = &txq->tx_ring;
  620. for (j = 0; j < tx_ring_size; j++) {
  621. hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr =
  622. kzalloc_node(sizeof(*tx->sdma_hdr),
  623. GFP_KERNEL, priv->dd->node);
  624. if (!hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr)
  625. goto free_txqs;
  626. }
  627. netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring);
  628. }
  629. return 0;
  630. free_txqs:
  631. for (i--; i >= 0; i--) {
  632. struct hfi1_ipoib_txq *txq = &priv->txqs[i];
  633. netif_napi_del(&txq->napi);
  634. tx_ring = &txq->tx_ring;
  635. for (j = 0; j < tx_ring_size; j++)
  636. kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
  637. kvfree(tx_ring->items);
  638. }
  639. kfree(priv->txqs);
  640. priv->txqs = NULL;
  641. return -ENOMEM;
  642. }
  643. static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
  644. {
  645. struct sdma_txreq *txreq;
  646. struct sdma_txreq *txreq_tmp;
  647. list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
  648. struct ipoib_txreq *tx =
  649. container_of(txreq, struct ipoib_txreq, txreq);
  650. list_del(&txreq->list);
  651. sdma_txclean(txq->priv->dd, &tx->txreq);
  652. dev_kfree_skb_any(tx->skb);
  653. tx->skb = NULL;
  654. txq->tx_ring.complete_txreqs++;
  655. }
  656. if (hfi1_ipoib_used(txq))
  657. dd_dev_warn(txq->priv->dd,
  658. "txq %d not empty found %u requests\n",
  659. txq->q_idx,
  660. hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
  661. txq->tx_ring.complete_txreqs));
  662. }
  663. void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
  664. {
  665. int i, j;
  666. for (i = 0; i < priv->netdev->num_tx_queues; i++) {
  667. struct hfi1_ipoib_txq *txq = &priv->txqs[i];
  668. struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
  669. iowait_cancel_work(&txq->wait);
  670. iowait_sdma_drain(&txq->wait);
  671. hfi1_ipoib_drain_tx_list(txq);
  672. netif_napi_del(&txq->napi);
  673. hfi1_ipoib_drain_tx_ring(txq);
  674. for (j = 0; j < tx_ring->max_items; j++)
  675. kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
  676. kvfree(tx_ring->items);
  677. }
  678. kfree(priv->txqs);
  679. priv->txqs = NULL;
  680. }
  681. void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
  682. {
  683. struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
  684. int i;
  685. for (i = 0; i < dev->num_tx_queues; i++) {
  686. struct hfi1_ipoib_txq *txq = &priv->txqs[i];
  687. napi_enable(&txq->napi);
  688. }
  689. }
  690. void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
  691. {
  692. struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
  693. int i;
  694. for (i = 0; i < dev->num_tx_queues; i++) {
  695. struct hfi1_ipoib_txq *txq = &priv->txqs[i];
  696. napi_disable(&txq->napi);
  697. hfi1_ipoib_drain_tx_ring(txq);
  698. }
  699. }
  700. void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
  701. {
  702. struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
  703. struct hfi1_ipoib_txq *txq = &priv->txqs[q];
  704. dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n",
  705. txq, q,
  706. __netif_subqueue_stopped(dev, txq->q_idx),
  707. atomic_read(&txq->tx_ring.stops),
  708. atomic_read(&txq->tx_ring.no_desc),
  709. atomic_read(&txq->tx_ring.ring_full));
  710. dd_dev_info(priv->dd, "sde %p engine %u\n",
  711. txq->sde,
  712. txq->sde ? txq->sde->this_idx : 0);
  713. dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
  714. dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
  715. txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs,
  716. hfi1_ipoib_used(txq));
  717. dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n",
  718. dev->tx_queue_len, txq->tx_ring.max_items);
  719. dd_dev_info(priv->dd, "head %u tail %u\n",
  720. txq->tx_ring.head, txq->tx_ring.tail);
  721. dd_dev_info(priv->dd, "wait queued %u\n",
  722. !list_empty(&txq->wait.list));
  723. dd_dev_info(priv->dd, "tx_list empty %u\n",
  724. list_empty(&txq->tx_list));
  725. }