ef100_tx.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /****************************************************************************
  3. * Driver for Solarflare network controllers and boards
  4. * Copyright 2018 Solarflare Communications Inc.
  5. * Copyright 2019-2020 Xilinx Inc.
  6. *
  7. * This program is free software; you can redistribute it and/or modify it
  8. * under the terms of the GNU General Public License version 2 as published
  9. * by the Free Software Foundation, incorporated herein by reference.
  10. */
  11. #include <net/ip6_checksum.h>
  12. #include "net_driver.h"
  13. #include "tx_common.h"
  14. #include "nic_common.h"
  15. #include "mcdi_functions.h"
  16. #include "ef100_regs.h"
  17. #include "io.h"
  18. #include "ef100_tx.h"
  19. #include "ef100_nic.h"
  20. int ef100_tx_probe(struct efx_tx_queue *tx_queue)
  21. {
  22. /* Allocate an extra descriptor for the QMDA status completion entry */
  23. return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
  24. (tx_queue->ptr_mask + 2) *
  25. sizeof(efx_oword_t),
  26. GFP_KERNEL);
  27. }
  28. void ef100_tx_init(struct efx_tx_queue *tx_queue)
  29. {
  30. /* must be the inverse of lookup in efx_get_tx_channel */
  31. tx_queue->core_txq =
  32. netdev_get_tx_queue(tx_queue->efx->net_dev,
  33. tx_queue->channel->channel -
  34. tx_queue->efx->tx_channel_offset);
  35. /* This value is purely documentational; as EF100 never passes through
  36. * the switch statement in tx.c:__efx_enqueue_skb(), that switch does
  37. * not handle case 3. EF100's TSOv3 descriptors are generated by
  38. * ef100_make_tso_desc().
  39. * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
  40. */
  41. tx_queue->tso_version = 3;
  42. if (efx_mcdi_tx_init(tx_queue))
  43. netdev_WARN(tx_queue->efx->net_dev,
  44. "failed to initialise TXQ %d\n", tx_queue->queue);
  45. }
  46. static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
  47. {
  48. struct efx_nic *efx = tx_queue->efx;
  49. struct ef100_nic_data *nic_data;
  50. struct efx_tx_buffer *buffer;
  51. size_t header_len;
  52. u32 mss;
  53. nic_data = efx->nic_data;
  54. if (!skb_is_gso_tcp(skb))
  55. return false;
  56. if (!(efx->net_dev->features & NETIF_F_TSO))
  57. return false;
  58. mss = skb_shinfo(skb)->gso_size;
  59. if (unlikely(mss < 4)) {
  60. WARN_ONCE(1, "MSS of %u is too small for TSO\n", mss);
  61. return false;
  62. }
  63. header_len = efx_tx_tso_header_length(skb);
  64. if (header_len > nic_data->tso_max_hdr_len)
  65. return false;
  66. if (skb_shinfo(skb)->gso_segs > nic_data->tso_max_payload_num_segs) {
  67. /* net_dev->gso_max_segs should've caught this */
  68. WARN_ON_ONCE(1);
  69. return false;
  70. }
  71. if (skb->data_len / mss > nic_data->tso_max_frames)
  72. return false;
  73. /* net_dev->gso_max_size should've caught this */
  74. if (WARN_ON_ONCE(skb->data_len > nic_data->tso_max_payload_len))
  75. return false;
  76. /* Reserve an empty buffer for the TSO V3 descriptor.
  77. * Convey the length of the header since we already know it.
  78. */
  79. buffer = efx_tx_queue_get_insert_buffer(tx_queue);
  80. buffer->flags = EFX_TX_BUF_TSO_V3 | EFX_TX_BUF_CONT;
  81. buffer->len = header_len;
  82. buffer->unmap_len = 0;
  83. buffer->skb = skb;
  84. ++tx_queue->insert_count;
  85. return true;
  86. }
  87. static efx_oword_t *ef100_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
  88. {
  89. if (likely(tx_queue->txd.buf.addr))
  90. return ((efx_oword_t *)tx_queue->txd.buf.addr) + index;
  91. else
  92. return NULL;
  93. }
  94. static void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
  95. {
  96. unsigned int write_ptr;
  97. efx_dword_t reg;
  98. tx_queue->xmit_pending = false;
  99. if (unlikely(tx_queue->notify_count == tx_queue->write_count))
  100. return;
  101. write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
  102. /* The write pointer goes into the high word */
  103. EFX_POPULATE_DWORD_1(reg, ERF_GZ_TX_RING_PIDX, write_ptr);
  104. efx_writed_page(tx_queue->efx, &reg,
  105. ER_GZ_TX_RING_DOORBELL, tx_queue->queue);
  106. tx_queue->notify_count = tx_queue->write_count;
  107. }
  108. static void ef100_tx_push_buffers(struct efx_tx_queue *tx_queue)
  109. {
  110. ef100_notify_tx_desc(tx_queue);
  111. ++tx_queue->pushes;
  112. }
  113. static void ef100_set_tx_csum_partial(const struct sk_buff *skb,
  114. struct efx_tx_buffer *buffer, efx_oword_t *txd)
  115. {
  116. efx_oword_t csum;
  117. int csum_start;
  118. if (!skb || skb->ip_summed != CHECKSUM_PARTIAL)
  119. return;
  120. /* skb->csum_start has the offset from head, but we need the offset
  121. * from data.
  122. */
  123. csum_start = skb_checksum_start_offset(skb);
  124. EFX_POPULATE_OWORD_3(csum,
  125. ESF_GZ_TX_SEND_CSO_PARTIAL_EN, 1,
  126. ESF_GZ_TX_SEND_CSO_PARTIAL_START_W,
  127. csum_start >> 1,
  128. ESF_GZ_TX_SEND_CSO_PARTIAL_CSUM_W,
  129. skb->csum_offset >> 1);
  130. EFX_OR_OWORD(*txd, *txd, csum);
  131. }
  132. static void ef100_set_tx_hw_vlan(const struct sk_buff *skb, efx_oword_t *txd)
  133. {
  134. u16 vlan_tci = skb_vlan_tag_get(skb);
  135. efx_oword_t vlan;
  136. EFX_POPULATE_OWORD_2(vlan,
  137. ESF_GZ_TX_SEND_VLAN_INSERT_EN, 1,
  138. ESF_GZ_TX_SEND_VLAN_INSERT_TCI, vlan_tci);
  139. EFX_OR_OWORD(*txd, *txd, vlan);
  140. }
  141. static void ef100_make_send_desc(struct efx_nic *efx,
  142. const struct sk_buff *skb,
  143. struct efx_tx_buffer *buffer, efx_oword_t *txd,
  144. unsigned int segment_count)
  145. {
  146. /* TX send descriptor */
  147. EFX_POPULATE_OWORD_3(*txd,
  148. ESF_GZ_TX_SEND_NUM_SEGS, segment_count,
  149. ESF_GZ_TX_SEND_LEN, buffer->len,
  150. ESF_GZ_TX_SEND_ADDR, buffer->dma_addr);
  151. if (likely(efx->net_dev->features & NETIF_F_HW_CSUM))
  152. ef100_set_tx_csum_partial(skb, buffer, txd);
  153. if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX &&
  154. skb && skb_vlan_tag_present(skb))
  155. ef100_set_tx_hw_vlan(skb, txd);
  156. }
  157. static void ef100_make_tso_desc(struct efx_nic *efx,
  158. const struct sk_buff *skb,
  159. struct efx_tx_buffer *buffer, efx_oword_t *txd,
  160. unsigned int segment_count)
  161. {
  162. bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
  163. unsigned int len, ip_offset, tcp_offset, payload_segs;
  164. u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16;
  165. unsigned int outer_ip_offset, outer_l4_offset;
  166. u16 vlan_tci = skb_vlan_tag_get(skb);
  167. u32 mss = skb_shinfo(skb)->gso_size;
  168. bool encap = skb->encapsulation;
  169. bool udp_encap = false;
  170. u16 vlan_enable = 0;
  171. struct tcphdr *tcp;
  172. bool outer_csum;
  173. u32 paylen;
  174. if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID)
  175. mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP;
  176. if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX)
  177. vlan_enable = skb_vlan_tag_present(skb);
  178. len = skb->len - buffer->len;
  179. /* We use 1 for the TSO descriptor and 1 for the header */
  180. payload_segs = segment_count - 2;
  181. if (encap) {
  182. outer_ip_offset = skb_network_offset(skb);
  183. outer_l4_offset = skb_transport_offset(skb);
  184. ip_offset = skb_inner_network_offset(skb);
  185. tcp_offset = skb_inner_transport_offset(skb);
  186. if (skb_shinfo(skb)->gso_type &
  187. (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))
  188. udp_encap = true;
  189. } else {
  190. ip_offset = skb_network_offset(skb);
  191. tcp_offset = skb_transport_offset(skb);
  192. outer_ip_offset = outer_l4_offset = 0;
  193. }
  194. outer_csum = skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM;
  195. /* subtract TCP payload length from inner checksum */
  196. tcp = (void *)skb->data + tcp_offset;
  197. paylen = skb->len - tcp_offset;
  198. csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen));
  199. EFX_POPULATE_OWORD_19(*txd,
  200. ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO,
  201. ESF_GZ_TX_TSO_MSS, mss,
  202. ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1,
  203. ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, payload_segs,
  204. ESF_GZ_TX_TSO_HDR_LEN_W, buffer->len >> 1,
  205. ESF_GZ_TX_TSO_PAYLOAD_LEN, len,
  206. ESF_GZ_TX_TSO_CSO_OUTER_L4, outer_csum,
  207. ESF_GZ_TX_TSO_CSO_INNER_L4, 1,
  208. ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1,
  209. ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1,
  210. ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid,
  211. ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1,
  212. ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1,
  213. ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1,
  214. ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial,
  215. ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial,
  216. ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid :
  217. ESE_GZ_TX_DESC_IP4_ID_NO_OP,
  218. ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable,
  219. ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci
  220. );
  221. }
  222. static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
  223. const struct sk_buff *skb,
  224. unsigned int segment_count,
  225. struct efx_rep *efv)
  226. {
  227. unsigned int old_write_count = tx_queue->write_count;
  228. unsigned int new_write_count = old_write_count;
  229. struct efx_tx_buffer *buffer;
  230. unsigned int next_desc_type;
  231. unsigned int write_ptr;
  232. efx_oword_t *txd;
  233. unsigned int nr_descs = tx_queue->insert_count - old_write_count;
  234. if (unlikely(nr_descs == 0))
  235. return;
  236. if (segment_count)
  237. next_desc_type = ESE_GZ_TX_DESC_TYPE_TSO;
  238. else
  239. next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND;
  240. if (unlikely(efv)) {
  241. /* Create TX override descriptor */
  242. write_ptr = new_write_count & tx_queue->ptr_mask;
  243. txd = ef100_tx_desc(tx_queue, write_ptr);
  244. ++new_write_count;
  245. tx_queue->packet_write_count = new_write_count;
  246. EFX_POPULATE_OWORD_3(*txd,
  247. ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX,
  248. ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport,
  249. ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1);
  250. nr_descs--;
  251. }
  252. /* if it's a raw write (such as XDP) then always SEND single frames */
  253. if (!skb)
  254. nr_descs = 1;
  255. do {
  256. write_ptr = new_write_count & tx_queue->ptr_mask;
  257. buffer = &tx_queue->buffer[write_ptr];
  258. txd = ef100_tx_desc(tx_queue, write_ptr);
  259. ++new_write_count;
  260. /* Create TX descriptor ring entry */
  261. tx_queue->packet_write_count = new_write_count;
  262. switch (next_desc_type) {
  263. case ESE_GZ_TX_DESC_TYPE_SEND:
  264. ef100_make_send_desc(tx_queue->efx, skb,
  265. buffer, txd, nr_descs);
  266. break;
  267. case ESE_GZ_TX_DESC_TYPE_TSO:
  268. /* TX TSO descriptor */
  269. WARN_ON_ONCE(!(buffer->flags & EFX_TX_BUF_TSO_V3));
  270. ef100_make_tso_desc(tx_queue->efx, skb,
  271. buffer, txd, nr_descs);
  272. break;
  273. default:
  274. /* TX segment descriptor */
  275. EFX_POPULATE_OWORD_3(*txd,
  276. ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG,
  277. ESF_GZ_TX_SEG_LEN, buffer->len,
  278. ESF_GZ_TX_SEG_ADDR, buffer->dma_addr);
  279. }
  280. /* if it's a raw write (such as XDP) then always SEND */
  281. next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG :
  282. ESE_GZ_TX_DESC_TYPE_SEND;
  283. /* mark as an EFV buffer if applicable */
  284. if (unlikely(efv))
  285. buffer->flags |= EFX_TX_BUF_EFV;
  286. } while (new_write_count != tx_queue->insert_count);
  287. wmb(); /* Ensure descriptors are written before they are fetched */
  288. tx_queue->write_count = new_write_count;
  289. /* The write_count above must be updated before reading
  290. * channel->holdoff_doorbell to avoid a race with the
  291. * completion path, so ensure these operations are not
  292. * re-ordered. This also flushes the update of write_count
  293. * back into the cache.
  294. */
  295. smp_mb();
  296. }
  297. void ef100_tx_write(struct efx_tx_queue *tx_queue)
  298. {
  299. ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL);
  300. ef100_tx_push_buffers(tx_queue);
  301. }
  302. int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
  303. {
  304. unsigned int tx_done =
  305. EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC);
  306. unsigned int qlabel =
  307. EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_Q_LABEL);
  308. struct efx_tx_queue *tx_queue =
  309. efx_channel_get_tx_queue(channel, qlabel);
  310. unsigned int tx_index = (tx_queue->read_count + tx_done - 1) &
  311. tx_queue->ptr_mask;
  312. return efx_xmit_done(tx_queue, tx_index);
  313. }
  314. /* Add a socket buffer to a TX queue
  315. *
  316. * You must hold netif_tx_lock() to call this function.
  317. *
  318. * Returns 0 on success, error code otherwise. In case of an error this
  319. * function will free the SKB.
  320. */
  321. netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue,
  322. struct sk_buff *skb)
  323. {
  324. return __ef100_enqueue_skb(tx_queue, skb, NULL);
  325. }
  326. int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
  327. struct efx_rep *efv)
  328. {
  329. unsigned int old_insert_count = tx_queue->insert_count;
  330. struct efx_nic *efx = tx_queue->efx;
  331. bool xmit_more = netdev_xmit_more();
  332. unsigned int fill_level;
  333. unsigned int segments;
  334. int rc;
  335. if (!tx_queue->buffer || !tx_queue->ptr_mask) {
  336. netif_stop_queue(efx->net_dev);
  337. dev_kfree_skb_any(skb);
  338. return -ENODEV;
  339. }
  340. segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0;
  341. if (segments == 1)
  342. segments = 0; /* Don't use TSO/GSO for a single segment. */
  343. if (segments && !ef100_tx_can_tso(tx_queue, skb)) {
  344. rc = efx_tx_tso_fallback(tx_queue, skb);
  345. tx_queue->tso_fallbacks++;
  346. if (rc)
  347. goto err;
  348. else
  349. return 0;
  350. }
  351. if (unlikely(efv)) {
  352. struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
  353. /* Drop representor packets if the queue is stopped.
  354. * We currently don't assert backoff to representors so this is
  355. * to make sure representor traffic can't starve the main
  356. * net device.
  357. * And, of course, if there are no TX descriptors left.
  358. */
  359. if (netif_tx_queue_stopped(tx_queue->core_txq) ||
  360. unlikely(efx_tx_buffer_in_use(buffer))) {
  361. atomic64_inc(&efv->stats.tx_errors);
  362. rc = -ENOSPC;
  363. goto err;
  364. }
  365. /* Also drop representor traffic if it could cause us to
  366. * stop the queue. If we assert backoff and we haven't
  367. * received traffic on the main net device recently then the
  368. * TX watchdog can go off erroneously.
  369. */
  370. fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
  371. fill_level += efx_tx_max_skb_descs(efx);
  372. if (fill_level > efx->txq_stop_thresh) {
  373. struct efx_tx_queue *txq2;
  374. /* Refresh cached fill level and re-check */
  375. efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
  376. txq2->old_read_count = READ_ONCE(txq2->read_count);
  377. fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
  378. fill_level += efx_tx_max_skb_descs(efx);
  379. if (fill_level > efx->txq_stop_thresh) {
  380. atomic64_inc(&efv->stats.tx_errors);
  381. rc = -ENOSPC;
  382. goto err;
  383. }
  384. }
  385. buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV;
  386. tx_queue->insert_count++;
  387. }
  388. /* Map for DMA and create descriptors */
  389. rc = efx_tx_map_data(tx_queue, skb, segments);
  390. if (rc)
  391. goto err;
  392. ef100_tx_make_descriptors(tx_queue, skb, segments, efv);
  393. fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
  394. if (fill_level > efx->txq_stop_thresh) {
  395. struct efx_tx_queue *txq2;
  396. /* Because of checks above, representor traffic should
  397. * not be able to stop the queue.
  398. */
  399. WARN_ON(efv);
  400. netif_tx_stop_queue(tx_queue->core_txq);
  401. /* Re-read after a memory barrier in case we've raced with
  402. * the completion path. Otherwise there's a danger we'll never
  403. * restart the queue if all completions have just happened.
  404. */
  405. smp_mb();
  406. efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
  407. txq2->old_read_count = READ_ONCE(txq2->read_count);
  408. fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
  409. if (fill_level < efx->txq_stop_thresh)
  410. netif_tx_start_queue(tx_queue->core_txq);
  411. }
  412. tx_queue->xmit_pending = true;
  413. /* If xmit_more then we don't need to push the doorbell, unless there
  414. * are 256 descriptors already queued in which case we have to push to
  415. * ensure we never push more than 256 at once.
  416. *
  417. * Always push for representor traffic, and don't account it to parent
  418. * PF netdevice's BQL.
  419. */
  420. if (unlikely(efv) ||
  421. __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
  422. tx_queue->write_count - tx_queue->notify_count > 255)
  423. ef100_tx_push_buffers(tx_queue);
  424. if (segments) {
  425. tx_queue->tso_bursts++;
  426. tx_queue->tso_packets += segments;
  427. tx_queue->tx_packets += segments;
  428. } else {
  429. tx_queue->tx_packets++;
  430. }
  431. return 0;
  432. err:
  433. efx_enqueue_unwind(tx_queue, old_insert_count);
  434. if (!IS_ERR_OR_NULL(skb))
  435. dev_kfree_skb_any(skb);
  436. /* If we're not expecting another transmit and we had something to push
  437. * on this queue then we need to push here to get the previous packets
  438. * out. We only enter this branch from before the xmit_more handling
  439. * above, so xmit_pending still refers to the old state.
  440. */
  441. if (tx_queue->xmit_pending && !xmit_more)
  442. ef100_tx_push_buffers(tx_queue);
  443. return rc;
  444. }