Эх сурвалжийг харах

qcacmn: Optimize tx completion processing in fast path

Optimize tx completion processing in fastpath by adding the list of
tx descs to the free list directly instead of individually accessing
each descriptor and adding it to free list. This gives advantages of
not taking descriptor pool lock for each descriptor and rather takes
the lock only once when appending the whole list of descs to the
free list.

Also removed unused member shinfo_addr from tx_desc structure. Removal
of this member enables increase of flags type from u16 to u32 without
increasing the size of tx_desc structure. These changes improved peak
KPI by 2% in SFE mode.

Change-Id: I995de9cc08c866ad6e1cd1efc76ac2a35d40fb7c
CRs-Fixed: 3584578
Nandha Kishore Easwaran 1 жил өмнө
parent
commit
15812b8137

+ 2 - 1
dp/wifi3.0/be/dp_be_tx.c

@@ -1828,7 +1828,6 @@ qdf_nbuf_t dp_tx_fast_send_be(struct cdp_soc_t *soc_hdl, uint8_t vdev_id,
 
 	/* Initialize the SW tx descriptor */
 	tx_desc->nbuf = nbuf;
-	tx_desc->shinfo_addr = skb_end_pointer(nbuf);
 	tx_desc->frm_type = dp_tx_frm_std;
 	tx_desc->tx_encap_type = vdev->tx_encap_type;
 	tx_desc->vdev_id = vdev_id;
@@ -1836,6 +1835,8 @@ qdf_nbuf_t dp_tx_fast_send_be(struct cdp_soc_t *soc_hdl, uint8_t vdev_id,
 	tx_desc->pkt_offset = 0;
 	tx_desc->length = pkt_len;
 	tx_desc->flags |= DP_TX_DESC_FLAG_SIMPLE;
+	if (soc->hw_txrx_stats_en)
+		tx_desc->flags |= DP_TX_DESC_FLAG_FASTPATH_SIMPLE;
 	tx_desc->nbuf->fast_recycled = 1;
 
 	if (nbuf->is_from_recycler && nbuf->fast_xmit)

+ 23 - 0
dp/wifi3.0/dp_rings_main.c

@@ -4297,6 +4297,27 @@ static inline void dp_soc_get_ap_mld_mode(struct dp_soc *soc)
 }
 #endif
 
+#ifdef QCA_VDEV_STATS_HW_OFFLOAD_SUPPORT
+/**
+ * dp_soc_hw_txrx_stats_init() - Initialize hw_txrx_stats_en in dp_soc
+ * @soc: Datapath soc handle
+ *
+ * Return: none
+ */
+static inline
+void dp_soc_hw_txrx_stats_init(struct dp_soc *soc)
+{
+	soc->hw_txrx_stats_en =
+		wlan_cfg_get_vdev_stats_hw_offload_config(soc->wlan_cfg_ctx);
+}
+#else
+static inline
+void dp_soc_hw_txrx_stats_init(struct dp_soc *soc)
+{
+	soc->hw_txrx_stats_en = 0;
+}
+#endif
+
 /**
  * dp_soc_init() - Initialize txrx SOC
  * @soc: Opaque DP SOC handle
@@ -4485,6 +4506,8 @@ void *dp_soc_init(struct dp_soc *soc, HTC_HANDLE htc_handle,
 
 	soc->vdev_stats_id_map = 0;
 
+	dp_soc_hw_txrx_stats_init(soc);
+
 	dp_soc_get_ap_mld_mode(soc);
 
 	return soc;

+ 49 - 8
dp/wifi3.0/dp_tx.c

@@ -1197,7 +1197,6 @@ struct dp_tx_desc_s *dp_tx_prepare_desc_single(struct dp_vdev *vdev,
 	tx_desc->msdu_ext_desc = NULL;
 	tx_desc->pkt_offset = 0;
 	tx_desc->length = qdf_nbuf_headlen(nbuf);
-	tx_desc->shinfo_addr = skb_end_pointer(nbuf);
 
 	dp_tx_trace_pkt(soc, nbuf, tx_desc->id, vdev->vdev_id,
 			vdev->qdf_opmode);
@@ -5647,6 +5646,19 @@ dp_tx_update_ppeds_tx_comp_stats(struct dp_soc *soc,
 }
 #endif
 
+void
+dp_tx_comp_process_desc_list_fast(struct dp_soc *soc,
+				  struct dp_tx_desc_s *head_desc,
+				  struct dp_tx_desc_s *tail_desc,
+				  uint8_t ring_id,
+				  uint32_t fast_desc_count)
+{
+	struct dp_tx_desc_pool_s *pool = &soc->tx_desc[head_desc->pool_id];
+
+	dp_tx_outstanding_sub(head_desc->pdev, fast_desc_count);
+	dp_tx_desc_free_list(pool, head_desc, tail_desc, fast_desc_count);
+}
+
 void
 dp_tx_comp_process_desc_list(struct dp_soc *soc,
 			     struct dp_tx_desc_s *comp_head, uint8_t ring_id)
@@ -5828,13 +5840,17 @@ uint32_t dp_tx_comp_handler(struct dp_intr *int_ctx, struct dp_soc *soc,
 	struct dp_tx_desc_s *tx_desc = NULL;
 	struct dp_tx_desc_s *head_desc = NULL;
 	struct dp_tx_desc_s *tail_desc = NULL;
+	struct dp_tx_desc_s *fast_head_desc = NULL;
+	struct dp_tx_desc_s *fast_tail_desc = NULL;
 	uint32_t num_processed = 0;
+	uint32_t fast_desc_count = 0;
 	uint32_t count;
 	uint32_t num_avail_for_reap = 0;
 	bool force_break = false;
 	struct dp_srng *tx_comp_ring = &soc->tx_comp_ring[ring_id];
 	int max_reap_limit, ring_near_full;
 	uint32_t num_entries;
+	qdf_nbuf_queue_head_t h;
 
 	DP_HIST_INIT();
 
@@ -5869,6 +5885,8 @@ more_data:
 							    hal_ring_hdl,
 							    num_avail_for_reap);
 
+	dp_tx_nbuf_queue_head_init(&h);
+
 	/* Find head descriptor from completion ring */
 	while (qdf_likely(num_avail_for_reap--)) {
 
@@ -5935,7 +5953,8 @@ more_data:
 		}
 		tx_desc->buffer_src = buffer_src;
 
-		if (tx_desc->flags & DP_TX_DESC_FLAG_PPEDS)
+		if (tx_desc->flags & DP_TX_DESC_FLAG_FASTPATH_SIMPLE ||
+		    tx_desc->flags & DP_TX_DESC_FLAG_PPEDS)
 			goto add_to_pool2;
 
 		/*
@@ -6006,14 +6025,28 @@ add_to_pool:
 
 add_to_pool2:
 			/* First ring descriptor on the cycle */
-			if (!head_desc) {
-				head_desc = tx_desc;
+
+			if (tx_desc->flags & DP_TX_DESC_FLAG_FASTPATH_SIMPLE ||
+			    tx_desc->flags & DP_TX_DESC_FLAG_PPEDS) {
+				dp_tx_nbuf_dev_queue_free(&h, tx_desc);
+				fast_desc_count++;
+				if (!fast_head_desc) {
+					fast_head_desc = tx_desc;
+					fast_tail_desc = tx_desc;
+				}
+				fast_tail_desc->next = tx_desc;
+				fast_tail_desc = tx_desc;
+				dp_tx_desc_clear(tx_desc);
+			} else {
+				if (!head_desc) {
+					head_desc = tx_desc;
+					tail_desc = tx_desc;
+				}
+
+				tail_desc->next = tx_desc;
+				tx_desc->next = NULL;
 				tail_desc = tx_desc;
 			}
-
-			tail_desc->next = tx_desc;
-			tx_desc->next = NULL;
-			tail_desc = tx_desc;
 		}
 next_desc:
 		num_processed += !(count & DP_TX_NAPI_BUDGET_DIV_MASK);
@@ -6037,6 +6070,14 @@ next_desc:
 
 	dp_srng_access_end(int_ctx, soc, hal_ring_hdl);
 
+	/* Process the reaped descriptors that were sent via fast path */
+	if (fast_head_desc) {
+		dp_tx_comp_process_desc_list_fast(soc, fast_head_desc,
+						  fast_tail_desc, ring_id,
+						  fast_desc_count);
+		dp_tx_nbuf_dev_kfree_list(&h);
+	}
+
 	/* Process the reaped descriptors */
 	if (head_desc)
 		dp_tx_comp_process_desc_list(soc, head_desc, ring_id);

+ 106 - 7
dp/wifi3.0/dp_tx.h

@@ -69,13 +69,9 @@ int dp_tx_proxy_arp(struct dp_vdev *vdev, qdf_nbuf_t nbuf);
 #define DP_TX_DESC_FLAG_FLUSH		0x2000
 #define DP_TX_DESC_FLAG_TRAFFIC_END_IND	0x4000
 #define DP_TX_DESC_FLAG_RMNET		0x8000
-/*
- * Since the Tx descriptor flag is of only 16-bit and no more bit is free for
- * any new flag, therefore for time being overloading PPEDS flag with that of
- * FLUSH flag and FLAG_FAST with TDLS which is not enabled for WIN.
- */
-#define DP_TX_DESC_FLAG_PPEDS		0x2000
-#define DP_TX_DESC_FLAG_FAST		0x100
+#define DP_TX_DESC_FLAG_FASTPATH_SIMPLE 0x10000
+#define DP_TX_DESC_FLAG_PPEDS		0x20000
+#define DP_TX_DESC_FLAG_FAST		0x40000
 
 #define DP_TX_EXT_DESC_FLAG_METADATA_VALID 0x1
 
@@ -274,6 +270,26 @@ void
 dp_tx_comp_process_desc_list(struct dp_soc *soc,
 			     struct dp_tx_desc_s *comp_head, uint8_t ring_id);
 
+/**
+ * dp_tx_comp_process_desc_list_fast() - Tx complete fast sw descriptor handler
+ * @soc: core txrx main context
+ * @head_desc: software descriptor head pointer
+ * @tail_desc: software descriptor tail pointer
+ * @ring_id: ring number
+ * @fast_desc_count: Total descriptor count in the list
+ *
+ * This function will process batch of descriptors reaped by dp_tx_comp_handler
+ * and append the list of descriptors to the freelist
+ *
+ * Return: none
+ */
+void
+dp_tx_comp_process_desc_list_fast(struct dp_soc *soc,
+				  struct dp_tx_desc_s *head_desc,
+				  struct dp_tx_desc_s *tail_desc,
+				  uint8_t ring_id,
+				  uint32_t fast_desc_count);
+
 /**
  * dp_tx_comp_free_buf() - Free nbuf associated with the Tx Descriptor
  * @soc: Soc handle
@@ -1976,6 +1992,40 @@ dp_tx_outstanding_dec(struct dp_pdev *pdev)
 	dp_update_tx_desc_stats(pdev);
 }
 
+/**
+ * __dp_tx_outstanding_sub - Sub outstanding tx desc values from global list
+ * @soc: DP soc handle
+ * @count: count of descs to subtract from outstanding
+ *
+ * Return: void
+ */
+static inline void
+__dp_tx_outstanding_sub(struct dp_soc *soc, uint32_t count)
+{
+	struct dp_global_context *dp_global;
+
+	dp_global = wlan_objmgr_get_global_ctx();
+
+	qdf_atomic_sub(count, &dp_global->global_descriptor_in_use);
+}
+
+/**
+ * dp_tx_outstanding_sub - Subtract outstanding tx desc values on pdev
+ * @pdev: DP pdev handle
+ * @count: count of descs to subtract from outstanding
+ *
+ * Return: void
+ */
+static inline void
+dp_tx_outstanding_sub(struct dp_pdev *pdev, uint32_t count)
+{
+	struct dp_soc *soc = pdev->soc;
+
+	__dp_tx_outstanding_sub(soc, count);
+	qdf_atomic_sub(count, &pdev->num_tx_outstanding);
+	dp_update_tx_desc_stats(pdev);
+}
+
 #else
 
 static inline void
@@ -2020,6 +2070,36 @@ dp_tx_outstanding_dec(struct dp_pdev *pdev)
 	qdf_atomic_dec(&pdev->num_tx_outstanding);
 	dp_update_tx_desc_stats(pdev);
 }
+
+/**
+ * __dp_tx_outstanding_sub - Sub outstanding tx desc values from soc
+ * @soc: DP soc handle
+ * @count: count of descs to subtract from outstanding
+ *
+ * Return: void
+ */
+static inline void
+__dp_tx_outstanding_sub(struct dp_soc *soc, uint32_t count)
+{
+	qdf_atomic_sub(count, &soc->num_tx_outstanding);
+}
+
+/**
+ * dp_tx_outstanding_sub - Subtract outstanding tx desc values on pdev
+ * @pdev: DP pdev handle
+ * @count: count of descs to subtract from outstanding
+ *
+ * Return: void
+ */
+static inline void
+dp_tx_outstanding_sub(struct dp_pdev *pdev, uint32_t count)
+{
+	struct dp_soc *soc = pdev->soc;
+
+	__dp_tx_outstanding_sub(soc, count);
+	qdf_atomic_sub(count, &pdev->num_tx_outstanding);
+	dp_update_tx_desc_stats(pdev);
+}
 #endif /* QCA_SUPPORT_DP_GLOBAL_CTX */
 
 #else //QCA_TX_LIMIT_CHECK
@@ -2064,6 +2144,25 @@ dp_tx_outstanding_dec(struct dp_pdev *pdev)
 	qdf_atomic_dec(&pdev->num_tx_outstanding);
 	dp_update_tx_desc_stats(pdev);
 }
+
+static inline void
+__dp_tx_outstanding_sub(struct dp_soc *soc, uint32_t count)
+{
+}
+
+/**
+ * dp_tx_outstanding_sub - Subtract outstanding tx desc values on pdev
+ * @pdev: DP pdev handle
+ * @count: count of descs to subtract from outstanding
+ *
+ * Return: void
+ */
+static inline void
+dp_tx_outstanding_sub(struct dp_pdev *pdev, uint32_t count)
+{
+	qdf_atomic_sub(count, &pdev->num_tx_outstanding);
+	dp_update_tx_desc_stats(pdev);
+}
 #endif //QCA_TX_LIMIT_CHECK
 
 /**

+ 38 - 3
dp/wifi3.0/dp_tx_desc.h

@@ -447,6 +447,21 @@ void dp_tx_tso_num_seg_pool_deinit(struct dp_soc *soc, uint8_t num_pool);
 void dp_tx_desc_pool_cleanup(struct dp_soc *soc, qdf_nbuf_t *nbuf_list);
 #endif
 
+/**
+ * dp_tx_desc_clear() - Clear contents of tx desc
+ * @tx_desc: descriptor to free
+ *
+ * Return: none
+ */
+static inline void
+dp_tx_desc_clear(struct dp_tx_desc_s *tx_desc)
+{
+	tx_desc->vdev_id = DP_INVALID_VDEV_ID;
+	tx_desc->nbuf = NULL;
+	tx_desc->flags = 0;
+	tx_desc->next = NULL;
+}
+
 #ifdef QCA_LL_TX_FLOW_CONTROL_V2
 void dp_tx_flow_control_init(struct dp_soc *);
 void dp_tx_flow_control_deinit(struct dp_soc *);
@@ -502,6 +517,14 @@ void dp_tx_put_desc_flow_pool(struct dp_tx_desc_pool_s *pool,
 	pool->avail_desc++;
 }
 
+static inline void
+dp_tx_desc_free_list(struct dp_tx_desc_pool_s *pool,
+		     struct dp_tx_desc_s *head_desc,
+		     struct dp_tx_desc_s *tail_desc,
+		     uint32_t fast_desc_count)
+{
+}
+
 #ifdef QCA_AC_BASED_FLOW_CONTROL
 
 /**
@@ -1051,10 +1074,8 @@ dp_tx_desc_free(struct dp_soc *soc, struct dp_tx_desc_s *tx_desc,
 		uint8_t desc_pool_id)
 {
 	struct dp_tx_desc_pool_s *pool = NULL;
-	tx_desc->vdev_id = DP_INVALID_VDEV_ID;
-	tx_desc->nbuf = NULL;
-	tx_desc->flags = 0;
 
+	dp_tx_desc_clear(tx_desc);
 	pool = &soc->tx_desc[desc_pool_id];
 	TX_DESC_LOCK_LOCK(&pool->lock);
 	tx_desc->next = pool->freelist;
@@ -1064,6 +1085,20 @@ dp_tx_desc_free(struct dp_soc *soc, struct dp_tx_desc_s *tx_desc,
 	TX_DESC_LOCK_UNLOCK(&pool->lock);
 }
 
+static inline void
+dp_tx_desc_free_list(struct dp_tx_desc_pool_s *pool,
+		     struct dp_tx_desc_s *head_desc,
+		     struct dp_tx_desc_s *tail_desc,
+		     uint32_t fast_desc_count)
+{
+	TX_DESC_LOCK_LOCK(&pool->lock);
+	pool->num_allocated -= fast_desc_count;
+	pool->num_free += fast_desc_count;
+	tail_desc->next = pool->freelist;
+	pool->freelist = head_desc;
+	TX_DESC_LOCK_UNLOCK(&pool->lock);
+}
+
 #endif /* QCA_LL_TX_FLOW_CONTROL_V2 */
 
 #ifdef QCA_DP_TX_DESC_ID_CHECK

+ 3 - 3
dp/wifi3.0/dp_types.h

@@ -655,7 +655,6 @@ struct dp_tx_ext_desc_pool_s {
  * @frm_type: Frame Type - ToDo check if this is redundant
  * @pkt_offset: Offset from which the actual packet data starts
  * @pool_id: Pool ID - used when releasing the descriptor
- * @shinfo_addr:
  * @msdu_ext_desc: MSDU extension descriptor
  * @timestamp:
  * @comp:
@@ -670,7 +669,7 @@ struct dp_tx_desc_s {
 	uint32_t magic;
 	uint64_t timestamp_tick;
 #endif
-	uint16_t flags;
+	uint32_t flags;
 	uint32_t id;
 	qdf_dma_addr_t dma_addr;
 	uint8_t vdev_id;
@@ -683,7 +682,6 @@ struct dp_tx_desc_s {
 	uint8_t frm_type;
 	uint8_t pkt_offset;
 	uint8_t  pool_id;
-	unsigned char *shinfo_addr;
 	struct dp_tx_ext_desc_elem_s *msdu_ext_desc;
 	qdf_ktime_t timestamp;
 	struct hal_tx_desc_comp_s comp;
@@ -2776,6 +2774,8 @@ struct dp_soc {
 	/* VDEVs on this SOC */
 	struct dp_vdev *vdev_id_map[MAX_VDEV_CNT];
 
+	uint8_t hw_txrx_stats_en:1;
+
 	/* Tx H/W queues lock */
 	qdf_spinlock_t tx_queue_lock[MAX_TX_HW_QUEUES];