فهرست منبع

qcacmn: Add support to enable tx hw latency stats at run time

This feature can be enabled runtime using sysfs interface.
Support is added to dump and clear the histogram stats.

The lower delay regions has to be more granular to indicate any
medium related issues for time sensitive XR applications.

Change-Id: I0a44a54d12d92ce016de349810cb2bedebaf9a58
CRs-Fixed: 2981006
Yeshwanth Sriram Guntuka 3 سال پیش
والد
کامیت
df666b7116

+ 1 - 0
dp/inc/cdp_txrx_cmn_struct.h

@@ -92,6 +92,7 @@
 #define CDP_DISCONNECT_STATS       25
 #define CDP_DP_RX_FISA_STATS	   26
 #define CDP_DP_SWLM_STATS	   27
+#define CDP_DP_TX_HW_LATENCY_STATS 28
 
 #define WME_AC_TO_TID(_ac) (       \
 		((_ac) == WME_AC_VO) ? 6 : \

+ 51 - 0
dp/inc/cdp_txrx_host_stats.h

@@ -904,4 +904,55 @@ cdp_get_pdev_tx_capture_stats(ol_txrx_soc_handle soc, uint8_t pdev_id,
 								   stats);
 }
 #endif /* WLAN_TX_PKT_CAPTURE_ENH */
+
+#ifdef HW_TX_DELAY_STATS_ENABLE
+/**
+ * cdp_enable_disable_vdev_tx_delay_stats() - Start/Stop tx delay stats capture
+ * @soc: soc handle
+ * @vdev_id: vdev id
+ * @value: value to be set
+ *
+ * Return: None
+ */
+static inline void
+cdp_enable_disable_vdev_tx_delay_stats(ol_txrx_soc_handle soc, uint8_t vdev_id,
+				       uint8_t value)
+{
+	if (!soc || !soc->ops) {
+		dp_cdp_debug("Invalid Instance");
+		return;
+	}
+
+	if (!soc->ops->host_stats_ops ||
+	    !soc->ops->host_stats_ops->enable_disable_vdev_tx_delay_stats)
+		return;
+
+	soc->ops->host_stats_ops->enable_disable_vdev_tx_delay_stats(soc,
+								     vdev_id,
+								     value);
+}
+
+/**
+ * cdp_vdev_is_tx_delay_stats_enabled() - Check if the Tx delay stats
+ *  is enabled or not for the given vdev_id
+ * @soc: soc handle
+ * @vdev_id: vdev_id
+ *
+ * Returns: 1 if enabled, 0 if disabled
+ */
+static inline uint8_t
+cdp_vdev_is_tx_delay_stats_enabled(ol_txrx_soc_handle soc, uint8_t vdev_id)
+{
+	if (!soc || !soc->ops || !soc->ops->host_stats_ops) {
+		dp_cdp_debug("Invalid Instance:");
+		return 0;
+	}
+
+	if (soc->ops->host_stats_ops->is_tx_delay_stats_enabled)
+		return soc->ops->host_stats_ops->is_tx_delay_stats_enabled(soc,
+								     vdev_id);
+
+	return 0;
+}
+#endif
 #endif /* _CDP_TXRX_HOST_STATS_H_ */

+ 8 - 0
dp/inc/cdp_txrx_ops.h

@@ -1089,6 +1089,14 @@ struct cdp_host_stats_ops {
 	(*get_pdev_tx_capture_stats)(struct cdp_soc_t *soc, uint8_t pdev_id,
 				     struct cdp_pdev_tx_capture_stats *stats);
 #endif /* WLAN_TX_PKT_CAPTURE_ENH */
+#ifdef HW_TX_DELAY_STATS_ENABLE
+	void
+	(*enable_disable_vdev_tx_delay_stats)(struct cdp_soc_t *soc,
+					      uint8_t vdev_id,
+					      uint8_t value);
+	uint8_t (*is_tx_delay_stats_enabled)(struct cdp_soc_t *soc_hdl,
+					     uint8_t vdev_id);
+#endif
 };
 
 struct cdp_wds_ops {

+ 5 - 0
dp/inc/cdp_txrx_stats_struct.h

@@ -1781,6 +1781,7 @@ struct cdp_rx_ingress_stats {
  * @tx: cdp tx stats
  * @rx: cdp rx stats
  * @tso_stats: tso stats
+ * @tid_tx_stats: tid tx stats
  */
 struct cdp_vdev_stats {
 	struct cdp_tx_ingress_stats tx_i;
@@ -1788,6 +1789,10 @@ struct cdp_vdev_stats {
 	struct cdp_tx_stats tx;
 	struct cdp_rx_stats rx;
 	struct cdp_tso_stats tso_stats;
+#ifdef HW_TX_DELAY_STATS_ENABLE
+	struct cdp_tid_tx_stats tid_tx_stats[CDP_MAX_TX_COMP_RINGS]
+					    [CDP_MAX_DATA_TIDS];
+#endif
 };
 
 /* struct cdp_calibr_stats - Calibrated stats

+ 3 - 8
dp/wifi3.0/be/dp_be_tx.c

@@ -272,7 +272,8 @@ void dp_tx_process_htt_completion_be(struct dp_soc *soc,
 
 		tid_stats = &pdev->stats.tid_stats.tid_tx_stats[ring_id][tid];
 
-		if (qdf_unlikely(pdev->delay_stats_flag))
+		if (qdf_unlikely(pdev->delay_stats_flag) ||
+		    qdf_unlikely(dp_is_vdev_tx_delay_stats_enabled(vdev)))
 			dp_tx_compute_delay(vdev, tx_desc, tid, ring_id);
 		if (tx_status < CDP_MAX_TX_HTT_STATUS)
 			tid_stats->htt_status_cnt[tx_status]++;
@@ -616,13 +617,7 @@ dp_tx_hw_enqueue_be(struct dp_soc *soc, struct dp_vdev *vdev,
 
 	dp_tx_set_min_rates_for_critical_frames(soc, hal_tx_desc_cached,
 						tx_desc->nbuf);
-
-	if (qdf_unlikely(vdev->pdev->delay_stats_flag) ||
-	    qdf_unlikely(wlan_cfg_is_peer_ext_stats_enabled(soc->wlan_cfg_ctx)) ||
-	    qdf_unlikely(soc->rdkstats_enabled) ||
-	    dp_tx_pkt_tracepoints_enabled())
-		tx_desc->timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
-
+	dp_tx_desc_set_ktimestamp(vdev, tx_desc);
 	dp_verbose_debug("length:%d , type = %d, dma_addr %llx, offset %d desc id %u",
 			 tx_desc->length,
 			 (tx_desc->flags & DP_TX_DESC_FLAG_FRAG),

+ 49 - 4
dp/wifi3.0/dp_internal.h

@@ -2068,16 +2068,17 @@ bool dp_check_pdev_exists(struct dp_soc *soc, struct dp_pdev *data);
 
 /**
  * dp_update_delay_stats() - Update delay statistics in structure
- *                              and fill min, max and avg delay
- * @pdev: pdev handle
+ *				and fill min, max and avg delay
+ * @tstats: tid tx stats
+ * @rstats: tid rx stats
  * @delay: delay in ms
  * @tid: tid value
  * @mode: type of tx delay mode
  * @ring id: ring number
- *
  * Return: none
  */
-void dp_update_delay_stats(struct dp_pdev *pdev, uint32_t delay,
+void dp_update_delay_stats(struct cdp_tid_tx_stats *tstats,
+			   struct cdp_tid_rx_stats *rstats, uint32_t delay,
 			   uint8_t tid, uint8_t mode, uint8_t ring_id);
 
 /**
@@ -3471,4 +3472,48 @@ dp_peer_get_tx_capture_stats(struct cdp_soc_t *soc_hdl,
 QDF_STATUS
 dp_pdev_get_tx_capture_stats(struct cdp_soc_t *soc_hdl, uint8_t pdev_id,
 			     struct cdp_pdev_tx_capture_stats *stats);
+
+#ifdef HW_TX_DELAY_STATS_ENABLE
+/*
+ * dp_is_vdev_tx_delay_stats_enabled(): Check if tx delay stats
+ *  is enabled for vdev
+ * @vdev: dp vdev
+ *
+ * Return: true if tx delay stats is enabled for vdev else false
+ */
+static inline uint8_t dp_is_vdev_tx_delay_stats_enabled(struct dp_vdev *vdev)
+{
+	return vdev->hw_tx_delay_stats_enabled;
+}
+
+/*
+ * dp_pdev_print_tx_delay_stats(): Print vdev tx delay stats
+ *  for pdev
+ * @soc: dp soc
+ *
+ * Return: None
+ */
+void dp_pdev_print_tx_delay_stats(struct dp_soc *soc);
+
+/**
+ * dp_pdev_clear_tx_delay_stats() - clear tx delay stats
+ * @soc: soc handle
+ *
+ * Return: None
+ */
+void dp_pdev_clear_tx_delay_stats(struct dp_soc *soc);
+#else
+static inline uint8_t dp_is_vdev_tx_delay_stats_enabled(struct dp_vdev *vdev)
+{
+	return 0;
+}
+
+static inline void dp_pdev_print_tx_delay_stats(struct dp_soc *soc)
+{
+}
+
+static inline void dp_pdev_clear_tx_delay_stats(struct dp_soc *soc)
+{
+}
+#endif
 #endif /* #ifndef _DP_INTERNAL_H_ */

+ 123 - 47
dp/wifi3.0/dp_main.c

@@ -10979,6 +10979,7 @@ static QDF_STATUS dp_txrx_dump_stats(struct cdp_soc_t *psoc, uint16_t value,
 		dp_txrx_path_stats(soc);
 		dp_print_soc_interrupt_stats(soc);
 		hal_dump_reg_write_stats(soc->hal_soc);
+		dp_pdev_print_tx_delay_stats(soc);
 		break;
 
 	case CDP_RX_RING_STATS:
@@ -11012,6 +11013,10 @@ static QDF_STATUS dp_txrx_dump_stats(struct cdp_soc_t *psoc, uint16_t value,
 		dp_print_swlm_stats(soc);
 		break;
 
+	case CDP_DP_TX_HW_LATENCY_STATS:
+		dp_pdev_print_tx_delay_stats(soc);
+		break;
+
 	default:
 		status = QDF_STATUS_E_INVAL;
 		break;
@@ -11250,6 +11255,10 @@ QDF_STATUS dp_txrx_clear_dump_stats(struct cdp_soc_t *soc_hdl, uint8_t pdev_id,
 		dp_txrx_clear_tso_stats(soc);
 		break;
 
+	case CDP_DP_TX_HW_LATENCY_STATS:
+		dp_pdev_clear_tx_delay_stats(soc);
+		break;
+
 	default:
 		status = QDF_STATUS_E_INVAL;
 		break;
@@ -12315,6 +12324,58 @@ dp_set_pkt_capture_mode(struct cdp_soc_t *soc_handle, bool val)
 }
 #endif
 
+#ifdef HW_TX_DELAY_STATS_ENABLE
+/**
+ * dp_enable_disable_vdev_tx_delay_stats(): Start/Stop tx delay stats capture
+ * @soc: DP soc handle
+ * @vdev_id: vdev id
+ * @value: value
+ *
+ * Return: None
+ */
+static void
+dp_enable_disable_vdev_tx_delay_stats(struct cdp_soc_t *soc_hdl,
+				      uint8_t vdev_id,
+				      uint8_t value)
+{
+	struct dp_soc *soc = cdp_soc_t_to_dp_soc(soc_hdl);
+	struct dp_vdev *vdev = NULL;
+
+	vdev = dp_vdev_get_ref_by_id(soc, vdev_id, DP_MOD_ID_CDP);
+	if (!vdev)
+		return;
+
+	vdev->hw_tx_delay_stats_enabled = value;
+
+	dp_vdev_unref_delete(soc, vdev, DP_MOD_ID_CDP);
+}
+
+/**
+ * dp_check_vdev_tx_delay_stats_enabled() - check the feature is enabled or not
+ * @soc: DP soc handle
+ * @vdev_id: vdev id
+ *
+ * Returns: 1 if enabled, 0 if disabled
+ */
+static uint8_t
+dp_check_vdev_tx_delay_stats_enabled(struct cdp_soc_t *soc_hdl,
+				     uint8_t vdev_id)
+{
+	struct dp_soc *soc = cdp_soc_t_to_dp_soc(soc_hdl);
+	struct dp_vdev *vdev;
+	uint8_t ret_val = 0;
+
+	vdev = dp_vdev_get_ref_by_id(soc, vdev_id, DP_MOD_ID_CDP);
+	if (!vdev)
+		return ret_val;
+
+	ret_val = vdev->hw_tx_delay_stats_enabled;
+	dp_vdev_unref_delete(soc, vdev, DP_MOD_ID_CDP);
+
+	return ret_val;
+}
+#endif
+
 static struct cdp_cmn_ops dp_ops_cmn = {
 	.txrx_soc_attach_target = dp_soc_attach_target_wifi3,
 	.txrx_vdev_attach = dp_vdev_attach_wifi3,
@@ -12526,6 +12587,11 @@ static struct cdp_host_stats_ops dp_ops_host_stats = {
 	.get_peer_tx_capture_stats = dp_peer_get_tx_capture_stats,
 	.get_pdev_tx_capture_stats = dp_pdev_get_tx_capture_stats,
 #endif /* WLAN_TX_PKT_CAPTURE_ENH */
+#ifdef HW_TX_DELAY_STATS_ENABLE
+	.enable_disable_vdev_tx_delay_stats =
+				dp_enable_disable_vdev_tx_delay_stats,
+	.is_tx_delay_stats_enabled = dp_check_vdev_tx_delay_stats_enabled,
+#endif
 	/* TODO */
 };
 
@@ -14062,11 +14128,37 @@ static uint8_t dp_bucket_index(uint32_t delay, uint16_t *array)
 	return (CDP_DELAY_BUCKET_MAX - 1);
 }
 
+#ifdef HW_TX_DELAY_STATS_ENABLE
+/*
+ * cdp_fw_to_hw_delay_range
+ * Fw to hw delay ranges in milliseconds
+ */
+static uint16_t cdp_fw_to_hw_delay[CDP_DELAY_BUCKET_MAX] = {
+	0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 250, 500};
+#else
+static uint16_t cdp_fw_to_hw_delay[CDP_DELAY_BUCKET_MAX] = {
+	0, 2, 4, 6, 8, 10, 20, 30, 40, 50, 100, 250, 500};
+#endif
+
+/*
+ * cdp_sw_enq_delay_range
+ * Software enqueue delay ranges in milliseconds
+ */
+static uint16_t cdp_sw_enq_delay[CDP_DELAY_BUCKET_MAX] = {
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+
+/*
+ * cdp_intfrm_delay_range
+ * Interframe delay ranges in milliseconds
+ */
+static uint16_t cdp_intfrm_delay[CDP_DELAY_BUCKET_MAX] = {
+	0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60};
+
 /**
  * dp_fill_delay_buckets() - Fill delay statistics bucket for each
  *				type of delay
- *
- * @pdev: pdev handle
+ * @tstats: tid tx stats
+ * @rstats: tid rx stats
  * @delay: delay in ms
  * @tid: tid value
  * @mode: type of tx delay mode
@@ -14074,34 +14166,12 @@ static uint8_t dp_bucket_index(uint32_t delay, uint16_t *array)
  * Return: pointer to cdp_delay_stats structure
  */
 static struct cdp_delay_stats *
-dp_fill_delay_buckets(struct dp_pdev *pdev, uint32_t delay,
+dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
+		      struct cdp_tid_rx_stats *rstats, uint32_t delay,
 		      uint8_t tid, uint8_t mode, uint8_t ring_id)
 {
 	uint8_t delay_index = 0;
-	struct cdp_tid_tx_stats *tstats =
-		&pdev->stats.tid_stats.tid_tx_stats[ring_id][tid];
-	struct cdp_tid_rx_stats *rstats =
-		&pdev->stats.tid_stats.tid_rx_stats[ring_id][tid];
-	/*
-	 * cdp_fw_to_hw_delay_range
-	 * Fw to hw delay ranges in milliseconds
-	 */
-	uint16_t cdp_fw_to_hw_delay[CDP_DELAY_BUCKET_MAX] = {
-		0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 250, 500};
-
-	/*
-	 * cdp_sw_enq_delay_range
-	 * Software enqueue delay ranges in milliseconds
-	 */
-	uint16_t cdp_sw_enq_delay[CDP_DELAY_BUCKET_MAX] = {
-		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-
-	/*
-	 * cdp_intfrm_delay_range
-	 * Interframe delay ranges in milliseconds
-	 */
-	uint16_t cdp_intfrm_delay[CDP_DELAY_BUCKET_MAX] = {
-		0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60};
+	struct cdp_delay_stats *stats = NULL;
 
 	/*
 	 * Update delay stats in proper bucket
@@ -14109,57 +14179,62 @@ dp_fill_delay_buckets(struct dp_pdev *pdev, uint32_t delay,
 	switch (mode) {
 	/* Software Enqueue delay ranges */
 	case CDP_DELAY_STATS_SW_ENQ:
+		if (!tstats)
+			break;
 
 		delay_index = dp_bucket_index(delay, cdp_sw_enq_delay);
 		tstats->swq_delay.delay_bucket[delay_index]++;
-		return &tstats->swq_delay;
+		stats = &tstats->swq_delay;
+		break;
 
 	/* Tx Completion delay ranges */
 	case CDP_DELAY_STATS_FW_HW_TRANSMIT:
+		if (!tstats)
+			break;
 
 		delay_index = dp_bucket_index(delay, cdp_fw_to_hw_delay);
 		tstats->hwtx_delay.delay_bucket[delay_index]++;
-		return &tstats->hwtx_delay;
+		stats = &tstats->hwtx_delay;
+		break;
 
 	/* Interframe tx delay ranges */
 	case CDP_DELAY_STATS_TX_INTERFRAME:
+		if (!tstats)
+			break;
 
 		delay_index = dp_bucket_index(delay, cdp_intfrm_delay);
 		tstats->intfrm_delay.delay_bucket[delay_index]++;
-		return &tstats->intfrm_delay;
+		stats = &tstats->intfrm_delay;
+		break;
 
 	/* Interframe rx delay ranges */
 	case CDP_DELAY_STATS_RX_INTERFRAME:
+		if (!rstats)
+			break;
 
 		delay_index = dp_bucket_index(delay, cdp_intfrm_delay);
 		rstats->intfrm_delay.delay_bucket[delay_index]++;
-		return &rstats->intfrm_delay;
+		stats = &rstats->intfrm_delay;
+		break;
 
 	/* Ring reap to indication to network stack */
 	case CDP_DELAY_STATS_REAP_STACK:
+		if (!rstats)
+			break;
 
 		delay_index = dp_bucket_index(delay, cdp_intfrm_delay);
 		rstats->to_stack_delay.delay_bucket[delay_index]++;
-		return &rstats->to_stack_delay;
+		stats = &rstats->to_stack_delay;
+		break;
 	default:
 		dp_debug("Incorrect delay mode: %d", mode);
 	}
 
-	return NULL;
+	return stats;
 }
 
-/**
- * dp_update_delay_stats() - Update delay statistics in structure
- *				and fill min, max and avg delay
- *
- * @pdev: pdev handle
- * @delay: delay in ms
- * @tid: tid value
- * @mode: type of tx delay mode
- * @ring id: ring number
- * Return: none
- */
-void dp_update_delay_stats(struct dp_pdev *pdev, uint32_t delay,
+void dp_update_delay_stats(struct cdp_tid_tx_stats *tstats,
+			   struct cdp_tid_rx_stats *rstats, uint32_t delay,
 			   uint8_t tid, uint8_t mode, uint8_t ring_id)
 {
 	struct cdp_delay_stats *dstats = NULL;
@@ -14168,7 +14243,8 @@ void dp_update_delay_stats(struct dp_pdev *pdev, uint32_t delay,
 	 * Delay ranges are different for different delay modes
 	 * Get the correct index to update delay bucket
 	 */
-	dstats = dp_fill_delay_buckets(pdev, delay, tid, mode, ring_id);
+	dstats = dp_fill_delay_buckets(tstats, rstats, delay, tid, mode,
+				       ring_id);
 	if (qdf_unlikely(!dstats))
 		return;
 
@@ -14189,7 +14265,7 @@ void dp_update_delay_stats(struct dp_pdev *pdev, uint32_t delay,
 		if (!dstats->avg_delay)
 			dstats->avg_delay = delay;
 		else
-			dstats->avg_delay = ((delay + dstats->avg_delay) / 2);
+			dstats->avg_delay = ((delay + dstats->avg_delay) >> 1);
 	}
 }
 

+ 4 - 2
dp/wifi3.0/dp_rx.c

@@ -1618,8 +1618,10 @@ void dp_rx_compute_delay(struct dp_vdev *vdev, qdf_nbuf_t nbuf)
 	uint8_t tid = qdf_nbuf_get_tid_val(nbuf);
 	uint32_t interframe_delay =
 		(uint32_t)(current_ts - vdev->prev_rx_deliver_tstamp);
+	struct cdp_tid_rx_stats *rstats =
+		&vdev->pdev->stats.tid_stats.tid_rx_stats[ring_id][tid];
 
-	dp_update_delay_stats(vdev->pdev, to_stack, tid,
+	dp_update_delay_stats(NULL, rstats, to_stack, tid,
 			      CDP_DELAY_STATS_REAP_STACK, ring_id);
 	/*
 	 * Update interframe delay stats calculated at deliver_data_ol point.
@@ -1628,7 +1630,7 @@ void dp_rx_compute_delay(struct dp_vdev *vdev, qdf_nbuf_t nbuf)
 	 * On the other side, this will help in avoiding extra per packet check
 	 * of vdev->prev_rx_deliver_tstamp.
 	 */
-	dp_update_delay_stats(vdev->pdev, interframe_delay, tid,
+	dp_update_delay_stats(NULL, rstats, interframe_delay, tid,
 			      CDP_DELAY_STATS_RX_INTERFRAME, ring_id);
 	vdev->prev_rx_deliver_tstamp = current_ts;
 }

+ 181 - 21
dp/wifi3.0/dp_stats.c

@@ -280,7 +280,18 @@ const char *fw_to_hw_delay_bucket[CDP_DELAY_BUCKET_MAX + 1] = {
 	"81 to 90 ms", "91 to 100 ms",
 	"101 to 250 ms", "251 to 500 ms", "500+ ms"
 };
+#elif defined(HW_TX_DELAY_STATS_ENABLE)
+const char *fw_to_hw_delay_bucket[CDP_DELAY_BUCKET_MAX + 1] = {
+	"0 to 2 ms", "2 to 4 ms",
+	"4 to 6 ms", "6 to 8 ms",
+	"8 to 10 ms", "10 to 20 ms",
+	"20 to 30 ms", "30 to 40 ms",
+	"40 to 50 ms", "50 to 100 ms",
+	"100 to 250 ms", "250 to 500 ms", "500+ ms"
+};
+#endif
 
+#ifdef QCA_ENH_V3_STATS_SUPPORT
 const char *sw_enq_delay_bucket[CDP_DELAY_BUCKET_MAX + 1] = {
 	"0 to 1 ms", "1 to 2 ms",
 	"2 to 3 ms", "3 to 4 ms",
@@ -4547,7 +4558,7 @@ void dp_peer_stats_update_protocol_cnt(struct cdp_soc_t *soc_hdl,
 }
 #endif
 
-#ifdef QCA_ENH_V3_STATS_SUPPORT
+#if defined(QCA_ENH_V3_STATS_SUPPORT) || defined(HW_TX_DELAY_STATS_ENABLE)
 /**
  * dp_vow_str_fw_to_hw_delay() - Return string for a delay
  * @index: Index of delay
@@ -4562,6 +4573,28 @@ static inline const char *dp_vow_str_fw_to_hw_delay(uint8_t index)
 	return fw_to_hw_delay_bucket[index];
 }
 
+/**
+ * dp_accumulate_delay_stats() - Update delay stats members
+ * @total: Update stats total structure
+ * @per_ring: per ring structures from where stats need to be accumulated
+ *
+ * Return: void
+ */
+static void
+dp_accumulate_delay_stats(struct cdp_delay_stats *total,
+			  struct cdp_delay_stats *per_ring)
+{
+	uint8_t index;
+
+	for (index = 0; index < CDP_DELAY_BUCKET_MAX; index++)
+		total->delay_bucket[index] += per_ring->delay_bucket[index];
+	total->min_delay = QDF_MIN(total->min_delay, per_ring->min_delay);
+	total->max_delay = QDF_MAX(total->max_delay, per_ring->max_delay);
+	total->avg_delay = ((total->avg_delay + per_ring->avg_delay) >> 1);
+}
+#endif
+
+#ifdef QCA_ENH_V3_STATS_SUPPORT
 /**
  * dp_vow_str_sw_enq_delay() - Return string for a delay
  * @index: Index of delay
@@ -4590,26 +4623,6 @@ static inline const char *dp_vow_str_intfrm_delay(uint8_t index)
 	return intfrm_delay_bucket[index];
 }
 
-/**
- * dp_accumulate_delay_stats() - Update delay stats members
- * @total: Update stats total structure
- * @per_ring: per ring structures from where stats need to be accumulated
- *
- * Return: void
- */
-static void
-dp_accumulate_delay_stats(struct cdp_delay_stats *total,
-			  struct cdp_delay_stats *per_ring)
-{
-	uint8_t index;
-
-	for (index = 0; index < CDP_DELAY_BUCKET_MAX; index++)
-		total->delay_bucket[index] += per_ring->delay_bucket[index];
-	total->min_delay = QDF_MIN(total->min_delay, per_ring->min_delay);
-	total->max_delay = QDF_MAX(total->max_delay, per_ring->max_delay);
-	total->avg_delay = (total->avg_delay + per_ring->avg_delay) / 2;
-}
-
 /**
  * dp_accumulate_tid_stats() - Accumulate TID stats from each ring
  * @pdev: pdev handle
@@ -4918,6 +4931,153 @@ void dp_pdev_print_rx_error_stats(struct dp_pdev *pdev)
 }
 #endif
 
+#ifdef HW_TX_DELAY_STATS_ENABLE
+static void dp_vdev_print_tx_delay_stats(struct dp_vdev *vdev)
+{
+	struct cdp_delay_stats delay_stats;
+	struct cdp_tid_tx_stats *per_ring;
+	uint8_t tid, index;
+	uint64_t count = 0;
+	uint8_t ring_id;
+
+	if (!vdev)
+		return;
+
+	DP_PRINT_STATS("vdev_id: %d Per TID Delay Non-Zero Stats:\n",
+		       vdev->vdev_id);
+	for (tid = 0; tid < CDP_MAX_DATA_TIDS; tid++) {
+		qdf_mem_zero(&delay_stats, sizeof(delay_stats));
+		for (ring_id = 0; ring_id < CDP_MAX_TX_COMP_RINGS; ring_id++) {
+			per_ring = &vdev->stats.tid_tx_stats[ring_id][tid];
+			dp_accumulate_delay_stats(&delay_stats,
+						  &per_ring->hwtx_delay);
+		}
+
+		DP_PRINT_STATS("Hardware Tx completion latency stats TID: %d",
+			       tid);
+		for (index = 0; index < CDP_DELAY_BUCKET_MAX; index++) {
+			count = delay_stats.delay_bucket[index];
+			if (count) {
+				DP_PRINT_STATS("%s:  Packets = %llu",
+					       dp_vow_str_fw_to_hw_delay(index),
+					       count);
+			}
+		}
+
+		DP_PRINT_STATS("Min = %u", delay_stats.min_delay);
+		DP_PRINT_STATS("Max = %u", delay_stats.max_delay);
+		DP_PRINT_STATS("Avg = %u\n", delay_stats.avg_delay);
+	}
+}
+
+void dp_pdev_print_tx_delay_stats(struct dp_soc *soc)
+{
+	struct dp_pdev *pdev = dp_get_pdev_from_soc_pdev_id_wifi3(soc, 0);
+	struct dp_vdev *vdev;
+	struct dp_vdev **vdev_array = NULL;
+	int index = 0, num_vdev = 0;
+
+	if (!pdev) {
+		dp_err("pdev is NULL");
+		return;
+	}
+
+	vdev_array =
+		qdf_mem_malloc(sizeof(struct dp_vdev *) * WLAN_PDEV_MAX_VDEVS);
+	if (!vdev_array)
+		return;
+
+	qdf_spin_lock_bh(&pdev->vdev_list_lock);
+	DP_PDEV_ITERATE_VDEV_LIST(pdev, vdev) {
+		if (dp_vdev_get_ref(soc, vdev, DP_MOD_ID_GENERIC_STATS))
+			continue;
+		vdev_array[index] = vdev;
+		index = index + 1;
+	}
+	qdf_spin_unlock_bh(&pdev->vdev_list_lock);
+
+	num_vdev = index;
+
+	for (index = 0; index < num_vdev; index++) {
+		vdev = vdev_array[index];
+		dp_vdev_print_tx_delay_stats(vdev);
+		dp_vdev_unref_delete(soc, vdev, DP_MOD_ID_GENERIC_STATS);
+	}
+	qdf_mem_free(vdev_array);
+}
+
+/**
+ * dp_reset_delay_stats() - reset delay stats
+ * @per_ring: per ring structures from where stats need to be accumulated
+ *
+ * Return: void
+ */
+static void dp_reset_delay_stats(struct cdp_delay_stats *per_ring)
+{
+	qdf_mem_zero(per_ring, sizeof(struct cdp_delay_stats));
+}
+
+/**
+ * dp_vdev_init_tx_delay_stats() - Clear tx delay stats
+ * @vdev: vdev handle
+ *
+ * Return: None
+ */
+static void dp_vdev_init_tx_delay_stats(struct dp_vdev *vdev)
+{
+	struct cdp_tid_tx_stats *per_ring;
+	uint8_t tid;
+	uint8_t ring_id;
+
+	if (!vdev)
+		return;
+
+	for (tid = 0; tid < CDP_MAX_DATA_TIDS; tid++) {
+		for (ring_id = 0; ring_id < CDP_MAX_TX_COMP_RINGS; ring_id++) {
+			per_ring = &vdev->stats.tid_tx_stats[ring_id][tid];
+			dp_reset_delay_stats(&per_ring->hwtx_delay);
+		}
+	}
+}
+
+void dp_pdev_clear_tx_delay_stats(struct dp_soc *soc)
+{
+	struct dp_pdev *pdev = dp_get_pdev_from_soc_pdev_id_wifi3(soc, 0);
+	struct dp_vdev *vdev;
+	struct dp_vdev **vdev_array = NULL;
+	int index = 0, num_vdev = 0;
+
+	if (!pdev) {
+		dp_err("pdev is NULL");
+		return;
+	}
+
+	vdev_array =
+		qdf_mem_malloc(sizeof(struct dp_vdev *) * WLAN_PDEV_MAX_VDEVS);
+	if (!vdev_array)
+		return;
+
+	qdf_spin_lock_bh(&pdev->vdev_list_lock);
+	DP_PDEV_ITERATE_VDEV_LIST(pdev, vdev) {
+		if (dp_vdev_get_ref(soc, vdev, DP_MOD_ID_GENERIC_STATS) !=
+		    QDF_STATUS_SUCCESS)
+			continue;
+		vdev_array[index] = vdev;
+		index = index + 1;
+	}
+	qdf_spin_unlock_bh(&pdev->vdev_list_lock);
+
+	num_vdev = index;
+
+	for (index = 0; index < num_vdev; index++) {
+		vdev = vdev_array[index];
+		dp_vdev_init_tx_delay_stats(vdev);
+		dp_vdev_unref_delete(soc, vdev, DP_MOD_ID_GENERIC_STATS);
+	}
+	qdf_mem_free(vdev_array);
+}
+#endif
+
 void dp_print_soc_cfg_params(struct dp_soc *soc)
 {
 	struct wlan_cfg_dp_soc_ctxt *soc_cfg_ctx;

+ 46 - 13
dp/wifi3.0/dp_tx.c

@@ -3854,6 +3854,28 @@ static inline void dp_tx_update_peer_delay_stats(struct dp_txrx_peer *txrx_peer,
 }
 #endif
 
+#ifdef HW_TX_DELAY_STATS_ENABLE
+static inline
+void dp_update_tx_delay_stats(struct dp_vdev *vdev, uint32_t delay, uint8_t tid,
+			      uint8_t mode, uint8_t ring_id)
+{
+	struct cdp_tid_tx_stats *tstats =
+		&vdev->stats.tid_tx_stats[ring_id][tid];
+
+	dp_update_delay_stats(tstats, NULL, delay, tid, mode, ring_id);
+}
+#else
+static inline
+void dp_update_tx_delay_stats(struct dp_vdev *vdev, uint32_t delay, uint8_t tid,
+			      uint8_t mode, uint8_t ring_id)
+{
+	struct cdp_tid_tx_stats *tstats =
+		&vdev->pdev->stats.tid_stats.tid_tx_stats[ring_id][tid];
+
+	dp_update_delay_stats(tstats, NULL, delay, tid, mode, ring_id);
+}
+#endif
+
 /**
  * dp_tx_compute_delay() - Compute and fill in all timestamps
  *				to pass in correct fields
@@ -3870,28 +3892,38 @@ void dp_tx_compute_delay(struct dp_vdev *vdev, struct dp_tx_desc_s *tx_desc,
 	int64_t current_timestamp, timestamp_ingress, timestamp_hw_enqueue;
 	uint32_t sw_enqueue_delay, fwhw_transmit_delay, interframe_delay;
 
-	if (qdf_likely(!vdev->pdev->delay_stats_flag))
+	if (qdf_likely(!vdev->pdev->delay_stats_flag) &&
+	    qdf_likely(!dp_is_vdev_tx_delay_stats_enabled(vdev)))
 		return;
 
 	current_timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
-	timestamp_ingress = qdf_nbuf_get_timestamp(tx_desc->nbuf);
 	timestamp_hw_enqueue = tx_desc->timestamp;
-	sw_enqueue_delay = (uint32_t)(timestamp_hw_enqueue - timestamp_ingress);
 	fwhw_transmit_delay = (uint32_t)(current_timestamp -
 					 timestamp_hw_enqueue);
+
+	/*
+	 * Delay between packet enqueued to HW and Tx completion
+	 */
+	dp_update_tx_delay_stats(vdev, fwhw_transmit_delay, tid,
+				 CDP_DELAY_STATS_FW_HW_TRANSMIT, ring_id);
+
+	/*
+	 * For MCL, only enqueue to completion delay is required
+	 * so return if the vdev flag is enabled.
+	 */
+	if (dp_is_vdev_tx_delay_stats_enabled(vdev))
+		return;
+
+	timestamp_ingress = qdf_nbuf_get_timestamp(tx_desc->nbuf);
+	sw_enqueue_delay = (uint32_t)(timestamp_hw_enqueue - timestamp_ingress);
 	interframe_delay = (uint32_t)(timestamp_ingress -
 				      vdev->prev_tx_enq_tstamp);
 
 	/*
 	 * Delay in software enqueue
 	 */
-	dp_update_delay_stats(vdev->pdev, sw_enqueue_delay, tid,
-			      CDP_DELAY_STATS_SW_ENQ, ring_id);
-	/*
-	 * Delay between packet enqueued to HW and Tx completion
-	 */
-	dp_update_delay_stats(vdev->pdev, fwhw_transmit_delay, tid,
-			      CDP_DELAY_STATS_FW_HW_TRANSMIT, ring_id);
+	dp_update_tx_delay_stats(vdev, sw_enqueue_delay, tid,
+				 CDP_DELAY_STATS_SW_ENQ, ring_id);
 
 	/*
 	 * Update interframe delay stats calculated at hardstart receive point.
@@ -3900,8 +3932,8 @@ void dp_tx_compute_delay(struct dp_vdev *vdev, struct dp_tx_desc_s *tx_desc,
 	 * On the other side, this will help in avoiding extra per packet check
 	 * of !vdev->prev_tx_enq_tstamp.
 	 */
-	dp_update_delay_stats(vdev->pdev, interframe_delay, tid,
-			      CDP_DELAY_STATS_TX_INTERFRAME, ring_id);
+	dp_update_tx_delay_stats(vdev, interframe_delay, tid,
+				 CDP_DELAY_STATS_TX_INTERFRAME, ring_id);
 	vdev->prev_tx_enq_tstamp = timestamp_ingress;
 }
 
@@ -4034,7 +4066,8 @@ dp_tx_update_peer_stats(struct dp_tx_desc_s *tx_desc,
 	length = qdf_nbuf_len(tx_desc->nbuf);
 	DP_PEER_STATS_FLAT_INC_PKT(txrx_peer, comp_pkt, 1, length);
 
-	if (qdf_unlikely(pdev->delay_stats_flag))
+	if (qdf_unlikely(pdev->delay_stats_flag) ||
+	    qdf_unlikely(dp_is_vdev_tx_delay_stats_enabled(txrx_peer->vdev)))
 		dp_tx_compute_delay(txrx_peer->vdev, tx_desc, tid, ring_id);
 
 	if (ts->status < CDP_MAX_TX_TQM_STATUS) {

+ 37 - 0
dp/wifi3.0/dp_tx.h

@@ -998,4 +998,41 @@ static inline bool dp_sawf_tag_valid_get(qdf_nbuf_t nbuf)
 }
 #endif
 
+#ifdef HW_TX_DELAY_STATS_ENABLE
+/**
+ * dp_tx_desc_set_ktimestamp() - set kernel timestamp in tx descriptor
+ * @vdev: DP vdev handle
+ * @tx_desc: tx descriptor
+ *
+ * Return: true when descriptor is timestamped, false otherwise
+ */
+static inline
+bool dp_tx_desc_set_ktimestamp(struct dp_vdev *vdev,
+			       struct dp_tx_desc_s *tx_desc)
+{
+	if (qdf_unlikely(vdev->pdev->delay_stats_flag) ||
+	    qdf_unlikely(vdev->pdev->soc->wlan_cfg_ctx->pext_stats_enabled) ||
+	    qdf_unlikely(dp_tx_pkt_tracepoints_enabled()) ||
+	    qdf_unlikely(vdev->pdev->soc->rdkstats_enabled) ||
+	    qdf_unlikely(dp_is_vdev_tx_delay_stats_enabled(vdev))) {
+		tx_desc->timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
+		return true;
+	}
+	return false;
+}
+#else
+static inline
+bool dp_tx_desc_set_ktimestamp(struct dp_vdev *vdev,
+			       struct dp_tx_desc_s *tx_desc)
+{
+	if (qdf_unlikely(vdev->pdev->delay_stats_flag) ||
+	    qdf_unlikely(vdev->pdev->soc->wlan_cfg_ctx->pext_stats_enabled) ||
+	    qdf_unlikely(dp_tx_pkt_tracepoints_enabled()) ||
+	    qdf_unlikely(vdev->pdev->soc->rdkstats_enabled)) {
+		tx_desc->timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
+		return true;
+	}
+	return false;
+}
+#endif
 #endif

+ 4 - 0
dp/wifi3.0/dp_types.h

@@ -3166,6 +3166,10 @@ struct dp_vdev {
 
 	/* vdev_stats_id - ID used for stats collection by FW from HW*/
 	uint8_t vdev_stats_id;
+#ifdef HW_TX_DELAY_STATS_ENABLE
+	/* hw tx delay stats enable */
+	uint8_t hw_tx_delay_stats_enabled;
+#endif
 };
 
 enum {

+ 3 - 8
dp/wifi3.0/li/dp_li_tx.c

@@ -184,7 +184,8 @@ void dp_tx_process_htt_completion_li(struct dp_soc *soc,
 
 		tid_stats = &pdev->stats.tid_stats.tid_tx_stats[ring_id][tid];
 
-		if (qdf_unlikely(pdev->delay_stats_flag))
+		if (qdf_unlikely(pdev->delay_stats_flag) ||
+		    qdf_unlikely(dp_is_vdev_tx_delay_stats_enabled(vdev)))
 			dp_tx_compute_delay(vdev, tx_desc, tid, ring_id);
 		if (tx_status < CDP_MAX_TX_HTT_STATUS)
 			tid_stats->htt_status_cnt[tx_status]++;
@@ -460,13 +461,7 @@ dp_tx_hw_enqueue_li(struct dp_soc *soc, struct dp_vdev *vdev,
 	if (tx_desc->flags & DP_TX_DESC_FLAG_MESH)
 		hal_tx_desc_set_mesh_en(soc->hal_soc, hal_tx_desc_cached, 1);
 
-	if (qdf_unlikely(vdev->pdev->delay_stats_flag) ||
-	    qdf_unlikely(
-		       wlan_cfg_is_peer_ext_stats_enabled(soc->wlan_cfg_ctx)) ||
-		dp_tx_pkt_tracepoints_enabled() ||
-		qdf_unlikely(soc->rdkstats_enabled))
-		tx_desc->timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
-	else
+	if (!dp_tx_desc_set_ktimestamp(vdev, tx_desc))
 		dp_tx_desc_set_timestamp(tx_desc);
 
 	dp_verbose_debug("length:%d , type = %d, dma_addr %llx, offset %d desc id %u",