Explorar o código

qcacmn: Microsecond time stamping for hw tx latency stats

Use microsecond timestamping for the tx descriptors
for capturing the latencies more accurately. It will help
to determine the appropriate bucket.

Change-Id: I216e083a7c89b01b6f2f384c1c0a85ca323d3a3e
CRs-Fixed: 3165153
Subrat Dash %!s(int64=3) %!d(string=hai) anos
pai
achega
46d50239e8

+ 4 - 1
dp/wifi3.0/dp_internal.h

@@ -2119,11 +2119,14 @@ bool dp_check_pdev_exists(struct dp_soc *soc, struct dp_pdev *data);
  * @tid: tid value
  * @mode: type of tx delay mode
  * @ring id: ring number
+ * @delay_in_us: flag to indicate whether the delay is in ms or us
+ *
  * Return: none
  */
 void dp_update_delay_stats(struct cdp_tid_tx_stats *tstats,
 			   struct cdp_tid_rx_stats *rstats, uint32_t delay,
-			   uint8_t tid, uint8_t mode, uint8_t ring_id);
+			   uint8_t tid, uint8_t mode, uint8_t ring_id,
+			   bool delay_in_us);
 
 /**
  * dp_print_ring_stats(): Print tail and head pointer

+ 38 - 19
dp/wifi3.0/dp_main.c

@@ -12753,23 +12753,25 @@ void dp_flush_ring_hptp(struct dp_soc *soc, hal_ring_handle_t hal_srng)
 #define DP_TX_COMP_MAX_LATENCY_MS 30000
 /**
  * dp_tx_comp_delay_check() - calculate time latency for tx completion per pkt
- * @timestamp - tx descriptor timestamp
+ * @tx_desc: tx descriptor
  *
  * Calculate time latency for tx completion per pkt and trigger self recovery
  * when the delay is more than threshold value.
  *
  * Return: True if delay is more than threshold
  */
-static bool dp_tx_comp_delay_check(uint64_t timestamp)
+static bool dp_tx_comp_delay_check(struct dp_tx_desc_s *tx_desc)
 {
-	uint64_t time_latency, current_time;
+	uint64_t time_latency, timestamp_tick = tx_desc->timestamp_tick;
+	qdf_ktime_t current_time = qdf_ktime_real_get();
+	qdf_ktime_t timestamp = tx_desc->timestamp;
 
 	if (!timestamp)
 		return false;
 
 	if (dp_tx_pkt_tracepoints_enabled()) {
-		current_time = qdf_ktime_to_ms(qdf_ktime_real_get());
-		time_latency = current_time - timestamp;
+		time_latency = qdf_ktime_to_ms(current_time) -
+				qdf_ktime_to_ms(timestamp);
 		if (time_latency >= DP_TX_COMP_MAX_LATENCY_MS) {
 			dp_err_rl("enqueued: %llu ms, current : %llu ms",
 				  timestamp, current_time);
@@ -12778,7 +12780,7 @@ static bool dp_tx_comp_delay_check(uint64_t timestamp)
 	} else {
 		current_time = qdf_system_ticks();
 		time_latency = qdf_system_ticks_to_msecs(current_time -
-							 timestamp);
+							 timestamp_tick);
 		if (time_latency >= DP_TX_COMP_MAX_LATENCY_MS) {
 			dp_err_rl("enqueued: %u ms, current : %u ms",
 				  qdf_system_ticks_to_msecs(timestamp),
@@ -12833,8 +12835,7 @@ static void dp_find_missing_tx_comp(struct dp_soc *soc)
 				continue;
 			} else if (tx_desc->magic ==
 				   DP_TX_MAGIC_PATTERN_INUSE) {
-				if (dp_tx_comp_delay_check(
-							tx_desc->timestamp)) {
+				if (dp_tx_comp_delay_check(tx_desc)) {
 					dp_err_rl("Tx completion not rcvd for id: %u",
 						  tx_desc->id);
 
@@ -14195,18 +14196,27 @@ static void dp_clear_cfr_dbg_stats(struct cdp_soc_t *soc_hdl,
  *
  * @delay: delay measured
  * @array: array used to index corresponding delay
+ * @delay_in_us: flag to indicate whether the delay in ms or us
  *
  * Return: index
  */
-static uint8_t dp_bucket_index(uint32_t delay, uint16_t *array)
+static uint8_t
+dp_bucket_index(uint32_t delay, uint16_t *array, bool delay_in_us)
 {
 	uint8_t i = CDP_DELAY_BUCKET_0;
+	uint32_t thr_low, thr_high;
 
 	for (; i < CDP_DELAY_BUCKET_MAX - 1; i++) {
-		if (delay >= array[i] && delay < array[i + 1])
+		thr_low = array[i];
+		thr_high = array[i + 1];
+
+		if (delay_in_us) {
+			thr_low = thr_low * USEC_PER_MSEC;
+			thr_high = thr_high * USEC_PER_MSEC;
+		}
+		if (delay >= thr_low && delay <= thr_high)
 			return i;
 	}
-
 	return (CDP_DELAY_BUCKET_MAX - 1);
 }
 
@@ -14245,12 +14255,15 @@ static uint16_t cdp_intfrm_delay[CDP_DELAY_BUCKET_MAX] = {
  * @tid: tid value
  * @mode: type of tx delay mode
  * @ring_id: ring number
+ * @delay_in_us: flag to indicate whether the delay in ms or us
+ *
  * Return: pointer to cdp_delay_stats structure
  */
 static struct cdp_delay_stats *
 dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
 		      struct cdp_tid_rx_stats *rstats, uint32_t delay,
-		      uint8_t tid, uint8_t mode, uint8_t ring_id)
+		      uint8_t tid, uint8_t mode, uint8_t ring_id,
+		      bool delay_in_us)
 {
 	uint8_t delay_index = 0;
 	struct cdp_delay_stats *stats = NULL;
@@ -14264,7 +14277,8 @@ dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
 		if (!tstats)
 			break;
 
-		delay_index = dp_bucket_index(delay, cdp_sw_enq_delay);
+		delay_index = dp_bucket_index(delay, cdp_sw_enq_delay,
+					      delay_in_us);
 		tstats->swq_delay.delay_bucket[delay_index]++;
 		stats = &tstats->swq_delay;
 		break;
@@ -14274,7 +14288,8 @@ dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
 		if (!tstats)
 			break;
 
-		delay_index = dp_bucket_index(delay, cdp_fw_to_hw_delay);
+		delay_index = dp_bucket_index(delay, cdp_fw_to_hw_delay,
+					      delay_in_us);
 		tstats->hwtx_delay.delay_bucket[delay_index]++;
 		stats = &tstats->hwtx_delay;
 		break;
@@ -14284,7 +14299,8 @@ dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
 		if (!tstats)
 			break;
 
-		delay_index = dp_bucket_index(delay, cdp_intfrm_delay);
+		delay_index = dp_bucket_index(delay, cdp_intfrm_delay,
+					      delay_in_us);
 		tstats->intfrm_delay.delay_bucket[delay_index]++;
 		stats = &tstats->intfrm_delay;
 		break;
@@ -14294,7 +14310,8 @@ dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
 		if (!rstats)
 			break;
 
-		delay_index = dp_bucket_index(delay, cdp_intfrm_delay);
+		delay_index = dp_bucket_index(delay, cdp_intfrm_delay,
+					      delay_in_us);
 		rstats->intfrm_delay.delay_bucket[delay_index]++;
 		stats = &rstats->intfrm_delay;
 		break;
@@ -14304,7 +14321,8 @@ dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
 		if (!rstats)
 			break;
 
-		delay_index = dp_bucket_index(delay, cdp_intfrm_delay);
+		delay_index = dp_bucket_index(delay, cdp_intfrm_delay,
+					      delay_in_us);
 		rstats->to_stack_delay.delay_bucket[delay_index]++;
 		stats = &rstats->to_stack_delay;
 		break;
@@ -14317,7 +14335,8 @@ dp_fill_delay_buckets(struct cdp_tid_tx_stats *tstats,
 
 void dp_update_delay_stats(struct cdp_tid_tx_stats *tstats,
 			   struct cdp_tid_rx_stats *rstats, uint32_t delay,
-			   uint8_t tid, uint8_t mode, uint8_t ring_id)
+			   uint8_t tid, uint8_t mode, uint8_t ring_id,
+			   bool delay_in_us)
 {
 	struct cdp_delay_stats *dstats = NULL;
 
@@ -14326,7 +14345,7 @@ void dp_update_delay_stats(struct cdp_tid_tx_stats *tstats,
 	 * Get the correct index to update delay bucket
 	 */
 	dstats = dp_fill_delay_buckets(tstats, rstats, delay, tid, mode,
-				       ring_id);
+				       ring_id, delay_in_us);
 	if (qdf_unlikely(!dstats))
 		return;
 

+ 2 - 2
dp/wifi3.0/dp_rx.c

@@ -1624,7 +1624,7 @@ void dp_rx_compute_delay(struct dp_vdev *vdev, qdf_nbuf_t nbuf)
 		&vdev->pdev->stats.tid_stats.tid_rx_stats[ring_id][tid];
 
 	dp_update_delay_stats(NULL, rstats, to_stack, tid,
-			      CDP_DELAY_STATS_REAP_STACK, ring_id);
+			      CDP_DELAY_STATS_REAP_STACK, ring_id, false);
 	/*
 	 * Update interframe delay stats calculated at deliver_data_ol point.
 	 * Value of vdev->prev_rx_deliver_tstamp will be 0 for 1st frame, so
@@ -1633,7 +1633,7 @@ void dp_rx_compute_delay(struct dp_vdev *vdev, qdf_nbuf_t nbuf)
 	 * of vdev->prev_rx_deliver_tstamp.
 	 */
 	dp_update_delay_stats(NULL, rstats, interframe_delay, tid,
-			      CDP_DELAY_STATS_RX_INTERFRAME, ring_id);
+			      CDP_DELAY_STATS_RX_INTERFRAME, ring_id, false);
 	vdev->prev_rx_deliver_tstamp = current_ts;
 }
 

+ 48 - 20
dp/wifi3.0/dp_tx.c

@@ -3858,7 +3858,7 @@ static void dp_tx_compute_tid_delay(struct cdp_delay_tid_stats *stats,
 
 	current_timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
 	timestamp_ingress = qdf_nbuf_get_timestamp(tx_desc->nbuf);
-	timestamp_hw_enqueue = tx_desc->timestamp;
+	timestamp_hw_enqueue = qdf_ktime_to_ms(tx_desc->timestamp);
 	sw_enqueue_delay = (uint32_t)(timestamp_hw_enqueue - timestamp_ingress);
 	fwhw_transmit_delay = (uint32_t)(current_timestamp -
 					 timestamp_hw_enqueue);
@@ -3917,24 +3917,37 @@ static inline void dp_tx_update_peer_delay_stats(struct dp_txrx_peer *txrx_peer,
 #endif
 
 #ifdef HW_TX_DELAY_STATS_ENABLE
+/**
+ * dp_update_tx_delay_stats() - update the delay stats
+ * @vdev: vdev handle
+ * @delay: delay in ms or us based on the flag delay_in_us
+ * @tid: tid value
+ * @mode: type of tx delay mode
+ * @ring id: ring number
+ * @delay_in_us: flag to indicate whether the delay is in ms or us
+ *
+ * Return: none
+ */
 static inline
 void dp_update_tx_delay_stats(struct dp_vdev *vdev, uint32_t delay, uint8_t tid,
-			      uint8_t mode, uint8_t ring_id)
+			      uint8_t mode, uint8_t ring_id, bool delay_in_us)
 {
 	struct cdp_tid_tx_stats *tstats =
 		&vdev->stats.tid_tx_stats[ring_id][tid];
 
-	dp_update_delay_stats(tstats, NULL, delay, tid, mode, ring_id);
+	dp_update_delay_stats(tstats, NULL, delay, tid, mode, ring_id,
+			      delay_in_us);
 }
 #else
 static inline
 void dp_update_tx_delay_stats(struct dp_vdev *vdev, uint32_t delay, uint8_t tid,
-			      uint8_t mode, uint8_t ring_id)
+			      uint8_t mode, uint8_t ring_id, bool delay_in_us)
 {
 	struct cdp_tid_tx_stats *tstats =
 		&vdev->pdev->stats.tid_stats.tid_tx_stats[ring_id][tid];
 
-	dp_update_delay_stats(tstats, NULL, delay, tid, mode, ring_id);
+	dp_update_delay_stats(tstats, NULL, delay, tid, mode, ring_id,
+			      delay_in_us);
 }
 #endif
 
@@ -3953,28 +3966,41 @@ void dp_tx_compute_delay(struct dp_vdev *vdev, struct dp_tx_desc_s *tx_desc,
 {
 	int64_t current_timestamp, timestamp_ingress, timestamp_hw_enqueue;
 	uint32_t sw_enqueue_delay, fwhw_transmit_delay, interframe_delay;
+	uint32_t fwhw_transmit_delay_us;
 
 	if (qdf_likely(!vdev->pdev->delay_stats_flag) &&
 	    qdf_likely(!dp_is_vdev_tx_delay_stats_enabled(vdev)))
 		return;
 
+	if (dp_is_vdev_tx_delay_stats_enabled(vdev)) {
+		fwhw_transmit_delay_us =
+			qdf_ktime_to_us(qdf_ktime_real_get()) -
+			qdf_ktime_to_us(tx_desc->timestamp);
+
+		/*
+		 * Delay between packet enqueued to HW and Tx completion in us
+		 */
+		dp_update_tx_delay_stats(vdev, fwhw_transmit_delay_us, tid,
+					 CDP_DELAY_STATS_FW_HW_TRANSMIT,
+					 ring_id, true);
+		/*
+		 * For MCL, only enqueue to completion delay is required
+		 * so return if the vdev flag is enabled.
+		 */
+		return;
+	}
+
 	current_timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
-	timestamp_hw_enqueue = tx_desc->timestamp;
+	timestamp_hw_enqueue = qdf_ktime_to_ms(tx_desc->timestamp);
 	fwhw_transmit_delay = (uint32_t)(current_timestamp -
 					 timestamp_hw_enqueue);
 
 	/*
-	 * Delay between packet enqueued to HW and Tx completion
+	 * Delay between packet enqueued to HW and Tx completion in ms
 	 */
 	dp_update_tx_delay_stats(vdev, fwhw_transmit_delay, tid,
-				 CDP_DELAY_STATS_FW_HW_TRANSMIT, ring_id);
-
-	/*
-	 * For MCL, only enqueue to completion delay is required
-	 * so return if the vdev flag is enabled.
-	 */
-	if (dp_is_vdev_tx_delay_stats_enabled(vdev))
-		return;
+				 CDP_DELAY_STATS_FW_HW_TRANSMIT, ring_id,
+				 false);
 
 	timestamp_ingress = qdf_nbuf_get_timestamp(tx_desc->nbuf);
 	sw_enqueue_delay = (uint32_t)(timestamp_hw_enqueue - timestamp_ingress);
@@ -3985,7 +4011,8 @@ void dp_tx_compute_delay(struct dp_vdev *vdev, struct dp_tx_desc_s *tx_desc,
 	 * Delay in software enqueue
 	 */
 	dp_update_tx_delay_stats(vdev, sw_enqueue_delay, tid,
-				 CDP_DELAY_STATS_SW_ENQ, ring_id);
+				 CDP_DELAY_STATS_SW_ENQ, ring_id,
+				 false);
 
 	/*
 	 * Update interframe delay stats calculated at hardstart receive point.
@@ -3995,7 +4022,8 @@ void dp_tx_compute_delay(struct dp_vdev *vdev, struct dp_tx_desc_s *tx_desc,
 	 * of !vdev->prev_tx_enq_tstamp.
 	 */
 	dp_update_tx_delay_stats(vdev, interframe_delay, tid,
-				 CDP_DELAY_STATS_TX_INTERFRAME, ring_id);
+				 CDP_DELAY_STATS_TX_INTERFRAME, ring_id,
+				 false);
 	vdev->prev_tx_enq_tstamp = timestamp_ingress;
 }
 
@@ -4413,7 +4441,7 @@ dp_tx_comp_process_desc(struct dp_soc *soc,
 	 */
 	if (qdf_unlikely(!!desc->pdev->latency_capture_enable)) {
 		time_latency = (qdf_ktime_to_ms(qdf_ktime_real_get()) -
-				desc->timestamp);
+				qdf_ktime_to_ms(desc->timestamp));
 	}
 
 	dp_send_completion_to_pkt_capture(soc, desc, ts);
@@ -4422,7 +4450,7 @@ dp_tx_comp_process_desc(struct dp_soc *soc,
 		qdf_trace_dp_packet(desc->nbuf, QDF_TX,
 				    desc->msdu_ext_desc ?
 				    desc->msdu_ext_desc->tso_desc : NULL,
-				    desc->timestamp);
+				    qdf_ktime_to_ms(desc->timestamp));
 
 	if (!(desc->msdu_ext_desc)) {
 		dp_tx_enh_unmap(soc, desc);
@@ -4758,7 +4786,7 @@ void dp_tx_comp_process_tx_status(struct dp_soc *soc,
 #ifdef QCA_SUPPORT_RDK_STATS
 	if (soc->peerstats_enabled)
 		dp_tx_sojourn_stats_process(vdev->pdev, txrx_peer, ts->tid,
-					    tx_desc->timestamp,
+					    qdf_ktime_to_ms(tx_desc->timestamp),
 					    ts->ppdu_id);
 #endif
 

+ 3 - 3
dp/wifi3.0/dp_tx.h

@@ -975,7 +975,7 @@ bool dp_tx_pkt_tracepoints_enabled(void)
 static inline
 void dp_tx_desc_set_timestamp(struct dp_tx_desc_s *tx_desc)
 {
-	tx_desc->timestamp = qdf_system_ticks();
+	tx_desc->timestamp_tick = qdf_system_ticks();
 }
 
 /**
@@ -1024,7 +1024,7 @@ bool dp_tx_desc_set_ktimestamp(struct dp_vdev *vdev,
 	    qdf_unlikely(dp_tx_pkt_tracepoints_enabled()) ||
 	    qdf_unlikely(vdev->pdev->soc->peerstats_enabled) ||
 	    qdf_unlikely(dp_is_vdev_tx_delay_stats_enabled(vdev))) {
-		tx_desc->timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
+		tx_desc->timestamp = qdf_ktime_real_get();
 		return true;
 	}
 	return false;
@@ -1038,7 +1038,7 @@ bool dp_tx_desc_set_ktimestamp(struct dp_vdev *vdev,
 	    qdf_unlikely(vdev->pdev->soc->wlan_cfg_ctx->pext_stats_enabled) ||
 	    qdf_unlikely(dp_tx_pkt_tracepoints_enabled()) ||
 	    qdf_unlikely(vdev->pdev->soc->peerstats_enabled)) {
-		tx_desc->timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
+		tx_desc->timestamp = qdf_ktime_real_get();
 		return true;
 	}
 	return false;

+ 0 - 2
dp/wifi3.0/dp_tx_desc.h

@@ -406,7 +406,6 @@ dp_tx_desc_free(struct dp_soc *soc, struct dp_tx_desc_s *tx_desc,
 	tx_desc->nbuf = NULL;
 	tx_desc->flags = 0;
 	dp_tx_desc_set_magic(tx_desc, DP_TX_MAGIC_PATTERN_FREE);
-	tx_desc->timestamp = 0;
 	dp_tx_put_desc_flow_pool(pool, tx_desc);
 	switch (pool->status) {
 	case FLOW_POOL_ACTIVE_PAUSED:
@@ -574,7 +573,6 @@ dp_tx_desc_free(struct dp_soc *soc, struct dp_tx_desc_s *tx_desc,
 	tx_desc->nbuf = NULL;
 	tx_desc->flags = 0;
 	dp_tx_desc_set_magic(tx_desc, DP_TX_MAGIC_PATTERN_FREE);
-	tx_desc->timestamp = 0;
 	dp_tx_put_desc_flow_pool(pool, tx_desc);
 	switch (pool->status) {
 	case FLOW_POOL_ACTIVE_PAUSED:

+ 2 - 1
dp/wifi3.0/dp_types.h

@@ -563,6 +563,7 @@ struct dp_tx_desc_s {
 	uint16_t length;
 #ifdef DP_TX_TRACKING
 	uint32_t magic;
+	uint64_t timestamp_tick;
 #endif
 	uint16_t flags;
 	uint32_t id;
@@ -578,7 +579,7 @@ struct dp_tx_desc_s {
 	uint8_t pkt_offset;
 	uint8_t  pool_id;
 	struct dp_tx_ext_desc_elem_s *msdu_ext_desc;
-	uint64_t timestamp;
+	qdf_ktime_t timestamp;
 	struct hal_tx_desc_comp_s comp;
 };
 

+ 8 - 0
qdf/inc/qdf_time.h

@@ -83,6 +83,14 @@ qdf_ktime_t qdf_ktime_add_ns(qdf_ktime_t ktime, int64_t ns);
  */
 int64_t qdf_ktime_to_ms(qdf_ktime_t ktime);
 
+/**
+ * qdf_ktime_to_us - Convert the qdf_ktime_t object into microseconds
+ * @ktime: time as qdf_ktime_t object
+ *
+ * Return: qdf_ktime_t in microseconds
+ */
+int64_t qdf_ktime_to_us(qdf_ktime_t ktime);
+
 /**
  * qdf_ktime_to_ns - Convert the qdf_ktime_t object into nanoseconds
  * @ktime: time as qdf_ktime_t object

+ 8 - 0
qdf/linux/src/qdf_time.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2021 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for
  * any purpose with or without fee is hereby granted, provided that the
@@ -67,6 +68,13 @@ int64_t qdf_ktime_to_ms(qdf_ktime_t ktime)
 
 qdf_export_symbol(qdf_ktime_to_ms);
 
+int64_t qdf_ktime_to_us(qdf_ktime_t ktime)
+{
+	return __qdf_time_ktime_to_us(ktime);
+}
+
+qdf_export_symbol(qdf_ktime_to_us);
+
 int64_t qdf_ktime_to_ns(qdf_ktime_t ktime)
 {
 	return __qdf_ktime_to_ns(ktime);