Selaa lähdekoodia

qcacmn: Check for tx desc leak or corruption

Tx descriptor is not being freed and put back in tx desc
pool for a long time. As a result, system is not going
into suspended state.
To find this descriptor, during dp_bus_suspend and
dp_runtime_suspend, parse through the descriptor pool to
check for any descriptors not freed for a long and trigger
self recovery when found.

Change-Id: Id97c5c8537c9bec922f4e254b5bf094505ee61ff
CRs-Fixed: 3109868
Ananya Gupta 3 vuotta sitten
vanhempi
sitoutus
940984c6a6

+ 1 - 1
dp/wifi3.0/be/dp_be_tx.c

@@ -800,7 +800,7 @@ QDF_STATUS dp_tx_desc_pool_init_be(struct dp_soc *soc,
 			dp_cc_desc_id_generate(page_desc->ppt_index,
 					       avail_entry_index);
 		tx_desc->pool_id = pool_id;
-
+		dp_tx_desc_set_magic(tx_desc, DP_TX_MAGIC_PATTERN_FREE);
 		tx_desc = tx_desc->next;
 		avail_entry_index = (avail_entry_index + 1) &
 					DP_CC_SPT_PAGE_MAX_ENTRIES_MASK;

+ 89 - 3
dp/wifi3.0/dp_main.c

@@ -12039,6 +12039,91 @@ void dp_flush_ring_hptp(struct dp_soc *soc, hal_ring_handle_t hal_srng)
 }
 #endif
 
+#ifdef DP_TX_TRACKING
+
+#define DP_TX_COMP_MAX_LATENCY_MS 120000
+/**
+ * dp_tx_comp_delay() - calculate time latency for tx completion per pkt
+ * @timestamp - tx descriptor timestamp
+ *
+ * Calculate time latency for tx completion per pkt and trigger self recovery
+ * when the delay is more than threshold value.
+ *
+ * Return: None.
+ */
+static void dp_tx_comp_delay(uint64_t timestamp)
+{
+	uint64_t time_latency;
+
+	if (dp_tx_pkt_tracepoints_enabled())
+		time_latency = qdf_ktime_to_ms(qdf_ktime_real_get()) -
+								timestamp;
+	else
+		time_latency = qdf_system_ticks_to_msecs(qdf_system_ticks() -
+								timestamp);
+
+	if (time_latency >= DP_TX_COMP_MAX_LATENCY_MS) {
+		dp_err_rl("tx completion not rcvd for %llu ms.", time_latency);
+		qdf_trigger_self_recovery(NULL, QDF_TX_DESC_LEAK);
+	}
+}
+
+/**
+ * dp_find_missing_tx_comp() - check for leaked descriptor in tx path
+ * @soc - DP SOC context
+ *
+ * Parse through descriptors in all pools and validate magic number and
+ * completion time. Trigger self recovery if magic value is corrupted.
+ *
+ * Return: None.
+ */
+static void dp_find_missing_tx_comp(struct dp_soc *soc)
+{
+	uint8_t i;
+	uint32_t j;
+	uint32_t num_desc, page_id, offset;
+	uint16_t num_desc_per_page;
+	struct dp_tx_desc_s *tx_desc = NULL;
+	struct dp_tx_desc_pool_s *tx_desc_pool = NULL;
+
+	for (i = 0; i < MAX_TXDESC_POOLS; i++) {
+		tx_desc_pool = &soc->tx_desc[i];
+		if (!(tx_desc_pool->pool_size) ||
+		    IS_TX_DESC_POOL_STATUS_INACTIVE(tx_desc_pool) ||
+		    !(tx_desc_pool->desc_pages.cacheable_pages))
+			continue;
+
+		num_desc = tx_desc_pool->pool_size;
+		num_desc_per_page =
+			tx_desc_pool->desc_pages.num_element_per_page;
+		for (j = 0; j < num_desc; j++) {
+			page_id = j / num_desc_per_page;
+			offset = j % num_desc_per_page;
+
+			if (qdf_unlikely(!(tx_desc_pool->
+					 desc_pages.cacheable_pages)))
+				break;
+
+			tx_desc = dp_tx_desc_find(soc, i, page_id, offset);
+			if (tx_desc->magic == DP_TX_MAGIC_PATTERN_FREE) {
+				continue;
+			} else if (tx_desc->magic ==
+						DP_TX_MAGIC_PATTERN_INUSE) {
+				dp_tx_comp_delay(tx_desc->timestamp);
+			} else {
+				dp_err("tx desc %d corrupted", tx_desc->id);
+				qdf_trigger_self_recovery(NULL,
+							  QDF_TX_DESC_LEAK);
+			}
+		}
+	}
+}
+#else
+static inline void dp_find_missing_tx_comp(struct dp_soc *soc)
+{
+}
+#endif
+
 #ifdef FEATURE_RUNTIME_PM
 /**
  * dp_runtime_suspend() - ensure DP is ready to runtime suspend
@@ -12065,9 +12150,9 @@ static QDF_STATUS dp_runtime_suspend(struct cdp_soc_t *soc_hdl, uint8_t pdev_id)
 	/* Abort if there are any pending TX packets */
 	tx_pending = dp_get_tx_pending(dp_pdev_to_cdp_pdev(pdev));
 	if (tx_pending) {
-		dp_init_info("%pK: Abort suspend due to pending TX packets %d",
-			     soc, tx_pending);
-
+		dp_info_rl("%pK: Abort suspend due to pending TX packets %d",
+			   soc, tx_pending);
+		dp_find_missing_tx_comp(soc);
 		/* perform a force flush if tx is pending */
 		for (i = 0; i < soc->num_tcl_data_rings; i++) {
 			hal_srng_set_event(soc->tcl_data_ring[i].hal_srng,
@@ -12559,6 +12644,7 @@ static QDF_STATUS dp_bus_suspend(struct cdp_soc_t *soc_hdl, uint8_t pdev_id)
 		if (timeout <= 0) {
 			dp_info("TX frames are pending %d, abort suspend",
 				tx_pending);
+			dp_find_missing_tx_comp(soc);
 			return QDF_STATUS_E_TIMEOUT;
 		}
 		timeout = timeout - drain_wait_delay;

+ 12 - 0
dp/wifi3.0/dp_tx.c

@@ -4540,6 +4540,17 @@ dp_srng_test_and_update_nf_params(struct dp_soc *soc, struct dp_srng *dp_srng,
 }
 #endif
 
+#ifdef DP_TX_TRACKING
+void dp_tx_desc_check_corruption(struct dp_tx_desc_s *tx_desc)
+{
+	if ((tx_desc->magic != DP_TX_MAGIC_PATTERN_INUSE) &&
+	    (tx_desc->magic != DP_TX_MAGIC_PATTERN_FREE)) {
+		dp_err_rl("tx_desc %u is corrupted", tx_desc->id);
+		qdf_trigger_self_recovery(NULL, QDF_TX_DESC_LEAK);
+	}
+}
+#endif
+
 uint32_t dp_tx_comp_handler(struct dp_intr *int_ctx, struct dp_soc *soc,
 			    hal_ring_handle_t hal_ring_hdl, uint8_t ring_id,
 			    uint32_t quota)
@@ -4685,6 +4696,7 @@ more_data:
 				dp_tx_comp_info_rl("Descriptor freed in vdev_detach %d",
 						   tx_desc->id);
 				DP_STATS_INC(soc, tx.tx_comp_exception, 1);
+				dp_tx_desc_check_corruption(tx_desc);
 				continue;
 			}
 

+ 36 - 0
dp/wifi3.0/dp_tx.h

@@ -942,4 +942,40 @@ bool dp_tx_pkt_tracepoints_enabled(void)
 		qdf_trace_dp_tx_comp_udp_pkt_enabled() ||
 		qdf_trace_dp_tx_comp_pkt_enabled());
 }
+
+#ifdef DP_TX_TRACKING
+/**
+ * dp_tx_desc_set_timestamp() - set timestamp in tx descriptor
+ * @tx_desc - tx descriptor
+ *
+ * Return: None
+ */
+static inline
+void dp_tx_desc_set_timestamp(struct dp_tx_desc_s *tx_desc)
+{
+	tx_desc->timestamp = qdf_system_ticks();
+}
+
+/**
+ * dp_tx_desc_check_corruption() - Verify magic pattern in tx descriptor
+ * @tx_desc: tx descriptor
+ *
+ * Check for corruption in tx descriptor, if magic pattern is not matching
+ * trigger self recovery
+ *
+ * Return: none
+ */
+void dp_tx_desc_check_corruption(struct dp_tx_desc_s *tx_desc);
+#else
+static inline
+void dp_tx_desc_set_timestamp(struct dp_tx_desc_s *tx_desc)
+{
+}
+
+static inline
+void dp_tx_desc_check_corruption(struct dp_tx_desc_s *tx_desc)
+{
+}
+#endif
+
 #endif

+ 21 - 2
dp/wifi3.0/dp_tx_desc.h

@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2016-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for
  * any purpose with or without fee is hereby granted, provided that the
@@ -93,6 +93,19 @@ do {                                                   \
 #endif /* !QCA_LL_TX_FLOW_CONTROL_V2 */
 #define MAX_POOL_BUFF_COUNT 10000
 
+#ifdef DP_TX_TRACKING
+static inline void dp_tx_desc_set_magic(struct dp_tx_desc_s *tx_desc,
+					uint32_t magic_pattern)
+{
+	tx_desc->magic = magic_pattern;
+}
+#else
+static inline void dp_tx_desc_set_magic(struct dp_tx_desc_s *tx_desc,
+					uint32_t magic_pattern)
+{
+}
+#endif
+
 QDF_STATUS dp_tx_desc_pool_alloc(struct dp_soc *soc, uint8_t pool_id,
 				 uint16_t num_elem);
 QDF_STATUS dp_tx_desc_pool_init(struct dp_soc *soc, uint8_t pool_id,
@@ -301,7 +314,8 @@ dp_tx_desc_alloc(struct dp_soc *soc, uint8_t desc_pool_id)
 			tx_desc = dp_tx_get_desc_flow_pool(pool);
 			tx_desc->pool_id = desc_pool_id;
 			tx_desc->flags = DP_TX_DESC_FLAG_ALLOCATED;
-
+			dp_tx_desc_set_magic(tx_desc,
+					     DP_TX_MAGIC_PATTERN_INUSE);
 			is_pause = dp_tx_is_threshold_reached(pool,
 							      pool->avail_desc);
 
@@ -391,6 +405,8 @@ dp_tx_desc_free(struct dp_soc *soc, struct dp_tx_desc_s *tx_desc,
 	tx_desc->vdev_id = DP_INVALID_VDEV_ID;
 	tx_desc->nbuf = NULL;
 	tx_desc->flags = 0;
+	dp_tx_desc_set_magic(tx_desc, DP_TX_MAGIC_PATTERN_FREE);
+	tx_desc->timestamp = 0;
 	dp_tx_put_desc_flow_pool(pool, tx_desc);
 	switch (pool->status) {
 	case FLOW_POOL_ACTIVE_PAUSED:
@@ -503,6 +519,8 @@ dp_tx_desc_alloc(struct dp_soc *soc, uint8_t desc_pool_id)
 			tx_desc = dp_tx_get_desc_flow_pool(pool);
 			tx_desc->pool_id = desc_pool_id;
 			tx_desc->flags = DP_TX_DESC_FLAG_ALLOCATED;
+			dp_tx_desc_set_magic(tx_desc,
+					     DP_TX_MAGIC_PATTERN_INUSE);
 			if (qdf_unlikely(pool->avail_desc < pool->stop_th)) {
 				pool->status = FLOW_POOL_ACTIVE_PAUSED;
 				qdf_spin_unlock_bh(&pool->flow_pool_lock);
@@ -555,6 +573,7 @@ dp_tx_desc_free(struct dp_soc *soc, struct dp_tx_desc_s *tx_desc,
 	tx_desc->vdev_id = DP_INVALID_VDEV_ID;
 	tx_desc->nbuf = NULL;
 	tx_desc->flags = 0;
+	dp_tx_desc_set_magic(tx_desc, DP_TX_MAGIC_PATTERN_FREE);
 	dp_tx_put_desc_flow_pool(pool, tx_desc);
 	switch (pool->status) {
 	case FLOW_POOL_ACTIVE_PAUSED:

+ 6 - 0
dp/wifi3.0/dp_types.h

@@ -161,6 +161,9 @@
 #define DP_SKIP_BAR_UPDATE_TIMEOUT 5000
 #endif
 
+#define DP_TX_MAGIC_PATTERN_INUSE	0xABCD1234
+#define DP_TX_MAGIC_PATTERN_FREE	0xDEADBEEF
+
 enum rx_pktlog_mode {
 	DP_RX_PKTLOG_DISABLED = 0,
 	DP_RX_PKTLOG_FULL,
@@ -552,6 +555,9 @@ struct dp_tx_desc_s {
 	struct dp_tx_desc_s *next;
 	qdf_nbuf_t nbuf;
 	uint16_t length;
+#ifdef DP_TX_TRACKING
+	uint32_t magic;
+#endif
 	uint16_t flags;
 	uint32_t id;
 	qdf_dma_addr_t dma_addr;

+ 3 - 0
dp/wifi3.0/li/dp_li_tx.c

@@ -406,6 +406,8 @@ dp_tx_hw_enqueue_li(struct dp_soc *soc, struct dp_vdev *vdev,
 		dp_tx_pkt_tracepoints_enabled() ||
 		qdf_unlikely(soc->rdkstats_enabled))
 		tx_desc->timestamp = qdf_ktime_to_ms(qdf_ktime_real_get());
+	else
+		dp_tx_desc_set_timestamp(tx_desc);
 
 	dp_verbose_debug("length:%d , type = %d, dma_addr %llx, offset %d desc id %u",
 			 tx_desc->length,
@@ -476,6 +478,7 @@ QDF_STATUS dp_tx_desc_pool_init_li(struct dp_soc *soc,
 
 		tx_desc->id = id;
 		tx_desc->pool_id = pool_id;
+		dp_tx_desc_set_magic(tx_desc, DP_TX_MAGIC_PATTERN_FREE);
 		tx_desc = tx_desc->next;
 		count++;
 	}

+ 1 - 0
qdf/inc/qdf_types.h

@@ -1442,6 +1442,7 @@ enum qdf_hang_reason {
 	QDF_RX_REG_PKT_ROUTE_ERR,
 	QDF_VDEV_SM_OUT_OF_SYNC,
 	QDF_STATS_REQ_TIMEDOUT,
+	QDF_TX_DESC_LEAK,
 };
 
 /**