Browse Source

qcacmn: add dedicated workqueue for Tx ring delayed reg write

Add delayed SRNG register writes support for Tx Ring, also add
dedicated workqueue to do the delayed Tx SRNG register writes.

Change-Id: I8dd157d341f3035e988804eab50d1ca681ab789b
CRs-Fixed: 2868989
Vevek Venkatesan 4 years ago
parent
commit
38af510319
4 changed files with 565 additions and 112 deletions
  1. 6 31
      hal/wifi3.0/hal_api.h
  2. 75 7
      hal/wifi3.0/hal_internal.h
  3. 473 74
      hal/wifi3.0/hal_srng.c
  4. 11 0
      hif/src/ipcie/if_ipci.c

+ 6 - 31
hal/wifi3.0/hal_api.h

@@ -451,7 +451,8 @@ static inline void hal_srng_write_address_32_mb(struct hal_soc *hal_soc,
 {
 {
 	qdf_iowrite32(addr, value);
 	qdf_iowrite32(addr, value);
 }
 }
-#elif defined(FEATURE_HAL_DELAYED_REG_WRITE)
+#elif defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
 static inline void hal_srng_write_address_32_mb(struct hal_soc *hal_soc,
 static inline void hal_srng_write_address_32_mb(struct hal_soc *hal_soc,
 						struct hal_srng *srng,
 						struct hal_srng *srng,
 						void __iomem *addr,
 						void __iomem *addr,
@@ -773,7 +774,8 @@ static inline void hal_write32_mb_confirm_retry(struct hal_soc *hal_soc,
 }
 }
 #endif /* GENERIC_SHADOW_REGISTER_ACCESS_ENABLE */
 #endif /* GENERIC_SHADOW_REGISTER_ACCESS_ENABLE */
 
 
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
 /**
 /**
  * hal_dump_reg_write_srng_stats() - dump SRNG reg write stats
  * hal_dump_reg_write_srng_stats() - dump SRNG reg write stats
  * @hal_soc: HAL soc handle
  * @hal_soc: HAL soc handle
@@ -858,35 +860,6 @@ void *hal_attach(struct hif_opaque_softc *hif_handle, qdf_device_t qdf_dev);
  */
  */
 extern void hal_detach(void *hal_soc);
 extern void hal_detach(void *hal_soc);
 
 
-/* SRNG type to be passed in APIs hal_srng_get_entrysize and hal_srng_setup */
-enum hal_ring_type {
-	REO_DST = 0,
-	REO_EXCEPTION = 1,
-	REO_REINJECT = 2,
-	REO_CMD = 3,
-	REO_STATUS = 4,
-	TCL_DATA = 5,
-	TCL_CMD_CREDIT = 6,
-	TCL_STATUS = 7,
-	CE_SRC = 8,
-	CE_DST = 9,
-	CE_DST_STATUS = 10,
-	WBM_IDLE_LINK = 11,
-	SW2WBM_RELEASE = 12,
-	WBM2SW_RELEASE = 13,
-	RXDMA_BUF = 14,
-	RXDMA_DST = 15,
-	RXDMA_MONITOR_BUF = 16,
-	RXDMA_MONITOR_STATUS = 17,
-	RXDMA_MONITOR_DST = 18,
-	RXDMA_MONITOR_DESC = 19,
-	DIR_BUF_RX_DMA_SRC = 20,
-#ifdef WLAN_FEATURE_CIF_CFR
-	WIFI_POS_SRC,
-#endif
-	MAX_RING_TYPES
-};
-
 #define HAL_SRNG_LMAC_RING 0x80000000
 #define HAL_SRNG_LMAC_RING 0x80000000
 /* SRNG flags passed in hal_srng_params.flags */
 /* SRNG flags passed in hal_srng_params.flags */
 #define HAL_SRNG_MSI_SWAP				0x00000008
 #define HAL_SRNG_MSI_SWAP				0x00000008
@@ -2751,7 +2724,9 @@ static inline QDF_STATUS hal_construct_shadow_regs(void *hal_soc)
  * Return: None
  * Return: None
  */
  */
 void hal_flush_reg_write_work(hal_soc_handle_t hal_handle);
 void hal_flush_reg_write_work(hal_soc_handle_t hal_handle);
+
 #else
 #else
 static inline void hal_flush_reg_write_work(hal_soc_handle_t hal_handle) { }
 static inline void hal_flush_reg_write_work(hal_soc_handle_t hal_handle) { }
 #endif
 #endif
+
 #endif /* _HAL_APIH_ */
 #endif /* _HAL_APIH_ */

+ 75 - 7
hal/wifi3.0/hal_internal.h

@@ -25,8 +25,10 @@
 #include "qdf_mem.h"
 #include "qdf_mem.h"
 #include "qdf_nbuf.h"
 #include "qdf_nbuf.h"
 #include "pld_common.h"
 #include "pld_common.h"
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
 #include "qdf_defer.h"
 #include "qdf_defer.h"
+#include "qdf_timer.h"
 #endif
 #endif
 
 
 #define hal_alert(params...) QDF_TRACE_FATAL(QDF_MODULE_ID_HAL, params)
 #define hal_alert(params...) QDF_TRACE_FATAL(QDF_MODULE_ID_HAL, params)
@@ -193,6 +195,35 @@ enum hal_srng_ring_id {
 	HAL_SRNG_LMAC1_ID_END = 143
 	HAL_SRNG_LMAC1_ID_END = 143
 };
 };
 
 
+/* SRNG type to be passed in APIs hal_srng_get_entrysize and hal_srng_setup */
+enum hal_ring_type {
+	REO_DST = 0,
+	REO_EXCEPTION = 1,
+	REO_REINJECT = 2,
+	REO_CMD = 3,
+	REO_STATUS = 4,
+	TCL_DATA = 5,
+	TCL_CMD_CREDIT = 6,
+	TCL_STATUS = 7,
+	CE_SRC = 8,
+	CE_DST = 9,
+	CE_DST_STATUS = 10,
+	WBM_IDLE_LINK = 11,
+	SW2WBM_RELEASE = 12,
+	WBM2SW_RELEASE = 13,
+	RXDMA_BUF = 14,
+	RXDMA_DST = 15,
+	RXDMA_MONITOR_BUF = 16,
+	RXDMA_MONITOR_STATUS = 17,
+	RXDMA_MONITOR_DST = 18,
+	RXDMA_MONITOR_DESC = 19,
+	DIR_BUF_RX_DMA_SRC = 20,
+#ifdef WLAN_FEATURE_CIF_CFR
+	WIFI_POS_SRC,
+#endif
+	MAX_RING_TYPES
+};
+
 #define HAL_RXDMA_MAX_RING_SIZE 0xFFFF
 #define HAL_RXDMA_MAX_RING_SIZE 0xFFFF
 #define HAL_MAX_LMACS 3
 #define HAL_MAX_LMACS 3
 #define HAL_MAX_RINGS_PER_LMAC (HAL_SRNG_LMAC1_ID_END - HAL_SRNG_LMAC1_ID_START)
 #define HAL_MAX_RINGS_PER_LMAC (HAL_SRNG_LMAC1_ID_END - HAL_SRNG_LMAC1_ID_START)
@@ -228,7 +259,8 @@ typedef struct hal_ring_handle *hal_ring_handle_t;
  */
  */
 #define HAL_SRNG_FLUSH_EVENT BIT(0)
 #define HAL_SRNG_FLUSH_EVENT BIT(0)
 
 
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
 
 
 /**
 /**
  * struct hal_reg_write_q_elem - delayed register write queue element
  * struct hal_reg_write_q_elem - delayed register write queue element
@@ -303,6 +335,21 @@ struct hal_reg_write_soc_stats {
 	uint32_t max_q_depth;
 	uint32_t max_q_depth;
 	uint32_t sched_delay[REG_WRITE_SCHED_DELAY_HIST_MAX];
 	uint32_t sched_delay[REG_WRITE_SCHED_DELAY_HIST_MAX];
 };
 };
+
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE_V2
+struct hal_reg_write_tcl_stats {
+	uint32_t wq_delayed;
+	uint32_t wq_direct;
+	uint32_t timer_enq;
+	uint32_t timer_direct;
+	uint32_t enq_timer_set;
+	uint32_t direct_timer_set;
+	uint32_t timer_reset;
+	qdf_time_t enq_time;
+	qdf_time_t deq_time;
+	uint32_t sched_delay[REG_WRITE_SCHED_DELAY_HIST_MAX];
+};
+#endif
 #endif
 #endif
 
 
 /* Common SRNG ring structure for source and destination rings */
 /* Common SRNG ring structure for source and destination rings */
@@ -361,6 +408,9 @@ struct hal_srng {
 	 */
 	 */
 	void *hwreg_base[MAX_SRNG_REG_GROUPS];
 	void *hwreg_base[MAX_SRNG_REG_GROUPS];
 
 
+	/* Ring type/name */
+	enum hal_ring_type ring_type;
+
 	/* Source or Destination ring */
 	/* Source or Destination ring */
 	enum hal_srng_dir ring_dir;
 	enum hal_srng_dir ring_dir;
 
 
@@ -418,10 +468,12 @@ struct hal_srng {
 	unsigned long srng_event;
 	unsigned long srng_event;
 	/* last flushed time stamp */
 	/* last flushed time stamp */
 	uint64_t last_flush_ts;
 	uint64_t last_flush_ts;
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
+	/* Previous hp/tp (based on ring dir) value written to the reg */
+	uint32_t last_reg_wr_val;
 	/* flag to indicate whether srng is already queued for delayed write */
 	/* flag to indicate whether srng is already queued for delayed write */
 	uint8_t reg_write_in_progress;
 	uint8_t reg_write_in_progress;
-
 	/* srng specific delayed write stats */
 	/* srng specific delayed write stats */
 	struct hal_reg_write_srng_stats wstats;
 	struct hal_reg_write_srng_stats wstats;
 #endif
 #endif
@@ -658,7 +710,8 @@ struct hal_hw_txrx_ops {
  */
  */
 struct hal_soc_stats {
 struct hal_soc_stats {
 	uint32_t reg_write_fail;
 	uint32_t reg_write_fail;
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
 	struct hal_reg_write_soc_stats wstats;
 	struct hal_reg_write_soc_stats wstats;
 #endif
 #endif
 #ifdef GENERIC_SHADOW_REGISTER_ACCESS_ENABLE
 #ifdef GENERIC_SHADOW_REGISTER_ACCESS_ENABLE
@@ -773,7 +826,21 @@ struct hal_soc {
 	qdf_atomic_t write_idx;
 	qdf_atomic_t write_idx;
 	/* read index used by worker thread to dequeue/write registers */
 	/* read index used by worker thread to dequeue/write registers */
 	uint32_t read_idx;
 	uint32_t read_idx;
-#endif
+#endif /*FEATURE_HAL_DELAYED_REG_WRITE */
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE_V2
+	/* delayed work for TCL reg write to be queued into workqueue */
+	qdf_work_t tcl_reg_write_work;
+	/* workqueue for TCL delayed register writes */
+	qdf_workqueue_t *tcl_reg_write_wq;
+	/* flag denotes whether TCL delayed write work is active */
+	qdf_atomic_t tcl_work_active;
+	/* flag indiactes TCL write happening from direct context */
+	bool tcl_direct;
+	/* timer to handle the pending TCL reg writes */
+	qdf_timer_t tcl_reg_write_timer;
+	/* stats related to TCL reg write */
+	struct hal_reg_write_tcl_stats tcl_stats;
+#endif /* FEATURE_HAL_DELAYED_REG_WRITE_V2 */
 	qdf_atomic_t active_work_cnt;
 	qdf_atomic_t active_work_cnt;
 #ifdef GENERIC_SHADOW_REGISTER_ACCESS_ENABLE
 #ifdef GENERIC_SHADOW_REGISTER_ACCESS_ENABLE
 	struct shadow_reg_config
 	struct shadow_reg_config
@@ -782,7 +849,8 @@ struct hal_soc {
 #endif
 #endif
 };
 };
 
 
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
 /**
 /**
  *  hal_delayed_reg_write() - delayed regiter write
  *  hal_delayed_reg_write() - delayed regiter write
  * @hal_soc: HAL soc handle
  * @hal_soc: HAL soc handle

+ 473 - 74
hal/wifi3.0/hal_srng.c

@@ -446,17 +446,8 @@ uint32_t hal_get_target_type(hal_soc_handle_t hal_soc_hdl)
 
 
 qdf_export_symbol(hal_get_target_type);
 qdf_export_symbol(hal_get_target_type);
 
 
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
-#ifdef MEMORY_DEBUG
-/*
- * Length of the queue(array) used to hold delayed register writes.
- * Must be a multiple of 2.
- */
-#define HAL_REG_WRITE_QUEUE_LEN 128
-#else
-#define HAL_REG_WRITE_QUEUE_LEN 32
-#endif
-
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HAL_DELAYED_REG_WRITE_V2)
 /**
 /**
  * hal_is_reg_write_tput_level_high() - throughput level for delayed reg writes
  * hal_is_reg_write_tput_level_high() - throughput level for delayed reg writes
  * @hal: hal_soc pointer
  * @hal: hal_soc pointer
@@ -470,6 +461,122 @@ static inline bool hal_is_reg_write_tput_level_high(struct hal_soc *hal)
 	return (bw_level >= PLD_BUS_WIDTH_MEDIUM) ? true : false;
 	return (bw_level >= PLD_BUS_WIDTH_MEDIUM) ? true : false;
 }
 }
 
 
+static inline
+char *hal_fill_reg_write_srng_stats(struct hal_srng *srng,
+				    char *buf, qdf_size_t size)
+{
+	qdf_scnprintf(buf, size, "enq %u deq %u coal %u direct %u",
+		      srng->wstats.enqueues, srng->wstats.dequeues,
+		      srng->wstats.coalesces, srng->wstats.direct);
+	return buf;
+}
+
+/* bytes for local buffer */
+#define HAL_REG_WRITE_SRNG_STATS_LEN 100
+
+void hal_dump_reg_write_srng_stats(hal_soc_handle_t hal_soc_hdl)
+{
+	struct hal_srng *srng;
+	char buf[HAL_REG_WRITE_SRNG_STATS_LEN];
+	struct hal_soc *hal = (struct hal_soc *)hal_soc_hdl;
+
+	srng = hal_get_srng(hal, HAL_SRNG_SW2TCL1);
+	hal_debug("SW2TCL1: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_WBM2SW0_RELEASE);
+	hal_debug("WBM2SW0: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_REO2SW1);
+	hal_debug("REO2SW1: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_REO2SW2);
+	hal_debug("REO2SW2: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_REO2SW3);
+	hal_debug("REO2SW3: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+}
+
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE_V2
+/**
+ * hal_dump_tcl_stats() - dump the TCL reg write stats
+ * @hal: hal_soc pointer
+ *
+ * Return: None
+ */
+static inline void hal_dump_tcl_stats(struct hal_soc *hal)
+{
+	struct hal_srng *srng = hal_get_srng(hal, HAL_SRNG_SW2TCL1);
+	uint32_t *hist = hal->tcl_stats.sched_delay;
+	char buf[HAL_REG_WRITE_SRNG_STATS_LEN];
+
+	hal_debug("TCL: %s sched-delay hist %u %u %u %u",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)),
+		  hist[REG_WRITE_SCHED_DELAY_SUB_100us],
+		  hist[REG_WRITE_SCHED_DELAY_SUB_1000us],
+		  hist[REG_WRITE_SCHED_DELAY_SUB_5000us],
+		  hist[REG_WRITE_SCHED_DELAY_GT_5000us]);
+	hal_debug("wq_dly %u wq_dir %u tim_enq %u tim_dir %u enq_tim_cnt %u dir_tim_cnt %u rst_tim_cnt %u",
+		  hal->tcl_stats.wq_delayed,
+		  hal->tcl_stats.wq_direct,
+		  hal->tcl_stats.timer_enq,
+		  hal->tcl_stats.timer_direct,
+		  hal->tcl_stats.enq_timer_set,
+		  hal->tcl_stats.direct_timer_set,
+		  hal->tcl_stats.timer_reset);
+}
+
+#else
+static inline void hal_dump_tcl_stats(struct hal_soc *hal)
+{
+}
+#endif
+
+void hal_dump_reg_write_stats(hal_soc_handle_t hal_soc_hdl)
+{
+	uint32_t *hist;
+	struct hal_soc *hal = (struct hal_soc *)hal_soc_hdl;
+
+	hist = hal->stats.wstats.sched_delay;
+	hal_debug("wstats: enq %u deq %u coal %u direct %u q_depth %u max_q %u sched-delay hist %u %u %u %u",
+		  qdf_atomic_read(&hal->stats.wstats.enqueues),
+		  hal->stats.wstats.dequeues,
+		  qdf_atomic_read(&hal->stats.wstats.coalesces),
+		  qdf_atomic_read(&hal->stats.wstats.direct),
+		  qdf_atomic_read(&hal->stats.wstats.q_depth),
+		  hal->stats.wstats.max_q_depth,
+		  hist[REG_WRITE_SCHED_DELAY_SUB_100us],
+		  hist[REG_WRITE_SCHED_DELAY_SUB_1000us],
+		  hist[REG_WRITE_SCHED_DELAY_SUB_5000us],
+		  hist[REG_WRITE_SCHED_DELAY_GT_5000us]);
+
+	hal_dump_tcl_stats(hal);
+}
+
+int hal_get_reg_write_pending_work(void *hal_soc)
+{
+	struct hal_soc *hal = (struct hal_soc *)hal_soc;
+
+	return qdf_atomic_read(&hal->active_work_cnt);
+}
+
+#endif
+
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#ifdef MEMORY_DEBUG
+/*
+ * Length of the queue(array) used to hold delayed register writes.
+ * Must be a multiple of 2.
+ */
+#define HAL_REG_WRITE_QUEUE_LEN 128
+#else
+#define HAL_REG_WRITE_QUEUE_LEN 32
+#endif
+
 /**
 /**
  * hal_process_reg_write_q_elem() - process a regiter write queue element
  * hal_process_reg_write_q_elem() - process a regiter write queue element
  * @hal: hal_soc pointer
  * @hal: hal_soc pointer
@@ -694,21 +801,6 @@ static void hal_reg_write_enqueue(struct hal_soc *hal_soc,
 		       &hal_soc->reg_write_work);
 		       &hal_soc->reg_write_work);
 }
 }
 
 
-void hal_delayed_reg_write(struct hal_soc *hal_soc,
-			   struct hal_srng *srng,
-			   void __iomem *addr,
-			   uint32_t value)
-{
-	if (pld_is_device_awake(hal_soc->qdf_dev->dev) ||
-	    hal_is_reg_write_tput_level_high(hal_soc)) {
-		qdf_atomic_inc(&hal_soc->stats.wstats.direct);
-		srng->wstats.direct++;
-		hal_write_address_32_mb(hal_soc, addr, value, false);
-	} else {
-		hal_reg_write_enqueue(hal_soc, srng, addr, value);
-	}
-}
-
 /**
 /**
  * hal_delayed_reg_write_init() - Initialization function for delayed reg writes
  * hal_delayed_reg_write_init() - Initialization function for delayed reg writes
  * @hal_soc: hal_soc pointer
  * @hal_soc: hal_soc pointer
@@ -755,84 +847,388 @@ static void hal_delayed_reg_write_deinit(struct hal_soc *hal)
 	qdf_mem_free(hal->reg_write_queue);
 	qdf_mem_free(hal->reg_write_queue);
 }
 }
 
 
-static inline
-char *hal_fill_reg_write_srng_stats(struct hal_srng *srng,
-				    char *buf, qdf_size_t size)
+#else
+static inline QDF_STATUS hal_delayed_reg_write_init(struct hal_soc *hal)
 {
 {
-	qdf_scnprintf(buf, size, "enq %u deq %u coal %u direct %u",
-		      srng->wstats.enqueues, srng->wstats.dequeues,
-		      srng->wstats.coalesces, srng->wstats.direct);
-	return buf;
+	return QDF_STATUS_SUCCESS;
 }
 }
 
 
-/* bytes for local buffer */
-#define HAL_REG_WRITE_SRNG_STATS_LEN 100
+static inline void hal_delayed_reg_write_deinit(struct hal_soc *hal)
+{
+}
+#endif
 
 
-void hal_dump_reg_write_srng_stats(hal_soc_handle_t hal_soc_hdl)
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE_V2
+#ifdef MEMORY_DEBUG
+/**
+ * hal_reg_write_get_timestamp() - Function to get the timestamp
+ *
+ * Return: return present simestamp
+ */
+static inline qdf_time_t hal_del_reg_write_get_ts(void)
 {
 {
-	struct hal_srng *srng;
-	char buf[HAL_REG_WRITE_SRNG_STATS_LEN];
-	struct hal_soc *hal = (struct hal_soc *)hal_soc_hdl;
+	return qdf_get_log_timestamp();
+}
 
 
-	srng = hal_get_srng(hal, HAL_SRNG_SW2TCL1);
-	hal_debug("SW2TCL1: %s",
-		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+/**
+ * hal_del_reg_write_ts_usecs() - Convert the timestamp to micro secs
+ * @ts: timestamp value to be converted
+ *
+ * Return: return the timestamp in micro secs
+ */
+static inline qdf_time_t hal_del_reg_write_ts_usecs(qdf_time_t ts)
+{
+	return qdf_log_timestamp_to_usecs(ts);
+}
 
 
-	srng = hal_get_srng(hal, HAL_SRNG_WBM2SW0_RELEASE);
-	hal_debug("WBM2SW0: %s",
-		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+/**
+ * hal_tcl_write_fill_sched_delay_hist() - fill TCL reg write delay histogram
+ * @hal: hal_soc pointer
+ * @delay: delay in us
+ *
+ * Return: None
+ */
+static inline void hal_tcl_write_fill_sched_delay_hist(struct hal_soc *hal)
+{
+	uint32_t *hist;
+	uint32_t delay_us;
 
 
-	srng = hal_get_srng(hal, HAL_SRNG_REO2SW1);
-	hal_debug("REO2SW1: %s",
-		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+	hal->tcl_stats.deq_time = hal_del_reg_write_get_ts();
+	delay_us = hal_del_reg_write_ts_usecs(hal->tcl_stats.deq_time -
+					      hal->tcl_stats.enq_time);
 
 
-	srng = hal_get_srng(hal, HAL_SRNG_REO2SW2);
-	hal_debug("REO2SW2: %s",
-		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+	hist = hal->tcl_stats.sched_delay;
+	if (delay_us < 100)
+		hist[REG_WRITE_SCHED_DELAY_SUB_100us]++;
+	else if (delay_us < 1000)
+		hist[REG_WRITE_SCHED_DELAY_SUB_1000us]++;
+	else if (delay_us < 5000)
+		hist[REG_WRITE_SCHED_DELAY_SUB_5000us]++;
+	else
+		hist[REG_WRITE_SCHED_DELAY_GT_5000us]++;
+}
 
 
-	srng = hal_get_srng(hal, HAL_SRNG_REO2SW3);
-	hal_debug("REO2SW3: %s",
-		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+#else
+static inline qdf_time_t hal_del_reg_write_get_ts(void)
+{
+	return 0;
 }
 }
 
 
-void hal_dump_reg_write_stats(hal_soc_handle_t hal_soc_hdl)
+static inline qdf_time_t hal_del_reg_write_ts_usecs(qdf_time_t ts)
 {
 {
-	uint32_t *hist;
-	struct hal_soc *hal = (struct hal_soc *)hal_soc_hdl;
+	return 0;
+}
 
 
-	hist = hal->stats.wstats.sched_delay;
+static inline void hal_tcl_write_fill_sched_delay_hist(struct hal_soc *hal)
+{
+}
+#endif
 
 
-	hal_debug("enq %u deq %u coal %u direct %u q_depth %u max_q %u sched-delay hist %u %u %u %u",
-		  qdf_atomic_read(&hal->stats.wstats.enqueues),
-		  hal->stats.wstats.dequeues,
-		  qdf_atomic_read(&hal->stats.wstats.coalesces),
-		  qdf_atomic_read(&hal->stats.wstats.direct),
-		  qdf_atomic_read(&hal->stats.wstats.q_depth),
-		  hal->stats.wstats.max_q_depth,
-		  hist[REG_WRITE_SCHED_DELAY_SUB_100us],
-		  hist[REG_WRITE_SCHED_DELAY_SUB_1000us],
-		  hist[REG_WRITE_SCHED_DELAY_SUB_5000us],
-		  hist[REG_WRITE_SCHED_DELAY_GT_5000us]);
+/**
+ * hal_tcl_reg_write_work() - Worker to process delayed SW2TCL1 writes
+ * @arg: hal_soc pointer
+ *
+ * Return: None
+ */
+static void hal_tcl_reg_write_work(void *arg)
+{
+	struct hal_soc *hal = arg;
+	struct hal_srng *srng = hal_get_srng(hal, HAL_SRNG_SW2TCL1);
+
+	SRNG_LOCK(&srng->lock);
+	srng->wstats.dequeues++;
+	hal_tcl_write_fill_sched_delay_hist(hal);
+
+	/*
+	 * During the tranition of low to high tput scenario, reg write moves
+	 * from delayed to direct write context, there is a little chance that
+	 * worker thread gets scheduled later than direct context write which
+	 * already wrote the latest HP value. This check can catch that case
+	 * and avoid the repetitive writing of the same HP value.
+	 */
+	if (srng->last_reg_wr_val != srng->u.src_ring.hp) {
+		srng->last_reg_wr_val = srng->u.src_ring.hp;
+		if (hal->tcl_direct) {
+			/*
+			 * TCL reg writes have been moved to direct context and
+			 * the assumption is that PCIe bus stays in Active state
+			 * during high tput, hence its fine to write the HP
+			 * while the SRNG_LOCK is being held.
+			 */
+			hal->tcl_stats.wq_direct++;
+			hal_write_address_32_mb(hal, srng->u.src_ring.hp_addr,
+						srng->last_reg_wr_val, false);
+			srng->reg_write_in_progress = false;
+			SRNG_UNLOCK(&srng->lock);
+		} else {
+			/*
+			 * TCL reg write to happen in delayed context,
+			 * write operation might take time due to possibility of
+			 * PCIe bus stays in low power state during low tput,
+			 * Hence release the SRNG_LOCK before writing.
+			 */
+			hal->tcl_stats.wq_delayed++;
+			srng->reg_write_in_progress = false;
+			SRNG_UNLOCK(&srng->lock);
+			hal_write_address_32_mb(hal, srng->u.src_ring.hp_addr,
+						srng->last_reg_wr_val, false);
+		}
+	} else {
+		srng->reg_write_in_progress = false;
+		SRNG_UNLOCK(&srng->lock);
+	}
+
+	/*
+	 * Decrement active_work_cnt to make sure that hif_try_complete_tasks
+	 * will wait. This will avoid race condition between delayed register
+	 * worker and bus suspend (system suspend or runtime suspend).
+	 *
+	 * The following decrement should be done at the end!
+	 */
+	qdf_atomic_dec(&hal->active_work_cnt);
+	qdf_atomic_set(&hal->tcl_work_active, false);
 }
 }
 
 
-int hal_get_reg_write_pending_work(void *hal_soc)
+static void __hal_flush_tcl_reg_write_work(struct hal_soc *hal)
 {
 {
-	struct hal_soc *hal = (struct hal_soc *)hal_soc;
+	qdf_cancel_work(&hal->tcl_reg_write_work);
+}
 
 
-	return qdf_atomic_read(&hal->active_work_cnt);
+/**
+ * hal_tcl_reg_write_enqueue() - enqueue TCL register writes into kworker
+ * @hal_soc: hal_soc pointer
+ * @srng: srng pointer
+ * @addr: iomem address of regiter
+ * @value: value to be written to iomem address
+ *
+ * This function executes from within the SRNG LOCK
+ *
+ * Return: None
+ */
+static void hal_tcl_reg_write_enqueue(struct hal_soc *hal_soc,
+				      struct hal_srng *srng,
+				      void __iomem *addr,
+				      uint32_t value)
+{
+	hal_soc->tcl_stats.enq_time = hal_del_reg_write_get_ts();
+
+	if (qdf_queue_work(hal_soc->qdf_dev, hal_soc->tcl_reg_write_wq,
+			   &hal_soc->tcl_reg_write_work)) {
+		srng->reg_write_in_progress  = true;
+		qdf_atomic_inc(&hal_soc->active_work_cnt);
+		qdf_atomic_set(&hal_soc->tcl_work_active, true);
+		srng->wstats.enqueues++;
+	} else {
+		hal_soc->tcl_stats.enq_timer_set++;
+		qdf_timer_mod(&hal_soc->tcl_reg_write_timer, 1);
+	}
+}
+
+/**
+ * hal_tcl_reg_write_timer() - timer handler to take care of pending TCL writes
+ * @arg: srng handle
+ *
+ * This function handles the pending TCL reg writes missed due to the previous
+ * scheduled worker running.
+ *
+ * Return: None
+ */
+static void hal_tcl_reg_write_timer(void *arg)
+{
+	hal_ring_handle_t srng_hdl = arg;
+	struct hal_srng *srng;
+	struct hal_soc *hal;
+
+	srng = (struct hal_srng *)srng_hdl;
+	hal = srng->hal_soc;
+
+	if (hif_pm_runtime_get(hal->hif_handle, RTPM_ID_DW_TX_HW_ENQUEUE,
+			       true)) {
+		hal_srng_set_event(srng_hdl, HAL_SRNG_FLUSH_EVENT);
+		hal_srng_inc_flush_cnt(srng_hdl);
+		goto fail;
+	}
+
+	SRNG_LOCK(&srng->lock);
+	if (hal->tcl_direct) {
+		/*
+		 * Due to the previous scheduled worker still running,
+		 * direct reg write cannot be performed, so posted the
+		 * pending writes to timer context.
+		 */
+		if (srng->last_reg_wr_val != srng->u.src_ring.hp) {
+			srng->last_reg_wr_val = srng->u.src_ring.hp;
+			srng->wstats.direct++;
+			hal->tcl_stats.timer_direct++;
+			hal_write_address_32_mb(hal, srng->u.src_ring.hp_addr,
+						srng->last_reg_wr_val, false);
+		}
+	} else {
+		/*
+		 * Due to the previous scheduled worker still running,
+		 * queue_work from delayed context would fail,
+		 * so retry from timer context.
+		 */
+		if (qdf_queue_work(hal->qdf_dev, hal->tcl_reg_write_wq,
+				   &hal->tcl_reg_write_work)) {
+			srng->reg_write_in_progress  = true;
+			qdf_atomic_inc(&hal->active_work_cnt);
+			qdf_atomic_set(&hal->tcl_work_active, true);
+			srng->wstats.enqueues++;
+			hal->tcl_stats.timer_enq++;
+		} else {
+			if (srng->last_reg_wr_val != srng->u.src_ring.hp) {
+				hal->tcl_stats.timer_reset++;
+				qdf_timer_mod(&hal->tcl_reg_write_timer, 1);
+			}
+		}
+	}
+	SRNG_UNLOCK(&srng->lock);
+	hif_pm_runtime_put(hal->hif_handle, RTPM_ID_DW_TX_HW_ENQUEUE);
+
+fail:
+	return;
+}
+
+/**
+ * hal_delayed_tcl_reg_write_init() - Initialization for delayed TCL reg writes
+ * @hal_soc: hal_soc pointer
+ *
+ * Initialize main data structures to process TCL register writes in a delayed
+ * workqueue.
+ *
+ * Return: QDF_STATUS_SUCCESS on success else a QDF error.
+ */
+static QDF_STATUS hal_delayed_tcl_reg_write_init(struct hal_soc *hal)
+{
+	struct hal_srng *srng = hal_get_srng(hal, HAL_SRNG_SW2TCL1);
+	QDF_STATUS status;
+
+	hal->tcl_reg_write_wq =
+		qdf_alloc_high_prior_ordered_workqueue("hal_tcl_reg_write_wq");
+	if (!hal->tcl_reg_write_wq) {
+		hal_err("hal_tcl_reg_write_wq alloc failed");
+		return QDF_STATUS_E_NOMEM;
+	}
+
+	status = qdf_create_work(0, &hal->tcl_reg_write_work,
+				 hal_tcl_reg_write_work, hal);
+	if (status != QDF_STATUS_SUCCESS) {
+		hal_err("tcl_reg_write_work create failed");
+		goto fail;
+	}
+
+	status = qdf_timer_init(hal->qdf_dev, &hal->tcl_reg_write_timer,
+				hal_tcl_reg_write_timer, (void *)srng,
+				QDF_TIMER_TYPE_WAKE_APPS);
+	if (status != QDF_STATUS_SUCCESS) {
+		hal_err("tcl_reg_write_timer init failed");
+		goto fail;
+	}
+
+	qdf_atomic_init(&hal->tcl_work_active);
+
+	return QDF_STATUS_SUCCESS;
+
+fail:
+	qdf_destroy_workqueue(0, hal->tcl_reg_write_wq);
+	return status;
+}
+
+/**
+ * hal_delayed_tcl_reg_write_deinit() - De-Initialize delayed TCL reg writes
+ * @hal_soc: hal_soc pointer
+ *
+ * De-initialize main data structures to process TCL register writes in a
+ * delayed workqueue.
+ *
+ * Return: None
+ */
+static void hal_delayed_tcl_reg_write_deinit(struct hal_soc *hal)
+{
+	qdf_timer_stop(&hal->tcl_reg_write_timer);
+	qdf_timer_free(&hal->tcl_reg_write_timer);
+
+	__hal_flush_tcl_reg_write_work(hal);
+	qdf_flush_workqueue(0, hal->tcl_reg_write_wq);
+	qdf_destroy_workqueue(0, hal->tcl_reg_write_wq);
 }
 }
 
 
 #else
 #else
-static inline QDF_STATUS hal_delayed_reg_write_init(struct hal_soc *hal)
+static inline QDF_STATUS hal_delayed_tcl_reg_write_init(struct hal_soc *hal)
 {
 {
 	return QDF_STATUS_SUCCESS;
 	return QDF_STATUS_SUCCESS;
 }
 }
 
 
-static inline void hal_delayed_reg_write_deinit(struct hal_soc *hal)
+static inline void hal_delayed_tcl_reg_write_deinit(struct hal_soc *hal)
 {
 {
 }
 }
 #endif
 #endif
 
 
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE_V2
+void hal_delayed_reg_write(struct hal_soc *hal_soc,
+			   struct hal_srng *srng,
+			   void __iomem *addr,
+			   uint32_t value)
+{
+	switch (srng->ring_type) {
+	case TCL_DATA:
+		if (hal_is_reg_write_tput_level_high(hal_soc)) {
+			hal_soc->tcl_direct = true;
+			if (srng->reg_write_in_progress ||
+			    !qdf_atomic_read(&hal_soc->tcl_work_active)) {
+				/*
+				 * Now the delayed work have either completed
+				 * the writing or not even scheduled and would
+				 * be blocked by SRNG_LOCK, hence it is fine to
+				 * do direct write here.
+				 */
+				srng->last_reg_wr_val = srng->u.src_ring.hp;
+				srng->wstats.direct++;
+				hal_write_address_32_mb(hal_soc, addr,
+							srng->last_reg_wr_val,
+							false);
+			} else {
+				hal_soc->tcl_stats.direct_timer_set++;
+				qdf_timer_mod(&hal_soc->tcl_reg_write_timer, 1);
+			}
+		} else {
+			hal_soc->tcl_direct = false;
+			if (srng->reg_write_in_progress) {
+				srng->wstats.coalesces++;
+			} else {
+				hal_tcl_reg_write_enqueue(hal_soc, srng,
+							  addr, value);
+			}
+		}
+		break;
+	default:
+		qdf_atomic_inc(&hal_soc->stats.wstats.direct);
+		srng->wstats.direct++;
+		hal_write_address_32_mb(hal_soc, addr, value, false);
+		break;
+	}
+}
+
+#else
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+void hal_delayed_reg_write(struct hal_soc *hal_soc,
+			   struct hal_srng *srng,
+			   void __iomem *addr,
+			   uint32_t value)
+{
+	if (pld_is_device_awake(hal_soc->qdf_dev->dev) ||
+	    hal_is_reg_write_tput_level_high(hal_soc)) {
+		qdf_atomic_inc(&hal_soc->stats.wstats.direct);
+		srng->wstats.direct++;
+		hal_write_address_32_mb(hal_soc, addr, value, false);
+	} else {
+		hal_reg_write_enqueue(hal_soc, srng, addr, value);
+	}
+}
+#endif
+#endif
+
 /**
 /**
  * hal_attach - Initialize HAL layer
  * hal_attach - Initialize HAL layer
  * @hif_handle: Opaque HIF handle
  * @hif_handle: Opaque HIF handle
@@ -903,6 +1299,7 @@ void *hal_attach(struct hif_opaque_softc *hif_handle, qdf_device_t qdf_dev)
 
 
 	qdf_atomic_init(&hal->active_work_cnt);
 	qdf_atomic_init(&hal->active_work_cnt);
 	hal_delayed_reg_write_init(hal);
 	hal_delayed_reg_write_init(hal);
+	hal_delayed_tcl_reg_write_init(hal);
 
 
 	return (void *)hal;
 	return (void *)hal;
 
 
@@ -953,6 +1350,7 @@ extern void hal_detach(void *hal_soc)
 	struct hal_soc *hal = (struct hal_soc *)hal_soc;
 	struct hal_soc *hal = (struct hal_soc *)hal_soc;
 
 
 	hal_delayed_reg_write_deinit(hal);
 	hal_delayed_reg_write_deinit(hal);
+	hal_delayed_tcl_reg_write_deinit(hal);
 
 
 	qdf_mem_free_consistent(hal->qdf_dev, hal->qdf_dev->dev,
 	qdf_mem_free_consistent(hal->qdf_dev, hal->qdf_dev->dev,
 		sizeof(*(hal->shadow_rdptr_mem_vaddr)) * HAL_SRNG_ID_MAX,
 		sizeof(*(hal->shadow_rdptr_mem_vaddr)) * HAL_SRNG_ID_MAX,
@@ -1197,6 +1595,7 @@ void *hal_srng_setup(void *hal_soc, int ring_type, int ring_num,
 
 
 	dev_base_addr = hal->dev_base_addr;
 	dev_base_addr = hal->dev_base_addr;
 	srng->ring_id = ring_id;
 	srng->ring_id = ring_id;
+	srng->ring_type = ring_type;
 	srng->ring_dir = ring_config->ring_dir;
 	srng->ring_dir = ring_config->ring_dir;
 	srng->ring_base_paddr = ring_params->ring_base_paddr;
 	srng->ring_base_paddr = ring_params->ring_base_paddr;
 	srng->ring_base_vaddr = ring_params->ring_base_vaddr;
 	srng->ring_base_vaddr = ring_params->ring_base_vaddr;

+ 11 - 0
hif/src/ipcie/if_ipci.c

@@ -770,3 +770,14 @@ void hif_print_ipci_stats(struct hif_ipci_softc *ipci_handle)
 		  ipci_handle->stats.soc_force_wake_release_success);
 		  ipci_handle->stats.soc_force_wake_release_success);
 }
 }
 #endif /* FORCE_WAKE */
 #endif /* FORCE_WAKE */
+
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+int hif_prevent_link_low_power_states(struct hif_opaque_softc *hif)
+{
+	return 0;
+}
+
+void hif_allow_link_low_power_states(struct hif_opaque_softc *hif)
+{
+}
+#endif