Explorar o código

qcacmn: Add delayed register write support in HAL

In case the bus is in low power mode, the register writes (followed by a
memory barrier) may take a long time (~4ms). This can cause the caller
to block till the PCIe write is completed. Thus, even though PCI
writes are posted, it can still block the caller.

Hence, in case the bus is in low power mode (not in M0), or not in high
throughput scenarios, queue the register write in a workqueue. The
register write will happen in the delayed work context. In other cases,
i.e ,when the bus is not in low power mode or in high thoughput
scenarios, do the register writes in caller context.

Change-Id: Idf218e4581545bc6ac67b91d0f70d495387ca90e
CRs-Fixed: 2602029
Mohit Khanna %!s(int64=5) %!d(string=hai) anos
pai
achega
b4429e8278

+ 2 - 0
dp/inc/cdp_txrx_cmn_struct.h

@@ -269,6 +269,7 @@ enum htt_cmn_dbg_stats_type {
  * @TXRX_PDEV_CFG_PARAMS: Print pdev cfg params info
  * @TXRX_NAPI_STATS: Print NAPI scheduling statistics
  * @TXRX_SOC_INTERRUPT_STATS: Print soc interrupt stats
+ * @TXRX_HAL_REG_WRITE_STATS: Hal Reg Write stats
  */
 enum cdp_host_txrx_stats {
 	TXRX_HOST_STATS_INVALID  = -1,
@@ -286,6 +287,7 @@ enum cdp_host_txrx_stats {
 	TXRX_NAPI_STATS       = 11,
 	TXRX_SOC_INTERRUPT_STATS = 12,
 	TXRX_SOC_FSE_STATS = 13,
+	TXRX_HAL_REG_WRITE_STATS = 14,
 	TXRX_HOST_STATS_MAX,
 };
 

+ 9 - 0
dp/wifi3.0/dp_main.c

@@ -334,6 +334,7 @@ const int dp_stats_mapping_table[][STATS_TYPE_MAX] = {
 	{TXRX_FW_STATS_INVALID, TXRX_PDEV_CFG_PARAMS},
 	{TXRX_FW_STATS_INVALID, TXRX_SOC_INTERRUPT_STATS},
 	{TXRX_FW_STATS_INVALID, TXRX_SOC_FSE_STATS},
+	{TXRX_FW_STATS_INVALID, TXRX_HAL_REG_WRITE_STATS},
 };
 
 /* MCL specific functions */
@@ -7604,6 +7605,7 @@ static void dp_txrx_stats_help(void)
 	dp_info(" 29 -- Host Soc cfg param Statistics");
 	dp_info(" 30 -- Host pdev cfg param Statistics");
 	dp_info(" 31 -- Host FISA stats");
+	dp_info(" 32 -- Host Register Work stats");
 }
 
 /**
@@ -7663,11 +7665,17 @@ dp_print_host_stats(struct dp_vdev *vdev,
 		break;
 	case TXRX_NAPI_STATS:
 		dp_print_napi_stats(pdev->soc);
+		break;
 	case TXRX_SOC_INTERRUPT_STATS:
 		dp_print_soc_interrupt_stats(pdev->soc);
 		break;
 	case TXRX_SOC_FSE_STATS:
 		dp_rx_dump_fisa_table(pdev->soc);
+		break;
+	case TXRX_HAL_REG_WRITE_STATS:
+		hal_dump_reg_write_stats(pdev->soc->hal_soc);
+		hal_dump_reg_write_srng_stats(pdev->soc->hal_soc);
+		break;
 	default:
 		dp_info("Wrong Input For TxRx Host Stats");
 		dp_txrx_stats_help();
@@ -9098,6 +9106,7 @@ static QDF_STATUS dp_txrx_dump_stats(struct cdp_soc_t *psoc, uint16_t value,
 	case CDP_TXRX_PATH_STATS:
 		dp_txrx_path_stats(soc);
 		dp_print_soc_interrupt_stats(soc);
+		hal_dump_reg_write_stats(soc->hal_soc);
 		break;
 
 	case CDP_RX_RING_STATS:

+ 51 - 5
hal/wifi3.0/hal_api.h

@@ -438,10 +438,29 @@ void hal_write_address_32_mb(struct hal_soc *hal_soc,
 }
 
 #ifdef DP_HAL_MULTIWINDOW_DIRECT_ACCESS
-#define hal_srng_write_address_32_mb(_a, _b, _c) qdf_iowrite32(_b, _c)
+static inline void hal_srng_write_address_32_mb(struct hal_soc *hal_soc,
+						struct hal_srng *srng,
+						void __iomem *addr,
+						uint32_t value)
+{
+	qdf_iowrite32(addr, value);
+}
+#elif defined(FEATURE_HAL_DELAYED_WRITE)
+static inline void hal_srng_write_address_32_mb(struct hal_soc *hal_soc,
+						struct hal_srng *srng,
+						void __iomem *addr,
+						uint32_t value)
+{
+	hal_delayed_reg_write(hal_soc, srng, addr, value);
+}
 #else
-#define hal_srng_write_address_32_mb(_a, _b, _c) \
-		hal_write_address_32_mb(_a, _b, _c)
+static inline void hal_srng_write_address_32_mb(struct hal_soc *hal_soc,
+						struct hal_srng *srng,
+						void __iomem *addr,
+						uint32_t value)
+{
+	hal_write_address_32_mb(hal_soc, addr, value);
+}
 #endif
 
 #if !defined(QCA_WIFI_QCA6390) && !defined(QCA_WIFI_QCA6490) && \
@@ -467,8 +486,7 @@ void hal_write_address_32_mb(struct hal_soc *hal_soc,
  *
  * Return: < 0 for failure/>= 0 for success
  */
-static inline
-uint32_t hal_read32_mb(struct hal_soc *hal_soc, uint32_t offset)
+static inline uint32_t hal_read32_mb(struct hal_soc *hal_soc, uint32_t offset)
 {
 	uint32_t ret;
 	unsigned long flags;
@@ -536,6 +554,32 @@ uint32_t hal_read32_mb(struct hal_soc *hal_soc, uint32_t offset)
 }
 #endif
 
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+/**
+ * hal_dump_reg_write_srng_stats() - dump SRNG reg write stats
+ * @hal_soc: HAL soc handle
+ *
+ * Return: none
+ */
+void hal_dump_reg_write_srng_stats(hal_soc_handle_t hal_soc_hdl);
+
+/**
+ * hal_dump_reg_write_stats() - dump reg write stats
+ * @hal_soc: HAL soc handle
+ *
+ * Return: none
+ */
+void hal_dump_reg_write_stats(hal_soc_handle_t hal_soc_hdl);
+#else
+static inline void hal_dump_reg_write_srng_stats(hal_soc_handle_t hal_soc_hdl)
+{
+}
+
+static inline void hal_dump_reg_write_stats(hal_soc_handle_t hal_soc_hdl)
+{
+}
+#endif
+
 /**
  * hal_read_address_32_mb() - Read 32-bit value from the register
  * @soc: soc handle
@@ -1425,10 +1469,12 @@ hal_srng_access_end_unlocked(void *hal_soc, hal_ring_handle_t hal_ring_hdl)
 	} else {
 		if (srng->ring_dir == HAL_SRNG_SRC_RING)
 			hal_srng_write_address_32_mb(hal_soc,
+						     srng,
 						     srng->u.src_ring.hp_addr,
 						     srng->u.src_ring.hp);
 		else
 			hal_srng_write_address_32_mb(hal_soc,
+						     srng,
 						     srng->u.dst_ring.tp_addr,
 						     srng->u.dst_ring.tp);
 	}

+ 9 - 2
hal/wifi3.0/hal_hw_headers.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2016-2020 The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for
  * any purpose with or without fee is hereby granted, provided that the
@@ -228,9 +228,16 @@
 #define SRNG_SRC_ADDR(_srng, _reg) \
 	SRNG_REG_ADDR(_srng, _reg, _reg ## _GROUP, SRC)
 
+#ifdef FEATURE_HAL_DELAYED_WRITE
 #define SRNG_REG_WRITE(_srng, _reg, _value, _dir) \
-	hal_write_address_32_mb(_srng->hal_soc, \
+	hal_delayed_reg_write(_srng->hal_soc, _srng,\
 		SRNG_ ## _dir ## _ADDR(_srng, _reg), (_value))
+#else
+#define SRNG_REG_WRITE(_srng, _reg, _value, _dir) \
+	hal_write_address_32_mb(_srng->hal_soc,\
+		SRNG_ ## _dir ## _ADDR(_srng, _reg), (_value))
+#endif
+
 
 #define SRNG_REG_READ(_srng, _reg, _dir) \
 	hal_read_address_32_mb(_srng->hal_soc, \

+ 115 - 0
hal/wifi3.0/hal_internal.h

@@ -25,6 +25,9 @@
 #include "qdf_mem.h"
 #include "qdf_nbuf.h"
 #include "pld_common.h"
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#include "qdf_defer.h"
+#endif
 
 #define hal_alert(params...) QDF_TRACE_FATAL(QDF_MODULE_ID_TXRX, params)
 #define hal_err(params...) QDF_TRACE_ERROR(QDF_MODULE_ID_TXRX, params)
@@ -194,6 +197,79 @@ typedef struct hal_ring_handle *hal_ring_handle_t;
  */
 #define HAL_SRNG_FLUSH_EVENT BIT(0)
 
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+
+/**
+ * struct hal_reg_write_q_elem - delayed register write queue element
+ * @srng: hal_srng queued for a delayed write
+ * @addr: iomem address of the register
+ * @val: register value at the time of delayed write enqueue
+ * @valid: whether this entry is valid or not
+ * @enqueue_time: enqueue time (qdf_log_timestamp)
+ * @dequeue_time: dequeue time (qdf_log_timestamp)
+ */
+struct hal_reg_write_q_elem {
+	struct hal_srng *srng;
+	void __iomem *addr;
+	uint32_t val;
+	uint8_t valid;
+	qdf_time_t enqueue_time;
+	qdf_time_t dequeue_time;
+};
+
+/**
+ * struct hal_reg_write_srng_stats - srng stats to keep track of register writes
+ * @enqueues: writes enqueued to delayed work
+ * @dequeues: writes dequeued from delayed work (not written yet)
+ * @coalesces: writes not enqueued since srng is already queued up
+ * @direct: writes not enqueued and written to register directly
+ */
+struct hal_reg_write_srng_stats {
+	uint32_t enqueues;
+	uint32_t dequeues;
+	uint32_t coalesces;
+	uint32_t direct;
+};
+
+/**
+ * enum hal_reg_sched_delay - ENUM for write sched delay histogram
+ * @REG_WRITE_SCHED_DELAY_SUB_100us: index for delay < 100us
+ * @REG_WRITE_SCHED_DELAY_SUB_1000us: index for delay < 1000us
+ * @REG_WRITE_SCHED_DELAY_SUB_5000us: index for delay < 5000us
+ * @REG_WRITE_SCHED_DELAY_GT_5000us: index for delay >= 5000us
+ * @REG_WRITE_SCHED_DELAY_HIST_MAX: Max value (nnsize of histogram array)
+ */
+enum hal_reg_sched_delay {
+	REG_WRITE_SCHED_DELAY_SUB_100us,
+	REG_WRITE_SCHED_DELAY_SUB_1000us,
+	REG_WRITE_SCHED_DELAY_SUB_5000us,
+	REG_WRITE_SCHED_DELAY_GT_5000us,
+	REG_WRITE_SCHED_DELAY_HIST_MAX,
+};
+
+/**
+ * struct hal_reg_write_soc_stats - soc stats to keep track of register writes
+ * @enqueues: writes enqueued to delayed work
+ * @dequeues: writes dequeued from delayed work (not written yet)
+ * @coalesces: writes not enqueued since srng is already queued up
+ * @direct: writes not enqueud and writted to register directly
+ * @prevent_l1_fails: prevent l1 API failed
+ * @q_depth: current queue depth in delayed register write queue
+ * @max_q_depth: maximum queue for delayed register write queue
+ * @sched_delay: = kernel work sched delay + bus wakeup delay, histogram
+ */
+struct hal_reg_write_soc_stats {
+	qdf_atomic_t enqueues;
+	uint32_t dequeues;
+	qdf_atomic_t coalesces;
+	qdf_atomic_t direct;
+	uint32_t prevent_l1_fails;
+	qdf_atomic_t q_depth;
+	uint32_t max_q_depth;
+	uint32_t sched_delay[REG_WRITE_SCHED_DELAY_HIST_MAX];
+};
+#endif
+
 /* Common SRNG ring structure for source and destination rings */
 struct hal_srng {
 	/* Unique SRNG ring ID */
@@ -304,6 +380,13 @@ struct hal_srng {
 	unsigned long srng_event;
 	/* last flushed time stamp */
 	uint64_t last_flush_ts;
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+	/* flag to indicate whether srng is already queued for delayed write */
+	uint8_t reg_write_in_progress;
+
+	/* srng specific delayed write stats */
+	struct hal_reg_write_srng_stats wstats;
+#endif
 };
 
 /* HW SRNG configuration table */
@@ -480,11 +563,15 @@ struct hal_hw_txrx_ops {
 /**
  * struct hal_soc_stats - Hal layer stats
  * @reg_write_fail: number of failed register writes
+ * @wstats: delayed register write stats
  *
  * This structure holds all the statistics at HAL layer.
  */
 struct hal_soc_stats {
 	uint32_t reg_write_fail;
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+	struct hal_reg_write_soc_stats wstats;
+#endif
 };
 
 #ifdef ENABLE_HAL_REG_WR_HISTORY
@@ -575,8 +662,36 @@ struct hal_soc {
 #ifdef ENABLE_HAL_REG_WR_HISTORY
 	struct hal_reg_write_fail_history *reg_wr_fail_hist;
 #endif
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+	/* queue(array) to hold register writes */
+	struct hal_reg_write_q_elem *reg_write_queue;
+	/* delayed work to be queued into workqueue */
+	qdf_work_t reg_write_work;
+	/* workqueue for delayed register writes */
+	qdf_workqueue_t *reg_write_wq;
+	/* write index used by caller to enqueue delayed work */
+	qdf_atomic_t write_idx;
+	/* read index used by worker thread to dequeue/write registers */
+	uint32_t read_idx;
+#endif
 };
 
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+/**
+ *  hal_delayed_reg_write() - delayed regiter write
+ * @hal_soc: HAL soc handle
+ * @srng: hal srng
+ * @addr: iomem address
+ * @value: value to be written
+ *
+ * Return: none
+ */
+void hal_delayed_reg_write(struct hal_soc *hal_soc,
+			   struct hal_srng *srng,
+			   void __iomem *addr,
+			   uint32_t value);
+#endif
+
 void hal_qca6750_attach(struct hal_soc *hal_soc);
 void hal_qca6490_attach(struct hal_soc *hal_soc);
 void hal_qca6390_attach(struct hal_soc *hal_soc);

+ 339 - 0
hal/wifi3.0/hal_srng.c

@@ -21,6 +21,7 @@
 #include "target_type.h"
 #include "wcss_version.h"
 #include "qdf_module.h"
+
 #ifdef QCA_WIFI_QCA8074
 void hal_qca6290_attach(struct hal_soc *hal);
 #endif
@@ -340,6 +341,338 @@ uint32_t hal_get_target_type(hal_soc_handle_t hal_soc_hdl)
 
 qdf_export_symbol(hal_get_target_type);
 
+#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#ifdef MEMORY_DEBUG
+/*
+ * Length of the queue(array) used to hold delayed register writes.
+ * Must be a multiple of 2.
+ */
+#define HAL_REG_WRITE_QUEUE_LEN 128
+#else
+#define HAL_REG_WRITE_QUEUE_LEN 32
+#endif
+
+/**
+ * hal_is_reg_write_tput_level_high() - throughput level for delayed reg writes
+ * @hal: hal_soc pointer
+ *
+ * Return: true if throughput is high, else false.
+ */
+static inline bool hal_is_reg_write_tput_level_high(struct hal_soc *hal)
+{
+	int bw_level = hif_get_bandwidth_level(hal->hif_handle);
+
+	return (bw_level >= PLD_BUS_WIDTH_MEDIUM) ? true : false;
+}
+
+/**
+ * hal_process_reg_write_q_elem() - process a regiter write queue element
+ * @hal: hal_soc pointer
+ * @q_elem: pointer to hal regiter write queue element
+ *
+ * Return: None
+ */
+static void hal_process_reg_write_q_elem(struct hal_soc *hal,
+					 struct hal_reg_write_q_elem *q_elem)
+{
+	struct hal_srng *srng = q_elem->srng;
+
+	SRNG_LOCK(&srng->lock);
+
+	srng->reg_write_in_progress = false;
+	srng->wstats.dequeues++;
+
+	if (srng->ring_dir == HAL_SRNG_SRC_RING)
+		hal_write_address_32_mb(hal,
+					srng->u.src_ring.hp_addr,
+					srng->u.src_ring.hp);
+	else
+		hal_write_address_32_mb(hal,
+					srng->u.dst_ring.tp_addr,
+					srng->u.dst_ring.tp);
+
+	SRNG_UNLOCK(&srng->lock);
+}
+
+/**
+ * hal_reg_write_fill_sched_delay_hist() - fill reg write delay histogram in hal
+ * @hal: hal_soc pointer
+ * @delay: delay in us
+ *
+ * Return: None
+ */
+static inline void hal_reg_write_fill_sched_delay_hist(struct hal_soc *hal,
+						       uint64_t delay_us)
+{
+	uint32_t *hist;
+
+	hist = hal->stats.wstats.sched_delay;
+
+	if (delay_us < 100)
+		hist[REG_WRITE_SCHED_DELAY_SUB_100us]++;
+	else if (delay_us < 1000)
+		hist[REG_WRITE_SCHED_DELAY_SUB_1000us]++;
+	else if (delay_us < 5000)
+		hist[REG_WRITE_SCHED_DELAY_SUB_5000us]++;
+	else
+		hist[REG_WRITE_SCHED_DELAY_GT_5000us]++;
+}
+
+/**
+ * hal_reg_write_work() - Worker to process delayed writes
+ * @arg: hal_soc pointer
+ *
+ * Return: None
+ */
+static void hal_reg_write_work(void *arg)
+{
+	int32_t q_depth;
+	struct hal_soc *hal = arg;
+	struct hal_reg_write_q_elem *q_elem;
+	qdf_time_t delta_us;
+
+	q_elem = &hal->reg_write_queue[(hal->read_idx)];
+
+	if (!q_elem->valid)
+		return;
+
+	q_depth = qdf_atomic_read(&hal->stats.wstats.q_depth);
+	if (q_depth > hal->stats.wstats.max_q_depth)
+		hal->stats.wstats.max_q_depth =  q_depth;
+
+	if (hif_prevent_link_low_power_states(hal->hif_handle)) {
+		hal->stats.wstats.prevent_l1_fails++;
+		return;
+	}
+
+	while (q_elem->valid) {
+		q_elem->dequeue_time = qdf_get_log_timestamp();
+		delta_us = qdf_log_timestamp_to_usecs(q_elem->dequeue_time -
+						      q_elem->enqueue_time);
+		hal_reg_write_fill_sched_delay_hist(hal, delta_us);
+		hal_verbose_debug("read_idx %u srng 0x%x, addr 0x%x val %u sched delay %u us",
+				  hal->read_idx,
+				  q_elem->srng->ring_id,
+				  q_elem->addr,
+				  q_elem->val,
+				  delta_us);
+
+		hal->stats.wstats.dequeues++;
+		qdf_atomic_dec(&hal->stats.wstats.q_depth);
+
+		hal_process_reg_write_q_elem(hal, q_elem);
+
+		q_elem->valid = 0;
+		hal->read_idx = (hal->read_idx + 1) &
+					(HAL_REG_WRITE_QUEUE_LEN - 1);
+		q_elem = &hal->reg_write_queue[(hal->read_idx)];
+	}
+
+	hif_allow_link_low_power_states(hal->hif_handle);
+}
+
+/**
+ * hal_flush_reg_write_work() - flush all writes from regiter write queue
+ * @arg: hal_soc pointer
+ *
+ * Return: None
+ */
+static inline void hal_flush_reg_write_work(struct hal_soc *hal)
+{
+	qdf_cancel_work(&hal->reg_write_work);
+	qdf_flush_work(&hal->reg_write_work);
+	qdf_flush_workqueue(0, hal->reg_write_wq);
+}
+
+/**
+ * hal_reg_write_enqueue() - enqueue register writes into kworker
+ * @hal_soc: hal_soc pointer
+ * @srng: srng pointer
+ * @addr: iomem address of regiter
+ * @value: value to be written to iomem address
+ *
+ * This function executes from within the SRNG LOCK
+ *
+ * Return: None
+ */
+static void hal_reg_write_enqueue(struct hal_soc *hal_soc,
+				  struct hal_srng *srng,
+				  void __iomem *addr,
+				  uint32_t value)
+{
+	struct hal_reg_write_q_elem *q_elem;
+	uint32_t write_idx;
+
+	if (srng->reg_write_in_progress) {
+		hal_verbose_debug("Already in progress srng ring id 0x%x addr 0x%x val %u",
+				  srng->ring_id, addr, value);
+		qdf_atomic_inc(&hal_soc->stats.wstats.coalesces);
+		srng->wstats.coalesces++;
+		return;
+	}
+
+	write_idx = qdf_atomic_inc_return(&hal_soc->write_idx);
+
+	write_idx = write_idx & (HAL_REG_WRITE_QUEUE_LEN - 1);
+
+	q_elem = &hal_soc->reg_write_queue[write_idx];
+
+	if (q_elem->valid) {
+		hal_err("queue full");
+		QDF_BUG(0);
+		return;
+	}
+
+	qdf_atomic_inc(&hal_soc->stats.wstats.enqueues);
+	srng->wstats.enqueues++;
+
+	qdf_atomic_inc(&hal_soc->stats.wstats.q_depth);
+
+	q_elem->srng = srng;
+	q_elem->addr = addr;
+	q_elem->val = value;
+	q_elem->enqueue_time = qdf_get_log_timestamp();
+
+	q_elem->valid = true;
+
+	srng->reg_write_in_progress  = true;
+
+	hal_verbose_debug("write_idx %u srng ring id 0x%x addr 0x%x val %u",
+			  write_idx, srng->ring_id, addr, value);
+
+	qdf_queue_work(hal_soc->qdf_dev, hal_soc->reg_write_wq,
+		       &hal_soc->reg_write_work);
+}
+
+void hal_delayed_reg_write(struct hal_soc *hal_soc,
+			   struct hal_srng *srng,
+			   void __iomem *addr,
+			   uint32_t value)
+{
+	if (pld_is_device_awake(hal_soc->qdf_dev->dev) ||
+	    hal_is_reg_write_tput_level_high(hal_soc)) {
+		qdf_atomic_inc(&hal_soc->stats.wstats.direct);
+		srng->wstats.direct++;
+		hal_write_address_32_mb(hal_soc, addr, value);
+	} else {
+		hal_reg_write_enqueue(hal_soc, srng, addr, value);
+	}
+}
+
+/**
+ * hal_delayed_reg_write_init() - Initialization function for delayed reg writes
+ * @hal_soc: hal_soc pointer
+ *
+ * Initialize main data structures to process register writes in a delayed
+ * workqueue.
+ *
+ * Return: QDF_STATUS_SUCCESS on success else a QDF error.
+ */
+static QDF_STATUS hal_delayed_reg_write_init(struct hal_soc *hal)
+{
+	hal->reg_write_wq =
+		qdf_alloc_high_prior_ordered_workqueue("hal_register_write_wq");
+	qdf_create_work(0, &hal->reg_write_work, hal_reg_write_work, hal);
+	hal->reg_write_queue = qdf_mem_malloc(HAL_REG_WRITE_QUEUE_LEN *
+					      sizeof(*hal->reg_write_queue));
+	if (!hal->reg_write_queue) {
+		hal_err("unable to allocate memory");
+		QDF_BUG(0);
+		return QDF_STATUS_E_NOMEM;
+	}
+
+	/* Initial value of indices */
+	hal->read_idx = 0;
+	qdf_atomic_set(&hal->write_idx, -1);
+	return QDF_STATUS_SUCCESS;
+}
+
+/**
+ * hal_delayed_reg_write_deinit() - De-Initialize delayed reg write processing
+ * @hal_soc: hal_soc pointer
+ *
+ * De-initialize main data structures to process register writes in a delayed
+ * workqueue.
+ *
+ * Return: None
+ */
+static void hal_delayed_reg_write_deinit(struct hal_soc *hal)
+{
+	hal_flush_reg_write_work(hal);
+	qdf_destroy_workqueue(0, hal->reg_write_wq);
+	qdf_mem_free(hal->reg_write_queue);
+}
+
+static inline
+char *hal_fill_reg_write_srng_stats(struct hal_srng *srng,
+				    char *buf, qdf_size_t size)
+{
+	qdf_scnprintf(buf, size, "enq %u deq %u coal %u direct %u",
+		      srng->wstats.enqueues, srng->wstats.dequeues,
+		      srng->wstats.coalesces, srng->wstats.direct);
+	return buf;
+}
+
+/* bytes for local buffer */
+#define HAL_REG_WRITE_SRNG_STATS_LEN 100
+
+void hal_dump_reg_write_srng_stats(hal_soc_handle_t hal_soc_hdl)
+{
+	struct hal_srng *srng;
+	char buf[HAL_REG_WRITE_SRNG_STATS_LEN];
+	struct hal_soc *hal = (struct hal_soc *)hal_soc_hdl;
+
+	srng = hal_get_srng(hal, HAL_SRNG_SW2TCL1);
+	hal_debug("SW2TCL1: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_WBM2SW0_RELEASE);
+	hal_debug("WBM2SW0: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_REO2SW1);
+	hal_debug("REO2SW1: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_REO2SW2);
+	hal_debug("REO2SW2: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+
+	srng = hal_get_srng(hal, HAL_SRNG_REO2SW3);
+	hal_debug("REO2SW3: %s",
+		  hal_fill_reg_write_srng_stats(srng, buf, sizeof(buf)));
+}
+
+void hal_dump_reg_write_stats(hal_soc_handle_t hal_soc_hdl)
+{
+	uint32_t *hist;
+	struct hal_soc *hal = (struct hal_soc *)hal_soc_hdl;
+
+	hist = hal->stats.wstats.sched_delay;
+
+	hal_debug("enq %u deq %u coal %u direct %u q_depth %u max_q %u sched-delay hist %u %u %u %u",
+		  qdf_atomic_read(&hal->stats.wstats.enqueues),
+		  hal->stats.wstats.dequeues,
+		  qdf_atomic_read(&hal->stats.wstats.coalesces),
+		  qdf_atomic_read(&hal->stats.wstats.direct),
+		  qdf_atomic_read(&hal->stats.wstats.q_depth),
+		  hal->stats.wstats.max_q_depth,
+		  hist[REG_WRITE_SCHED_DELAY_SUB_100us],
+		  hist[REG_WRITE_SCHED_DELAY_SUB_1000us],
+		  hist[REG_WRITE_SCHED_DELAY_SUB_5000us],
+		  hist[REG_WRITE_SCHED_DELAY_GT_5000us]);
+}
+#else
+static inline QDF_STATUS hal_delayed_reg_write_init(struct hal_soc *hal)
+{
+	return QDF_STATUS_SUCCESS;
+}
+
+static inline void hal_delayed_reg_write_deinit(struct hal_soc *hal)
+{
+}
+#endif
+
 /**
  * hal_attach - Initialize HAL layer
  * @hif_handle: Opaque HIF handle
@@ -402,7 +735,9 @@ void *hal_attach(struct hif_opaque_softc *hif_handle, qdf_device_t qdf_dev)
 	hal->target_type = hal_get_target_type(hal_soc_to_hal_soc_handle(hal));
 
 	hal_target_based_configure(hal);
+
 	hal_reg_write_fail_history_init(hal);
+
 	/**
 	 * Indicate Initialization of srngs to avoid force wake
 	 * as umac power collapse is not enabled yet
@@ -411,6 +746,8 @@ void *hal_attach(struct hif_opaque_softc *hif_handle, qdf_device_t qdf_dev)
 
 	qdf_minidump_log(hal, sizeof(*hal), "hal_soc");
 
+	hal_delayed_reg_write_init(hal);
+
 	return (void *)hal;
 
 fail2:
@@ -459,6 +796,8 @@ extern void hal_detach(void *hal_soc)
 {
 	struct hal_soc *hal = (struct hal_soc *)hal_soc;
 
+	hal_delayed_reg_write_deinit(hal);
+
 	qdf_mem_free_consistent(hal->qdf_dev, hal->qdf_dev->dev,
 		sizeof(*(hal->shadow_rdptr_mem_vaddr)) * HAL_SRNG_ID_MAX,
 		hal->shadow_rdptr_mem_vaddr, hal->shadow_rdptr_mem_paddr, 0);