Ver Fonte

qcacmn: Add delayed reg write support for wcn6450

Current HAL delayed reg write is tied to SRNG notions, hence
implement delayed reg write logic in HIF since WCN6450 does
not use SRNG interface.

New feature flag FEATURE_HIF_DELAYED_REG_WRITE is introduced
to disable/enable this support.

Change-Id: Id7087ad53cd5879cf49ee0e84dd727de61137541
CRs-Fixed: 3519702
Venkateswara Naralasetty há 2 anos atrás
pai
commit
a3a5a72bbd

+ 15 - 3
dp/wifi3.0/dp_main.c

@@ -6812,6 +6812,19 @@ static bool dp_umac_rst_skel_enable_get(struct dp_soc *soc)
 }
 #endif
 
+#ifndef WLAN_SOFTUMAC_SUPPORT
+static void dp_print_reg_write_stats(struct dp_soc *soc)
+{
+	hal_dump_reg_write_stats(soc->hal_soc);
+	hal_dump_reg_write_srng_stats(soc->hal_soc);
+}
+#else
+static void dp_print_reg_write_stats(struct dp_soc *soc)
+{
+	hif_print_reg_write_stats(soc->hif_handle);
+}
+#endif
+
 /**
  * dp_print_host_stats()- Function to print the stats aggregated at host
  * @vdev: DP_VDEV handle
@@ -6883,8 +6896,7 @@ dp_print_host_stats(struct dp_vdev *vdev,
 						CDP_FISA_STATS_ID_DUMP_HW_FST);
 		break;
 	case TXRX_HAL_REG_WRITE_STATS:
-		hal_dump_reg_write_stats(pdev->soc->hal_soc);
-		hal_dump_reg_write_srng_stats(pdev->soc->hal_soc);
+		dp_print_reg_write_stats(pdev->soc);
 		break;
 	case TXRX_SOC_REO_HW_DESC_DUMP:
 		dp_get_rx_reo_queue_info((struct cdp_soc_t *)pdev->soc,
@@ -9061,7 +9073,7 @@ static QDF_STATUS dp_txrx_dump_stats(struct cdp_soc_t *psoc, uint16_t value,
 	case CDP_TXRX_PATH_STATS:
 		dp_txrx_path_stats(soc);
 		dp_print_soc_interrupt_stats(soc);
-		hal_dump_reg_write_stats(soc->hal_soc);
+		dp_print_reg_write_stats(soc);
 		dp_pdev_print_tx_delay_stats(soc);
 		/* Dump usage watermark stats for core TX/RX SRNGs */
 		dp_dump_srng_high_wm_stats(soc, (1 << REO_DST));

+ 16 - 1
hif/inc/hif.h

@@ -2163,7 +2163,8 @@ int hif_force_wake_release(struct hif_opaque_softc *handle)
 }
 #endif /* FORCE_WAKE */
 
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HIF_DELAYED_REG_WRITE)
 /**
  * hif_prevent_link_low_power_states() - Prevent from going to low power states
  * @hif: HIF opaque context
@@ -2934,4 +2935,18 @@ void hif_affinity_mgr_affine_irq(struct hif_softc *scn)
  * Return: None
  */
 void hif_affinity_mgr_set_affinity(struct hif_opaque_softc *scn);
+
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+/**
+ * hif_print_reg_write_stats() - Print hif delayed reg write stats
+ * @hif_ctx: hif opaque handle
+ *
+ * Return: None
+ */
+void hif_print_reg_write_stats(struct hif_opaque_softc *hif_ctx);
+#else
+static inline void hif_print_reg_write_stats(struct hif_opaque_softc *hif_ctx)
+{
+}
+#endif
 #endif /* _HIF_H_ */

+ 24 - 0
hif/src/ce/ce_internal.h

@@ -136,6 +136,24 @@ struct CE_ring_state {
 	uint64_t last_flush_ts;
 };
 
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+/**
+ * struct ce_reg_write_stats - stats to keep track of register writes
+ * @enqueues: writes enqueued to delayed work
+ * @dequeues: writes dequeued from delayed work (not written yet)
+ * @coalesces: writes not enqueued since srng is already queued up
+ * @direct: writes not enqueued and written to register directly
+ * @dequeue_delay: dequeue operation be delayed
+ */
+struct ce_reg_write_stats {
+	uint32_t enqueues;
+	uint32_t dequeues;
+	uint32_t coalesces;
+	uint32_t direct;
+	uint32_t dequeue_delay;
+};
+#endif
+
 /* Copy Engine internal state */
 struct CE_state {
 	struct hif_softc *scn;
@@ -206,6 +224,12 @@ struct CE_state {
 #endif
 	bool msi_supported;
 	bool batch_intr_supported;
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+	struct ce_reg_write_stats wstats;
+	uint8_t reg_write_in_progress;
+	qdf_time_t last_dequeue_time;
+#endif
+	uint32_t ce_wrt_idx_offset;
 };
 
 /* Descriptor rings must be aligned to this boundary */

+ 23 - 0
hif/src/ce/ce_main.c

@@ -1591,6 +1591,12 @@ static bool ce_mark_datapath(struct CE_state *ce_state)
 static void ce_update_msi_batch_intr_flags(struct CE_state *ce_state)
 {
 }
+
+static inline void ce_update_wrt_idx_offset(struct hif_softc *scn,
+					    struct CE_state *ce_state,
+					    uint8_t ring_type)
+{
+}
 #else
 static bool ce_mark_datapath(struct CE_state *ce_state)
 {
@@ -1625,6 +1631,20 @@ static void ce_update_msi_batch_intr_flags(struct CE_state *ce_state)
 	ce_state->msi_supported = true;
 	ce_state->batch_intr_supported = true;
 }
+
+static inline void ce_update_wrt_idx_offset(struct hif_softc *scn,
+					    struct CE_state *ce_state,
+					    uint8_t ring_type)
+{
+	if (ring_type == CE_RING_SRC)
+		ce_state->ce_wrt_idx_offset =
+			CE_SRC_WR_IDX_OFFSET_GET(scn, ce_state->ctrl_addr);
+	else if (ring_type == CE_RING_DEST)
+		ce_state->ce_wrt_idx_offset =
+			CE_DST_WR_IDX_OFFSET_GET(scn, ce_state->ctrl_addr);
+	else
+		QDF_BUG(0);
+}
 #endif
 
 /**
@@ -2643,6 +2663,9 @@ struct CE_handle *ce_init(struct hif_softc *scn,
 		goto error_target_access;
 
 	ce_update_msi_batch_intr_flags(CE_state);
+	ce_update_wrt_idx_offset(scn, CE_state,
+				 attr->src_nentries ?
+				 CE_RING_SRC : CE_RING_DEST);
 
 	return (struct CE_handle *)CE_state;
 

+ 16 - 4
hif/src/ce/ce_reg.h

@@ -624,17 +624,29 @@ unsigned int hif_get_dst_ring_read_index(struct hif_softc *scn,
 #define NUM_SHADOW_REGISTERS 24
 u32 shadow_sr_wr_ind_addr(struct hif_softc *scn, u32 ctrl_addr);
 u32 shadow_dst_wr_ind_addr(struct hif_softc *scn, u32 ctrl_addr);
-#endif
 
+#define CE_SRC_WR_IDX_OFFSET_GET(scn, CE_ctrl_addr) \
+	shadow_sr_wr_ind_addr(scn, CE_ctrl_addr)
+#define CE_DST_WR_IDX_OFFSET_GET(scn, CE_ctrl_addr) \
+	shadow_dst_wr_ind_addr(scn, CE_ctrl_addr)
+#else
+#define CE_SRC_WR_IDX_OFFSET_GET(scn, CE_ctrl_addr) \
+	CE_ctrl_addr + SR_WR_INDEX_ADDRESS
+#define CE_DST_WR_IDX_OFFSET_GET(scn, CE_ctrl_addr) \
+	CE_ctrl_addr + DST_WR_INDEX_ADDRESS
+#endif
 
-#ifdef ADRASTEA_SHADOW_REGISTERS
+#if defined(FEATURE_HIF_DELAYED_REG_WRITE)
+#define CE_SRC_RING_WRITE_IDX_SET(scn, CE_ctrl_addr, n) \
+	A_TARGET_DELAYED_REG_WRITE(scn, CE_ctrl_addr, n)
+#define CE_DEST_RING_WRITE_IDX_SET(scn, CE_ctrl_addr, n) \
+	A_TARGET_DELAYED_REG_WRITE(scn, CE_ctrl_addr, n)
+#elif defined(ADRASTEA_SHADOW_REGISTERS)
 #define CE_SRC_RING_WRITE_IDX_SET(scn, CE_ctrl_addr, n) \
 	A_TARGET_WRITE(scn, shadow_sr_wr_ind_addr(scn, CE_ctrl_addr), n)
 #define CE_DEST_RING_WRITE_IDX_SET(scn, CE_ctrl_addr, n) \
 	A_TARGET_WRITE(scn, shadow_dst_wr_ind_addr(scn, CE_ctrl_addr), n)
-
 #else
-
 #define CE_SRC_RING_WRITE_IDX_SET(scn, CE_ctrl_addr, n) \
 	A_TARGET_WRITE(scn, (CE_ctrl_addr) + SR_WR_INDEX_ADDRESS, (n))
 #define CE_DEST_RING_WRITE_IDX_SET(scn, CE_ctrl_addr, n) \

+ 5 - 0
hif/src/hif_io32.h

@@ -324,6 +324,11 @@ void hif_target_write_checked(struct hif_softc *scn, uint32_t offset,
 	hif_write32_mb(scn, (scn->mem) + (offset), value)
 #endif
 
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+#define A_TARGET_DELAYED_REG_WRITE(scn, ctrl_addr, val) \
+	hif_delayed_reg_write(scn, ctrl_addr, val)
+#endif
+
 void hif_irq_enable(struct hif_softc *scn, int irq_id);
 void hif_irq_disable(struct hif_softc *scn, int irq_id);
 

+ 361 - 0
hif/src/hif_main.c

@@ -55,6 +55,8 @@
 #include <linux/cpumask.h>
 
 #include <pld_common.h>
+#include "ce_internal.h"
+#include <qdf_tracepoint.h>
 
 void hif_dump(struct hif_opaque_softc *hif_ctx, uint8_t cmd_id, bool start)
 {
@@ -1392,6 +1394,11 @@ static inline int hif_get_num_pending_work(struct hif_softc *scn)
 {
 	return hal_get_reg_write_pending_work(scn->hal_soc);
 }
+#elif defined(FEATURE_HIF_DELAYED_REG_WRITE)
+static inline int hif_get_num_pending_work(struct hif_softc *scn)
+{
+	return qdf_atomic_read(&scn->active_work_cnt);
+}
 #else
 
 static inline int hif_get_num_pending_work(struct hif_softc *scn)
@@ -1527,6 +1534,354 @@ uint8_t hif_get_ep_vote_access(struct hif_opaque_softc *hif_ctx,
 }
 #endif
 
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+#ifdef MEMORY_DEBUG
+#define HIF_REG_WRITE_QUEUE_LEN 128
+#else
+#define HIF_REG_WRITE_QUEUE_LEN 32
+#endif
+
+/**
+ * hif_print_reg_write_stats() - Print hif delayed reg write stats
+ * @hif_ctx: hif opaque handle
+ *
+ * Return: None
+ */
+void hif_print_reg_write_stats(struct hif_opaque_softc *hif_ctx)
+{
+	struct hif_softc *scn = HIF_GET_SOFTC(hif_ctx);
+	struct CE_state *ce_state;
+	uint32_t *hist;
+	int i;
+
+	hist = scn->wstats.sched_delay;
+	hif_debug("wstats: enq %u deq %u coal %u direct %u q_depth %u max_q %u sched-delay hist %u %u %u %u",
+		  qdf_atomic_read(&scn->wstats.enqueues),
+		  scn->wstats.dequeues,
+		  qdf_atomic_read(&scn->wstats.coalesces),
+		  qdf_atomic_read(&scn->wstats.direct),
+		  qdf_atomic_read(&scn->wstats.q_depth),
+		  scn->wstats.max_q_depth,
+		  hist[HIF_REG_WRITE_SCHED_DELAY_SUB_100us],
+		  hist[HIF_REG_WRITE_SCHED_DELAY_SUB_1000us],
+		  hist[HIF_REG_WRITE_SCHED_DELAY_SUB_5000us],
+		  hist[HIF_REG_WRITE_SCHED_DELAY_GT_5000us]);
+
+	for (i = 0; i < scn->ce_count; i++) {
+		ce_state = scn->ce_id_to_state[i];
+		if (!ce_state)
+			continue;
+
+		hif_debug("ce%d: enq %u deq %u coal %u direct %u",
+			  i, ce_state->wstats.enqueues,
+			  ce_state->wstats.dequeues,
+			  ce_state->wstats.coalesces,
+			  ce_state->wstats.direct);
+	}
+}
+
+/**
+ * hif_is_reg_write_tput_level_high() - throughput level for delayed reg writes
+ * @scn: hif_softc pointer
+ *
+ * Return: true if throughput is high, else false.
+ */
+static inline bool hif_is_reg_write_tput_level_high(struct hif_softc *scn)
+{
+	int bw_level = hif_get_bandwidth_level(GET_HIF_OPAQUE_HDL(scn));
+
+	return (bw_level >= PLD_BUS_WIDTH_MEDIUM) ? true : false;
+}
+
+/**
+ * hif_reg_write_fill_sched_delay_hist() - fill reg write delay histogram
+ * @scn: hif_softc pointer
+ * @delay_us: delay in us
+ *
+ * Return: None
+ */
+static inline void hif_reg_write_fill_sched_delay_hist(struct hif_softc *scn,
+						       uint64_t delay_us)
+{
+	uint32_t *hist;
+
+	hist = scn->wstats.sched_delay;
+
+	if (delay_us < 100)
+		hist[HIF_REG_WRITE_SCHED_DELAY_SUB_100us]++;
+	else if (delay_us < 1000)
+		hist[HIF_REG_WRITE_SCHED_DELAY_SUB_1000us]++;
+	else if (delay_us < 5000)
+		hist[HIF_REG_WRITE_SCHED_DELAY_SUB_5000us]++;
+	else
+		hist[HIF_REG_WRITE_SCHED_DELAY_GT_5000us]++;
+}
+
+/**
+ * hif_process_reg_write_q_elem() - process a register write queue element
+ * @scn: hif_softc pointer
+ * @q_elem: pointer to hal register write queue element
+ *
+ * Return: The value which was written to the address
+ */
+static int32_t
+hif_process_reg_write_q_elem(struct hif_softc *scn,
+			     struct hif_reg_write_q_elem *q_elem)
+{
+	struct CE_state *ce_state = q_elem->ce_state;
+	uint32_t write_val = -1;
+
+	qdf_spin_lock_bh(&ce_state->ce_index_lock);
+
+	ce_state->reg_write_in_progress = false;
+	ce_state->wstats.dequeues++;
+
+	if (ce_state->src_ring) {
+		q_elem->dequeue_val = ce_state->src_ring->write_index;
+		hal_write32_mb(scn->hal_soc, ce_state->ce_wrt_idx_offset,
+			       ce_state->src_ring->write_index);
+		write_val = ce_state->src_ring->write_index;
+	} else if (ce_state->dest_ring) {
+		q_elem->dequeue_val = ce_state->dest_ring->write_index;
+		hal_write32_mb(scn->hal_soc, ce_state->ce_wrt_idx_offset,
+			       ce_state->dest_ring->write_index);
+		write_val = ce_state->dest_ring->write_index;
+	} else {
+		hif_debug("invalid reg write received");
+		qdf_assert(0);
+	}
+
+	q_elem->valid = 0;
+	ce_state->last_dequeue_time = q_elem->dequeue_time;
+
+	qdf_spin_unlock_bh(&ce_state->ce_index_lock);
+
+	return write_val;
+}
+
+/**
+ * hif_reg_write_work() - Worker to process delayed writes
+ * @arg: hif_softc pointer
+ *
+ * Return: None
+ */
+static void hif_reg_write_work(void *arg)
+{
+	struct hif_softc *scn = arg;
+	struct hif_reg_write_q_elem *q_elem;
+	uint32_t offset;
+	uint64_t delta_us;
+	int32_t q_depth, write_val;
+	uint32_t num_processed = 0;
+	int32_t ring_id;
+
+	q_elem = &scn->reg_write_queue[scn->read_idx];
+	q_elem->work_scheduled_time = qdf_get_log_timestamp();
+	q_elem->cpu_id = qdf_get_cpu();
+
+	/* Make sure q_elem consistent in the memory for multi-cores */
+	qdf_rmb();
+	if (!q_elem->valid)
+		return;
+
+	q_depth = qdf_atomic_read(&scn->wstats.q_depth);
+	if (q_depth > scn->wstats.max_q_depth)
+		scn->wstats.max_q_depth =  q_depth;
+
+	if (hif_prevent_link_low_power_states(GET_HIF_OPAQUE_HDL(scn))) {
+		scn->wstats.prevent_l1_fails++;
+		return;
+	}
+
+	while (true) {
+		qdf_rmb();
+		if (!q_elem->valid)
+			break;
+
+		q_elem->dequeue_time = qdf_get_log_timestamp();
+		ring_id = q_elem->ce_state->id;
+		offset = q_elem->offset;
+		delta_us = qdf_log_timestamp_to_usecs(q_elem->dequeue_time -
+						      q_elem->enqueue_time);
+		hif_reg_write_fill_sched_delay_hist(scn, delta_us);
+
+		scn->wstats.dequeues++;
+		qdf_atomic_dec(&scn->wstats.q_depth);
+
+		write_val = hif_process_reg_write_q_elem(scn, q_elem);
+		hif_debug("read_idx %u ce_id %d offset 0x%x dequeue_val %d",
+			  scn->read_idx, ring_id, offset, write_val);
+
+		qdf_trace_dp_del_reg_write(ring_id, q_elem->enqueue_val,
+					   q_elem->dequeue_val,
+					   q_elem->enqueue_time,
+					   q_elem->dequeue_time);
+		num_processed++;
+		scn->read_idx = (scn->read_idx + 1) &
+					(HIF_REG_WRITE_QUEUE_LEN - 1);
+		q_elem = &scn->reg_write_queue[scn->read_idx];
+	}
+
+	hif_allow_link_low_power_states(GET_HIF_OPAQUE_HDL(scn));
+
+	/*
+	 * Decrement active_work_cnt by the number of elements dequeued after
+	 * hif_allow_link_low_power_states.
+	 * This makes sure that hif_try_complete_tasks will wait till we make
+	 * the bus access in hif_allow_link_low_power_states. This will avoid
+	 * race condition between delayed register worker and bus suspend
+	 * (system suspend or runtime suspend).
+	 *
+	 * The following decrement should be done at the end!
+	 */
+	qdf_atomic_sub(num_processed, &scn->active_work_cnt);
+}
+
+/**
+ * hif_delayed_reg_write_deinit() - De-Initialize delayed reg write processing
+ * @scn: hif_softc pointer
+ *
+ * De-initialize main data structures to process register writes in a delayed
+ * workqueue.
+ *
+ * Return: None
+ */
+static void hif_delayed_reg_write_deinit(struct hif_softc *scn)
+{
+	qdf_flush_work(&scn->reg_write_work);
+	qdf_disable_work(&scn->reg_write_work);
+	qdf_flush_workqueue(0, scn->reg_write_wq);
+	qdf_destroy_workqueue(0, scn->reg_write_wq);
+	qdf_mem_free(scn->reg_write_queue);
+}
+
+/**
+ * hif_delayed_reg_write_init() - Initialization function for delayed reg writes
+ * @scn: hif_softc pointer
+ *
+ * Initialize main data structures to process register writes in a delayed
+ * workqueue.
+ */
+
+static QDF_STATUS hif_delayed_reg_write_init(struct hif_softc *scn)
+{
+	qdf_atomic_init(&scn->active_work_cnt);
+	scn->reg_write_wq =
+		qdf_alloc_high_prior_ordered_workqueue("hif_register_write_wq");
+	qdf_create_work(0, &scn->reg_write_work, hif_reg_write_work, scn);
+	scn->reg_write_queue = qdf_mem_malloc(HIF_REG_WRITE_QUEUE_LEN *
+					      sizeof(*scn->reg_write_queue));
+	if (!scn->reg_write_queue) {
+		hif_err("unable to allocate memory for delayed reg write");
+		QDF_BUG(0);
+		return QDF_STATUS_E_NOMEM;
+	}
+
+	/* Initial value of indices */
+	scn->read_idx = 0;
+	qdf_atomic_set(&scn->write_idx, -1);
+
+	return QDF_STATUS_SUCCESS;
+}
+
+static void hif_reg_write_enqueue(struct hif_softc *scn,
+				  struct CE_state *ce_state,
+				  uint32_t value)
+{
+	struct hif_reg_write_q_elem *q_elem;
+	uint32_t write_idx;
+
+	if (ce_state->reg_write_in_progress) {
+		hif_debug("Already in progress ce_id %d offset 0x%x value %u",
+			  ce_state->id, ce_state->ce_wrt_idx_offset, value);
+		qdf_atomic_inc(&scn->wstats.coalesces);
+		ce_state->wstats.coalesces++;
+		return;
+	}
+
+	write_idx = qdf_atomic_inc_return(&scn->write_idx);
+	write_idx = write_idx & (HIF_REG_WRITE_QUEUE_LEN - 1);
+
+	q_elem = &scn->reg_write_queue[write_idx];
+	if (q_elem->valid) {
+		hif_err("queue full");
+		QDF_BUG(0);
+		return;
+	}
+
+	qdf_atomic_inc(&scn->wstats.enqueues);
+	ce_state->wstats.enqueues++;
+
+	qdf_atomic_inc(&scn->wstats.q_depth);
+
+	q_elem->ce_state = ce_state;
+	q_elem->offset = ce_state->ce_wrt_idx_offset;
+	q_elem->enqueue_val = value;
+	q_elem->enqueue_time = qdf_get_log_timestamp();
+
+	/*
+	 * Before the valid flag is set to true, all the other
+	 * fields in the q_elem needs to be updated in memory.
+	 * Else there is a chance that the dequeuing worker thread
+	 * might read stale entries and process incorrect srng.
+	 */
+	qdf_wmb();
+	q_elem->valid = true;
+
+	/*
+	 * After all other fields in the q_elem has been updated
+	 * in memory successfully, the valid flag needs to be updated
+	 * in memory in time too.
+	 * Else there is a chance that the dequeuing worker thread
+	 * might read stale valid flag and the work will be bypassed
+	 * for this round. And if there is no other work scheduled
+	 * later, this hal register writing won't be updated any more.
+	 */
+	qdf_wmb();
+
+	ce_state->reg_write_in_progress  = true;
+	qdf_atomic_inc(&scn->active_work_cnt);
+
+	hif_debug("write_idx %u ce_id %d offset 0x%x value %u",
+		  write_idx, ce_state->id, ce_state->ce_wrt_idx_offset, value);
+
+	qdf_queue_work(scn->qdf_dev, scn->reg_write_wq,
+		       &scn->reg_write_work);
+}
+
+void hif_delayed_reg_write(struct hif_softc *scn, uint32_t ctrl_addr,
+			   uint32_t val)
+{
+	struct CE_state *ce_state;
+	int ce_id = COPY_ENGINE_ID(ctrl_addr);
+
+	ce_state = scn->ce_id_to_state[ce_id];
+
+	if (!ce_state->htt_tx_data && !ce_state->htt_rx_data) {
+		hif_reg_write_enqueue(scn, ce_state, val);
+		return;
+	}
+
+	if (hif_is_reg_write_tput_level_high(scn) ||
+	    (PLD_MHI_STATE_L0 == pld_get_mhi_state(scn->qdf_dev->dev))) {
+		hal_write32_mb(scn->hal_soc, ce_state->ce_wrt_idx_offset, val);
+		qdf_atomic_inc(&scn->wstats.direct);
+		ce_state->wstats.direct++;
+	} else {
+		hif_reg_write_enqueue(scn, ce_state, val);
+	}
+}
+#else
+static inline QDF_STATUS hif_delayed_reg_write_init(struct hif_softc *scn)
+{
+	return QDF_STATUS_SUCCESS;
+}
+
+static inline void  hif_delayed_reg_write_deinit(struct hif_softc *scn)
+{
+}
+#endif
+
 #if defined(QCA_WIFI_WCN6450)
 static QDF_STATUS hif_hal_attach(struct hif_softc *scn)
 {
@@ -1644,6 +1999,11 @@ QDF_STATUS hif_enable(struct hif_opaque_softc *hif_ctx, struct device *dev,
 		goto disable_bus;
 	}
 
+	if (hif_delayed_reg_write_init(scn) != QDF_STATUS_SUCCESS) {
+		hif_err("unable to initialize delayed reg write");
+		goto hal_detach;
+	}
+
 	if (hif_bus_configure(scn)) {
 		hif_err("Target probe failed");
 		status = QDF_STATUS_E_FAILURE;
@@ -1682,6 +2042,7 @@ void hif_disable(struct hif_opaque_softc *hif_ctx, enum hif_disable_type type)
 	if (!scn)
 		return;
 
+	hif_delayed_reg_write_deinit(scn);
 	hif_set_enable_detection(hif_ctx, false);
 	hif_latency_detect_timer_stop(hif_ctx);
 

+ 102 - 0
hif/src/hif_main.h

@@ -290,6 +290,72 @@ struct hif_umac_reset_ctx {
 
 #define MAX_SHADOW_REGS 40
 
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+/**
+ * enum hif_reg_sched_delay - ENUM for write sched delay histogram
+ * @HIF_REG_WRITE_SCHED_DELAY_SUB_100us: index for delay < 100us
+ * @HIF_REG_WRITE_SCHED_DELAY_SUB_1000us: index for delay < 1000us
+ * @HIF_REG_WRITE_SCHED_DELAY_SUB_5000us: index for delay < 5000us
+ * @HIF_REG_WRITE_SCHED_DELAY_GT_5000us: index for delay >= 5000us
+ * @HIF_REG_WRITE_SCHED_DELAY_HIST_MAX: Max value (nnsize of histogram array)
+ */
+enum hif_reg_sched_delay {
+	HIF_REG_WRITE_SCHED_DELAY_SUB_100us,
+	HIF_REG_WRITE_SCHED_DELAY_SUB_1000us,
+	HIF_REG_WRITE_SCHED_DELAY_SUB_5000us,
+	HIF_REG_WRITE_SCHED_DELAY_GT_5000us,
+	HIF_REG_WRITE_SCHED_DELAY_HIST_MAX,
+};
+
+/**
+ * struct hif_reg_write_soc_stats - soc stats to keep track of register writes
+ * @enqueues: writes enqueued to delayed work
+ * @dequeues: writes dequeued from delayed work (not written yet)
+ * @coalesces: writes not enqueued since srng is already queued up
+ * @direct: writes not enqueud and writted to register directly
+ * @prevent_l1_fails: prevent l1 API failed
+ * @q_depth: current queue depth in delayed register write queue
+ * @max_q_depth: maximum queue for delayed register write queue
+ * @sched_delay: = kernel work sched delay + bus wakeup delay, histogram
+ * @dequeue_delay: dequeue operation be delayed
+ */
+struct hif_reg_write_soc_stats {
+	qdf_atomic_t enqueues;
+	uint32_t dequeues;
+	qdf_atomic_t coalesces;
+	qdf_atomic_t direct;
+	uint32_t prevent_l1_fails;
+	qdf_atomic_t q_depth;
+	uint32_t max_q_depth;
+	uint32_t sched_delay[HIF_REG_WRITE_SCHED_DELAY_HIST_MAX];
+	uint32_t dequeue_delay;
+};
+
+/**
+ * struct hif_reg_write_q_elem - delayed register write queue element
+ * @ce_state: CE state queued for a delayed write
+ * @offset: offset of the CE register
+ * @enqueue_val: register value at the time of delayed write enqueue
+ * @dequeue_val: register value at the time of delayed write dequeue
+ * @valid: whether this entry is valid or not
+ * @enqueue_time: enqueue time (qdf_log_timestamp)
+ * @work_scheduled_time: work scheduled time (qdf_log_timestamp)
+ * @dequeue_time: dequeue time (qdf_log_timestamp)
+ * @cpu_id: record cpuid when schedule work
+ */
+struct hif_reg_write_q_elem {
+	struct CE_state *ce_state;
+	uint32_t offset;
+	uint32_t enqueue_val;
+	uint32_t dequeue_val;
+	uint8_t valid;
+	qdf_time_t enqueue_time;
+	qdf_time_t work_scheduled_time;
+	qdf_time_t dequeue_time;
+	int cpu_id;
+};
+#endif
+
 struct hif_softc {
 	struct hif_opaque_softc osc;
 	struct hif_config_info hif_config;
@@ -433,6 +499,20 @@ struct hif_softc {
 	struct qdf_mem_multi_page_t dl_recv_pages;
 	int dl_recv_pipe_num;
 #endif
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+	/* queue(array) to hold register writes */
+	struct hif_reg_write_q_elem *reg_write_queue;
+	/* delayed work to be queued into workqueue */
+	qdf_work_t reg_write_work;
+	/* workqueue for delayed register writes */
+	qdf_workqueue_t *reg_write_wq;
+	/* write index used by caller to enqueue delayed work */
+	qdf_atomic_t write_idx;
+	/* read index used by worker thread to dequeue/write registers */
+	uint32_t read_idx;
+	struct hif_reg_write_soc_stats wstats;
+	qdf_atomic_t active_work_cnt;
+#endif /* FEATURE_HIF_DELAYED_REG_WRITE */
 };
 
 #if defined(NUM_SOC_PERF_CLUSTER) && (NUM_SOC_PERF_CLUSTER > 1)
@@ -761,4 +841,26 @@ void hif_reg_window_write(struct hif_softc *scn,
 uint32_t hif_reg_window_read(struct hif_softc *scn, uint32_t offset);
 #endif
 
+#ifdef FEATURE_HIF_DELAYED_REG_WRITE
+void hif_delayed_reg_write(struct hif_softc *scn, uint32_t ctrl_addr,
+			   uint32_t val);
+#endif
+
+#if defined(HIF_IPCI) && defined(FEATURE_HAL_DELAYED_REG_WRITE)
+static inline bool hif_is_ep_vote_access_disabled(struct hif_softc *scn)
+{
+	if ((qdf_atomic_read(&scn->dp_ep_vote_access) ==
+	     HIF_EP_VOTE_ACCESS_DISABLE) &&
+	    (qdf_atomic_read(&scn->ep_vote_access) ==
+	     HIF_EP_VOTE_ACCESS_DISABLE))
+		return true;
+
+	return false;
+}
+#else
+static inline bool hif_is_ep_vote_access_disabled(struct hif_softc *scn)
+{
+	return false;
+}
+#endif
 #endif /* __HIF_MAIN_H__ */

+ 3 - 5
hif/src/ipcie/if_ipci.c

@@ -964,7 +964,8 @@ void hif_print_ipci_stats(struct hif_ipci_softc *ipci_handle)
 }
 #endif /* FORCE_WAKE */
 
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HIF_DELAYED_REG_WRITE)
 int hif_prevent_link_low_power_states(struct hif_opaque_softc *hif)
 {
 	struct hif_softc *scn = HIF_GET_SOFTC(hif);
@@ -975,10 +976,7 @@ int hif_prevent_link_low_power_states(struct hif_opaque_softc *hif)
 	if (pld_is_pci_ep_awake(scn->qdf_dev->dev) == -ENOTSUPP)
 		return 0;
 
-	if ((qdf_atomic_read(&scn->dp_ep_vote_access) ==
-	     HIF_EP_VOTE_ACCESS_DISABLE) &&
-	    (qdf_atomic_read(&scn->ep_vote_access) ==
-	    HIF_EP_VOTE_ACCESS_DISABLE)) {
+	if (hif_is_ep_vote_access_disabled(scn)) {
 		hif_info_high("EP access disabled in flight skip vote");
 		return 0;
 	}

+ 4 - 2
hif/src/ipcie/if_ipci.h

@@ -66,7 +66,8 @@ struct hif_ipci_stats {
 #define FORCE_WAKE_DELAY_MS 5
 #endif /* FORCE_WAKE */
 
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HIF_DELAYED_REG_WRITE)
 #define EP_VOTE_POLL_TIME_US  50
 #define EP_VOTE_POLL_TIME_CNT 3
 #ifdef HAL_CONFIG_SLUB_DEBUG_ON
@@ -102,7 +103,8 @@ struct hif_ipci_softc {
 
 	void (*hif_ipci_get_soc_info)(struct hif_ipci_softc *sc,
 				      struct device *dev);
-#ifdef FEATURE_HAL_DELAYED_REG_WRITE
+#if defined(FEATURE_HAL_DELAYED_REG_WRITE) || \
+	defined(FEATURE_HIF_DELAYED_REG_WRITE)
 	uint32_t ep_awake_reset_fail;
 	uint32_t prevent_l1_fail;
 	uint32_t ep_awake_set_fail;