Browse Source

qcacld-3.0: Handle TX register write coalesces

Currently for every packet transmission, the head
pointer register is updated. Each write corresponds
to one PCIe write.

Add the logic to coalesce the head pointer
register writes to reduce the PCIe writes. This
also helps in PCIe bus staying in low power states
for some more time and thereby reducing power
usage.

Change-Id: I10fe0499222fbcf94305c681008d12449cda5359
CRs-Fixed: 2769013
Rakesh Pillai 4 years ago
parent
commit
d250c88d4c
2 changed files with 311 additions and 28 deletions
  1. 166 28
      core/dp/txrx3.0/dp_swlm.c
  2. 145 0
      core/dp/txrx3.0/dp_swlm.h

+ 166 - 28
core/dp/txrx3.0/dp_swlm.c

@@ -19,10 +19,45 @@
 #include "dp_swlm.h"
 
 /**
- * dp_tcl_should_coalesc() - To know if the current TCL reg write is to be
- *			     processed or coalesced.
+ * dp_swlm_is_tput_thresh_reached() - Calculate the current tx and rx TPUT
+ *				      and check if it passes the pre-set
+ *				      threshold.
  * @soc: Datapath global soc handle
- * @tx_desc: descriptor for the current packet to be transmitted
+ *
+ * This function calculates the current TX and RX throughput and checks
+ * if it is above the pre-set thresholds by SWLM.
+ *
+ * Returns: true, if the TX/RX throughput is passing the threshold
+ *	    false, otherwise
+ */
+static bool dp_swlm_is_tput_thresh_reached(struct dp_soc *soc)
+{
+	struct dp_swlm *swlm = &soc->swlm;
+	static int prev_rx_bytes, prev_tx_bytes;
+	int rx_delta, tx_delta;
+
+	tx_delta = soc->stats.tx.egress.bytes - prev_tx_bytes;
+	prev_tx_bytes = soc->stats.tx.egress.bytes;
+	if (tx_delta > swlm->params.tcl.tx_traffic_thresh) {
+		swlm->params.tcl.sampling_session_tx_bytes = tx_delta;
+		return true;
+	}
+
+	rx_delta = soc->stats.rx.ingress.bytes - prev_rx_bytes;
+	prev_rx_bytes = soc->stats.rx.ingress.bytes;
+	if (rx_delta > swlm->params.tcl.rx_traffic_thresh) {
+		swlm->params.tcl.sampling_session_tx_bytes = tx_delta;
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * dp_swlm_can_tcl_wr_coalesce() - To check if current TCL reg write can be
+ *				   coalesced or not.
+ * @soc: Datapath global soc handle
+ * @tcl_data: priv data for tcl coalescing
  *
  * This function takes into account the current tx and rx throughput and
  * decides whether the TCL register write corresponding to the current packet,
@@ -33,53 +68,156 @@
  * Returns: 1 if the current TCL write is to be coalesced
  *	    0, if the current TCL write is to be processed.
  */
-static int dp_tcl_should_coalesc(struct dp_soc *soc,
-				 struct dp_tx_desc_s *tx_desc)
+static int
+dp_swlm_can_tcl_wr_coalesce(struct dp_soc *soc,
+			    struct dp_swlm_tcl_data *tcl_data)
 {
-	return 0;
+	u64 curr_time = qdf_get_log_timestamp_usecs();
+	int tput_level_pass, coalesce = 0;
+	struct dp_swlm *swlm = &soc->swlm;
+	static int tput_pass_cnt;
+	static u64 expire_time;
+
+	if (curr_time >= expire_time) {
+		expire_time = qdf_get_log_timestamp_usecs() +
+			      swlm->params.tcl.sampling_time;
+		tput_level_pass = dp_swlm_is_tput_thresh_reached(soc);
+		if (tput_level_pass) {
+			tput_pass_cnt++;
+		} else {
+			tput_pass_cnt = 0;
+			DP_STATS_INC(swlm, tcl.tput_criteria_fail, 1);
+			goto coalescing_fail;
+		}
+	}
+
+	swlm->params.tcl.bytes_coalesced += qdf_nbuf_len(tcl_data->nbuf);
+
+	if (tput_pass_cnt > DP_SWLM_TCL_TPUT_PASS_THRESH) {
+		coalesce = 1;
+		if (swlm->params.tcl.bytes_coalesced >
+		    swlm->params.tcl.bytes_flush_thresh) {
+			coalesce = 0;
+			DP_STATS_INC(swlm, tcl.bytes_thresh_reached, 1);
+		} else if (curr_time > swlm->params.tcl.coalesce_end_time) {
+			coalesce = 0;
+			DP_STATS_INC(swlm, tcl.time_thresh_reached, 1);
+		}
+	}
+
+coalescing_fail:
+	if (!coalesce) {
+		dp_swlm_tcl_reset_session_data(soc);
+		return 0;
+	}
+
+	qdf_timer_mod(&swlm->params.tcl.flush_timer, 1);
+
+	return 1;
 }
 
+static struct dp_swlm_ops dp_latency_mgr_ops = {
+	.tcl_wr_coalesce_check = dp_swlm_can_tcl_wr_coalesce,
+};
+
 /**
- * dp_swlm_query_policy() - apply software latency policy based on ring type.
- * @soc: Datapath global soc handle
- * @ring_type: SRNG type
- * @query_data: private data for the query corresponding to the ring type
+ * dp_swlm_tcl_flush_timer() - Timer handler for tcl register write coalescing
+ * @arg: private data of the timer
  *
- * Returns: 0 always
+ * Returns: none
  */
-int dp_swlm_query_policy(struct dp_soc *soc, int ring_type,
-			 union swlm_data query_data)
+static void dp_swlm_tcl_flush_timer(void *arg)
 {
-	if (qdf_unlikely(!soc->swlm.is_enabled))
-		return 0;
+	struct dp_soc *soc = arg;
+	struct dp_swlm *swlm = &soc->swlm;
+	hal_ring_handle_t hal_ring_hdl =
+				soc->tcl_data_ring[0].hal_srng;
 
-	switch (ring_type) {
-	case TCL_DATA:
-		return soc->swlm.ops->tcl_should_coalesc(soc,
-							 query_data.tx_desc);
-	default:
-		break
+	if (hal_srng_try_access_start(soc->hal_soc, hal_ring_hdl) < 0) {
+		DP_STATS_INC(swlm, tcl.timer_flush_fail, 1);
+		return;
 	}
 
-	return 0;
+	DP_STATS_INC(swlm, tcl.timer_flush_success, 1);
+	hal_srng_access_end(soc->hal_soc, hal_ring_hdl);
 }
 
-struct dp_swlm_ops dp_latency_mgr_ops = {
-	.tcl_should_coalesc = dp_tcl_should_coalesc,
-};
+/**
+ * dp_soc_swlm_tcl_attach() - attach the TCL resources for the software
+ *			      latency manager.
+ * @soc: Datapath global soc handle
+ *
+ * Returns: QDF_STATUS
+ */
+static inline QDF_STATUS dp_soc_swlm_tcl_attach(struct dp_soc *soc)
+{
+	struct dp_swlm *swlm = &soc->swlm;
+
+	swlm->params.tcl.rx_traffic_thresh = DP_SWLM_TCL_RX_TRAFFIC_THRESH;
+	swlm->params.tcl.tx_traffic_thresh = DP_SWLM_TCL_TX_TRAFFIC_THRESH;
+	swlm->params.tcl.sampling_time = DP_SWLM_TCL_TRAFFIC_SAMPLING_TIME;
+	swlm->params.tcl.bytes_flush_thresh = 0;
+	swlm->params.tcl.time_flush_thresh = DP_SWLM_TCL_TIME_FLUSH_THRESH;
+	swlm->params.tcl.tx_thresh_multiplier =
+					DP_SWLM_TCL_TX_THRESH_MULTIPLIER;
+
+	qdf_timer_init(soc->osdev, &swlm->params.tcl.flush_timer,
+		       dp_swlm_tcl_flush_timer, (void *)soc,
+		       QDF_TIMER_TYPE_WAKE_APPS);
+
+	return QDF_STATUS_SUCCESS;
+}
 
 /**
- * dp_soc_swlm_attach() - attach the software latency manager resources
+ * dp_soc_swlm_tcl_detach() - detach the TCL resources for the software
+ *			      latency manager.
  * @soc: Datapath global soc handle
  *
  * Returns: QDF_STATUS
  */
+static inline QDF_STATUS dp_soc_swlm_tcl_detach(struct dp_soc *soc)
+{
+	struct dp_swlm *swlm = &soc->swlm;
+
+	qdf_timer_stop(&swlm->params.tcl.flush_timer);
+	qdf_timer_free(&swlm->params.tcl.flush_timer);
+
+	return QDF_STATUS_SUCCESS;
+}
+
 QDF_STATUS dp_soc_swlm_attach(struct dp_soc *soc)
 {
-	soc->swlm.ops = &dp_latency_mgr_ops;
-	soc->swlm.is_enabled = true;
+	struct dp_swlm *swlm = &soc->swlm;
+	QDF_STATUS ret;
+
+	swlm->ops = &dp_latency_mgr_ops;
+
+	ret = dp_soc_swlm_tcl_attach(soc);
+	if (QDF_IS_STATUS_ERROR(ret))
+		goto swlm_tcl_setup_fail;
+
+	swlm->is_enabled = true;
 
 	return QDF_STATUS_SUCCESS;
+
+swlm_tcl_setup_fail:
+	swlm->is_enabled = false;
+	return ret;
 }
 
+QDF_STATUS dp_soc_swlm_detach(struct dp_soc *soc)
+{
+	struct dp_swlm *swlm = &soc->swlm;
+	QDF_STATUS ret;
+
+	swlm->is_enabled = false;
+
+	ret = dp_soc_swlm_tcl_detach(soc);
+	if (QDF_IS_STATUS_ERROR(ret))
+		return ret;
+
+	swlm->ops = NULL;
+
+	return QDF_STATUS_SUCCESS;
+}
 #endif /* WLAN_DP_FEATURE_SW_LATENCY_MGR */

+ 145 - 0
core/dp/txrx3.0/dp_swlm.h

@@ -17,7 +17,152 @@
 #ifndef _DP_SWLM_H_
 #define _DP_SWLM_H_
 
+#ifdef WLAN_DP_FEATURE_SW_LATENCY_MGR
+
 #include <dp_types.h>
 #include <dp_internal.h>
 
+#define DP_SWLM_TCL_TPUT_PASS_THRESH 3
+
+#define DP_SWLM_TCL_RX_TRAFFIC_THRESH 50
+#define DP_SWLM_TCL_TX_TRAFFIC_THRESH 50
+/* Traffic test time is in us */
+#define DP_SWLM_TCL_TRAFFIC_SAMPLING_TIME 250
+#define DP_SWLM_TCL_TIME_FLUSH_THRESH 1000
+#define DP_SWLM_TCL_TX_THRESH_MULTIPLIER 2
+
+/* Inline Functions */
+
+/**
+ * dp_tx_is_special_frame() - check if this TX frame is a special frame.
+ * @nbuf: TX skb pointer
+ * @frame_mask: the mask for required special frames
+ *
+ * Check if TX frame is a required special frame.
+ *
+ * Returns: true, if this frame is a needed special frame,
+ *	    false, otherwise
+ */
+static inline
+bool dp_tx_is_special_frame(qdf_nbuf_t nbuf, uint32_t frame_mask)
+{
+	if (((frame_mask & FRAME_MASK_IPV4_ARP) &&
+	     qdf_nbuf_is_ipv4_arp_pkt(nbuf)) ||
+	    ((frame_mask & FRAME_MASK_IPV4_DHCP) &&
+	     qdf_nbuf_is_ipv4_dhcp_pkt(nbuf)) ||
+	    ((frame_mask & FRAME_MASK_IPV4_EAPOL) &&
+	     qdf_nbuf_is_ipv4_eapol_pkt(nbuf)) ||
+	    ((frame_mask & FRAME_MASK_IPV6_DHCP) &&
+	     qdf_nbuf_is_ipv6_dhcp_pkt(nbuf)))
+		return true;
+
+	return false;
+}
+
+/**
+ * dp_swlm_tcl_reset_session_data() -  Reset the TCL coalescing session data
+ * @soc: DP soc handle
+ *
+ * Returns QDF_STATUS
+ */
+static inline QDF_STATUS
+dp_swlm_tcl_reset_session_data(struct dp_soc *soc)
+{
+	struct dp_swlm *swlm = &soc->swlm;
+
+	swlm->params.tcl.coalesce_end_time = qdf_get_log_timestamp_usecs() +
+		     swlm->params.tcl.time_flush_thresh;
+	swlm->params.tcl.bytes_coalesced = 0;
+	swlm->params.tcl.bytes_flush_thresh =
+				swlm->params.tcl.sampling_session_tx_bytes *
+				swlm->params.tcl.tx_thresh_multiplier;
+	qdf_timer_sync_cancel(&swlm->params.tcl.flush_timer);
+
+	return QDF_STATUS_SUCCESS;
+}
+
+/**
+ * dp_swlm_tcl_pre_check() - Pre checks for current packet to be transmitted
+ * @soc: Datapath soc handle
+ * @tcl_data: tcl swlm data
+ *
+ * Returns: QDF_STATUS_SUCCESS, if all pre-check conditions pass
+ *	    QDF_STATUS_E_FAILURE, otherwise
+ */
+static inline QDF_STATUS
+dp_swlm_tcl_pre_check(struct dp_soc *soc,
+		      struct dp_swlm_tcl_data *tcl_data)
+{
+	struct dp_swlm *swlm = &soc->swlm;
+	uint32_t frame_mask = FRAME_MASK_IPV4_ARP | FRAME_MASK_IPV4_DHCP |
+				FRAME_MASK_IPV4_EAPOL | FRAME_MASK_IPV6_DHCP;
+
+	if (tcl_data->tid > DP_VO_TID) {
+		DP_STATS_INC(swlm, tcl.tid_fail, 1);
+		goto fail;
+	}
+
+	if (dp_tx_is_special_frame(tcl_data->nbuf, frame_mask)) {
+		DP_STATS_INC(swlm, tcl.sp_frames, 1);
+		goto fail;
+	}
+
+	if (tcl_data->num_ll_connections) {
+		DP_STATS_INC(swlm, tcl.ll_connection, 1);
+		goto fail;
+	}
+
+	return QDF_STATUS_SUCCESS;
+
+fail:
+	return QDF_STATUS_E_FAILURE;
+}
+
+/**
+ * dp_swlm_query_policy() - apply software latency policy based on ring type.
+ * @soc: Datapath global soc handle
+ * @ring_type: SRNG type
+ * @query_data: private data for the query corresponding to the ring type
+ *
+ * Returns: 1, if policy is to be applied
+ *	    0, if policy is not to be applied
+ */
+static inline int dp_swlm_query_policy(struct dp_soc *soc, int ring_type,
+				       union swlm_data query_data)
+{
+	struct dp_swlm *swlm = &soc->swlm;
+
+	switch (ring_type) {
+	case TCL_DATA:
+		return swlm->ops->tcl_wr_coalesce_check(soc,
+							query_data.tcl_data);
+	default:
+		dp_err("Ring type %d not supported by SW latency manager",
+		       ring_type);
+		break;
+	}
+
+	return 0;
+}
+
+/* Function Declarations */
+
+/**
+ * dp_soc_swlm_attach() - attach the software latency manager resources
+ * @soc: Datapath global soc handle
+ *
+ * Returns: QDF_STATUS
+ */
+QDF_STATUS dp_soc_swlm_attach(struct dp_soc *soc);
+
+/**
+ * dp_soc_swlm_detach() - detach the software latency manager resources
+ * @soc: Datapath global soc handle
+ *
+ * Returns: QDF_STATUS
+ */
+QDF_STATUS dp_soc_swlm_detach(struct dp_soc *soc);
+
+#endif /* WLAN_DP_FEATURE_SW_LATENCY_MGR */
+
 #endif