Przeglądaj źródła

qcacmn: Add HW, SW and nbuf prefetch support in Berryllium

Add HW, SW and nbuf prefetch support in Berryllium, this will
ensure we have prefetched the HW desc, SW desc and nbuf by
the time we are in the 3rd iteration of the dp_rx_be_process
first loop.

CRs-Fixed: 3218647
Change-Id: I27d371c5d1c9a37d61e4fc00d5eb03609fad589c
Tallapragada Kalyan 3 lat temu
rodzic
commit
f7a1c7e0c7

+ 47 - 23
dp/wifi3.0/be/dp_be_rx.c

@@ -163,8 +163,10 @@ uint32_t dp_rx_process_be(struct dp_intr *int_ctx,
 			  uint32_t quota)
 {
 	hal_ring_desc_t ring_desc;
+	hal_ring_desc_t last_prefetched_hw_desc;
 	hal_soc_handle_t hal_soc;
 	struct dp_rx_desc *rx_desc = NULL;
+	struct dp_rx_desc *last_prefetched_sw_desc = NULL;
 	qdf_nbuf_t nbuf, next;
 	bool near_full;
 	union dp_rx_desc_list_elem_t *head[WLAN_MAX_MLO_CHIPS][MAX_PDEV_CNT];
@@ -190,7 +192,6 @@ uint32_t dp_rx_process_be(struct dp_intr *int_ctx,
 	struct dp_srng *dp_rxdma_srng;
 	struct rx_desc_pool *rx_desc_pool;
 	struct dp_soc *soc = int_ctx->soc;
-	uint8_t core_id = 0;
 	struct cdp_tid_rx_stats *tid_stats;
 	qdf_nbuf_t nbuf_head;
 	qdf_nbuf_t nbuf_tail;
@@ -264,14 +265,27 @@ more_data:
 
 	hal_srng_update_ring_usage_wm_no_lock(soc->hal_soc, hal_ring_hdl);
 
+	if (!num_pending)
+		num_pending = hal_srng_dst_num_valid(hal_soc, hal_ring_hdl, 0);
+
+	if (num_pending > quota)
+		num_pending = quota;
+
+	dp_srng_dst_inv_cached_descs(soc, hal_ring_hdl, num_pending);
+	last_prefetched_hw_desc = dp_srng_dst_prefetch_32_byte_desc(hal_soc,
+							    hal_ring_hdl,
+							    num_pending);
 	/*
 	 * start reaping the buffers from reo ring and queue
 	 * them in per vdev queue.
 	 * Process the received pkts in a different per vdev loop.
 	 */
-	while (qdf_likely(quota &&
-			  (ring_desc = hal_srng_dst_peek(hal_soc,
-							 hal_ring_hdl)))) {
+	while (qdf_likely(num_pending)) {
+		ring_desc = dp_srng_dst_get_next(soc, hal_ring_hdl);
+
+		if (qdf_unlikely(!ring_desc))
+			break;
+
 		error = HAL_RX_ERROR_STATUS_GET(ring_desc);
 
 		if (qdf_unlikely(error == HAL_REO_ERROR_DETECTED)) {
@@ -375,19 +389,25 @@ more_data:
 				if ((msdu_desc_info.msdu_len /
 				     (RX_DATA_BUFFER_SIZE -
 				      soc->rx_pkt_tlv_size) + 1) >
-				    num_entries_avail) {
+				    num_pending) {
 					DP_STATS_INC(soc,
 						     rx.msdu_scatter_wait_break,
 						     1);
 					dp_rx_cookie_reset_invalid_bit(
 								     ring_desc);
+					/* As we are going to break out of the
+					 * loop because of unavailability of
+					 * descs to form complete SG, we need to
+					 * reset the TP in the REO destination
+					 * ring.
+					 */
+					hal_srng_dst_dec_tp(hal_soc,
+							    hal_ring_hdl);
 					break;
 				}
 				is_prev_msdu_last = false;
 			}
 		}
-		core_id = smp_processor_id();
-		DP_STATS_INC(soc, rx.ring_packets[core_id][reo_ring_num], 1);
 
 		if (mpdu_desc_info.mpdu_flags & HAL_MPDU_F_RETRY_BIT)
 			qdf_nbuf_set_rx_retry_flag(rx_desc->nbuf, 1);
@@ -400,10 +420,8 @@ more_data:
 		    msdu_desc_info.msdu_flags & HAL_MSDU_F_LAST_MSDU_IN_MPDU)
 			is_prev_msdu_last = true;
 
-		/* Pop out the descriptor*/
-		hal_srng_dst_get_next(hal_soc, hal_ring_hdl);
-
 		rx_bufs_reaped[rx_desc->chip_id][rx_desc->pool_id]++;
+
 		peer_mdata = mpdu_desc_info.peer_meta_data;
 		QDF_NBUF_CB_RX_PEER_ID(rx_desc->nbuf) =
 			dp_rx_peer_metadata_peer_id_get_be(soc, peer_mdata);
@@ -461,23 +479,25 @@ more_data:
 		 * move unmap after scattered msdu waiting break logic
 		 * in case double skb unmap happened.
 		 */
-		rx_desc_pool = &soc->rx_desc_buf[rx_desc->pool_id];
 		dp_rx_nbuf_unmap(soc, rx_desc, reo_ring_num);
 		rx_desc->unmapped = 1;
 		DP_RX_PROCESS_NBUF(soc, nbuf_head, nbuf_tail, ebuf_head,
 				   ebuf_tail, rx_desc);
-		/*
-		 * if continuation bit is set then we have MSDU spread
-		 * across multiple buffers, let us not decrement quota
-		 * till we reap all buffers of that MSDU.
-		 */
-		if (qdf_likely(!qdf_nbuf_is_rx_chfrag_cont(rx_desc->nbuf)))
-			quota -= 1;
+
+		quota -= 1;
+		num_pending -= 1;
 
 		dp_rx_add_to_free_desc_list
 			(&head[rx_desc->chip_id][rx_desc->pool_id],
 			 &tail[rx_desc->chip_id][rx_desc->pool_id], rx_desc);
 		num_rx_bufs_reaped++;
+
+		dp_rx_prefetch_hw_sw_nbuf_32_byte_desc(soc, hal_soc,
+					       num_pending,
+					       hal_ring_hdl,
+					       &last_prefetched_hw_desc,
+					       &last_prefetched_sw_desc);
+
 		/*
 		 * only if complete msdu is received for scatter case,
 		 * then allow break.
@@ -489,6 +509,9 @@ more_data:
 	}
 done:
 	dp_rx_srng_access_end(int_ctx, soc, hal_ring_hdl);
+	qdf_dsb();
+
+	dp_rx_per_core_stats_update(soc, reo_ring_num, num_rx_bufs_reaped);
 
 	for (chip_id = 0; chip_id < WLAN_MAX_MLO_CHIPS; chip_id++) {
 		for (mac_id = 0; mac_id < MAX_PDEV_CNT; mac_id++) {
@@ -506,11 +529,12 @@ done:
 
 			rx_desc_pool = &replenish_soc->rx_desc_buf[mac_id];
 
-			dp_rx_buffers_replenish(replenish_soc, mac_id,
-						dp_rxdma_srng, rx_desc_pool,
-						rx_bufs_reaped[chip_id][mac_id],
-						&head[chip_id][mac_id],
-						&tail[chip_id][mac_id]);
+			dp_rx_buffers_replenish_simple(replenish_soc, mac_id,
+					       dp_rxdma_srng,
+					       rx_desc_pool,
+					       rx_bufs_reaped[chip_id][mac_id],
+					       &head[chip_id][mac_id],
+					       &tail[chip_id][mac_id]);
 		}
 	}
 

+ 71 - 0
dp/wifi3.0/be/dp_be_rx.h

@@ -23,6 +23,8 @@
 #include <dp_types.h>
 #include "dp_be.h"
 #include "dp_peer.h"
+#include <dp_rx.h>
+#include "hal_be_rx.h"
 
 /*
  * dp_be_intrabss_params
@@ -399,4 +401,73 @@ void dp_rx_prefetch_nbuf_data_be(qdf_nbuf_t nbuf, qdf_nbuf_t next)
 {
 }
 #endif
+
+#ifdef QCA_DP_RX_HW_SW_NBUF_DESC_PREFETCH
+/**
+ * dp_rx_cookie_2_va_rxdma_buf_prefetch() - function to prefetch the SW desc
+ * @soc: Handle to DP Soc structure
+ * @cookie: cookie used to lookup virtual address
+ *
+ * Return: prefetched Rx descriptor virtual address
+ */
+static inline
+void *dp_rx_va_prefetch(void *last_prefetched_hw_desc)
+{
+	void *prefetch_desc;
+
+	prefetch_desc = (void *)hal_rx_get_reo_desc_va(last_prefetched_hw_desc);
+	qdf_prefetch(prefetch_desc);
+	return prefetch_desc;
+}
+
+/**
+ * dp_rx_prefetch_hw_sw_nbuf_desc() - function to prefetch HW and SW desc
+ * @soc: Handle to HAL Soc structure
+ * @num_entries: valid number of HW descriptors
+ * @hal_ring_hdl: Destination ring pointer
+ * @last_prefetched_hw_desc: pointer to the last prefetched HW descriptor
+ * @last_prefetched_sw_desc: input & output param of last prefetch SW desc
+ *
+ * Return: None
+ */
+static inline void
+dp_rx_prefetch_hw_sw_nbuf_32_byte_desc(struct dp_soc *soc,
+			       hal_soc_handle_t hal_soc,
+			       uint32_t num_entries,
+			       hal_ring_handle_t hal_ring_hdl,
+			       hal_ring_desc_t *last_prefetched_hw_desc,
+			       struct dp_rx_desc **last_prefetched_sw_desc)
+{
+	if (*last_prefetched_sw_desc) {
+		qdf_prefetch((uint8_t *)(*last_prefetched_sw_desc)->nbuf);
+		qdf_prefetch((uint8_t *)(*last_prefetched_sw_desc)->nbuf + 64);
+	}
+
+	if (num_entries) {
+		*last_prefetched_sw_desc =
+			dp_rx_va_prefetch(*last_prefetched_hw_desc);
+
+		if ((uintptr_t)*last_prefetched_hw_desc & 0x3f)
+			*last_prefetched_hw_desc =
+				hal_srng_dst_prefetch_next_cached_desc(hal_soc,
+					  hal_ring_hdl,
+					  (uint8_t *)*last_prefetched_hw_desc);
+		else
+			*last_prefetched_hw_desc =
+				hal_srng_dst_get_next_32_byte_desc(hal_soc,
+				   hal_ring_hdl,
+				   (uint8_t *)*last_prefetched_hw_desc);
+	}
+}
+#else
+static inline void
+dp_rx_prefetch_hw_sw_nbuf_32_byte_desc(struct dp_soc *soc,
+			       hal_soc_handle_t hal_soc,
+			       uint32_t num_entries,
+			       hal_ring_handle_t hal_ring_hdl,
+			       hal_ring_desc_t *last_prefetched_hw_desc,
+			       struct dp_rx_desc **last_prefetched_sw_desc)
+{
+}
+#endif
 #endif

+ 27 - 0
dp/wifi3.0/dp_internal.h

@@ -2893,6 +2893,25 @@ static inline void *dp_srng_dst_prefetch(hal_soc_handle_t hal_soc,
 {
 	return hal_srng_dst_prefetch(hal_soc, hal_ring_hdl, num_entries);
 }
+
+/**
+ * dp_srng_dst_prefetch_32_byte_desc() - Wrapper function to prefetch
+ *					 32 byte descriptor starting at
+ *					 64 byte offset
+ * @hal_soc_hdl: HAL SOC handle
+ * @hal_ring: opaque pointer to the HAL Rx Destination ring
+ * @num_entries: Entry count
+ *
+ * Return: None
+ */
+static inline
+void *dp_srng_dst_prefetch_32_byte_desc(hal_soc_handle_t hal_soc,
+					hal_ring_handle_t hal_ring_hdl,
+					uint32_t num_entries)
+{
+	return hal_srng_dst_prefetch_32_byte_desc(hal_soc, hal_ring_hdl,
+						  num_entries);
+}
 #else
 static inline void *dp_srng_dst_prefetch(hal_soc_handle_t hal_soc,
 					 hal_ring_handle_t hal_ring_hdl,
@@ -2900,6 +2919,14 @@ static inline void *dp_srng_dst_prefetch(hal_soc_handle_t hal_soc,
 {
 	return NULL;
 }
+
+static inline
+void *dp_srng_dst_prefetch_32_byte_desc(hal_soc_handle_t hal_soc,
+					hal_ring_handle_t hal_ring_hdl,
+					uint32_t num_entries)
+{
+	return NULL;
+}
 #endif
 
 #ifdef QCA_ENH_V3_STATS_SUPPORT

+ 1 - 1
dp/wifi3.0/dp_rx.h

@@ -2395,7 +2395,7 @@ void dp_rx_nbuf_unmap(struct dp_soc *soc,
 	rx_desc_pool = &soc->rx_desc_buf[rx_desc->pool_id];
 	nbuf = rx_desc->nbuf;
 
-	qdf_nbuf_dma_inv_range((void *)nbuf->data,
+	qdf_nbuf_dma_inv_range_no_dsb((void *)nbuf->data,
 			       (void *)(nbuf->data + rx_desc_pool->buf_size));
 }
 

+ 60 - 0
hal/wifi3.0/hal_api.h

@@ -3227,4 +3227,64 @@ void *hal_srng_dst_prefetch_next_cached_desc(hal_soc_handle_t hal_soc_hdl,
 	qdf_prefetch(last_prefetched_hw_desc);
 	return (void *)last_prefetched_hw_desc;
 }
+
+/**
+ * hal_srng_dst_prefetch_32_byte_desc() - function to prefetch a desc at
+ *					  64 byte offset
+ * @hal_soc_hdl: HAL SOC handle
+ * @hal_ring_hdl: Destination ring pointer
+ * @num_valid: valid entries in the ring
+ *
+ * return: last prefetched destination ring descriptor
+ */
+static inline
+void *hal_srng_dst_prefetch_32_byte_desc(hal_soc_handle_t hal_soc_hdl,
+					 hal_ring_handle_t hal_ring_hdl,
+					 uint16_t num_valid)
+{
+	struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
+	uint8_t *desc;
+
+	if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
+		return NULL;
+
+	desc = (uint8_t *)&srng->ring_base_vaddr[srng->u.dst_ring.tp];
+
+	if ((uintptr_t)desc & 0x3f)
+		desc += srng->entry_size * sizeof(uint32_t);
+	else
+		desc += (srng->entry_size * sizeof(uint32_t)) * 2;
+
+	if (desc  == ((uint8_t *)srng->ring_vaddr_end))
+		desc = (uint8_t *)&srng->ring_base_vaddr[0];
+
+	qdf_prefetch(desc);
+
+	return (void *)(desc + srng->entry_size * sizeof(uint32_t));
+}
+
+/**
+ * hal_srng_dst_prefetch_next_cached_desc() - function to prefetch next desc
+ * @hal_soc_hdl: HAL SOC handle
+ * @hal_ring_hdl: Destination ring pointer
+ * @last_prefetched_hw_desc: last prefetched HW descriptor
+ *
+ * return: next prefetched destination descriptor
+ */
+static inline
+void *hal_srng_dst_get_next_32_byte_desc(hal_soc_handle_t hal_soc_hdl,
+					 hal_ring_handle_t hal_ring_hdl,
+					 uint8_t *last_prefetched_hw_desc)
+{
+	struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
+
+	if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
+		return NULL;
+
+	last_prefetched_hw_desc += srng->entry_size * sizeof(uint32_t);
+	if (last_prefetched_hw_desc == ((uint8_t *)srng->ring_vaddr_end))
+		last_prefetched_hw_desc = (uint8_t *)&srng->ring_base_vaddr[0];
+
+	return (void *)last_prefetched_hw_desc;
+}
 #endif /* _HAL_APIH_ */