qcacmn: Prefetch RX HW desc, SW desc and SKB in pipeline fashion

Prefetch RX HW desc, SW desc and SKB in pipeline
fasion in the first loop of RX processing.

This has improved TPUT by 200Mbps and provided a
10% gain in CPU (single core)

PINE with other optimizations: 3960Mbps @ 100% core-3
PINE + pipeline prefetch: 4130Mbps @ 90%  core-3

Change-Id: I47f351601b264eb3a2b50e4154229d55da738724
This commit is contained in:
Tallapragada Kalyan
2021-10-31 00:29:20 +05:30
committed by Madan Koyyalamudi
parent e3c327a0ba
commit 4e7ceff561
4 changed files with 175 additions and 0 deletions

View File

@@ -2435,6 +2435,30 @@ static inline void dp_srng_dst_inv_cached_descs(struct dp_soc *dp_soc,
}
#endif /* QCA_CACHED_RING_DESC */
#if defined(QCA_CACHED_RING_DESC) && defined(QCA_DP_RX_HW_SW_NBUF_DESC_PREFETCH)
/**
* dp_srng_dst_prefetch() - Wrapper function to prefetch descs from dest ring
* @hal_soc_hdl: HAL SOC handle
* @hal_ring: opaque pointer to the HAL Rx Destination ring
* @num_entries: Entry count
*
* Return: None
*/
static inline void *dp_srng_dst_prefetch(hal_soc_handle_t hal_soc,
hal_ring_handle_t hal_ring_hdl,
uint32_t num_entries)
{
return hal_srng_dst_prefetch(hal_soc, hal_ring_hdl, num_entries);
}
#else
static inline void *dp_srng_dst_prefetch(hal_soc_handle_t hal_soc,
hal_ring_handle_t hal_ring_hdl,
uint32_t num_entries)
{
return NULL;
}
#endif
#ifdef QCA_ENH_V3_STATS_SUPPORT
/**
* dp_pdev_print_delay_stats(): Print pdev level delay stats