qcacmn: Prefetch RX HW desc, SW desc and SKB in pipeline fashion

Prefetch RX HW desc, SW desc and SKB in pipeline
fasion in the first loop of RX processing.

This has improved TPUT by 200Mbps and provided a
10% gain in CPU (single core)

PINE with other optimizations: 3960Mbps @ 100% core-3
PINE + pipeline prefetch: 4130Mbps @ 90%  core-3

Change-Id: I47f351601b264eb3a2b50e4154229d55da738724
This commit is contained in:
Tallapragada Kalyan
2021-10-31 00:29:20 +05:30
committed by Madan Koyyalamudi
parent e3c327a0ba
commit 4e7ceff561
4 changed files with 175 additions and 0 deletions

View File

@@ -2969,4 +2969,73 @@ hal_dmac_cmn_src_rxbuf_ring_get(hal_soc_handle_t hal_soc_hdl)
return hal_soc->dmac_cmn_src_rxbuf_ring;
}
/**
* hal_srng_dst_prefetch() - function to prefetch 4 destination ring descs
* @hal_soc_hdl: HAL SOC handle
* @hal_ring_hdl: Destination ring pointer
* @num_valid: valid entries in the ring
*
* return: last prefetched destination ring descriptor
*/
static inline
void *hal_srng_dst_prefetch(hal_soc_handle_t hal_soc_hdl,
hal_ring_handle_t hal_ring_hdl,
uint16_t num_valid)
{
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
uint8_t *desc;
uint32_t cnt;
/*
* prefetching 4 HW descriptors will ensure atleast by the time
* 5th HW descriptor is being processed it is guranteed that the
* 5th HW descriptor, its SW Desc, its nbuf and its nbuf's data
* are in cache line. basically ensuring all the 4 (HW, SW, nbuf
* & nbuf->data) are prefetched.
*/
uint32_t max_prefetch = 4;
if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
return NULL;
desc = (uint8_t *)&srng->ring_base_vaddr[srng->u.dst_ring.tp];
if (num_valid < max_prefetch)
max_prefetch = num_valid;
for (cnt = 0; cnt < max_prefetch; cnt++) {
desc += srng->entry_size * sizeof(uint32_t);
if (desc == ((uint8_t *)srng->ring_vaddr_end))
desc = (uint8_t *)&srng->ring_base_vaddr[0];
qdf_prefetch(desc);
}
return (void *)desc;
}
/**
* hal_srng_dst_prefetch_next_cached_desc() - function to prefetch next desc
* @hal_soc_hdl: HAL SOC handle
* @hal_ring_hdl: Destination ring pointer
* @last_prefetched_hw_desc: last prefetched HW descriptor
*
* return: next prefetched destination descriptor
*/
static inline
void *hal_srng_dst_prefetch_next_cached_desc(hal_soc_handle_t hal_soc_hdl,
hal_ring_handle_t hal_ring_hdl,
uint8_t *last_prefetched_hw_desc)
{
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
return NULL;
last_prefetched_hw_desc += srng->entry_size * sizeof(uint32_t);
if (last_prefetched_hw_desc == ((uint8_t *)srng->ring_vaddr_end))
last_prefetched_hw_desc = (uint8_t *)&srng->ring_base_vaddr[0];
qdf_prefetch(last_prefetched_hw_desc);
return (void *)last_prefetched_hw_desc;
}
#endif /* _HAL_APIH_ */