qcacmn: Prefetch RX HW desc, SW desc and SKB in pipeline fashion
Prefetch RX HW desc, SW desc and SKB in pipeline fasion in the first loop of RX processing. This has improved TPUT by 200Mbps and provided a 10% gain in CPU (single core) PINE with other optimizations: 3960Mbps @ 100% core-3 PINE + pipeline prefetch: 4130Mbps @ 90% core-3 Change-Id: I47f351601b264eb3a2b50e4154229d55da738724
This commit is contained in:

committed by
Madan Koyyalamudi

parent
e3c327a0ba
commit
4e7ceff561
@@ -2969,4 +2969,73 @@ hal_dmac_cmn_src_rxbuf_ring_get(hal_soc_handle_t hal_soc_hdl)
|
||||
|
||||
return hal_soc->dmac_cmn_src_rxbuf_ring;
|
||||
}
|
||||
|
||||
/**
|
||||
* hal_srng_dst_prefetch() - function to prefetch 4 destination ring descs
|
||||
* @hal_soc_hdl: HAL SOC handle
|
||||
* @hal_ring_hdl: Destination ring pointer
|
||||
* @num_valid: valid entries in the ring
|
||||
*
|
||||
* return: last prefetched destination ring descriptor
|
||||
*/
|
||||
static inline
|
||||
void *hal_srng_dst_prefetch(hal_soc_handle_t hal_soc_hdl,
|
||||
hal_ring_handle_t hal_ring_hdl,
|
||||
uint16_t num_valid)
|
||||
{
|
||||
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
|
||||
uint8_t *desc;
|
||||
uint32_t cnt;
|
||||
/*
|
||||
* prefetching 4 HW descriptors will ensure atleast by the time
|
||||
* 5th HW descriptor is being processed it is guranteed that the
|
||||
* 5th HW descriptor, its SW Desc, its nbuf and its nbuf's data
|
||||
* are in cache line. basically ensuring all the 4 (HW, SW, nbuf
|
||||
* & nbuf->data) are prefetched.
|
||||
*/
|
||||
uint32_t max_prefetch = 4;
|
||||
|
||||
if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
|
||||
return NULL;
|
||||
|
||||
desc = (uint8_t *)&srng->ring_base_vaddr[srng->u.dst_ring.tp];
|
||||
|
||||
if (num_valid < max_prefetch)
|
||||
max_prefetch = num_valid;
|
||||
|
||||
for (cnt = 0; cnt < max_prefetch; cnt++) {
|
||||
desc += srng->entry_size * sizeof(uint32_t);
|
||||
if (desc == ((uint8_t *)srng->ring_vaddr_end))
|
||||
desc = (uint8_t *)&srng->ring_base_vaddr[0];
|
||||
|
||||
qdf_prefetch(desc);
|
||||
}
|
||||
return (void *)desc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hal_srng_dst_prefetch_next_cached_desc() - function to prefetch next desc
|
||||
* @hal_soc_hdl: HAL SOC handle
|
||||
* @hal_ring_hdl: Destination ring pointer
|
||||
* @last_prefetched_hw_desc: last prefetched HW descriptor
|
||||
*
|
||||
* return: next prefetched destination descriptor
|
||||
*/
|
||||
static inline
|
||||
void *hal_srng_dst_prefetch_next_cached_desc(hal_soc_handle_t hal_soc_hdl,
|
||||
hal_ring_handle_t hal_ring_hdl,
|
||||
uint8_t *last_prefetched_hw_desc)
|
||||
{
|
||||
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
|
||||
|
||||
if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
|
||||
return NULL;
|
||||
|
||||
last_prefetched_hw_desc += srng->entry_size * sizeof(uint32_t);
|
||||
if (last_prefetched_hw_desc == ((uint8_t *)srng->ring_vaddr_end))
|
||||
last_prefetched_hw_desc = (uint8_t *)&srng->ring_base_vaddr[0];
|
||||
|
||||
qdf_prefetch(last_prefetched_hw_desc);
|
||||
return (void *)last_prefetched_hw_desc;
|
||||
}
|
||||
#endif /* _HAL_APIH_ */
|
||||
|
Reference in New Issue
Block a user