qcacmn: Do Batched invalidate of tx completion descriptor
Do batched invalidate of tx completion descriptor to avoid unnecessary D-cache miss for 32 byte size descriptor. Change-Id: Ia580fe78dcef5b36f117aaad171a2df6d0e34966
This commit is contained in:
@@ -1761,6 +1761,55 @@ static inline void dp_srng_access_end(struct dp_intr *int_ctx,
|
|||||||
}
|
}
|
||||||
#endif /* WLAN_FEATURE_DP_EVENT_HISTORY */
|
#endif /* WLAN_FEATURE_DP_EVENT_HISTORY */
|
||||||
|
|
||||||
|
#ifdef QCA_CACHED_RING_DESC
|
||||||
|
/**
|
||||||
|
* dp_srng_dst_get_next() - Wrapper function to get next ring desc
|
||||||
|
* @dp_socsoc: DP Soc handle
|
||||||
|
* @hal_ring: opaque pointer to the HAL Destination Ring
|
||||||
|
*
|
||||||
|
* Return: HAL ring descriptor
|
||||||
|
*/
|
||||||
|
static inline void *dp_srng_dst_get_next(struct dp_soc *dp_soc,
|
||||||
|
hal_ring_handle_t hal_ring_hdl)
|
||||||
|
{
|
||||||
|
hal_soc_handle_t hal_soc = dp_soc->hal_soc;
|
||||||
|
|
||||||
|
return hal_srng_dst_get_next_cached(hal_soc, hal_ring_hdl);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* dp_srng_dst_inv_cached_descs() - Wrapper function to invalidate cached
|
||||||
|
* descriptors
|
||||||
|
* @dp_socsoc: DP Soc handle
|
||||||
|
* @hal_ring: opaque pointer to the HAL Rx Destination ring
|
||||||
|
* @num_entries: Entry count
|
||||||
|
*
|
||||||
|
* Return: None
|
||||||
|
*/
|
||||||
|
static inline void dp_srng_dst_inv_cached_descs(struct dp_soc *dp_soc,
|
||||||
|
hal_ring_handle_t hal_ring_hdl,
|
||||||
|
uint32_t num_entries)
|
||||||
|
{
|
||||||
|
hal_soc_handle_t hal_soc = dp_soc->hal_soc;
|
||||||
|
|
||||||
|
hal_srng_dst_inv_cached_descs(soc->hal_soc, hal_ring_hdl, num_entries);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void *dp_srng_dst_get_next(struct dp_soc *dp_soc,
|
||||||
|
hal_ring_handle_t hal_ring_hdl)
|
||||||
|
{
|
||||||
|
hal_soc_handle_t hal_soc = dp_soc->hal_soc;
|
||||||
|
|
||||||
|
return hal_srng_dst_get_next(hal_soc, hal_ring_hdl);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void dp_srng_dst_inv_cached_descs(struct dp_soc *dp_soc,
|
||||||
|
hal_ring_handle_t hal_ring_hdl,
|
||||||
|
uint32_t num_entries)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif /* QCA_CACHED_RING_DESC */
|
||||||
|
|
||||||
#ifdef QCA_ENH_V3_STATS_SUPPORT
|
#ifdef QCA_ENH_V3_STATS_SUPPORT
|
||||||
/**
|
/**
|
||||||
* dp_pdev_print_delay_stats(): Print pdev level delay stats
|
* dp_pdev_print_delay_stats(): Print pdev level delay stats
|
||||||
|
@@ -3599,6 +3599,7 @@ uint32_t dp_tx_comp_handler(struct dp_intr *int_ctx, struct dp_soc *soc,
|
|||||||
struct dp_tx_desc_s *tail_desc = NULL;
|
struct dp_tx_desc_s *tail_desc = NULL;
|
||||||
uint32_t num_processed = 0;
|
uint32_t num_processed = 0;
|
||||||
uint32_t count = 0;
|
uint32_t count = 0;
|
||||||
|
uint32_t num_avail_for_reap = 0;
|
||||||
bool force_break = false;
|
bool force_break = false;
|
||||||
|
|
||||||
DP_HIST_INIT();
|
DP_HIST_INIT();
|
||||||
@@ -3612,9 +3613,18 @@ more_data:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
num_avail_for_reap = hal_srng_dst_num_valid(soc->hal_soc, hal_ring_hdl, 0);
|
||||||
|
|
||||||
|
if (num_avail_for_reap >= quota)
|
||||||
|
num_avail_for_reap = quota;
|
||||||
|
|
||||||
|
dp_srng_dst_inv_cached_descs(soc, hal_ring_hdl, num_avail_for_reap);
|
||||||
|
|
||||||
/* Find head descriptor from completion ring */
|
/* Find head descriptor from completion ring */
|
||||||
while (qdf_likely(tx_comp_hal_desc =
|
while (qdf_likely(num_avail_for_reap)) {
|
||||||
hal_srng_dst_get_next(soc->hal_soc, hal_ring_hdl))) {
|
tx_comp_hal_desc = dp_srng_dst_get_next(soc, hal_ring_hdl);
|
||||||
|
if (qdf_unlikely(!tx_comp_hal_desc))
|
||||||
|
break;
|
||||||
|
|
||||||
buffer_src = hal_tx_comp_get_buffer_source(tx_comp_hal_desc);
|
buffer_src = hal_tx_comp_get_buffer_source(tx_comp_hal_desc);
|
||||||
|
|
||||||
@@ -3756,10 +3766,6 @@ more_data:
|
|||||||
* Processed packet count is more than given quota
|
* Processed packet count is more than given quota
|
||||||
* stop to processing
|
* stop to processing
|
||||||
*/
|
*/
|
||||||
if (num_processed >= quota) {
|
|
||||||
force_break = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
@@ -3774,6 +3780,10 @@ more_data:
|
|||||||
dp_tx_comp_process_desc_list(soc, head_desc, ring_id);
|
dp_tx_comp_process_desc_list(soc, head_desc, ring_id);
|
||||||
|
|
||||||
if (dp_tx_comp_enable_eol_data_check(soc)) {
|
if (dp_tx_comp_enable_eol_data_check(soc)) {
|
||||||
|
|
||||||
|
if (num_processed >= quota)
|
||||||
|
force_break = true;
|
||||||
|
|
||||||
if (!force_break &&
|
if (!force_break &&
|
||||||
hal_srng_dst_peek_sync_locked(soc->hal_soc,
|
hal_srng_dst_peek_sync_locked(soc->hal_soc,
|
||||||
hal_ring_hdl)) {
|
hal_ring_hdl)) {
|
||||||
|
@@ -986,38 +986,40 @@ static inline int hal_srng_access_start(hal_soc_handle_t hal_soc_hdl,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hal_srng_dst_get_next - Get next entry from a destination ring and move
|
* hal_srng_dst_get_next - Get next entry from a destination ring
|
||||||
* cached tail pointer
|
|
||||||
*
|
|
||||||
* @hal_soc: Opaque HAL SOC handle
|
* @hal_soc: Opaque HAL SOC handle
|
||||||
* @hal_ring_hdl: Destination ring pointer
|
* @hal_ring_hdl: Destination ring pointer
|
||||||
*
|
*
|
||||||
* Return: Opaque pointer for next ring entry; NULL on failire
|
* Return: Opaque pointer for next ring entry; NULL on failure
|
||||||
*/
|
*/
|
||||||
static inline
|
static inline
|
||||||
void *hal_srng_dst_get_next(void *hal_soc,
|
void *hal_srng_dst_get_next(void *hal_soc,
|
||||||
hal_ring_handle_t hal_ring_hdl)
|
hal_ring_handle_t hal_ring_hdl)
|
||||||
{
|
{
|
||||||
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
|
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
|
||||||
struct hal_soc *soc = (struct hal_soc *)hal_soc;
|
|
||||||
uint32_t *desc;
|
uint32_t *desc;
|
||||||
uint32_t *desc_next;
|
|
||||||
uint32_t tp;
|
|
||||||
|
|
||||||
if (srng->u.dst_ring.tp != srng->u.dst_ring.cached_hp) {
|
if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
|
||||||
desc = &(srng->ring_base_vaddr[srng->u.dst_ring.tp]);
|
return NULL;
|
||||||
|
|
||||||
|
desc = &srng->ring_base_vaddr[srng->u.dst_ring.tp];
|
||||||
/* TODO: Using % is expensive, but we have to do this since
|
/* TODO: Using % is expensive, but we have to do this since
|
||||||
* size of some SRNG rings is not power of 2 (due to descriptor
|
* size of some SRNG rings is not power of 2 (due to descriptor
|
||||||
* sizes). Need to create separate API for rings used
|
* sizes). Need to create separate API for rings used
|
||||||
* per-packet, with sizes power of 2 (TCL2SW, REO2SW,
|
* per-packet, with sizes power of 2 (TCL2SW, REO2SW,
|
||||||
* SW2RXDMA and CE rings)
|
* SW2RXDMA and CE rings)
|
||||||
*/
|
*/
|
||||||
srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) %
|
srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size);
|
||||||
srng->ring_size;
|
if (srng->u.dst_ring.tp == srng->ring_size)
|
||||||
|
srng->u.dst_ring.tp = 0;
|
||||||
|
|
||||||
if (srng->flags & HAL_SRNG_CACHED_DESC) {
|
if (srng->flags & HAL_SRNG_CACHED_DESC) {
|
||||||
|
struct hal_soc *soc = (struct hal_soc *)hal_soc;
|
||||||
|
uint32_t *desc_next;
|
||||||
|
uint32_t tp;
|
||||||
|
|
||||||
tp = srng->u.dst_ring.tp;
|
tp = srng->u.dst_ring.tp;
|
||||||
desc_next = &srng->ring_base_vaddr[tp];
|
desc_next = &srng->ring_base_vaddr[srng->u.dst_ring.tp];
|
||||||
qdf_mem_dma_cache_sync(soc->qdf_dev,
|
qdf_mem_dma_cache_sync(soc->qdf_dev,
|
||||||
qdf_mem_virt_to_phys(desc_next),
|
qdf_mem_virt_to_phys(desc_next),
|
||||||
QDF_DMA_FROM_DEVICE,
|
QDF_DMA_FROM_DEVICE,
|
||||||
@@ -1029,7 +1031,40 @@ void *hal_srng_dst_get_next(void *hal_soc,
|
|||||||
return (void *)desc;
|
return (void *)desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hal_srng_dst_get_next_cached - Get cached next entry
|
||||||
|
* @hal_soc: Opaque HAL SOC handle
|
||||||
|
* @hal_ring_hdl: Destination ring pointer
|
||||||
|
*
|
||||||
|
* Get next entry from a destination ring and move cached tail pointer
|
||||||
|
*
|
||||||
|
* Return: Opaque pointer for next ring entry; NULL on failure
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
void *hal_srng_dst_get_next_cached(void *hal_soc,
|
||||||
|
hal_ring_handle_t hal_ring_hdl)
|
||||||
|
{
|
||||||
|
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
|
||||||
|
uint32_t *desc;
|
||||||
|
uint32_t *desc_next;
|
||||||
|
|
||||||
|
if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
desc = &srng->ring_base_vaddr[srng->u.dst_ring.tp];
|
||||||
|
/* TODO: Using % is expensive, but we have to do this since
|
||||||
|
* size of some SRNG rings is not power of 2 (due to descriptor
|
||||||
|
* sizes). Need to create separate API for rings used
|
||||||
|
* per-packet, with sizes power of 2 (TCL2SW, REO2SW,
|
||||||
|
* SW2RXDMA and CE rings)
|
||||||
|
*/
|
||||||
|
srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size);
|
||||||
|
if (srng->u.dst_ring.tp == srng->ring_size)
|
||||||
|
srng->u.dst_ring.tp = 0;
|
||||||
|
|
||||||
|
desc_next = &srng->ring_base_vaddr[srng->u.dst_ring.tp];
|
||||||
|
qdf_prefetch(desc_next);
|
||||||
|
return (void *)desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1148,10 +1183,72 @@ uint32_t hal_srng_dst_num_valid(void *hal_soc,
|
|||||||
|
|
||||||
if (hp >= tp)
|
if (hp >= tp)
|
||||||
return (hp - tp) / srng->entry_size;
|
return (hp - tp) / srng->entry_size;
|
||||||
else
|
|
||||||
return (srng->ring_size - tp + hp) / srng->entry_size;
|
return (srng->ring_size - tp + hp) / srng->entry_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hal_srng_dst_inv_cached_descs - API to invalidate descriptors in batch mode
|
||||||
|
* @hal_soc: Opaque HAL SOC handle
|
||||||
|
* @hal_ring_hdl: Destination ring pointer
|
||||||
|
* @entry_count: Number of descriptors to be invalidated
|
||||||
|
*
|
||||||
|
* Invalidates a set of cached descriptors starting from tail to
|
||||||
|
* provided count worth
|
||||||
|
*
|
||||||
|
* Return - None
|
||||||
|
*/
|
||||||
|
static inline void hal_srng_dst_inv_cached_descs(void *hal_soc,
|
||||||
|
hal_ring_handle_t hal_ring_hdl,
|
||||||
|
uint32_t entry_count)
|
||||||
|
{
|
||||||
|
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
|
||||||
|
uint32_t hp = srng->u.dst_ring.cached_hp;
|
||||||
|
uint32_t tp = srng->u.dst_ring.tp;
|
||||||
|
uint32_t sync_p = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If SRNG does not have cached descriptors this
|
||||||
|
* API call should be a no op
|
||||||
|
*/
|
||||||
|
if (!(srng->flags & HAL_SRNG_CACHED_DESC))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (qdf_unlikely(entry_count == 0))
|
||||||
|
return;
|
||||||
|
|
||||||
|
sync_p = (entry_count - 1) * srng->entry_size;
|
||||||
|
|
||||||
|
if (hp > tp) {
|
||||||
|
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp],
|
||||||
|
&srng->ring_base_vaddr[tp + sync_p]
|
||||||
|
+ (srng->entry_size * sizeof(uint32_t)));
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* We have wrapped around
|
||||||
|
*/
|
||||||
|
uint32_t wrap_cnt = ((srng->ring_size - tp) / srng->entry_size);
|
||||||
|
|
||||||
|
if (entry_count <= wrap_cnt) {
|
||||||
|
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp],
|
||||||
|
&srng->ring_base_vaddr[tp + sync_p] +
|
||||||
|
(srng->entry_size * sizeof(uint32_t)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry_count -= wrap_cnt;
|
||||||
|
sync_p = (entry_count - 1) * srng->entry_size;
|
||||||
|
|
||||||
|
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp],
|
||||||
|
&srng->ring_base_vaddr[srng->ring_size - srng->entry_size] +
|
||||||
|
(srng->entry_size * sizeof(uint32_t)));
|
||||||
|
|
||||||
|
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[0],
|
||||||
|
&srng->ring_base_vaddr[sync_p]
|
||||||
|
+ (srng->entry_size * sizeof(uint32_t)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hal_srng_dst_num_valid_locked - Returns num valid entries to be processed
|
* hal_srng_dst_num_valid_locked - Returns num valid entries to be processed
|
||||||
*
|
*
|
||||||
|
Reference in New Issue
Block a user