diff --git a/dp/wifi3.0/dp_internal.h b/dp/wifi3.0/dp_internal.h index e3f42dbdf2..98a7c0d8b5 100644 --- a/dp/wifi3.0/dp_internal.h +++ b/dp/wifi3.0/dp_internal.h @@ -1761,6 +1761,55 @@ static inline void dp_srng_access_end(struct dp_intr *int_ctx, } #endif /* WLAN_FEATURE_DP_EVENT_HISTORY */ +#ifdef QCA_CACHED_RING_DESC +/** + * dp_srng_dst_get_next() - Wrapper function to get next ring desc + * @dp_socsoc: DP Soc handle + * @hal_ring: opaque pointer to the HAL Destination Ring + * + * Return: HAL ring descriptor + */ +static inline void *dp_srng_dst_get_next(struct dp_soc *dp_soc, + hal_ring_handle_t hal_ring_hdl) +{ + hal_soc_handle_t hal_soc = dp_soc->hal_soc; + + return hal_srng_dst_get_next_cached(hal_soc, hal_ring_hdl); +} + +/** + * dp_srng_dst_inv_cached_descs() - Wrapper function to invalidate cached + * descriptors + * @dp_socsoc: DP Soc handle + * @hal_ring: opaque pointer to the HAL Rx Destination ring + * @num_entries: Entry count + * + * Return: None + */ +static inline void dp_srng_dst_inv_cached_descs(struct dp_soc *dp_soc, + hal_ring_handle_t hal_ring_hdl, + uint32_t num_entries) +{ + hal_soc_handle_t hal_soc = dp_soc->hal_soc; + + hal_srng_dst_inv_cached_descs(soc->hal_soc, hal_ring_hdl, num_entries); +} +#else +static inline void *dp_srng_dst_get_next(struct dp_soc *dp_soc, + hal_ring_handle_t hal_ring_hdl) +{ + hal_soc_handle_t hal_soc = dp_soc->hal_soc; + + return hal_srng_dst_get_next(hal_soc, hal_ring_hdl); +} + +static inline void dp_srng_dst_inv_cached_descs(struct dp_soc *dp_soc, + hal_ring_handle_t hal_ring_hdl, + uint32_t num_entries) +{ +} +#endif /* QCA_CACHED_RING_DESC */ + #ifdef QCA_ENH_V3_STATS_SUPPORT /** * dp_pdev_print_delay_stats(): Print pdev level delay stats diff --git a/dp/wifi3.0/dp_tx.c b/dp/wifi3.0/dp_tx.c index 1fb06de39a..c1e3a2962c 100644 --- a/dp/wifi3.0/dp_tx.c +++ b/dp/wifi3.0/dp_tx.c @@ -3599,6 +3599,7 @@ uint32_t dp_tx_comp_handler(struct dp_intr *int_ctx, struct dp_soc *soc, struct dp_tx_desc_s *tail_desc = NULL; uint32_t num_processed = 0; uint32_t count = 0; + uint32_t num_avail_for_reap = 0; bool force_break = false; DP_HIST_INIT(); @@ -3612,9 +3613,18 @@ more_data: return 0; } + num_avail_for_reap = hal_srng_dst_num_valid(soc->hal_soc, hal_ring_hdl, 0); + + if (num_avail_for_reap >= quota) + num_avail_for_reap = quota; + + dp_srng_dst_inv_cached_descs(soc, hal_ring_hdl, num_avail_for_reap); + /* Find head descriptor from completion ring */ - while (qdf_likely(tx_comp_hal_desc = - hal_srng_dst_get_next(soc->hal_soc, hal_ring_hdl))) { + while (qdf_likely(num_avail_for_reap)) { + tx_comp_hal_desc = dp_srng_dst_get_next(soc, hal_ring_hdl); + if (qdf_unlikely(!tx_comp_hal_desc)) + break; buffer_src = hal_tx_comp_get_buffer_source(tx_comp_hal_desc); @@ -3756,10 +3766,6 @@ more_data: * Processed packet count is more than given quota * stop to processing */ - if (num_processed >= quota) { - force_break = true; - break; - } count++; @@ -3774,6 +3780,10 @@ more_data: dp_tx_comp_process_desc_list(soc, head_desc, ring_id); if (dp_tx_comp_enable_eol_data_check(soc)) { + + if (num_processed >= quota) + force_break = true; + if (!force_break && hal_srng_dst_peek_sync_locked(soc->hal_soc, hal_ring_hdl)) { diff --git a/hal/wifi3.0/hal_api.h b/hal/wifi3.0/hal_api.h index dad28b6d1e..a2807df502 100644 --- a/hal/wifi3.0/hal_api.h +++ b/hal/wifi3.0/hal_api.h @@ -986,50 +986,85 @@ static inline int hal_srng_access_start(hal_soc_handle_t hal_soc_hdl, } /** - * hal_srng_dst_get_next - Get next entry from a destination ring and move - * cached tail pointer - * + * hal_srng_dst_get_next - Get next entry from a destination ring * @hal_soc: Opaque HAL SOC handle * @hal_ring_hdl: Destination ring pointer * - * Return: Opaque pointer for next ring entry; NULL on failire + * Return: Opaque pointer for next ring entry; NULL on failure */ static inline void *hal_srng_dst_get_next(void *hal_soc, hal_ring_handle_t hal_ring_hdl) { struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; - struct hal_soc *soc = (struct hal_soc *)hal_soc; uint32_t *desc; - uint32_t *desc_next; - uint32_t tp; - if (srng->u.dst_ring.tp != srng->u.dst_ring.cached_hp) { - desc = &(srng->ring_base_vaddr[srng->u.dst_ring.tp]); - /* TODO: Using % is expensive, but we have to do this since - * size of some SRNG rings is not power of 2 (due to descriptor - * sizes). Need to create separate API for rings used - * per-packet, with sizes power of 2 (TCL2SW, REO2SW, - * SW2RXDMA and CE rings) - */ - srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) % - srng->ring_size; + if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp) + return NULL; - if (srng->flags & HAL_SRNG_CACHED_DESC) { - tp = srng->u.dst_ring.tp; - desc_next = &srng->ring_base_vaddr[tp]; - qdf_mem_dma_cache_sync(soc->qdf_dev, - qdf_mem_virt_to_phys(desc_next), - QDF_DMA_FROM_DEVICE, - (srng->entry_size * - sizeof(uint32_t))); - qdf_prefetch(desc_next); - } + desc = &srng->ring_base_vaddr[srng->u.dst_ring.tp]; + /* TODO: Using % is expensive, but we have to do this since + * size of some SRNG rings is not power of 2 (due to descriptor + * sizes). Need to create separate API for rings used + * per-packet, with sizes power of 2 (TCL2SW, REO2SW, + * SW2RXDMA and CE rings) + */ + srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size); + if (srng->u.dst_ring.tp == srng->ring_size) + srng->u.dst_ring.tp = 0; - return (void *)desc; + if (srng->flags & HAL_SRNG_CACHED_DESC) { + struct hal_soc *soc = (struct hal_soc *)hal_soc; + uint32_t *desc_next; + uint32_t tp; + + tp = srng->u.dst_ring.tp; + desc_next = &srng->ring_base_vaddr[srng->u.dst_ring.tp]; + qdf_mem_dma_cache_sync(soc->qdf_dev, + qdf_mem_virt_to_phys(desc_next), + QDF_DMA_FROM_DEVICE, + (srng->entry_size * + sizeof(uint32_t))); + qdf_prefetch(desc_next); } - return NULL; + return (void *)desc; +} + +/** + * hal_srng_dst_get_next_cached - Get cached next entry + * @hal_soc: Opaque HAL SOC handle + * @hal_ring_hdl: Destination ring pointer + * + * Get next entry from a destination ring and move cached tail pointer + * + * Return: Opaque pointer for next ring entry; NULL on failure + */ +static inline +void *hal_srng_dst_get_next_cached(void *hal_soc, + hal_ring_handle_t hal_ring_hdl) +{ + struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; + uint32_t *desc; + uint32_t *desc_next; + + if (srng->u.dst_ring.tp == srng->u.dst_ring.cached_hp) + return NULL; + + desc = &srng->ring_base_vaddr[srng->u.dst_ring.tp]; + /* TODO: Using % is expensive, but we have to do this since + * size of some SRNG rings is not power of 2 (due to descriptor + * sizes). Need to create separate API for rings used + * per-packet, with sizes power of 2 (TCL2SW, REO2SW, + * SW2RXDMA and CE rings) + */ + srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size); + if (srng->u.dst_ring.tp == srng->ring_size) + srng->u.dst_ring.tp = 0; + + desc_next = &srng->ring_base_vaddr[srng->u.dst_ring.tp]; + qdf_prefetch(desc_next); + return (void *)desc; } /** @@ -1148,8 +1183,70 @@ uint32_t hal_srng_dst_num_valid(void *hal_soc, if (hp >= tp) return (hp - tp) / srng->entry_size; - else - return (srng->ring_size - tp + hp) / srng->entry_size; + + return (srng->ring_size - tp + hp) / srng->entry_size; +} + +/** + * hal_srng_dst_inv_cached_descs - API to invalidate descriptors in batch mode + * @hal_soc: Opaque HAL SOC handle + * @hal_ring_hdl: Destination ring pointer + * @entry_count: Number of descriptors to be invalidated + * + * Invalidates a set of cached descriptors starting from tail to + * provided count worth + * + * Return - None + */ +static inline void hal_srng_dst_inv_cached_descs(void *hal_soc, + hal_ring_handle_t hal_ring_hdl, + uint32_t entry_count) +{ + struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; + uint32_t hp = srng->u.dst_ring.cached_hp; + uint32_t tp = srng->u.dst_ring.tp; + uint32_t sync_p = 0; + + /* + * If SRNG does not have cached descriptors this + * API call should be a no op + */ + if (!(srng->flags & HAL_SRNG_CACHED_DESC)) + return; + + if (qdf_unlikely(entry_count == 0)) + return; + + sync_p = (entry_count - 1) * srng->entry_size; + + if (hp > tp) { + qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], + &srng->ring_base_vaddr[tp + sync_p] + + (srng->entry_size * sizeof(uint32_t))); + } else { + /* + * We have wrapped around + */ + uint32_t wrap_cnt = ((srng->ring_size - tp) / srng->entry_size); + + if (entry_count <= wrap_cnt) { + qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], + &srng->ring_base_vaddr[tp + sync_p] + + (srng->entry_size * sizeof(uint32_t))); + return; + } + + entry_count -= wrap_cnt; + sync_p = (entry_count - 1) * srng->entry_size; + + qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], + &srng->ring_base_vaddr[srng->ring_size - srng->entry_size] + + (srng->entry_size * sizeof(uint32_t))); + + qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[0], + &srng->ring_base_vaddr[sync_p] + + (srng->entry_size * sizeof(uint32_t))); + } } /**