From e3c327a0ba8715258c40c3810f431ff3953f7087 Mon Sep 17 00:00:00 2001 From: Tallapragada Kalyan Date: Tue, 7 Sep 2021 08:25:57 +0530 Subject: [PATCH] qcacmn: do a batch invalidation of REO descriptors Added an API to do a batch invalidation of REO descs saw an improvement of 40 to 45 Mbps. Note: this change is applicable only for cached descriptors PINE with Default driver: 3189 @ 100% core-3 PINE with skb prefetch: 3469 @ 100% core-3 PINE with skb pre + batch inv: 3506 @ 100% core-3 Change-Id: Ic2cf294972acfe5765448a18bed7e903562836c3 --- dp/wifi3.0/li/dp_li_rx.c | 49 ++++++++++++++------------ hal/wifi3.0/hal_api.h | 70 ++++++++++++++++++++------------------ hal/wifi3.0/hal_internal.h | 3 ++ hal/wifi3.0/hal_srng.c | 1 + 4 files changed, 67 insertions(+), 56 deletions(-) diff --git a/dp/wifi3.0/li/dp_li_rx.c b/dp/wifi3.0/li/dp_li_rx.c index d81b2deb97..03e82f1238 100644 --- a/dp/wifi3.0/li/dp_li_rx.c +++ b/dp/wifi3.0/li/dp_li_rx.c @@ -209,7 +209,7 @@ uint32_t dp_rx_process_li(struct dp_intr *int_ctx, bool near_full; union dp_rx_desc_list_elem_t *head[MAX_PDEV_CNT]; union dp_rx_desc_list_elem_t *tail[MAX_PDEV_CNT]; - uint32_t num_pending; + uint32_t num_pending = 0; uint32_t rx_bufs_used = 0, rx_buf_cookie; uint16_t msdu_len = 0; uint16_t peer_id; @@ -228,7 +228,6 @@ uint32_t dp_rx_process_li(struct dp_intr *int_ctx, struct dp_srng *dp_rxdma_srng; struct rx_desc_pool *rx_desc_pool; struct dp_soc *soc = int_ctx->soc; - uint8_t core_id = 0; struct cdp_tid_rx_stats *tid_stats; qdf_nbuf_t nbuf_head; qdf_nbuf_t nbuf_tail; @@ -239,7 +238,6 @@ uint32_t dp_rx_process_li(struct dp_intr *int_ctx, struct hif_opaque_softc *scn; int32_t tid = 0; bool is_prev_msdu_last = true; - uint32_t num_entries_avail = 0; uint32_t rx_ol_pkt_cnt = 0; uint32_t num_entries = 0; struct hal_rx_msdu_metadata msdu_metadata; @@ -290,14 +288,21 @@ more_data: goto done; } + if (!num_pending) + num_pending = hal_srng_dst_num_valid(hal_soc, hal_ring_hdl, 0); + + dp_srng_dst_inv_cached_descs(soc, hal_ring_hdl, num_pending); + + if (num_pending > quota) + num_pending = quota; + /* * start reaping the buffers from reo ring and queue * them in per vdev queue. * Process the received pkts in a different per vdev loop. */ - while (qdf_likely(quota && - (ring_desc = hal_srng_dst_peek(hal_soc, - hal_ring_hdl)))) { + while (qdf_likely(num_pending)) { + ring_desc = dp_srng_dst_get_next(soc, hal_ring_hdl); error = HAL_RX_ERROR_STATUS_GET(ring_desc); if (qdf_unlikely(error == HAL_REO_ERROR_DETECTED)) { dp_rx_err("%pK: HAL RING 0x%pK:error %d", @@ -344,7 +349,6 @@ more_data: &tail[rx_desc->pool_id], rx_desc); } - hal_srng_dst_get_next(hal_soc, hal_ring_hdl); continue; } @@ -363,7 +367,6 @@ more_data: ring_desc, rx_desc); /* ignore duplicate RX desc and continue to process */ /* Pop out the descriptor */ - hal_srng_dst_get_next(hal_soc, hal_ring_hdl); continue; } @@ -374,7 +377,6 @@ more_data: dp_rx_dump_info_and_assert(soc, hal_ring_hdl, ring_desc, rx_desc); rx_desc->in_err_state = 1; - hal_srng_dst_get_next(hal_soc, hal_ring_hdl); continue; } @@ -397,11 +399,6 @@ more_data: * the new MPDU */ if (is_prev_msdu_last) { - /* Get number of entries available in HW ring */ - num_entries_avail = - hal_srng_dst_num_valid(hal_soc, - hal_ring_hdl, 1); - /* For new MPDU check if we can read complete * MPDU by comparing the number of buffers * available and number of buffers needed to @@ -410,21 +407,26 @@ more_data: if ((msdu_desc_info.msdu_len / (RX_DATA_BUFFER_SIZE - soc->rx_pkt_tlv_size) + 1) > - num_entries_avail) { + num_pending) { DP_STATS_INC(soc, rx.msdu_scatter_wait_break, 1); dp_rx_cookie_reset_invalid_bit( ring_desc); + /* As we are going to break out of the + * loop because of unavailability of + * descs to form complete SG, we need to + * reset the TP in the REO destination + * ring. + */ + hal_srng_dst_dec_tp(hal_soc, + hal_ring_hdl); break; } is_prev_msdu_last = false; } } - core_id = smp_processor_id(); - DP_STATS_INC(soc, rx.ring_packets[core_id][reo_ring_num], 1); - if (mpdu_desc_info.mpdu_flags & HAL_MPDU_F_RETRY_BIT) qdf_nbuf_set_rx_retry_flag(rx_desc->nbuf, 1); @@ -436,9 +438,6 @@ more_data: msdu_desc_info.msdu_flags & HAL_MSDU_F_LAST_MSDU_IN_MPDU) is_prev_msdu_last = true; - /* Pop out the descriptor*/ - hal_srng_dst_get_next(hal_soc, hal_ring_hdl); - rx_bufs_reaped[rx_desc->pool_id]++; peer_mdata = mpdu_desc_info.peer_meta_data; QDF_NBUF_CB_RX_PEER_ID(rx_desc->nbuf) = @@ -509,8 +508,10 @@ more_data: * across multiple buffers, let us not decrement quota * till we reap all buffers of that MSDU. */ - if (qdf_likely(!qdf_nbuf_is_rx_chfrag_cont(rx_desc->nbuf))) + if (qdf_likely(!qdf_nbuf_is_rx_chfrag_cont(rx_desc->nbuf))) { quota -= 1; + num_pending -= 1; + } dp_rx_add_to_free_desc_list(&head[rx_desc->pool_id], &tail[rx_desc->pool_id], rx_desc); @@ -527,6 +528,10 @@ more_data: done: dp_rx_srng_access_end(int_ctx, soc, hal_ring_hdl); + DP_STATS_INCC(soc, + rx.ring_packets[qdf_get_smp_processor_id()][reo_ring_num], + num_rx_bufs_reaped, num_rx_bufs_reaped); + for (mac_id = 0; mac_id < MAX_PDEV_CNT; mac_id++) { /* * continue with next mac_id if no pkts were reaped diff --git a/hal/wifi3.0/hal_api.h b/hal/wifi3.0/hal_api.h index dd6e389a69..3abb5da85e 100644 --- a/hal/wifi3.0/hal_api.h +++ b/hal/wifi3.0/hal_api.h @@ -1489,6 +1489,25 @@ void *hal_srng_dst_get_next_cached(void *hal_soc, return (void *)desc; } +/** + * hal_srng_dst_dec_tp - decrement the TP of the Dst ring by one entry + * @hal_soc: Opaque HAL SOC handle + * @hal_ring_hdl: Destination ring pointer + * + * reset the tail pointer in the destination ring by one entry + * + */ +static inline +void hal_srng_dst_dec_tp(void *hal_soc, hal_ring_handle_t hal_ring_hdl) +{ + struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; + + if (qdf_unlikely(!srng->u.dst_ring.tp)) + srng->u.dst_ring.tp = (srng->ring_size - srng->entry_size); + else + srng->u.dst_ring.tp -= srng->entry_size; +} + static inline int hal_srng_lock(hal_ring_handle_t hal_ring_hdl) { struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; @@ -1642,10 +1661,9 @@ uint32_t hal_srng_dst_num_valid(void *hal_soc, * hal_srng_dst_inv_cached_descs - API to invalidate descriptors in batch mode * @hal_soc: Opaque HAL SOC handle * @hal_ring_hdl: Destination ring pointer - * @entry_count: Number of descriptors to be invalidated + * @entry_count: call invalidate API if valid entries available * - * Invalidates a set of cached descriptors starting from tail to - * provided count worth + * Invalidates a set of cached descriptors starting from TP to cached_HP * * Return - None */ @@ -1654,9 +1672,8 @@ static inline void hal_srng_dst_inv_cached_descs(void *hal_soc, uint32_t entry_count) { struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; - uint32_t hp = srng->u.dst_ring.cached_hp; - uint32_t tp = srng->u.dst_ring.tp; - uint32_t sync_p = 0; + uint32_t *first_desc; + uint32_t *last_desc; /* * If SRNG does not have cached descriptors this @@ -1665,38 +1682,23 @@ static inline void hal_srng_dst_inv_cached_descs(void *hal_soc, if (!(srng->flags & HAL_SRNG_CACHED_DESC)) return; - if (qdf_unlikely(entry_count == 0)) + if (!entry_count) return; - sync_p = (entry_count - 1) * srng->entry_size; + first_desc = &srng->ring_base_vaddr[srng->u.dst_ring.tp]; + last_desc = &srng->ring_base_vaddr[srng->u.dst_ring.cached_hp]; - if (hp > tp) { - qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], - &srng->ring_base_vaddr[tp + sync_p] - + (srng->entry_size * sizeof(uint32_t))); - } else { - /* - * We have wrapped around - */ - uint32_t wrap_cnt = ((srng->ring_size - tp) / srng->entry_size); + if (last_desc > (uint32_t *)first_desc) + /* invalidate from tp to cached_hp */ + qdf_nbuf_dma_inv_range((void *)first_desc, (void *)(last_desc)); + else { + /* invalidate from tp to end of the ring */ + qdf_nbuf_dma_inv_range((void *)first_desc, + (void *)srng->ring_vaddr_end); - if (entry_count <= wrap_cnt) { - qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], - &srng->ring_base_vaddr[tp + sync_p] + - (srng->entry_size * sizeof(uint32_t))); - return; - } - - entry_count -= wrap_cnt; - sync_p = (entry_count - 1) * srng->entry_size; - - qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], - &srng->ring_base_vaddr[srng->ring_size - srng->entry_size] + - (srng->entry_size * sizeof(uint32_t))); - - qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[0], - &srng->ring_base_vaddr[sync_p] - + (srng->entry_size * sizeof(uint32_t))); + /* invalidate from start of ring to cached_hp */ + qdf_nbuf_dma_inv_range((void *)srng->ring_base_vaddr, + (void *)last_desc); } } diff --git a/hal/wifi3.0/hal_internal.h b/hal/wifi3.0/hal_internal.h index b69fc4da19..a38ec22ee5 100644 --- a/hal/wifi3.0/hal_internal.h +++ b/hal/wifi3.0/hal_internal.h @@ -511,6 +511,9 @@ struct hal_srng { /* Virtual base address of the ring */ uint32_t *ring_base_vaddr; + /* virtual address end */ + uint32_t *ring_vaddr_end; + /* Number of entries in ring */ uint32_t num_entries; diff --git a/hal/wifi3.0/hal_srng.c b/hal/wifi3.0/hal_srng.c index ec0bb96faa..67502559e1 100644 --- a/hal/wifi3.0/hal_srng.c +++ b/hal/wifi3.0/hal_srng.c @@ -1509,6 +1509,7 @@ void *hal_srng_setup(void *hal_soc, int ring_type, int ring_num, srng->num_entries = ring_params->num_entries; srng->ring_size = srng->num_entries * srng->entry_size; srng->ring_size_mask = srng->ring_size - 1; + srng->ring_vaddr_end = srng->ring_base_vaddr + srng->ring_size; srng->msi_addr = ring_params->msi_addr; srng->msi_data = ring_params->msi_data; srng->intr_timer_thres_us = ring_params->intr_timer_thres_us;