qcacmn: do a batch invalidation of REO descriptors

Added an API to do a batch invalidation of REO descs
saw an improvement of 40 to 45 Mbps.
Note: this change is applicable only for cached
descriptors

PINE with Default driver: 3189 @ 100% core-3
PINE with skb prefetch: 3469 @ 100% core-3
PINE with skb pre + batch inv: 3506 @ 100% core-3
Change-Id: Ic2cf294972acfe5765448a18bed7e903562836c3
This commit is contained in:
Tallapragada Kalyan
2021-09-07 08:25:57 +05:30
committed by Madan Koyyalamudi
parent 0fef3ec487
commit e3c327a0ba
4 changed files with 67 additions and 56 deletions

View File

@@ -209,7 +209,7 @@ uint32_t dp_rx_process_li(struct dp_intr *int_ctx,
bool near_full; bool near_full;
union dp_rx_desc_list_elem_t *head[MAX_PDEV_CNT]; union dp_rx_desc_list_elem_t *head[MAX_PDEV_CNT];
union dp_rx_desc_list_elem_t *tail[MAX_PDEV_CNT]; union dp_rx_desc_list_elem_t *tail[MAX_PDEV_CNT];
uint32_t num_pending; uint32_t num_pending = 0;
uint32_t rx_bufs_used = 0, rx_buf_cookie; uint32_t rx_bufs_used = 0, rx_buf_cookie;
uint16_t msdu_len = 0; uint16_t msdu_len = 0;
uint16_t peer_id; uint16_t peer_id;
@@ -228,7 +228,6 @@ uint32_t dp_rx_process_li(struct dp_intr *int_ctx,
struct dp_srng *dp_rxdma_srng; struct dp_srng *dp_rxdma_srng;
struct rx_desc_pool *rx_desc_pool; struct rx_desc_pool *rx_desc_pool;
struct dp_soc *soc = int_ctx->soc; struct dp_soc *soc = int_ctx->soc;
uint8_t core_id = 0;
struct cdp_tid_rx_stats *tid_stats; struct cdp_tid_rx_stats *tid_stats;
qdf_nbuf_t nbuf_head; qdf_nbuf_t nbuf_head;
qdf_nbuf_t nbuf_tail; qdf_nbuf_t nbuf_tail;
@@ -239,7 +238,6 @@ uint32_t dp_rx_process_li(struct dp_intr *int_ctx,
struct hif_opaque_softc *scn; struct hif_opaque_softc *scn;
int32_t tid = 0; int32_t tid = 0;
bool is_prev_msdu_last = true; bool is_prev_msdu_last = true;
uint32_t num_entries_avail = 0;
uint32_t rx_ol_pkt_cnt = 0; uint32_t rx_ol_pkt_cnt = 0;
uint32_t num_entries = 0; uint32_t num_entries = 0;
struct hal_rx_msdu_metadata msdu_metadata; struct hal_rx_msdu_metadata msdu_metadata;
@@ -290,14 +288,21 @@ more_data:
goto done; goto done;
} }
if (!num_pending)
num_pending = hal_srng_dst_num_valid(hal_soc, hal_ring_hdl, 0);
dp_srng_dst_inv_cached_descs(soc, hal_ring_hdl, num_pending);
if (num_pending > quota)
num_pending = quota;
/* /*
* start reaping the buffers from reo ring and queue * start reaping the buffers from reo ring and queue
* them in per vdev queue. * them in per vdev queue.
* Process the received pkts in a different per vdev loop. * Process the received pkts in a different per vdev loop.
*/ */
while (qdf_likely(quota && while (qdf_likely(num_pending)) {
(ring_desc = hal_srng_dst_peek(hal_soc, ring_desc = dp_srng_dst_get_next(soc, hal_ring_hdl);
hal_ring_hdl)))) {
error = HAL_RX_ERROR_STATUS_GET(ring_desc); error = HAL_RX_ERROR_STATUS_GET(ring_desc);
if (qdf_unlikely(error == HAL_REO_ERROR_DETECTED)) { if (qdf_unlikely(error == HAL_REO_ERROR_DETECTED)) {
dp_rx_err("%pK: HAL RING 0x%pK:error %d", dp_rx_err("%pK: HAL RING 0x%pK:error %d",
@@ -344,7 +349,6 @@ more_data:
&tail[rx_desc->pool_id], &tail[rx_desc->pool_id],
rx_desc); rx_desc);
} }
hal_srng_dst_get_next(hal_soc, hal_ring_hdl);
continue; continue;
} }
@@ -363,7 +367,6 @@ more_data:
ring_desc, rx_desc); ring_desc, rx_desc);
/* ignore duplicate RX desc and continue to process */ /* ignore duplicate RX desc and continue to process */
/* Pop out the descriptor */ /* Pop out the descriptor */
hal_srng_dst_get_next(hal_soc, hal_ring_hdl);
continue; continue;
} }
@@ -374,7 +377,6 @@ more_data:
dp_rx_dump_info_and_assert(soc, hal_ring_hdl, dp_rx_dump_info_and_assert(soc, hal_ring_hdl,
ring_desc, rx_desc); ring_desc, rx_desc);
rx_desc->in_err_state = 1; rx_desc->in_err_state = 1;
hal_srng_dst_get_next(hal_soc, hal_ring_hdl);
continue; continue;
} }
@@ -397,11 +399,6 @@ more_data:
* the new MPDU * the new MPDU
*/ */
if (is_prev_msdu_last) { if (is_prev_msdu_last) {
/* Get number of entries available in HW ring */
num_entries_avail =
hal_srng_dst_num_valid(hal_soc,
hal_ring_hdl, 1);
/* For new MPDU check if we can read complete /* For new MPDU check if we can read complete
* MPDU by comparing the number of buffers * MPDU by comparing the number of buffers
* available and number of buffers needed to * available and number of buffers needed to
@@ -410,21 +407,26 @@ more_data:
if ((msdu_desc_info.msdu_len / if ((msdu_desc_info.msdu_len /
(RX_DATA_BUFFER_SIZE - (RX_DATA_BUFFER_SIZE -
soc->rx_pkt_tlv_size) + 1) > soc->rx_pkt_tlv_size) + 1) >
num_entries_avail) { num_pending) {
DP_STATS_INC(soc, DP_STATS_INC(soc,
rx.msdu_scatter_wait_break, rx.msdu_scatter_wait_break,
1); 1);
dp_rx_cookie_reset_invalid_bit( dp_rx_cookie_reset_invalid_bit(
ring_desc); ring_desc);
/* As we are going to break out of the
* loop because of unavailability of
* descs to form complete SG, we need to
* reset the TP in the REO destination
* ring.
*/
hal_srng_dst_dec_tp(hal_soc,
hal_ring_hdl);
break; break;
} }
is_prev_msdu_last = false; is_prev_msdu_last = false;
} }
} }
core_id = smp_processor_id();
DP_STATS_INC(soc, rx.ring_packets[core_id][reo_ring_num], 1);
if (mpdu_desc_info.mpdu_flags & HAL_MPDU_F_RETRY_BIT) if (mpdu_desc_info.mpdu_flags & HAL_MPDU_F_RETRY_BIT)
qdf_nbuf_set_rx_retry_flag(rx_desc->nbuf, 1); qdf_nbuf_set_rx_retry_flag(rx_desc->nbuf, 1);
@@ -436,9 +438,6 @@ more_data:
msdu_desc_info.msdu_flags & HAL_MSDU_F_LAST_MSDU_IN_MPDU) msdu_desc_info.msdu_flags & HAL_MSDU_F_LAST_MSDU_IN_MPDU)
is_prev_msdu_last = true; is_prev_msdu_last = true;
/* Pop out the descriptor*/
hal_srng_dst_get_next(hal_soc, hal_ring_hdl);
rx_bufs_reaped[rx_desc->pool_id]++; rx_bufs_reaped[rx_desc->pool_id]++;
peer_mdata = mpdu_desc_info.peer_meta_data; peer_mdata = mpdu_desc_info.peer_meta_data;
QDF_NBUF_CB_RX_PEER_ID(rx_desc->nbuf) = QDF_NBUF_CB_RX_PEER_ID(rx_desc->nbuf) =
@@ -509,8 +508,10 @@ more_data:
* across multiple buffers, let us not decrement quota * across multiple buffers, let us not decrement quota
* till we reap all buffers of that MSDU. * till we reap all buffers of that MSDU.
*/ */
if (qdf_likely(!qdf_nbuf_is_rx_chfrag_cont(rx_desc->nbuf))) if (qdf_likely(!qdf_nbuf_is_rx_chfrag_cont(rx_desc->nbuf))) {
quota -= 1; quota -= 1;
num_pending -= 1;
}
dp_rx_add_to_free_desc_list(&head[rx_desc->pool_id], dp_rx_add_to_free_desc_list(&head[rx_desc->pool_id],
&tail[rx_desc->pool_id], rx_desc); &tail[rx_desc->pool_id], rx_desc);
@@ -527,6 +528,10 @@ more_data:
done: done:
dp_rx_srng_access_end(int_ctx, soc, hal_ring_hdl); dp_rx_srng_access_end(int_ctx, soc, hal_ring_hdl);
DP_STATS_INCC(soc,
rx.ring_packets[qdf_get_smp_processor_id()][reo_ring_num],
num_rx_bufs_reaped, num_rx_bufs_reaped);
for (mac_id = 0; mac_id < MAX_PDEV_CNT; mac_id++) { for (mac_id = 0; mac_id < MAX_PDEV_CNT; mac_id++) {
/* /*
* continue with next mac_id if no pkts were reaped * continue with next mac_id if no pkts were reaped

View File

@@ -1489,6 +1489,25 @@ void *hal_srng_dst_get_next_cached(void *hal_soc,
return (void *)desc; return (void *)desc;
} }
/**
* hal_srng_dst_dec_tp - decrement the TP of the Dst ring by one entry
* @hal_soc: Opaque HAL SOC handle
* @hal_ring_hdl: Destination ring pointer
*
* reset the tail pointer in the destination ring by one entry
*
*/
static inline
void hal_srng_dst_dec_tp(void *hal_soc, hal_ring_handle_t hal_ring_hdl)
{
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
if (qdf_unlikely(!srng->u.dst_ring.tp))
srng->u.dst_ring.tp = (srng->ring_size - srng->entry_size);
else
srng->u.dst_ring.tp -= srng->entry_size;
}
static inline int hal_srng_lock(hal_ring_handle_t hal_ring_hdl) static inline int hal_srng_lock(hal_ring_handle_t hal_ring_hdl)
{ {
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
@@ -1642,10 +1661,9 @@ uint32_t hal_srng_dst_num_valid(void *hal_soc,
* hal_srng_dst_inv_cached_descs - API to invalidate descriptors in batch mode * hal_srng_dst_inv_cached_descs - API to invalidate descriptors in batch mode
* @hal_soc: Opaque HAL SOC handle * @hal_soc: Opaque HAL SOC handle
* @hal_ring_hdl: Destination ring pointer * @hal_ring_hdl: Destination ring pointer
* @entry_count: Number of descriptors to be invalidated * @entry_count: call invalidate API if valid entries available
* *
* Invalidates a set of cached descriptors starting from tail to * Invalidates a set of cached descriptors starting from TP to cached_HP
* provided count worth
* *
* Return - None * Return - None
*/ */
@@ -1654,9 +1672,8 @@ static inline void hal_srng_dst_inv_cached_descs(void *hal_soc,
uint32_t entry_count) uint32_t entry_count)
{ {
struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl; struct hal_srng *srng = (struct hal_srng *)hal_ring_hdl;
uint32_t hp = srng->u.dst_ring.cached_hp; uint32_t *first_desc;
uint32_t tp = srng->u.dst_ring.tp; uint32_t *last_desc;
uint32_t sync_p = 0;
/* /*
* If SRNG does not have cached descriptors this * If SRNG does not have cached descriptors this
@@ -1665,38 +1682,23 @@ static inline void hal_srng_dst_inv_cached_descs(void *hal_soc,
if (!(srng->flags & HAL_SRNG_CACHED_DESC)) if (!(srng->flags & HAL_SRNG_CACHED_DESC))
return; return;
if (qdf_unlikely(entry_count == 0)) if (!entry_count)
return; return;
sync_p = (entry_count - 1) * srng->entry_size; first_desc = &srng->ring_base_vaddr[srng->u.dst_ring.tp];
last_desc = &srng->ring_base_vaddr[srng->u.dst_ring.cached_hp];
if (hp > tp) { if (last_desc > (uint32_t *)first_desc)
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], /* invalidate from tp to cached_hp */
&srng->ring_base_vaddr[tp + sync_p] qdf_nbuf_dma_inv_range((void *)first_desc, (void *)(last_desc));
+ (srng->entry_size * sizeof(uint32_t))); else {
} else { /* invalidate from tp to end of the ring */
/* qdf_nbuf_dma_inv_range((void *)first_desc,
* We have wrapped around (void *)srng->ring_vaddr_end);
*/
uint32_t wrap_cnt = ((srng->ring_size - tp) / srng->entry_size);
if (entry_count <= wrap_cnt) { /* invalidate from start of ring to cached_hp */
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp], qdf_nbuf_dma_inv_range((void *)srng->ring_base_vaddr,
&srng->ring_base_vaddr[tp + sync_p] + (void *)last_desc);
(srng->entry_size * sizeof(uint32_t)));
return;
}
entry_count -= wrap_cnt;
sync_p = (entry_count - 1) * srng->entry_size;
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[tp],
&srng->ring_base_vaddr[srng->ring_size - srng->entry_size] +
(srng->entry_size * sizeof(uint32_t)));
qdf_nbuf_dma_inv_range(&srng->ring_base_vaddr[0],
&srng->ring_base_vaddr[sync_p]
+ (srng->entry_size * sizeof(uint32_t)));
} }
} }

View File

@@ -511,6 +511,9 @@ struct hal_srng {
/* Virtual base address of the ring */ /* Virtual base address of the ring */
uint32_t *ring_base_vaddr; uint32_t *ring_base_vaddr;
/* virtual address end */
uint32_t *ring_vaddr_end;
/* Number of entries in ring */ /* Number of entries in ring */
uint32_t num_entries; uint32_t num_entries;

View File

@@ -1509,6 +1509,7 @@ void *hal_srng_setup(void *hal_soc, int ring_type, int ring_num,
srng->num_entries = ring_params->num_entries; srng->num_entries = ring_params->num_entries;
srng->ring_size = srng->num_entries * srng->entry_size; srng->ring_size = srng->num_entries * srng->entry_size;
srng->ring_size_mask = srng->ring_size - 1; srng->ring_size_mask = srng->ring_size - 1;
srng->ring_vaddr_end = srng->ring_base_vaddr + srng->ring_size;
srng->msi_addr = ring_params->msi_addr; srng->msi_addr = ring_params->msi_addr;
srng->msi_data = ring_params->msi_data; srng->msi_data = ring_params->msi_data;
srng->intr_timer_thres_us = ring_params->intr_timer_thres_us; srng->intr_timer_thres_us = ring_params->intr_timer_thres_us;