qcacmn: Optimize tx completion processing in fast path

Optimize tx completion processing in fastpath by adding the list of
tx descs to the free list directly instead of individually accessing
each descriptor and adding it to free list. This gives advantages of
not taking descriptor pool lock for each descriptor and rather takes
the lock only once when appending the whole list of descs to the
free list.

Also removed unused member shinfo_addr from tx_desc structure. Removal
of this member enables increase of flags type from u16 to u32 without
increasing the size of tx_desc structure. These changes improved peak
KPI by 2% in SFE mode.

Change-Id: I995de9cc08c866ad6e1cd1efc76ac2a35d40fb7c
CRs-Fixed: 3584578
This commit is contained in:
Nandha Kishore Easwaran
2023-08-08 09:53:23 +05:30
committed by Rahul Choudhary
parent fb7d334b7c
commit 15812b8137
6 changed files with 221 additions and 22 deletions

View File

@@ -1197,7 +1197,6 @@ struct dp_tx_desc_s *dp_tx_prepare_desc_single(struct dp_vdev *vdev,
tx_desc->msdu_ext_desc = NULL;
tx_desc->pkt_offset = 0;
tx_desc->length = qdf_nbuf_headlen(nbuf);
tx_desc->shinfo_addr = skb_end_pointer(nbuf);
dp_tx_trace_pkt(soc, nbuf, tx_desc->id, vdev->vdev_id,
vdev->qdf_opmode);
@@ -5647,6 +5646,19 @@ dp_tx_update_ppeds_tx_comp_stats(struct dp_soc *soc,
}
#endif
void
dp_tx_comp_process_desc_list_fast(struct dp_soc *soc,
struct dp_tx_desc_s *head_desc,
struct dp_tx_desc_s *tail_desc,
uint8_t ring_id,
uint32_t fast_desc_count)
{
struct dp_tx_desc_pool_s *pool = &soc->tx_desc[head_desc->pool_id];
dp_tx_outstanding_sub(head_desc->pdev, fast_desc_count);
dp_tx_desc_free_list(pool, head_desc, tail_desc, fast_desc_count);
}
void
dp_tx_comp_process_desc_list(struct dp_soc *soc,
struct dp_tx_desc_s *comp_head, uint8_t ring_id)
@@ -5828,13 +5840,17 @@ uint32_t dp_tx_comp_handler(struct dp_intr *int_ctx, struct dp_soc *soc,
struct dp_tx_desc_s *tx_desc = NULL;
struct dp_tx_desc_s *head_desc = NULL;
struct dp_tx_desc_s *tail_desc = NULL;
struct dp_tx_desc_s *fast_head_desc = NULL;
struct dp_tx_desc_s *fast_tail_desc = NULL;
uint32_t num_processed = 0;
uint32_t fast_desc_count = 0;
uint32_t count;
uint32_t num_avail_for_reap = 0;
bool force_break = false;
struct dp_srng *tx_comp_ring = &soc->tx_comp_ring[ring_id];
int max_reap_limit, ring_near_full;
uint32_t num_entries;
qdf_nbuf_queue_head_t h;
DP_HIST_INIT();
@@ -5869,6 +5885,8 @@ more_data:
hal_ring_hdl,
num_avail_for_reap);
dp_tx_nbuf_queue_head_init(&h);
/* Find head descriptor from completion ring */
while (qdf_likely(num_avail_for_reap--)) {
@@ -5935,7 +5953,8 @@ more_data:
}
tx_desc->buffer_src = buffer_src;
if (tx_desc->flags & DP_TX_DESC_FLAG_PPEDS)
if (tx_desc->flags & DP_TX_DESC_FLAG_FASTPATH_SIMPLE ||
tx_desc->flags & DP_TX_DESC_FLAG_PPEDS)
goto add_to_pool2;
/*
@@ -6006,14 +6025,28 @@ add_to_pool:
add_to_pool2:
/* First ring descriptor on the cycle */
if (!head_desc) {
head_desc = tx_desc;
if (tx_desc->flags & DP_TX_DESC_FLAG_FASTPATH_SIMPLE ||
tx_desc->flags & DP_TX_DESC_FLAG_PPEDS) {
dp_tx_nbuf_dev_queue_free(&h, tx_desc);
fast_desc_count++;
if (!fast_head_desc) {
fast_head_desc = tx_desc;
fast_tail_desc = tx_desc;
}
fast_tail_desc->next = tx_desc;
fast_tail_desc = tx_desc;
dp_tx_desc_clear(tx_desc);
} else {
if (!head_desc) {
head_desc = tx_desc;
tail_desc = tx_desc;
}
tail_desc->next = tx_desc;
tx_desc->next = NULL;
tail_desc = tx_desc;
}
tail_desc->next = tx_desc;
tx_desc->next = NULL;
tail_desc = tx_desc;
}
next_desc:
num_processed += !(count & DP_TX_NAPI_BUDGET_DIV_MASK);
@@ -6037,6 +6070,14 @@ next_desc:
dp_srng_access_end(int_ctx, soc, hal_ring_hdl);
/* Process the reaped descriptors that were sent via fast path */
if (fast_head_desc) {
dp_tx_comp_process_desc_list_fast(soc, fast_head_desc,
fast_tail_desc, ring_id,
fast_desc_count);
dp_tx_nbuf_dev_kfree_list(&h);
}
/* Process the reaped descriptors */
if (head_desc)
dp_tx_comp_process_desc_list(soc, head_desc, ring_id);