ath10k: implement NAPI support
Add NAPI support for rx and tx completion. NAPI poll is scheduled from interrupt handler. The design is as below - on interrupt - schedule napi and mask interrupts - on poll - process all pipes (no actual Tx/Rx) - process Rx within budget - if quota exceeds budget reschedule napi poll by returning budget - process Tx completions and update budget if necessary - process Tx fetch indications (pull-push) - push any other pending Tx (if possible) - before resched or napi completion replenish htt rx ring buffer - if work done < budget, complete napi poll and unmask interrupts This change also get rid of two tasklets (intr_tq and txrx_compl_task). Measured peak throughput with NAPI on IPQ4019 platform in controlled environment. No noticeable reduction in throughput is seen and also observed improvements in CPU usage. Approx. 15% CPU usage got reduced in UDP uplink case. DL: AP DUT Tx UL: AP DUT Rx IPQ4019 (avg. cpu usage %) ======== TOT +NAPI =========== ============= TCP DL 644 Mbps (42%) 645 Mbps (36%) TCP UL 673 Mbps (30%) 675 Mbps (26%) UDP DL 682 Mbps (49%) 680 Mbps (49%) UDP UL 720 Mbps (28%) 717 Mbps (11%) Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
This commit is contained in:

committed by
Kalle Valo

parent
c39265f72a
commit
3c97f5de1f
@@ -1506,12 +1506,10 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
|
||||
ath10k_ce_per_engine_service(ar, pipe);
|
||||
}
|
||||
|
||||
void ath10k_pci_kill_tasklet(struct ath10k *ar)
|
||||
static void ath10k_pci_rx_retry_sync(struct ath10k *ar)
|
||||
{
|
||||
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
|
||||
|
||||
tasklet_kill(&ar_pci->intr_tq);
|
||||
|
||||
del_timer_sync(&ar_pci->rx_post_retry);
|
||||
}
|
||||
|
||||
@@ -1570,7 +1568,7 @@ void ath10k_pci_hif_get_default_pipe(struct ath10k *ar,
|
||||
ul_pipe, dl_pipe);
|
||||
}
|
||||
|
||||
static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
|
||||
void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
@@ -1753,7 +1751,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar)
|
||||
|
||||
void ath10k_pci_flush(struct ath10k *ar)
|
||||
{
|
||||
ath10k_pci_kill_tasklet(ar);
|
||||
ath10k_pci_rx_retry_sync(ar);
|
||||
ath10k_pci_buffer_cleanup(ar);
|
||||
}
|
||||
|
||||
@@ -1780,6 +1778,8 @@ static void ath10k_pci_hif_stop(struct ath10k *ar)
|
||||
ath10k_pci_irq_disable(ar);
|
||||
ath10k_pci_irq_sync(ar);
|
||||
ath10k_pci_flush(ar);
|
||||
napi_synchronize(&ar->napi);
|
||||
napi_disable(&ar->napi);
|
||||
|
||||
spin_lock_irqsave(&ar_pci->ps_lock, flags);
|
||||
WARN_ON(ar_pci->ps_wake_refcount > 0);
|
||||
@@ -2533,6 +2533,7 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar)
|
||||
ath10k_err(ar, "could not wake up target CPU: %d\n", ret);
|
||||
goto err_ce;
|
||||
}
|
||||
napi_enable(&ar->napi);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -2772,35 +2773,53 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg)
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) {
|
||||
if (!ath10k_pci_irq_pending(ar))
|
||||
return IRQ_NONE;
|
||||
if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) &&
|
||||
!ath10k_pci_irq_pending(ar))
|
||||
return IRQ_NONE;
|
||||
|
||||
ath10k_pci_disable_and_clear_legacy_irq(ar);
|
||||
}
|
||||
|
||||
tasklet_schedule(&ar_pci->intr_tq);
|
||||
ath10k_pci_disable_and_clear_legacy_irq(ar);
|
||||
ath10k_pci_irq_msi_fw_mask(ar);
|
||||
napi_schedule(&ar->napi);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void ath10k_pci_tasklet(unsigned long data)
|
||||
static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget)
|
||||
{
|
||||
struct ath10k *ar = (struct ath10k *)data;
|
||||
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
|
||||
struct ath10k *ar = container_of(ctx, struct ath10k, napi);
|
||||
int done = 0;
|
||||
|
||||
if (ath10k_pci_has_fw_crashed(ar)) {
|
||||
ath10k_pci_irq_disable(ar);
|
||||
ath10k_pci_fw_crashed_clear(ar);
|
||||
ath10k_pci_fw_crashed_dump(ar);
|
||||
return;
|
||||
napi_complete(ctx);
|
||||
return done;
|
||||
}
|
||||
|
||||
ath10k_ce_per_engine_service_any(ar);
|
||||
|
||||
/* Re-enable legacy irq that was disabled in the irq handler */
|
||||
if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY)
|
||||
done = ath10k_htt_txrx_compl_task(ar, budget);
|
||||
|
||||
if (done < budget) {
|
||||
napi_complete(ctx);
|
||||
/* In case of MSI, it is possible that interrupts are received
|
||||
* while NAPI poll is inprogress. So pending interrupts that are
|
||||
* received after processing all copy engine pipes by NAPI poll
|
||||
* will not be handled again. This is causing failure to
|
||||
* complete boot sequence in x86 platform. So before enabling
|
||||
* interrupts safer to check for pending interrupts for
|
||||
* immediate servicing.
|
||||
*/
|
||||
if (CE_INTERRUPT_SUMMARY(ar)) {
|
||||
napi_reschedule(ctx);
|
||||
goto out;
|
||||
}
|
||||
ath10k_pci_enable_legacy_irq(ar);
|
||||
ath10k_pci_irq_msi_fw_unmask(ar);
|
||||
}
|
||||
|
||||
out:
|
||||
return done;
|
||||
}
|
||||
|
||||
static int ath10k_pci_request_irq_msi(struct ath10k *ar)
|
||||
@@ -2858,11 +2877,10 @@ static void ath10k_pci_free_irq(struct ath10k *ar)
|
||||
free_irq(ar_pci->pdev->irq, ar);
|
||||
}
|
||||
|
||||
void ath10k_pci_init_irq_tasklets(struct ath10k *ar)
|
||||
void ath10k_pci_init_napi(struct ath10k *ar)
|
||||
{
|
||||
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
|
||||
|
||||
tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar);
|
||||
netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll,
|
||||
ATH10K_NAPI_BUDGET);
|
||||
}
|
||||
|
||||
static int ath10k_pci_init_irq(struct ath10k *ar)
|
||||
@@ -2870,7 +2888,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar)
|
||||
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
|
||||
int ret;
|
||||
|
||||
ath10k_pci_init_irq_tasklets(ar);
|
||||
ath10k_pci_init_napi(ar);
|
||||
|
||||
if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO)
|
||||
ath10k_info(ar, "limiting irq mode to: %d\n",
|
||||
@@ -3131,7 +3149,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar)
|
||||
|
||||
void ath10k_pci_release_resource(struct ath10k *ar)
|
||||
{
|
||||
ath10k_pci_kill_tasklet(ar);
|
||||
ath10k_pci_rx_retry_sync(ar);
|
||||
netif_napi_del(&ar->napi);
|
||||
ath10k_pci_ce_deinit(ar);
|
||||
ath10k_pci_free_pipes(ar);
|
||||
}
|
||||
@@ -3298,7 +3317,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
|
||||
|
||||
err_free_irq:
|
||||
ath10k_pci_free_irq(ar);
|
||||
ath10k_pci_kill_tasklet(ar);
|
||||
ath10k_pci_rx_retry_sync(ar);
|
||||
|
||||
err_deinit_irq:
|
||||
ath10k_pci_deinit_irq(ar);
|
||||
|
Reference in New Issue
Block a user