msm: add: changed tx completion to be done in napi context

In an effort to improve the data path changed the TX
data path to use NAPI for tx completions.
Added a specific polling function for NAPI, and a napi_struct
to each sys pipe.

Change-Id: I168a03a112109a2a9a7a747bf08147107a8d5fc7
Acked-by: Tal Gelbard <tgelbard@qti.qualcomm.com>
Signed-off-by: Amir Levy <alevy@codeaurora.org>
This commit is contained in:
Amir Levy
2020-04-22 15:32:19 +03:00
committed by Gerrit - the friendly Code Review server
parent e42bd3284e
commit c45018dd9f
3 changed files with 106 additions and 10 deletions

View File

@@ -6568,8 +6568,8 @@ static int ipa3_lan_poll(struct napi_struct *napi, int budget)
static inline void ipa3_enable_napi_netdev(void) static inline void ipa3_enable_napi_netdev(void)
{ {
if (ipa3_ctx->lan_rx_napi_enable) { if (ipa3_ctx->lan_rx_napi_enable) {
init_dummy_netdev(&ipa3_ctx->lan_ndev); init_dummy_netdev(&ipa3_ctx->generic_ndev);
netif_napi_add(&ipa3_ctx->lan_ndev, &ipa3_ctx->napi_lan_rx, netif_napi_add(&ipa3_ctx->generic_ndev, &ipa3_ctx->napi_lan_rx,
ipa3_lan_poll, NAPI_WEIGHT); ipa3_lan_poll, NAPI_WEIGHT);
} }
} }
@@ -6701,6 +6701,7 @@ static int ipa3_pre_init(const struct ipa3_plat_drv_res *resource_p,
ipa3_ctx->do_ram_collection_on_crash = ipa3_ctx->do_ram_collection_on_crash =
resource_p->do_ram_collection_on_crash; resource_p->do_ram_collection_on_crash;
ipa3_ctx->lan_rx_napi_enable = resource_p->lan_rx_napi_enable; ipa3_ctx->lan_rx_napi_enable = resource_p->lan_rx_napi_enable;
ipa3_ctx->tx_napi_enable = resource_p->tx_napi_enable;
ipa3_ctx->rmnet_ctl_enable = resource_p->rmnet_ctl_enable; ipa3_ctx->rmnet_ctl_enable = resource_p->rmnet_ctl_enable;
ipa3_ctx->tx_wrapper_cache_max_size = get_tx_wrapper_cache_size( ipa3_ctx->tx_wrapper_cache_max_size = get_tx_wrapper_cache_size(
resource_p->tx_wrapper_cache_max_size); resource_p->tx_wrapper_cache_max_size);
@@ -7697,6 +7698,13 @@ static int get_ipa_dts_configuration(struct platform_device *pdev,
ipa_drv_res->lan_rx_napi_enable ipa_drv_res->lan_rx_napi_enable
? "True" : "False"); ? "True" : "False");
ipa_drv_res->tx_napi_enable =
of_property_read_bool(pdev->dev.of_node,
"qcom,tx-napi");
IPADBG(": Enable tx NAPI = %s\n",
ipa_drv_res->tx_napi_enable
? "True" : "False");
ipa_drv_res->rmnet_ctl_enable = ipa_drv_res->rmnet_ctl_enable =
of_property_read_bool(pdev->dev.of_node, of_property_read_bool(pdev->dev.of_node,
"qcom,rmnet-ctl-enable"); "qcom,rmnet-ctl-enable");

View File

@@ -136,7 +136,14 @@ static void ipa3_tasklet_rx_notify(unsigned long data);
static u32 ipa_adjust_ra_buff_base_sz(u32 aggr_byte_limit); static u32 ipa_adjust_ra_buff_base_sz(u32 aggr_byte_limit);
static void ipa3_wq_write_done_common(struct ipa3_sys_context *sys, /**
* ipa3_wq_write_done_common() - this function is responsible on freeing
* all tx_pkt_wrappers related to a skb
* @tx_pkt: the first tx_pkt_warpper related to a certain skb
* @sys:points to the ipa3_sys_context the EOT was received on
* returns the number of tx_pkt_wrappers that were freed
*/
static int ipa3_wq_write_done_common(struct ipa3_sys_context *sys,
struct ipa3_tx_pkt_wrapper *tx_pkt) struct ipa3_tx_pkt_wrapper *tx_pkt)
{ {
struct ipa3_tx_pkt_wrapper *next_pkt; struct ipa3_tx_pkt_wrapper *next_pkt;
@@ -147,16 +154,16 @@ static void ipa3_wq_write_done_common(struct ipa3_sys_context *sys,
if (unlikely(tx_pkt == NULL)) { if (unlikely(tx_pkt == NULL)) {
IPAERR("tx_pkt is NULL\n"); IPAERR("tx_pkt is NULL\n");
return; return 0;
} }
cnt = tx_pkt->cnt; cnt = tx_pkt->cnt;
IPADBG_LOW("cnt: %d\n", cnt);
for (i = 0; i < cnt; i++) { for (i = 0; i < cnt; i++) {
spin_lock_bh(&sys->spinlock); spin_lock_bh(&sys->spinlock);
if (unlikely(list_empty(&sys->head_desc_list))) { if (unlikely(list_empty(&sys->head_desc_list))) {
spin_unlock_bh(&sys->spinlock); spin_unlock_bh(&sys->spinlock);
return; IPAERR_RL("list is empty missing descriptors");
return i;
} }
next_pkt = list_next_entry(tx_pkt, link); next_pkt = list_next_entry(tx_pkt, link);
list_del(&tx_pkt->link); list_del(&tx_pkt->link);
@@ -192,6 +199,7 @@ static void ipa3_wq_write_done_common(struct ipa3_sys_context *sys,
(*callback)(user1, user2); (*callback)(user1, user2);
tx_pkt = next_pkt; tx_pkt = next_pkt;
} }
return i;
} }
static void ipa3_wq_write_done_status(int src_pipe, static void ipa3_wq_write_done_status(int src_pipe,
@@ -246,6 +254,54 @@ static void ipa3_tasklet_write_done(unsigned long data)
spin_unlock_bh(&sys->spinlock); spin_unlock_bh(&sys->spinlock);
} }
static int ipa3_poll_tx_complete(struct ipa3_sys_context *sys, int budget)
{
struct ipa3_tx_pkt_wrapper *this_pkt = NULL;
bool xmit_done = false;
int entry_budget = budget;
spin_lock_bh(&sys->spinlock);
while (budget > 0 && atomic_read(&sys->xmit_eot_cnt) > 0) {
if (unlikely(list_empty(&sys->head_desc_list))) {
IPADBG_LOW("list is empty");
break;
}
this_pkt = list_first_entry(&sys->head_desc_list,
struct ipa3_tx_pkt_wrapper, link);
xmit_done = this_pkt->xmit_done;
spin_unlock_bh(&sys->spinlock);
budget -= ipa3_wq_write_done_common(sys, this_pkt);
spin_lock_bh(&sys->spinlock);
if (xmit_done)
atomic_add_unless(&sys->xmit_eot_cnt, -1, 0);
}
spin_unlock_bh(&sys->spinlock);
return entry_budget - budget;
}
static int ipa3_aux_poll_tx_complete(struct napi_struct *napi_tx, int budget)
{
struct ipa3_sys_context *sys = container_of(napi_tx,
struct ipa3_sys_context, napi_tx);
int tx_done = 0;
poll_tx:
tx_done += ipa3_poll_tx_complete(sys, budget - tx_done);
if (tx_done < budget) {
napi_complete(napi_tx);
atomic_set(&sys->in_napi_context, 0);
/*if we got an EOT while we marked NAPI as complete*/
if (atomic_read(&sys->xmit_eot_cnt) > 0 &&
!atomic_cmpxchg(&sys->in_napi_context, 0, 1)
&& napi_reschedule(napi_tx)) {
goto poll_tx;
}
}
IPADBG_LOW("the number of tx completions is: %d", tx_done);
return min(tx_done, budget);
}
static void ipa3_send_nop_desc(struct work_struct *work) static void ipa3_send_nop_desc(struct work_struct *work)
{ {
struct ipa3_sys_context *sys = container_of(work, struct ipa3_sys_context *sys = container_of(work,
@@ -283,6 +339,7 @@ static void ipa3_send_nop_desc(struct work_struct *work)
return; return;
} }
list_add_tail(&tx_pkt->link, &sys->head_desc_list); list_add_tail(&tx_pkt->link, &sys->head_desc_list);
sys->len++;
sys->nop_pending = false; sys->nop_pending = false;
memset(&nop_xfer, 0, sizeof(nop_xfer)); memset(&nop_xfer, 0, sizeof(nop_xfer));
@@ -452,7 +509,7 @@ int ipa3_send(struct ipa3_sys_context *sys,
tx_pkt->xmit_done = false; tx_pkt->xmit_done = false;
list_add_tail(&tx_pkt->link, &sys->head_desc_list); list_add_tail(&tx_pkt->link, &sys->head_desc_list);
sys->len++;
gsi_xfer[i].addr = tx_pkt->mem.phys_base; gsi_xfer[i].addr = tx_pkt->mem.phys_base;
/* /*
@@ -1120,6 +1177,22 @@ int ipa3_setup_sys_pipe(struct ipa_sys_connect_params *sys_in, u32 *clnt_hdl)
if (sys_in->client == IPA_CLIENT_APPS_WAN_LOW_LAT_CONS) if (sys_in->client == IPA_CLIENT_APPS_WAN_LOW_LAT_CONS)
tasklet_init(&ep->sys->tasklet, ipa3_tasklet_rx_notify, tasklet_init(&ep->sys->tasklet, ipa3_tasklet_rx_notify,
(unsigned long) ep->sys); (unsigned long) ep->sys);
if (ipa3_ctx->tx_napi_enable) {
if (sys_in->client != IPA_CLIENT_APPS_WAN_PROD) {
netif_tx_napi_add(&ipa3_ctx->generic_ndev,
&ep->sys->napi_tx, ipa3_aux_poll_tx_complete,
NAPI_TX_WEIGHT);
} else {
netif_tx_napi_add((struct net_device *)sys_in->priv,
&ep->sys->napi_tx, ipa3_aux_poll_tx_complete,
NAPI_TX_WEIGHT);
}
napi_enable(&ep->sys->napi_tx);
IPADBG("napi_enable on producer client %d completed",
sys_in->client);
}
ep->skip_ep_cfg = sys_in->skip_ep_cfg; ep->skip_ep_cfg = sys_in->skip_ep_cfg;
if (ipa3_assign_policy(sys_in, ep->sys)) { if (ipa3_assign_policy(sys_in, ep->sys)) {
IPAERR("failed to sys ctx for client %d\n", sys_in->client); IPAERR("failed to sys ctx for client %d\n", sys_in->client);
@@ -4454,6 +4527,12 @@ static void ipa_gsi_irq_tx_notify_cb(struct gsi_chan_xfer_notify *notify)
tx_pkt = notify->xfer_user_data; tx_pkt = notify->xfer_user_data;
tx_pkt->xmit_done = true; tx_pkt->xmit_done = true;
atomic_inc(&tx_pkt->sys->xmit_eot_cnt); atomic_inc(&tx_pkt->sys->xmit_eot_cnt);
if (ipa3_ctx->tx_napi_enable) {
if(!atomic_cmpxchg(&tx_pkt->sys->in_napi_context, 0, 1))
napi_schedule(&tx_pkt->sys->napi_tx);
}
else
tasklet_schedule(&tx_pkt->sys->tasklet); tasklet_schedule(&tx_pkt->sys->tasklet);
break; break;
default: default:

View File

@@ -77,6 +77,8 @@
#define NAPI_WEIGHT 60 #define NAPI_WEIGHT 60
#define NAPI_TX_WEIGHT 64
#define IPADBG(fmt, args...) \ #define IPADBG(fmt, args...) \
do { \ do { \
pr_debug(DRV_NAME " %s:%d " fmt, __func__, __LINE__, ## args);\ pr_debug(DRV_NAME " %s:%d " fmt, __func__, __LINE__, ## args);\
@@ -1028,6 +1030,9 @@ struct ipa3_repl_ctx {
* @ep: IPA EP context * @ep: IPA EP context
* @xmit_eot_cnt: count of pending eot for tasklet to process * @xmit_eot_cnt: count of pending eot for tasklet to process
* @tasklet: tasklet for eot write_done handle (tx_complete) * @tasklet: tasklet for eot write_done handle (tx_complete)
* @napi_tx: napi for eot write done handle (tx_complete) - to replace tasklet
* @in_napi_context: an atomic variable used for non-blocking locking,
* preventing from multiple napi_sched to be called.
* *
* IPA context specific to the GPI pipes a.k.a LAN IN/OUT and WAN * IPA context specific to the GPI pipes a.k.a LAN IN/OUT and WAN
*/ */
@@ -1063,6 +1068,8 @@ struct ipa3_sys_context {
struct tasklet_struct tasklet; struct tasklet_struct tasklet;
bool skip_eot; bool skip_eot;
u32 eob_drop_cnt; u32 eob_drop_cnt;
struct napi_struct napi_tx;
atomic_t in_napi_context;
/* ordering is important - mutable fields go above */ /* ordering is important - mutable fields go above */
struct ipa3_ep_context *ep; struct ipa3_ep_context *ep;
@@ -1894,7 +1901,7 @@ struct ipa3_app_clock_vote {
* @app_vote: holds userspace application clock vote count * @app_vote: holds userspace application clock vote count
* IPA context - holds all relevant info about IPA driver and its state * IPA context - holds all relevant info about IPA driver and its state
* @lan_rx_napi_enable: flag if NAPI is enabled on the LAN dp * @lan_rx_napi_enable: flag if NAPI is enabled on the LAN dp
* @lan_ndev: dummy netdev for LAN rx NAPI * @generic_ndev: dummy netdev for LAN rx NAPI and tx NAPI
* @napi_lan_rx: NAPI object for LAN rx * @napi_lan_rx: NAPI object for LAN rx
* @ipa_wan_skb_page - page recycling enabled on wwan data path * @ipa_wan_skb_page - page recycling enabled on wwan data path
* @icc_num_cases - number of icc scaling level supported * @icc_num_cases - number of icc scaling level supported
@@ -2077,7 +2084,8 @@ struct ipa3_context {
struct ipacm_fnr_info fnr_info; struct ipacm_fnr_info fnr_info;
/* dummy netdev for lan RX NAPI */ /* dummy netdev for lan RX NAPI */
bool lan_rx_napi_enable; bool lan_rx_napi_enable;
struct net_device lan_ndev; bool tx_napi_enable;
struct net_device generic_ndev;
struct napi_struct napi_lan_rx; struct napi_struct napi_lan_rx;
u32 icc_num_cases; u32 icc_num_cases;
u32 icc_num_paths; u32 icc_num_paths;
@@ -2122,6 +2130,7 @@ struct ipa3_plat_drv_res {
bool gsi_ch20_wa; bool gsi_ch20_wa;
bool tethered_flow_control; bool tethered_flow_control;
bool lan_rx_napi_enable; bool lan_rx_napi_enable;
bool tx_napi_enable;
u32 mhi_evid_limits[2]; /* start and end values */ u32 mhi_evid_limits[2]; /* start and end values */
bool ipa_mhi_dynamic_config; bool ipa_mhi_dynamic_config;
u32 ipa_tz_unlock_reg_num; u32 ipa_tz_unlock_reg_num;