Browse Source

msm: add: changed tx completion to be done in napi context

In an effort to improve the data path changed the TX
data path to use NAPI for tx completions.
Added a specific polling function for NAPI, and a napi_struct
to each sys pipe.

Change-Id: I168a03a112109a2a9a7a747bf08147107a8d5fc7
Acked-by: Tal Gelbard <[email protected]>
Signed-off-by: Amir Levy <[email protected]>
Amir Levy 5 years ago
parent
commit
c45018dd9f

+ 10 - 2
drivers/platform/msm/ipa/ipa_v3/ipa.c

@@ -6568,8 +6568,8 @@ static int ipa3_lan_poll(struct napi_struct *napi, int budget)
 static inline void ipa3_enable_napi_netdev(void)
 {
 	if (ipa3_ctx->lan_rx_napi_enable) {
-		init_dummy_netdev(&ipa3_ctx->lan_ndev);
-		netif_napi_add(&ipa3_ctx->lan_ndev, &ipa3_ctx->napi_lan_rx,
+		init_dummy_netdev(&ipa3_ctx->generic_ndev);
+		netif_napi_add(&ipa3_ctx->generic_ndev, &ipa3_ctx->napi_lan_rx,
 				ipa3_lan_poll, NAPI_WEIGHT);
 	}
 }
@@ -6701,6 +6701,7 @@ static int ipa3_pre_init(const struct ipa3_plat_drv_res *resource_p,
 	ipa3_ctx->do_ram_collection_on_crash =
 		resource_p->do_ram_collection_on_crash;
 	ipa3_ctx->lan_rx_napi_enable = resource_p->lan_rx_napi_enable;
+	ipa3_ctx->tx_napi_enable = resource_p->tx_napi_enable;
 	ipa3_ctx->rmnet_ctl_enable = resource_p->rmnet_ctl_enable;
 	ipa3_ctx->tx_wrapper_cache_max_size = get_tx_wrapper_cache_size(
 			resource_p->tx_wrapper_cache_max_size);
@@ -7697,6 +7698,13 @@ static int get_ipa_dts_configuration(struct platform_device *pdev,
 		ipa_drv_res->lan_rx_napi_enable
 		? "True" : "False");
 
+	ipa_drv_res->tx_napi_enable =
+		of_property_read_bool(pdev->dev.of_node,
+			"qcom,tx-napi");
+	IPADBG(": Enable tx NAPI = %s\n",
+		ipa_drv_res->tx_napi_enable
+		? "True" : "False");
+
 	ipa_drv_res->rmnet_ctl_enable =
 		of_property_read_bool(pdev->dev.of_node,
 		"qcom,rmnet-ctl-enable");

+ 85 - 6
drivers/platform/msm/ipa/ipa_v3/ipa_dp.c

@@ -136,7 +136,14 @@ static void ipa3_tasklet_rx_notify(unsigned long data);
 
 static u32 ipa_adjust_ra_buff_base_sz(u32 aggr_byte_limit);
 
-static void ipa3_wq_write_done_common(struct ipa3_sys_context *sys,
+/**
+ * ipa3_wq_write_done_common() - this function is responsible on freeing
+ * all tx_pkt_wrappers related to a skb
+ * @tx_pkt: the first tx_pkt_warpper related to a certain skb
+ * @sys:points to the ipa3_sys_context the EOT was received on
+ * returns the number of tx_pkt_wrappers that were freed
+ */
+static int ipa3_wq_write_done_common(struct ipa3_sys_context *sys,
 				struct ipa3_tx_pkt_wrapper *tx_pkt)
 {
 	struct ipa3_tx_pkt_wrapper *next_pkt;
@@ -147,16 +154,16 @@ static void ipa3_wq_write_done_common(struct ipa3_sys_context *sys,
 
 	if (unlikely(tx_pkt == NULL)) {
 		IPAERR("tx_pkt is NULL\n");
-		return;
+		return 0;
 	}
 
 	cnt = tx_pkt->cnt;
-	IPADBG_LOW("cnt: %d\n", cnt);
 	for (i = 0; i < cnt; i++) {
 		spin_lock_bh(&sys->spinlock);
 		if (unlikely(list_empty(&sys->head_desc_list))) {
 			spin_unlock_bh(&sys->spinlock);
-			return;
+			IPAERR_RL("list is empty missing descriptors");
+			return i;
 		}
 		next_pkt = list_next_entry(tx_pkt, link);
 		list_del(&tx_pkt->link);
@@ -192,6 +199,7 @@ static void ipa3_wq_write_done_common(struct ipa3_sys_context *sys,
 			(*callback)(user1, user2);
 		tx_pkt = next_pkt;
 	}
+	return i;
 }
 
 static void ipa3_wq_write_done_status(int src_pipe,
@@ -246,6 +254,54 @@ static void ipa3_tasklet_write_done(unsigned long data)
 	spin_unlock_bh(&sys->spinlock);
 }
 
+static int ipa3_poll_tx_complete(struct ipa3_sys_context *sys, int budget)
+{
+	struct ipa3_tx_pkt_wrapper *this_pkt = NULL;
+	bool xmit_done = false;
+	int entry_budget = budget;
+
+	spin_lock_bh(&sys->spinlock);
+	while (budget > 0 && atomic_read(&sys->xmit_eot_cnt) > 0) {
+		if (unlikely(list_empty(&sys->head_desc_list))) {
+			IPADBG_LOW("list is empty");
+			break;
+		}
+		this_pkt = list_first_entry(&sys->head_desc_list,
+			struct ipa3_tx_pkt_wrapper, link);
+		xmit_done = this_pkt->xmit_done;
+		spin_unlock_bh(&sys->spinlock);
+		budget -= ipa3_wq_write_done_common(sys, this_pkt);
+		spin_lock_bh(&sys->spinlock);
+		if (xmit_done)
+			atomic_add_unless(&sys->xmit_eot_cnt, -1, 0);
+	}
+	spin_unlock_bh(&sys->spinlock);
+	return entry_budget - budget;
+}
+
+static int ipa3_aux_poll_tx_complete(struct napi_struct *napi_tx, int budget)
+{
+	struct ipa3_sys_context *sys = container_of(napi_tx,
+		struct ipa3_sys_context, napi_tx);
+	int tx_done = 0;
+
+poll_tx:
+	tx_done += ipa3_poll_tx_complete(sys, budget - tx_done);
+	if (tx_done < budget) {
+		napi_complete(napi_tx);
+		atomic_set(&sys->in_napi_context, 0);
+
+		/*if we got an EOT while we marked NAPI as complete*/
+		if (atomic_read(&sys->xmit_eot_cnt) > 0 &&
+		    !atomic_cmpxchg(&sys->in_napi_context, 0, 1)
+		    && napi_reschedule(napi_tx)) {
+		    goto poll_tx;
+		}
+	}
+	IPADBG_LOW("the number of tx completions is: %d", tx_done);
+	return min(tx_done, budget);
+}
+
 static void ipa3_send_nop_desc(struct work_struct *work)
 {
 	struct ipa3_sys_context *sys = container_of(work,
@@ -283,6 +339,7 @@ static void ipa3_send_nop_desc(struct work_struct *work)
 		return;
 	}
 	list_add_tail(&tx_pkt->link, &sys->head_desc_list);
+	sys->len++;
 	sys->nop_pending = false;
 
 	memset(&nop_xfer, 0, sizeof(nop_xfer));
@@ -452,7 +509,7 @@ int ipa3_send(struct ipa3_sys_context *sys,
 		tx_pkt->xmit_done = false;
 
 		list_add_tail(&tx_pkt->link, &sys->head_desc_list);
-
+		sys->len++;
 		gsi_xfer[i].addr = tx_pkt->mem.phys_base;
 
 		/*
@@ -1120,6 +1177,22 @@ int ipa3_setup_sys_pipe(struct ipa_sys_connect_params *sys_in, u32 *clnt_hdl)
 	if (sys_in->client == IPA_CLIENT_APPS_WAN_LOW_LAT_CONS)
 		tasklet_init(&ep->sys->tasklet, ipa3_tasklet_rx_notify,
 				(unsigned long) ep->sys);
+
+	if (ipa3_ctx->tx_napi_enable) {
+		if (sys_in->client != IPA_CLIENT_APPS_WAN_PROD) {
+			netif_tx_napi_add(&ipa3_ctx->generic_ndev,
+			&ep->sys->napi_tx, ipa3_aux_poll_tx_complete,
+			NAPI_TX_WEIGHT);
+		} else {
+			netif_tx_napi_add((struct net_device *)sys_in->priv,
+			&ep->sys->napi_tx, ipa3_aux_poll_tx_complete,
+			NAPI_TX_WEIGHT);
+		}
+		napi_enable(&ep->sys->napi_tx);
+		IPADBG("napi_enable on producer client %d completed",
+			sys_in->client);
+	}
+
 	ep->skip_ep_cfg = sys_in->skip_ep_cfg;
 	if (ipa3_assign_policy(sys_in, ep->sys)) {
 		IPAERR("failed to sys ctx for client %d\n", sys_in->client);
@@ -4454,7 +4527,13 @@ static void ipa_gsi_irq_tx_notify_cb(struct gsi_chan_xfer_notify *notify)
 		tx_pkt = notify->xfer_user_data;
 		tx_pkt->xmit_done = true;
 		atomic_inc(&tx_pkt->sys->xmit_eot_cnt);
-		tasklet_schedule(&tx_pkt->sys->tasklet);
+
+		if (ipa3_ctx->tx_napi_enable) {
+		    if(!atomic_cmpxchg(&tx_pkt->sys->in_napi_context, 0, 1))
+			napi_schedule(&tx_pkt->sys->napi_tx);
+		}
+		else
+			tasklet_schedule(&tx_pkt->sys->tasklet);
 		break;
 	default:
 		IPAERR("received unexpected event id %d\n", notify->evt_id);

+ 11 - 2
drivers/platform/msm/ipa/ipa_v3/ipa_i.h

@@ -77,6 +77,8 @@
 
 #define NAPI_WEIGHT 60
 
+#define NAPI_TX_WEIGHT 64
+
 #define IPADBG(fmt, args...) \
 	do { \
 		pr_debug(DRV_NAME " %s:%d " fmt, __func__, __LINE__, ## args);\
@@ -1028,6 +1030,9 @@ struct ipa3_repl_ctx {
  * @ep: IPA EP context
  * @xmit_eot_cnt: count of pending eot for tasklet to process
  * @tasklet: tasklet for eot write_done handle (tx_complete)
+ * @napi_tx: napi for eot write done handle (tx_complete) - to replace tasklet
+ * @in_napi_context: an atomic variable used for non-blocking locking,
+ * preventing from multiple napi_sched to be called.
  *
  * IPA context specific to the GPI pipes a.k.a LAN IN/OUT and WAN
  */
@@ -1063,6 +1068,8 @@ struct ipa3_sys_context {
 	struct tasklet_struct tasklet;
 	bool skip_eot;
 	u32 eob_drop_cnt;
+	struct napi_struct napi_tx;
+	atomic_t in_napi_context;
 
 	/* ordering is important - mutable fields go above */
 	struct ipa3_ep_context *ep;
@@ -1894,7 +1901,7 @@ struct ipa3_app_clock_vote {
  * @app_vote: holds userspace application clock vote count
  * IPA context - holds all relevant info about IPA driver and its state
  * @lan_rx_napi_enable: flag if NAPI is enabled on the LAN dp
- * @lan_ndev: dummy netdev for LAN rx NAPI
+ * @generic_ndev: dummy netdev for LAN rx NAPI and tx NAPI
  * @napi_lan_rx: NAPI object for LAN rx
  * @ipa_wan_skb_page - page recycling enabled on wwan data path
  * @icc_num_cases - number of icc scaling level supported
@@ -2077,7 +2084,8 @@ struct ipa3_context {
 	struct ipacm_fnr_info fnr_info;
 	/* dummy netdev for lan RX NAPI */
 	bool lan_rx_napi_enable;
-	struct net_device lan_ndev;
+	bool tx_napi_enable;
+	struct net_device generic_ndev;
 	struct napi_struct napi_lan_rx;
 	u32 icc_num_cases;
 	u32 icc_num_paths;
@@ -2122,6 +2130,7 @@ struct ipa3_plat_drv_res {
 	bool gsi_ch20_wa;
 	bool tethered_flow_control;
 	bool lan_rx_napi_enable;
+	bool tx_napi_enable;
 	u32 mhi_evid_limits[2]; /* start and end values */
 	bool ipa_mhi_dynamic_config;
 	u32 ipa_tz_unlock_reg_num;