Explorar el Código

qcacld-3.0: changes for GRO for TCP packets

Add support for GRO functionality for TCP packets.

CRs-Fixed: 2346997
Change-Id: I9d38c926af4114e90427f5b5c632cd02b9eb3160
Mohit Khanna hace 6 años
padre
commit
8141877926

+ 0 - 2
core/cds/inc/cds_config.h

@@ -76,7 +76,6 @@ enum active_apf_mode {
  * @max_scan: Maximum number of parallel scans
  * @tx_flow_stop_queue_th: Threshold to stop queue in percentage
  * @tx_flow_start_queue_offset: Start queue offset in percentage
- * @num_dp_rx_threads: number of dp rx threads to be configured
  * @enable_dp_rx_threads: enable dp rx threads
  * @is_lpass_enabled: Indicate whether LPASS is enabled or not
  * @tx_chain_mask_cck: Tx chain mask enabled or not
@@ -114,7 +113,6 @@ struct cds_config_info {
 	uint32_t tx_flow_stop_queue_th;
 	uint32_t tx_flow_start_queue_offset;
 #endif
-	uint8_t num_dp_rx_threads;
 	uint8_t enable_dp_rx_threads;
 #ifdef WLAN_FEATURE_LPSS
 	bool is_lpass_enabled;

+ 0 - 1
core/cds/src/cds_api.c

@@ -746,7 +746,6 @@ QDF_STATUS cds_dp_open(struct wlan_objmgr_psoc *psoc)
 		goto intr_close;
 	}
 
-	dp_config.num_rx_threads = gp_cds_context->cds_cfg->num_dp_rx_threads;
 	dp_config.enable_rx_threads =
 		gp_cds_context->cds_cfg->enable_dp_rx_threads;
 	qdf_status = dp_txrx_init(cds_get_context(QDF_MODULE_ID_SOC),

+ 123 - 17
core/dp/txrx3.0/dp_rx_thread.c

@@ -50,6 +50,25 @@ static inline void dp_rx_tm_walk_skb_list(qdf_nbuf_t nbuf_list)
 { }
 #endif /* DP_RX_TM_DEBUG */
 
+/**
+ * dp_rx_tm_get_soc_handle() - get soc handle from struct dp_rx_tm_handle_cmn
+ * @rx_tm_handle_cmn - rx thread manager cmn handle
+ *
+ * Returns: ol_txrx_soc_handle on success, NULL on failure.
+ */
+static inline
+ol_txrx_soc_handle dp_rx_tm_get_soc_handle(struct dp_rx_tm_handle_cmn *rx_tm_handle_cmn)
+{
+	struct dp_txrx_handle_cmn *txrx_handle_cmn;
+	ol_txrx_soc_handle soc;
+
+	txrx_handle_cmn =
+		dp_rx_thread_get_txrx_handle(rx_tm_handle_cmn);
+
+	soc = dp_txrx_get_soc_from_ext_handle(txrx_handle_cmn);
+	return soc;
+}
+
 /**
  * dp_rx_tm_thread_dump_stats() - display stats for a rx_thread
  * @rx_thread - rx_thread pointer for which the stats need to be
@@ -96,7 +115,7 @@ QDF_STATUS dp_rx_tm_dump_stats(struct dp_rx_tm_handle *rx_tm_hdl)
 {
 	int i;
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		if (!rx_tm_hdl->rx_thread[i])
 			continue;
 		dp_rx_tm_thread_dump_stats(rx_tm_hdl->rx_thread[i]);
@@ -396,6 +415,46 @@ static int dp_rx_thread_loop(void *arg)
 	return 0;
 }
 
+/**
+ * dp_rx_tm_thread_napi_poll() - dummy napi poll for rx_thread NAPI
+ * @napi: pointer to DP rx_thread NAPI
+ * @budget: NAPI BUDGET
+ *
+ * Return: 0 as it is not supposed to be polled at all as it is not scheduled.
+ */
+static int dp_rx_tm_thread_napi_poll(struct napi_struct *napi, int budget)
+{
+	dp_err("this napi_poll should not be polled as we don't schedule it");
+	QDF_BUG(0);
+	return 0;
+}
+
+/**
+ * dp_rx_tm_thread_napi_init() - Initialize dummy rx_thread NAPI
+ * @rx_thread: dp_rx_thread structure containing dummy napi and netdev
+ *
+ * Return: None
+ */
+static void dp_rx_tm_thread_napi_init(struct dp_rx_thread *rx_thread)
+{
+	/* Todo - optimize to use only one dummy netdev for all thread napis */
+	init_dummy_netdev(&rx_thread->netdev);
+	netif_napi_add(&rx_thread->netdev, &rx_thread->napi,
+		       dp_rx_tm_thread_napi_poll, 64);
+	napi_enable(&rx_thread->napi);
+}
+
+/**
+ * dp_rx_tm_thread_napi_deinit() - De-initialize dummy rx_thread NAPI
+ * @rx_thread: dp_rx_thread handle containing dummy napi and netdev
+ *
+ * Return: None
+ */
+static void dp_rx_tm_thread_napi_deinit(struct dp_rx_thread *rx_thread)
+{
+	netif_napi_del(&rx_thread->napi);
+}
+
 /*
  * dp_rx_tm_thread_init() - Initialize dp_rx_thread structure and thread
  *
@@ -425,6 +484,11 @@ static QDF_STATUS dp_rx_tm_thread_init(struct dp_rx_thread *rx_thread,
 	qdf_event_create(&rx_thread->shutdown_event);
 	qdf_scnprintf(thread_name, sizeof(thread_name), "dp_rx_thread_%u", id);
 	dp_info("%s %u", thread_name, id);
+
+	if (cdp_cfg_get(dp_rx_tm_get_soc_handle(rx_thread->rtm_handle_cmn),
+			cfg_dp_gro_enable))
+		dp_rx_tm_thread_napi_init(rx_thread);
+
 	rx_thread->task = qdf_create_thread(dp_rx_thread_loop,
 					    rx_thread, thread_name);
 	if (!rx_thread->task) {
@@ -455,6 +519,11 @@ static QDF_STATUS dp_rx_tm_thread_deinit(struct dp_rx_thread *rx_thread)
 	qdf_event_destroy(&rx_thread->suspend_event);
 	qdf_event_destroy(&rx_thread->resume_event);
 	qdf_event_destroy(&rx_thread->shutdown_event);
+
+	if (cdp_cfg_get(dp_rx_tm_get_soc_handle(rx_thread->rtm_handle_cmn),
+			cfg_dp_gro_enable))
+		dp_rx_tm_thread_napi_deinit(rx_thread);
+
 	return QDF_STATUS_SUCCESS;
 }
 
@@ -463,10 +532,29 @@ QDF_STATUS dp_rx_tm_init(struct dp_rx_tm_handle *rx_tm_hdl,
 {
 	int i;
 	QDF_STATUS qdf_status = QDF_STATUS_SUCCESS;
-	/* ignoring num_dp_rx_threads for now */
+
+	if (num_dp_rx_threads > DP_MAX_RX_THREADS) {
+		dp_err("unable to initialize %u number of threads. MAX %u",
+		       num_dp_rx_threads, DP_MAX_RX_THREADS);
+		return QDF_STATUS_E_INVAL;
+	}
+
+	rx_tm_hdl->num_dp_rx_threads = num_dp_rx_threads;
+
+	dp_info("initializing %u threads", num_dp_rx_threads);
+
+	/* allocate an array to contain the DP RX thread pointers */
+	rx_tm_hdl->rx_thread = qdf_mem_malloc(num_dp_rx_threads *
+					      sizeof(struct dp_rx_thread *));
+
+	if (qdf_unlikely(!rx_tm_hdl->rx_thread)) {
+		qdf_status = QDF_STATUS_E_NOMEM;
+		goto ret;
+	}
+
 	qdf_init_waitqueue_head(&rx_tm_hdl->wait_q);
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		rx_tm_hdl->rx_thread[i] =
 			(struct dp_rx_thread *)
 			qdf_mem_malloc(sizeof(struct dp_rx_thread));
@@ -502,7 +590,7 @@ QDF_STATUS dp_rx_tm_suspend(struct dp_rx_tm_handle *rx_tm_hdl)
 	QDF_STATUS qdf_status;
 	struct dp_rx_thread *rx_thread;
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		if (!rx_tm_hdl->rx_thread[i])
 			continue;
 		qdf_set_bit(RX_SUSPEND_EVENT,
@@ -511,7 +599,7 @@ QDF_STATUS dp_rx_tm_suspend(struct dp_rx_tm_handle *rx_tm_hdl)
 
 	qdf_wake_up_interruptible(&rx_tm_hdl->wait_q);
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		rx_thread = rx_tm_hdl->rx_thread[i];
 		if (!rx_thread)
 			continue;
@@ -548,7 +636,7 @@ QDF_STATUS dp_rx_tm_resume(struct dp_rx_tm_handle *rx_tm_hdl)
 		return QDF_STATUS_E_FAULT;
 	}
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		if (!rx_tm_hdl->rx_thread[i])
 			continue;
 		dp_debug("calling thread %d to resume", i);
@@ -568,7 +656,7 @@ static QDF_STATUS dp_rx_tm_shutdown(struct dp_rx_tm_handle *rx_tm_hdl)
 {
 	int i;
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		if (!rx_tm_hdl->rx_thread[i])
 			continue;
 		qdf_set_bit(RX_SHUTDOWN_EVENT,
@@ -579,7 +667,7 @@ static QDF_STATUS dp_rx_tm_shutdown(struct dp_rx_tm_handle *rx_tm_hdl)
 
 	qdf_wake_up_interruptible(&rx_tm_hdl->wait_q);
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		if (!rx_tm_hdl->rx_thread[i])
 			continue;
 		dp_debug("waiting for shutdown of thread %d", i);
@@ -599,15 +687,24 @@ static QDF_STATUS dp_rx_tm_shutdown(struct dp_rx_tm_handle *rx_tm_hdl)
 QDF_STATUS dp_rx_tm_deinit(struct dp_rx_tm_handle *rx_tm_hdl)
 {
 	int i = 0;
+	if (!rx_tm_hdl->rx_thread) {
+		dp_err("rx_tm_hdl->rx_thread not initialized!");
+		return QDF_STATUS_SUCCESS;
+	}
 
 	dp_rx_tm_shutdown(rx_tm_hdl);
 
-	for (i = 0; i < DP_MAX_RX_THREADS; i++) {
+	for (i = 0; i < rx_tm_hdl->num_dp_rx_threads; i++) {
 		if (!rx_tm_hdl->rx_thread[i])
 			continue;
 		dp_rx_tm_thread_deinit(rx_tm_hdl->rx_thread[i]);
 		qdf_mem_free(rx_tm_hdl->rx_thread[i]);
 	}
+
+	/* free the array of RX thread pointers*/
+	qdf_mem_free(rx_tm_hdl->rx_thread);
+	rx_tm_hdl->rx_thread = NULL;
+
 	return QDF_STATUS_SUCCESS;
 }
 
@@ -621,12 +718,6 @@ QDF_STATUS dp_rx_tm_deinit(struct dp_rx_tm_handle *rx_tm_hdl)
  * in the nbuf list. Depending on the RX_CTX (copy engine or reo
  * ring) on which the packet was received, the function selects
  * a corresponding rx_thread.
- * The function uses a simplistic mapping -
- *
- * RX_THREAD = RX_CTX % number of RX threads in the system.
- *
- * This also means that if RX_CTX < # rx threads, more than one
- * interrupt source may end up on the same rx_thread.
  *
  * Return: rx thread ID selected for the nbuf
  */
@@ -636,8 +727,13 @@ static uint8_t dp_rx_tm_select_thread(struct dp_rx_tm_handle *rx_tm_hdl,
 	uint8_t selected_rx_thread;
 	uint8_t reo_ring_num = QDF_NBUF_CB_RX_CTX_ID(nbuf_list);
 
-	selected_rx_thread = reo_ring_num % DP_MAX_RX_THREADS;
+	if (reo_ring_num >= rx_tm_hdl->num_dp_rx_threads) {
+		dp_err_rl("unexpected ring number");
+		QDF_BUG(0);
+		return 0;
+	}
 
+	selected_rx_thread = reo_ring_num;
 	return selected_rx_thread;
 }
 
@@ -647,9 +743,19 @@ QDF_STATUS dp_rx_tm_enqueue_pkt(struct dp_rx_tm_handle *rx_tm_hdl,
 	uint8_t selected_thread_id;
 
 	selected_thread_id = dp_rx_tm_select_thread(rx_tm_hdl, nbuf_list);
-
 	dp_rx_tm_thread_enqueue(rx_tm_hdl->rx_thread[selected_thread_id],
 				nbuf_list);
 	return QDF_STATUS_SUCCESS;
 }
 
+struct napi_struct *dp_rx_tm_get_napi_context(struct dp_rx_tm_handle *rx_tm_hdl,
+					      uint8_t rx_ctx_id)
+{
+	if (rx_ctx_id >= rx_tm_hdl->num_dp_rx_threads) {
+		dp_err_rl("unexpected rx_ctx_id %u", rx_ctx_id);
+		QDF_BUG(0);
+		return NULL;
+	}
+
+	return &rx_tm_hdl->rx_thread[rx_ctx_id]->napi;
+}

+ 20 - 4
core/dp/txrx3.0/dp_rx_thread.h

@@ -27,7 +27,7 @@
 #define DP_RX_TM_MAX_REO_RINGS 4
 
 /* Number of DP RX threads supported */
-#define DP_MAX_RX_THREADS 3
+#define DP_MAX_RX_THREADS DP_RX_TM_MAX_REO_RINGS
 
 /*
  * Macro to get to wait_queue structure. Needed since wait_q is an object.
@@ -68,6 +68,7 @@ struct dp_rx_thread_stats {
 
 /**
  * struct dp_rx_thread - structure holding variables for a single DP RX thread
+ * @id: id of the dp_rx_thread (0 or 1 or 2..DP_MAX_RX_THREADS - 1)
  * @task: task structure corresponding to the thread
  * @start_event: handle of Event for DP Rx thread to signal startup
  * @suspend_event: handle of Event for DP Rx thread to signal suspend
@@ -78,11 +79,13 @@ struct dp_rx_thread_stats {
  * @nbufq_len: length of the nbuf queue
  * @aff_mask: cuurent affinity mask of the DP Rx thread
  * @stats: per thread stats
- * @id: id of the dp_rx_thread (0 or 1 or 2..DP_MAX_RX_THREADS - 1)
  * @rtm_handle_cmn: abstract RX TM handle. This allows access to the dp_rx_tm
  *		    structures via APIs.
+ * @napi: napi to deliver packet to stack via GRO
+ * @netdev: dummy netdev to initialize the napi structure with
  */
 struct dp_rx_thread {
+	uint8_t id;
 	qdf_thread_t *task;
 	qdf_event_t start_event;
 	qdf_event_t suspend_event;
@@ -92,8 +95,9 @@ struct dp_rx_thread {
 	qdf_nbuf_queue_head_t nbuf_queue;
 	unsigned long aff_mask;
 	struct dp_rx_thread_stats stats;
-	uint8_t id;
 	struct dp_rx_tm_handle_cmn *rtm_handle_cmn;
+	struct napi_struct napi;
+	struct net_device netdev;
 };
 
 /**
@@ -113,16 +117,18 @@ enum dp_rx_thread_state {
 
 /**
  * struct dp_rx_tm_handle - DP RX thread infrastructure handle
+ * @num_dp_rx_threads: number of DP RX threads initialized
  * @txrx_handle_cmn: opaque txrx handle to get to pdev and soc
  * wait_q: wait_queue for the rx_threads to wait on and expect an event
  * @state: state of the rx_threads. All of them should be in the same state.
  * @rx_thread: array of pointers of type struct dp_rx_thread
  */
 struct dp_rx_tm_handle {
+	uint8_t num_dp_rx_threads;
 	struct dp_txrx_handle_cmn *txrx_handle_cmn;
 	qdf_wait_queue_head_t wait_q;
 	enum dp_rx_thread_state state;
-	struct dp_rx_thread *rx_thread[DP_MAX_RX_THREADS];
+	struct dp_rx_thread **rx_thread;
 };
 
 /**
@@ -207,4 +213,14 @@ dp_rx_thread_get_wait_queue(struct dp_rx_tm_handle_cmn *rx_tm_handle_cmn)
 	return &rx_tm_handle->wait_q;
 }
 
+/**
+ * dp_rx_tm_get_napi_context() - get NAPI context for a RX CTX ID
+ * @soc: ol_txrx_soc_handle object
+ * @rx_ctx_id: RX context ID (RX thread ID) corresponding to which NAPI is
+ *             needed
+ *
+ * Return: NULL on failure, else pointer to NAPI corresponding to rx_ctx_id
+ */
+struct napi_struct *dp_rx_tm_get_napi_context(struct dp_rx_tm_handle *rx_tm_hdl,
+					      uint8_t rx_ctx_id);
 #endif /* __DP_RX_THREAD_H */

+ 5 - 1
core/dp/txrx3.0/dp_txrx.c

@@ -19,12 +19,14 @@
 #include <wlan_objmgr_pdev_obj.h>
 #include <dp_txrx.h>
 #include <cdp_txrx_cmn.h>
+#include <cdp_txrx_misc.h>
 
 QDF_STATUS dp_txrx_init(ol_txrx_soc_handle soc, struct cdp_pdev *pdev,
 			struct dp_txrx_config *config)
 {
 	struct dp_txrx_handle *dp_ext_hdl;
 	QDF_STATUS qdf_status = QDF_STATUS_SUCCESS;
+	uint8_t num_dp_rx_threads;
 
 	dp_ext_hdl = qdf_mem_malloc(sizeof(*dp_ext_hdl));
 	if (!dp_ext_hdl) {
@@ -40,9 +42,11 @@ QDF_STATUS dp_txrx_init(ol_txrx_soc_handle soc, struct cdp_pdev *pdev,
 	dp_ext_hdl->rx_tm_hdl.txrx_handle_cmn =
 				dp_txrx_get_cmn_hdl_frm_ext_hdl(dp_ext_hdl);
 
+	num_dp_rx_threads = cdp_get_num_rx_contexts(soc);
+
 	if (dp_ext_hdl->config.enable_rx_threads) {
 		qdf_status = dp_rx_tm_init(&dp_ext_hdl->rx_tm_hdl,
-					   dp_ext_hdl->config.num_rx_threads);
+					   num_dp_rx_threads);
 	}
 
 	return qdf_status;

+ 56 - 4
core/dp/txrx3.0/dp_txrx.h

@@ -28,11 +28,9 @@
 /**
  * struct dp_txrx_config - dp txrx configuration passed to dp txrx modules
  * @enable_dp_rx_threads: enable DP rx threads or not
- * @num_rx_threads: number of DP RX threads
  */
 struct dp_txrx_config {
 	bool enable_rx_threads;
-	uint8_t num_rx_threads;
 };
 
 struct dp_txrx_handle_cmn;
@@ -166,6 +164,16 @@ ret:
 	return qdf_status;
 }
 
+/**
+ * dp_rx_enqueue_pkt() - enqueue packet(s) into the thread
+ * @soc: ol_txrx_soc_handle object
+ * @nbuf_list: list of packets to be queued into the rx_thread
+ *
+ * The function accepts a list of skbs connected by the skb->next pointer and
+ * queues them into a RX thread to be sent to the stack.
+ *
+ * Return: QDF_STATUS_SUCCESS on success, error qdf status on failure
+ */
 static inline
 QDF_STATUS dp_rx_enqueue_pkt(ol_txrx_soc_handle soc, qdf_nbuf_t nbuf_list)
 {
@@ -190,7 +198,14 @@ ret:
 	return qdf_status;
 }
 
-static inline QDF_STATUS dp_txrx_dump_stats(ol_txrx_soc_handle soc)
+/**
+ * dp_txrx_ext_dump_stats() - dump txrx external module stats
+ * @soc: ol_txrx_soc_handle object
+ *
+ *
+ * Return: QDF_STATUS_SUCCESS on success, error qdf status on failure
+ */
+static inline QDF_STATUS dp_txrx_ext_dump_stats(ol_txrx_soc_handle soc)
 {
 	struct dp_txrx_handle *dp_ext_hdl;
 	QDF_STATUS qdf_status = QDF_STATUS_SUCCESS;
@@ -211,7 +226,37 @@ static inline QDF_STATUS dp_txrx_dump_stats(ol_txrx_soc_handle soc)
 ret:
 	return qdf_status;
 }
+
+/**
+ * dp_rx_get_napi_context() - get NAPI context for a RX CTX ID
+ * @soc: ol_txrx_soc_handle object
+ * @rx_ctx_id: RX context ID (RX thread ID) corresponding to which NAPI is
+ *             needed
+ *
+ * Return: NULL on failure, else pointer to NAPI corresponding to rx_ctx_id
+ */
+static inline
+struct napi_struct *dp_rx_get_napi_context(ol_txrx_soc_handle soc,
+					   uint8_t rx_ctx_id)
+{
+	struct dp_txrx_handle *dp_ext_hdl;
+
+	if (!soc) {
+		dp_err("soc in NULL!");
+		return NULL;
+	}
+
+	dp_ext_hdl = cdp_soc_get_dp_txrx_handle(soc);
+	if (!dp_ext_hdl) {
+		dp_err("dp_ext_hdl in NULL!");
+		return NULL;
+	}
+
+	return dp_rx_tm_get_napi_context(&dp_ext_hdl->rx_tm_hdl, rx_ctx_id);
+}
+
 #else
+
 static inline
 QDF_STATUS dp_txrx_init(ol_txrx_soc_handle soc, struct cdp_pdev *pdev,
 			    struct dp_txrx_config *config)
@@ -240,9 +285,16 @@ QDF_STATUS dp_rx_enqueue_pkt(ol_txrx_soc_handle soc, qdf_nbuf_t nbuf_list)
 	return QDF_STATUS_SUCCESS;
 }
 
-static inline QDF_STATUS dp_txrx_dump_stats(ol_txrx_soc_handle soc)
+static inline QDF_STATUS dp_txrx_ext_dump_stats(ol_txrx_soc_handle soc)
 {
 	return QDF_STATUS_SUCCESS;
 }
+
+static inline
+struct napi_struct *dp_rx_get_napi_context(ol_txrx_soc_handle soc,
+					   uint8_t rx_ctx_id)
+{
+	return NULL;
+}
 #endif /* FEATURE_WLAN_DP_RX_THREADS */
 #endif /* _DP_TXRX_H */

+ 10 - 2
core/hdd/inc/wlan_hdd_main.h

@@ -545,6 +545,11 @@ struct hdd_tx_rx_stats {
 	__u32 rx_dropped[NUM_CPUS];
 	__u32 rx_delivered[NUM_CPUS];
 	__u32 rx_refused[NUM_CPUS];
+	/* rx gro */
+	__u32 rx_aggregated;
+	__u32 rx_non_aggregated;
+	__u32 rx_gro_flushes;
+	__u32 rx_gro_force_flushes;
 
 	/* txflow stats */
 	bool     is_txflow_paused;
@@ -1966,8 +1971,11 @@ struct hdd_context {
 	QDF_STATUS (*receive_offload_cb)(struct hdd_adapter *,
 					 struct sk_buff *);
 	qdf_atomic_t vendor_disable_lro_flag;
-	qdf_atomic_t disable_lro_in_concurrency;
-	qdf_atomic_t disable_lro_in_low_tput;
+
+	/* disable RX offload (GRO/LRO) in concurrency scenarios */
+	qdf_atomic_t disable_rx_ol_in_concurrency;
+	/* disable RX offload (GRO/LRO) in low throughput scenarios */
+	qdf_atomic_t disable_rx_ol_in_low_tput;
 	bool en_tcp_delack_no_lro;
 	bool force_rsne_override;
 	qdf_wake_lock_t monitor_mode_wakelock;

+ 16 - 2
core/hdd/inc/wlan_hdd_tx_rx.h

@@ -91,10 +91,24 @@ QDF_STATUS hdd_deinit_tx_rx(struct hdd_adapter *adapter);
  */
 QDF_STATUS hdd_rx_packet_cbk(void *adapter_context, qdf_nbuf_t rxBuf);
 
+/**
+ * hdd_rx_deliver_to_stack() - HDD helper function to deliver RX pkts to stack
+ * @adapter: pointer to HDD adapter context
+ * @skb: pointer to skb
+ *
+ * The function calls the appropriate stack function depending upon the packet
+ * type and whether GRO/LRO is enabled.
+ *
+ * Return: QDF_STATUS_E_FAILURE if any errors encountered,
+ *	   QDF_STATUS_SUCCESS otherwise
+ */
+QDF_STATUS hdd_rx_deliver_to_stack(struct hdd_adapter *adapter,
+				   struct sk_buff *skb);
+
 /**
  * hdd_rx_pkt_thread_enqueue_cbk() - receive pkt handler to enqueue into thread
  * @adapter: pointer to HDD adapter
- * @rxBuf: pointer to rx qdf_nbuf
+ * @nbuf_list: pointer to qdf_nbuf list
  *
  * Receive callback registered with DP layer which enqueues packets into dp rx
  * thread
@@ -105,7 +119,7 @@ QDF_STATUS hdd_rx_pkt_thread_enqueue_cbk(void *adapter_context,
 					 qdf_nbuf_t nbuf_list);
 
 /**
- * hdd_rx_ol_init() - Initialize Rx mode(LRO or GRO) method
+ * hdd_rx_ol_init() - Initialize Rx offload mode (LRO or GRO)
  * @hdd_ctx: pointer to HDD Station Context
  *
  * Return: 0 on success and non zero on failure.

+ 18 - 7
core/hdd/src/wlan_hdd_main.c

@@ -159,6 +159,7 @@
 #include <wlan_hdd_spectralscan.h>
 #include "wlan_green_ap_ucfg_api.h"
 #include <wlan_p2p_ucfg_api.h>
+#include <target_type.h>
 
 #ifdef MODULE
 #define WLAN_MODULE_NAME  module_name(THIS_MODULE)
@@ -7669,10 +7670,17 @@ static void hdd_display_periodic_stats(struct hdd_context *hdd_ctx,
 	static uint32_t counter;
 	static bool data_in_time_period;
 	ol_txrx_pdev_handle pdev;
+	ol_txrx_soc_handle soc;
 
 	if (hdd_ctx->config->periodic_stats_disp_time == 0)
 		return;
 
+	soc = cds_get_context(QDF_MODULE_ID_SOC);
+	if (!soc) {
+		hdd_err("soc is NULL");
+		return;
+	}
+
 	pdev = cds_get_context(QDF_MODULE_ID_TXRX);
 	if (!pdev) {
 		hdd_err("pdev is NULL");
@@ -7687,11 +7695,11 @@ static void hdd_display_periodic_stats(struct hdd_context *hdd_ctx,
 		hdd_ctx->config->periodic_stats_disp_time * 1000) {
 		if (data_in_time_period) {
 			wlan_hdd_display_txrx_stats(hdd_ctx);
-			dp_txrx_dump_stats(cds_get_context(QDF_MODULE_ID_SOC));
-			cdp_display_stats(cds_get_context(QDF_MODULE_ID_SOC),
+			dp_txrx_ext_dump_stats(soc);
+			cdp_display_stats(soc,
 					  CDP_RX_RING_STATS,
 					  QDF_STATS_VERBOSITY_LEVEL_LOW);
-			cdp_display_stats(cds_get_context(QDF_MODULE_ID_SOC),
+			cdp_display_stats(soc,
 					  CDP_TXRX_PATH_STATS,
 					  QDF_STATS_VERBOSITY_LEVEL_LOW);
 			wlan_hdd_display_netif_queue_history
@@ -9817,7 +9825,6 @@ static inline void hdd_txrx_populate_cds_config(struct cds_config_info
 		cfg_get(hdd_ctx->psoc, CFG_DP_TX_FLOW_START_QUEUE_OFFSET);
 	/* configuration for DP RX Threads */
 	cds_cfg->enable_dp_rx_threads = hdd_ctx->enable_dp_rx_threads;
-	cds_cfg->num_dp_rx_threads = hdd_ctx->config->num_dp_rx_threads;
 }
 #else
 static inline void hdd_txrx_populate_cds_config(struct cds_config_info
@@ -11284,9 +11291,12 @@ int hdd_configure_cds(struct hdd_context *hdd_ctx)
 	if (ret)
 		goto cds_disable;
 
-	if (hdd_ctx->ol_enable)
-		dp_cbs.hdd_disable_rx_ol_in_concurrency =
-				hdd_disable_rx_ol_in_concurrency;
+	/* Donot disable rx offload on concurrency for lithium based targets */
+	if (!(hdd_ctx->target_type == TARGET_TYPE_QCA6290 ||
+	      hdd_ctx->target_type == TARGET_TYPE_QCA6390))
+		if (hdd_ctx->ol_enable)
+			dp_cbs.hdd_disable_rx_ol_in_concurrency =
+					hdd_disable_rx_ol_in_concurrency;
 	dp_cbs.hdd_set_rx_mode_rps_cb = hdd_set_rx_mode_rps;
 	dp_cbs.hdd_ipa_set_mcc_mode_cb = hdd_ipa_set_mcc_mode;
 	dp_cbs.hdd_v2_flow_pool_map = hdd_v2_flow_pool_map;
@@ -14269,6 +14279,7 @@ static int hdd_update_dp_config(struct hdd_context *hdd_ctx)
 			cfg_get(hdd_ctx->psoc,
 				CFG_DP_TCP_UDP_CKSUM_OFFLOAD);
 	params.ipa_enable = ucfg_ipa_is_enabled();
+	params.gro_enable = cfg_get(hdd_ctx->psoc, CFG_DP_GRO);
 
 	status = cdp_update_config_parameters(soc, &params);
 	if (status) {

+ 3 - 8
core/hdd/src/wlan_hdd_softap_tx_rx.c

@@ -835,7 +835,7 @@ static void hdd_softap_notify_tx_compl_cbk(struct sk_buff *skb,
 QDF_STATUS hdd_softap_rx_packet_cbk(void *adapter_context, qdf_nbuf_t rx_buf)
 {
 	struct hdd_adapter *adapter = NULL;
-	int rxstat;
+	QDF_STATUS qdf_status;
 	unsigned int cpu_index;
 	struct sk_buff *skb = NULL;
 	struct sk_buff *next = NULL;
@@ -938,15 +938,10 @@ QDF_STATUS hdd_softap_rx_packet_cbk(void *adapter_context, qdf_nbuf_t rx_buf)
 		 * it to stack
 		 */
 		qdf_net_buf_debug_release_skb(skb);
-		if (hdd_napi_enabled(HDD_NAPI_ANY) &&
-			!hdd_ctx->enable_rxthread)
-			rxstat = netif_receive_skb(skb);
-		else
-			rxstat = netif_rx_ni(skb);
 
-		hdd_ctx->no_rx_offload_pkt_cnt++;
+		qdf_status = hdd_rx_deliver_to_stack(adapter, skb);
 
-		if (NET_RX_SUCCESS == rxstat)
+		if (QDF_IS_STATUS_SUCCESS(qdf_status))
 			++adapter->hdd_stats.tx_rx_stats.rx_delivered[cpu_index];
 		else
 			++adapter->hdd_stats.tx_rx_stats.rx_refused[cpu_index];

+ 9 - 5
core/hdd/src/wlan_hdd_stats.c

@@ -6131,7 +6131,7 @@ int wlan_hdd_get_temperature(struct hdd_adapter *adapter, int *temperature)
 	return 0;
 }
 
-void wlan_hdd_display_txrx_stats(struct hdd_context *hdd_ctx)
+void wlan_hdd_display_txrx_stats(struct hdd_context *ctx)
 {
 	struct hdd_adapter *adapter = NULL;
 	struct hdd_tx_rx_stats *stats;
@@ -6139,7 +6139,7 @@ void wlan_hdd_display_txrx_stats(struct hdd_context *hdd_ctx)
 	uint32_t total_rx_pkt, total_rx_dropped,
 		 total_rx_delv, total_rx_refused;
 
-	hdd_for_each_adapter(hdd_ctx, adapter) {
+	hdd_for_each_adapter(ctx, adapter) {
 		total_rx_pkt = 0;
 		total_rx_dropped = 0;
 		total_rx_delv = 0;
@@ -6153,7 +6153,7 @@ void wlan_hdd_display_txrx_stats(struct hdd_context *hdd_ctx)
 			total_rx_refused += stats->rx_refused[i];
 		}
 
-		hdd_debug("Total Transmit - called %u, dropped %u orphan %u",
+		hdd_debug("TX - called %u, dropped %u orphan %u",
 			  stats->tx_called, stats->tx_dropped,
 			  stats->tx_orphaned);
 
@@ -6164,8 +6164,12 @@ void wlan_hdd_display_txrx_stats(struct hdd_context *hdd_ctx)
 				  i, stats->rx_packets[i], stats->rx_dropped[i],
 				  stats->rx_delivered[i], stats->rx_refused[i]);
 		}
-		hdd_debug("Total Receive - packets %u, dropped %u, delivered %u, refused %u",
+		hdd_debug("RX - packets %u, dropped %u, delivered %u, refused %u\nGRO - agg %u non-agg %u flushes(%u %u) disabled(conc %u low-tput %u)",
 			  total_rx_pkt, total_rx_dropped, total_rx_delv,
-			  total_rx_refused);
+			  total_rx_refused, stats->rx_aggregated,
+			  stats->rx_non_aggregated, stats->rx_gro_flushes,
+			  stats->rx_gro_force_flushes,
+			  qdf_atomic_read(&ctx->disable_rx_ol_in_concurrency),
+			  qdf_atomic_read(&ctx->disable_rx_ol_in_low_tput));
 	}
 }

+ 229 - 96
core/hdd/src/wlan_hdd_tx_rx.c

@@ -24,7 +24,6 @@
 
 /* denote that this file does not allow legacy hddLog */
 #define HDD_DISALLOW_LEGACY_HDDLOG 1
-
 #include <wlan_hdd_tx_rx.h>
 #include <wlan_hdd_softap_tx_rx.h>
 #include <wlan_hdd_napi.h>
@@ -61,6 +60,7 @@
 #include "wlan_hdd_nud_tracking.h"
 #include "dp_txrx.h"
 #include "cfg_ucfg_api.h"
+#include "target_type.h"
 
 #ifdef QCA_LL_TX_FLOW_CONTROL_V2
 /*
@@ -1530,35 +1530,138 @@ static void hdd_resolve_rx_ol_mode(struct hdd_context *hdd_ctx)
 		cdp_cfg_get(soc, cfg_dp_lro_enable) &&
 			cdp_cfg_get(soc, cfg_dp_gro_enable) ?
 		hdd_err("Can't enable both LRO and GRO, disabling Rx offload") :
-		hdd_debug("LRO and GRO both are disabled");
+		hdd_info("LRO and GRO both are disabled");
 		hdd_ctx->ol_enable = 0;
 	} else if (cdp_cfg_get(soc, cfg_dp_lro_enable)) {
 		hdd_debug("Rx offload LRO is enabled");
 		hdd_ctx->ol_enable = CFG_LRO_ENABLED;
 	} else {
-		hdd_debug("Rx offload GRO is enabled");
+		hdd_info("Rx offload: GRO is enabled");
 		hdd_ctx->ol_enable = CFG_GRO_ENABLED;
 	}
 }
 
 /**
- * hdd_gro_rx() - Handle Rx procesing via GRO
+ * hdd_gro_rx_bh_disable() - GRO RX/flush function.
+ * @napi_to_use: napi to be used to give packets to the stack, gro flush
+ * @skb: pointer to sk_buff
+ *
+ * Function calls napi_gro_receive for the skb. If the skb indicates that a
+ * flush needs to be done (set by the lower DP layer), the function also calls
+ * napi_gro_flush. Local softirqs are disabled (and later enabled) while making
+ * napi_gro__ calls.
+ *
+ * Return: QDF_STATUS_SUCCESS if not dropped by napi_gro_receive or
+ *	   QDF error code.
+ */
+static QDF_STATUS hdd_gro_rx_bh_disable(struct hdd_adapter *adapter,
+					struct napi_struct *napi_to_use,
+					struct sk_buff *skb)
+{
+	QDF_STATUS status = QDF_STATUS_E_FAILURE;
+	gro_result_t gro_res;
+	bool flush_ind = QDF_NBUF_CB_RX_FLUSH_IND(skb);
+
+	skb_set_hash(skb, QDF_NBUF_CB_RX_FLOW_ID(skb), PKT_HASH_TYPE_L4);
+
+	local_bh_disable();
+	gro_res = napi_gro_receive(napi_to_use, skb);
+	if (flush_ind)
+		napi_gro_flush(napi_to_use, false);
+	local_bh_enable();
+
+	if (gro_res != GRO_DROP)
+		status = QDF_STATUS_SUCCESS;
+
+	if (flush_ind)
+		adapter->hdd_stats.tx_rx_stats.rx_gro_flushes++;
+
+	return status;
+}
+
+/**
+ * hdd_gro_rx_dp_thread() - Handle Rx procesing via GRO for DP thread
  * @adapter: pointer to adapter context
  * @skb: pointer to sk_buff
  *
  * Return: QDF_STATUS_SUCCESS if processed via GRO or non zero return code
  */
-static QDF_STATUS hdd_gro_rx(struct hdd_adapter *adapter, struct sk_buff *skb)
+static
+QDF_STATUS hdd_gro_rx_dp_thread(struct hdd_adapter *adapter,
+				struct sk_buff *skb)
+{
+	struct napi_struct *napi_to_use = NULL;
+	QDF_STATUS status = QDF_STATUS_E_FAILURE;
+	bool gro_disabled_temp = false;
+	struct hdd_context *hdd_ctx = adapter->hdd_ctx;
+
+	if (!adapter->hdd_ctx->enable_dp_rx_threads) {
+		hdd_dp_err_rl("gro not supported without DP RX thread!");
+		status = QDF_STATUS_E_FAILURE;
+		return status;
+	}
+
+	napi_to_use =
+		dp_rx_get_napi_context(cds_get_context(QDF_MODULE_ID_SOC),
+				       QDF_NBUF_CB_RX_CTX_ID(skb));
+
+	if (!napi_to_use) {
+		hdd_dp_err_rl("no napi to use for GRO!");
+		status = QDF_STATUS_E_FAILURE;
+		return status;
+	}
+
+	gro_disabled_temp =
+		qdf_atomic_read(&hdd_ctx->disable_rx_ol_in_low_tput);
+
+	if (!gro_disabled_temp) {
+		/* nothing to do */
+	} else {
+		/*
+		 * GRO is disabled temporarily, but there is a pending
+		 * gro_list, flush it.
+		 */
+		if (napi_to_use->gro_list) {
+			QDF_NBUF_CB_RX_FLUSH_IND(skb) = 1;
+			adapter->hdd_stats.tx_rx_stats.rx_gro_force_flushes++;
+		} else {
+			hdd_err_rl("GRO disabled - return");
+			status = QDF_STATUS_E_FAILURE;
+			return status;
+		}
+	}
+
+	status = hdd_gro_rx_bh_disable(adapter, napi_to_use, skb);
+
+	return status;
+}
+
+/**
+ * hdd_gro_rx_legacy() - Handle Rx processing via GRO for ihelium based targets
+ * @adapter: pointer to adapter context
+ * @skb: pointer to sk_buff
+ *
+ * Supports GRO for only station mode
+ *
+ * Return: QDF_STATUS_SUCCESS if processed via GRO or non zero return code
+ */
+static
+QDF_STATUS hdd_gro_rx_legacy(struct hdd_adapter *adapter, struct sk_buff *skb)
 {
 	struct qca_napi_info *qca_napii;
 	struct qca_napi_data *napid;
 	struct napi_struct *napi_to_use;
 	QDF_STATUS status = QDF_STATUS_E_FAILURE;
+	struct hdd_context *hdd_ctx = adapter->hdd_ctx;
 
 	/* Only enabling it for STA mode like LRO today */
 	if (QDF_STA_MODE != adapter->device_mode)
 		return QDF_STATUS_E_NOSUPPORT;
 
+	if (qdf_atomic_read(&hdd_ctx->disable_rx_ol_in_low_tput) ||
+	    qdf_atomic_read(&hdd_ctx->disable_rx_ol_in_concurrency))
+		return QDF_STATUS_E_NOSUPPORT;
+
 	napid = hdd_napi_get_all();
 	if (unlikely(napid == NULL))
 		goto out;
@@ -1567,7 +1670,6 @@ static QDF_STATUS hdd_gro_rx(struct hdd_adapter *adapter, struct sk_buff *skb)
 	if (unlikely(qca_napii == NULL))
 		goto out;
 
-	skb_set_hash(skb, QDF_NBUF_CB_RX_FLOW_ID(skb), PKT_HASH_TYPE_L4);
 	/*
 	 * As we are breaking context in Rxthread mode, there is rx_thread NAPI
 	 * corresponds each hif_napi.
@@ -1577,11 +1679,7 @@ static QDF_STATUS hdd_gro_rx(struct hdd_adapter *adapter, struct sk_buff *skb)
 	else
 		napi_to_use = &qca_napii->napi;
 
-	local_bh_disable();
-	napi_gro_receive(napi_to_use, skb);
-	local_bh_enable();
-
-	status = QDF_STATUS_SUCCESS;
+	status = hdd_gro_rx_bh_disable(adapter, napi_to_use, skb);
 out:
 
 	return status;
@@ -1643,12 +1741,14 @@ static void hdd_qdf_lro_flush(void *data)
 
 /**
  * hdd_register_rx_ol() - Register LRO/GRO rx processing callbacks
+ * @hdd_ctx: pointer to hdd_ctx
+ * @lithium_based_target: whether its a lithium arch based target or not
  *
  * Return: none
  */
-static void hdd_register_rx_ol(void)
+static void hdd_register_rx_ol_cb(struct hdd_context *hdd_ctx,
+				  bool lithium_based_target)
 {
-	struct hdd_context *hdd_ctx = cds_get_context(QDF_MODULE_ID_HDD);
 	void *soc = cds_get_context(QDF_MODULE_ID_SOC);
 
 	if  (!hdd_ctx) {
@@ -1663,41 +1763,52 @@ static void hdd_register_rx_ol(void)
 		hdd_ctx->receive_offload_cb = hdd_lro_rx;
 		hdd_debug("LRO is enabled");
 	} else if (hdd_ctx->ol_enable == CFG_GRO_ENABLED) {
-		if (hdd_ctx->enable_rxthread)
-			cdp_register_rx_offld_flush_cb(soc,
-						hdd_rxthread_napi_gro_flush);
-		else
-			cdp_register_rx_offld_flush_cb(soc,
-						       hdd_hif_napi_gro_flush);
-		hdd_ctx->receive_offload_cb = hdd_gro_rx;
+		if (lithium_based_target) {
+		/* no flush registration needed, it happens in DP thread */
+			hdd_ctx->receive_offload_cb = hdd_gro_rx_dp_thread;
+		} else {
+			/*ihelium based targets */
+			if (hdd_ctx->enable_rxthread)
+				cdp_register_rx_offld_flush_cb(soc,
+							       hdd_rxthread_napi_gro_flush);
+			else
+				cdp_register_rx_offld_flush_cb(soc,
+							       hdd_hif_napi_gro_flush);
+			hdd_ctx->receive_offload_cb = hdd_gro_rx_legacy;
+		}
 		hdd_debug("GRO is enabled");
 	} else if (HDD_MSM_CFG(hdd_ctx->config->enable_tcp_delack)) {
 		hdd_ctx->en_tcp_delack_no_lro = 1;
+		hdd_debug("TCP Del ACK is enabled");
 	}
 }
 
-int hdd_rx_ol_init(struct hdd_context *hdd_ctx)
+/**
+ * hdd_rx_ol_send_config() - Send RX offload configuration to FW
+ * @hdd_ctx: pointer to hdd_ctx
+ *
+ * This function is only used for non lithium targets. Lithium based targets are
+ * sending LRO config to FW in vdev attach implemented in cmn DP layer.
+ *
+ * Return: 0 on success, non zero on failure
+ */
+static int hdd_rx_ol_send_config(struct hdd_context *hdd_ctx)
 {
 	struct cdp_lro_hash_config lro_config = {0};
-
-	hdd_resolve_rx_ol_mode(hdd_ctx);
-
-	hdd_register_rx_ol();
-
 	/*
 	 * This will enable flow steering and Toeplitz hash
 	 * So enable it for LRO or GRO processing.
 	 */
-	if (hdd_napi_enabled(HDD_NAPI_ANY) == 0) {
-		hdd_warn("NAPI is disabled");
-		return 0;
+	if (cfg_get(hdd_ctx->psoc, CFG_DP_GRO) ||
+	    cfg_get(hdd_ctx->psoc, CFG_DP_LRO)) {
+		lro_config.lro_enable = 1;
+		lro_config.tcp_flag = TCPHDR_ACK;
+		lro_config.tcp_flag_mask = TCPHDR_FIN | TCPHDR_SYN |
+					   TCPHDR_RST | TCPHDR_ACK |
+					   TCPHDR_URG | TCPHDR_ECE |
+					   TCPHDR_CWR;
 	}
 
-	lro_config.lro_enable = 1;
-	lro_config.tcp_flag = TCPHDR_ACK;
-	lro_config.tcp_flag_mask = TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST |
-		TCPHDR_ACK | TCPHDR_URG | TCPHDR_ECE | TCPHDR_CWR;
-
 	get_random_bytes(lro_config.toeplitz_hash_ipv4,
 			 (sizeof(lro_config.toeplitz_hash_ipv4[0]) *
 			  LRO_IPV4_SEED_ARR_SZ));
@@ -1706,10 +1817,35 @@ int hdd_rx_ol_init(struct hdd_context *hdd_ctx)
 			 (sizeof(lro_config.toeplitz_hash_ipv6[0]) *
 			  LRO_IPV6_SEED_ARR_SZ));
 
-	if (0 != wma_lro_init(&lro_config)) {
-		hdd_err("Failed to send LRO/GRO configuration!");
-		hdd_ctx->ol_enable = 0;
+	if (wma_lro_init(&lro_config))
 		return -EAGAIN;
+	else
+		hdd_dp_info("LRO Config: lro_enable: 0x%x tcp_flag 0x%x tcp_flag_mask 0x%x",
+			    lro_config.lro_enable, lro_config.tcp_flag,
+			    lro_config.tcp_flag_mask);
+
+	return 0;
+}
+
+int hdd_rx_ol_init(struct hdd_context *hdd_ctx)
+{
+	int ret = 0;
+	bool lithium_based_target = false;
+
+	if (hdd_ctx->target_type == TARGET_TYPE_QCA6290 ||
+	    hdd_ctx->target_type == TARGET_TYPE_QCA6390)
+		lithium_based_target = true;
+
+	hdd_resolve_rx_ol_mode(hdd_ctx);
+	hdd_register_rx_ol_cb(hdd_ctx, lithium_based_target);
+
+	if (!lithium_based_target) {
+		ret = hdd_rx_ol_send_config(hdd_ctx);
+		if (ret) {
+			hdd_ctx->ol_enable = 0;
+			hdd_err("Failed to send LRO/GRO configuration! %u", ret);
+			return ret;
+		}
 	}
 
 	return 0;
@@ -1734,55 +1870,26 @@ void hdd_disable_rx_ol_in_concurrency(bool disable)
 			wlan_hdd_update_tcp_rx_param(hdd_ctx, &rx_tp_data);
 			hdd_ctx->en_tcp_delack_no_lro = 1;
 		}
-		qdf_atomic_set(&hdd_ctx->disable_lro_in_concurrency, 1);
+		qdf_atomic_set(&hdd_ctx->disable_rx_ol_in_concurrency, 1);
 	} else {
 		if (HDD_MSM_CFG(hdd_ctx->config->enable_tcp_delack)) {
 			hdd_info("Disable TCP delack as LRO is enabled");
 			hdd_ctx->en_tcp_delack_no_lro = 0;
 			hdd_reset_tcp_delack(hdd_ctx);
 		}
-		qdf_atomic_set(&hdd_ctx->disable_lro_in_concurrency, 0);
+		qdf_atomic_set(&hdd_ctx->disable_rx_ol_in_concurrency, 0);
 	}
 }
 
 void hdd_disable_rx_ol_for_low_tput(struct hdd_context *hdd_ctx, bool disable)
 {
 	if (disable)
-		qdf_atomic_set(&hdd_ctx->disable_lro_in_low_tput, 1);
+		qdf_atomic_set(&hdd_ctx->disable_rx_ol_in_low_tput, 1);
 	else
-		qdf_atomic_set(&hdd_ctx->disable_lro_in_low_tput, 0);
+		qdf_atomic_set(&hdd_ctx->disable_rx_ol_in_low_tput, 0);
 }
 
-/**
- * hdd_can_handle_receive_offload() - Check for dynamic disablement
- * @hdd_ctx: hdd context
- * @skb: pointer to sk_buff which will be processed by Rx OL
- *
- * Check for dynamic disablement of Rx offload
- *
- * Return: false if we cannot process otherwise true
- */
-static bool hdd_can_handle_receive_offload(struct hdd_context *hdd_ctx,
-					   struct sk_buff *skb)
-{
-	if (!hdd_ctx->receive_offload_cb)
-		return false;
-
-	if (!QDF_NBUF_CB_RX_TCP_PROTO(skb) ||
-	    qdf_atomic_read(&hdd_ctx->disable_lro_in_concurrency) ||
-	    QDF_NBUF_CB_RX_PEER_CACHED_FRM(skb) ||
-	    qdf_atomic_read(&hdd_ctx->disable_lro_in_low_tput))
-		return false;
-	else
-		return true;
-}
 #else /* RECEIVE_OFFLOAD */
-static bool hdd_can_handle_receive_offload(struct hdd_context *hdd_ctx,
-					   struct sk_buff *skb)
-{
-	return false;
-}
-
 int hdd_rx_ol_init(struct hdd_context *hdd_ctx)
 {
 	hdd_err("Rx_OL, LRO/GRO not supported");
@@ -1826,13 +1933,61 @@ QDF_STATUS hdd_rx_pkt_thread_enqueue_cbk(void *adapter,
 	return dp_rx_enqueue_pkt(cds_get_context(QDF_MODULE_ID_SOC), nbuf_list);
 }
 
+QDF_STATUS hdd_rx_deliver_to_stack(struct hdd_adapter *adapter,
+				   struct sk_buff *skb)
+{
+	struct hdd_context *hdd_ctx = adapter->hdd_ctx;
+	int status = QDF_STATUS_E_FAILURE;
+	int netif_status;
+	bool skb_receive_offload_ok = false;
+
+	if (QDF_NBUF_CB_RX_TCP_PROTO(skb) &&
+	    !QDF_NBUF_CB_RX_PEER_CACHED_FRM(skb))
+		skb_receive_offload_ok = true;
+
+	if (skb_receive_offload_ok && hdd_ctx->receive_offload_cb)
+		status = hdd_ctx->receive_offload_cb(adapter, skb);
+
+	if (QDF_IS_STATUS_SUCCESS(status)) {
+		adapter->hdd_stats.tx_rx_stats.rx_aggregated++;
+		return status;
+	}
+
+	adapter->hdd_stats.tx_rx_stats.rx_non_aggregated++;
+
+	/* Account for GRO/LRO ineligible packets, mostly UDP */
+	hdd_ctx->no_rx_offload_pkt_cnt++;
+
+	if (qdf_likely(hdd_ctx->enable_dp_rx_threads ||
+		       hdd_ctx->enable_rxthread)) {
+		local_bh_disable();
+		netif_status = netif_receive_skb(skb);
+		local_bh_enable();
+	} else if (qdf_unlikely(QDF_NBUF_CB_RX_PEER_CACHED_FRM(skb))) {
+		/*
+		 * Frames before peer is registered to avoid contention with
+		 * NAPI softirq.
+		 * Refer fix:
+		 * qcacld-3.0: Do netif_rx_ni() for frames received before
+		 * peer assoc
+		 */
+		netif_status = netif_rx_ni(skb);
+	} else { /* NAPI Context */
+		netif_status = netif_receive_skb(skb);
+	}
+
+	if (netif_status == NET_RX_SUCCESS)
+		status = QDF_STATUS_SUCCESS;
+
+	return status;
+}
+
 QDF_STATUS hdd_rx_packet_cbk(void *adapter_context,
 			     qdf_nbuf_t rxBuf)
 {
 	struct hdd_adapter *adapter = NULL;
 	struct hdd_context *hdd_ctx = NULL;
-	int rxstat = 0;
-	QDF_STATUS rx_ol_status = QDF_STATUS_E_FAILURE;
+	QDF_STATUS qdf_status = QDF_STATUS_E_FAILURE;
 	struct sk_buff *skb = NULL;
 	struct sk_buff *next = NULL;
 	struct hdd_station_ctx *sta_ctx = NULL;
@@ -1873,11 +2028,6 @@ QDF_STATUS hdd_rx_packet_cbk(void *adapter_context,
 		next = skb->next;
 		skb->next = NULL;
 
-/* Debug code, remove later */
-#if defined(QCA_WIFI_QCA6290) || defined(QCA_WIFI_QCA6390)
-		QDF_TRACE(QDF_MODULE_ID_HDD_DATA, QDF_TRACE_LEVEL_DEBUG,
-			 "%s: skb %pK skb->len %d\n", __func__, skb, skb->len);
-#endif
 		if (QDF_NBUF_CB_PACKET_TYPE_ARP ==
 		    QDF_NBUF_CB_GET_PACKET_TYPE(skb)) {
 			if (qdf_nbuf_data_is_arp_rsp(skb) &&
@@ -1975,26 +2125,9 @@ QDF_STATUS hdd_rx_packet_cbk(void *adapter_context,
 
 		hdd_tsf_timestamp_rx(hdd_ctx, skb, ktime_to_us(skb->tstamp));
 
-		if (hdd_can_handle_receive_offload(hdd_ctx, skb))
-			rx_ol_status = hdd_ctx->receive_offload_cb(adapter,
-								   skb);
-
-		if (rx_ol_status != QDF_STATUS_SUCCESS) {
-			/* we should optimize this per packet check, unlikely */
-			/* Account for GRO/LRO ineligible packets, mostly UDP */
-			hdd_ctx->no_rx_offload_pkt_cnt++;
-			if (hdd_napi_enabled(HDD_NAPI_ANY) &&
-			    !hdd_ctx->enable_rxthread &&
-			    !QDF_NBUF_CB_RX_PEER_CACHED_FRM(skb)) {
-				rxstat = netif_receive_skb(skb);
-			} else {
-				local_bh_disable();
-				rxstat = netif_receive_skb(skb);
-				local_bh_enable();
-			}
-		}
+		qdf_status = hdd_rx_deliver_to_stack(adapter, skb);
 
-		if (!rxstat) {
+		if (QDF_IS_STATUS_SUCCESS(qdf_status)) {
 			++adapter->hdd_stats.tx_rx_stats.
 						rx_delivered[cpu_index];
 			if (track_arp)

+ 33 - 29
core/hdd/src/wlan_hdd_wext.c

@@ -3010,6 +3010,7 @@ void hdd_wlan_get_stats(struct hdd_adapter *adapter, uint16_t *length,
 	uint32_t total_rx_pkt = 0, total_rx_dropped = 0;
 	uint32_t total_rx_delv = 0, total_rx_refused = 0;
 	int i = 0;
+	struct hdd_context *hdd_ctx = adapter->hdd_ctx;
 
 	for (; i < NUM_CPUS; i++) {
 		total_rx_pkt += stats->rx_packets[i];
@@ -3019,39 +3020,42 @@ void hdd_wlan_get_stats(struct hdd_adapter *adapter, uint16_t *length,
 	}
 
 	len = scnprintf(buffer, buf_len,
-		"\nTransmit[%lu] - "
-		"called %u, dropped %u orphan %u,"
-		"\n[dropped]    BK %u, BE %u, VI %u, VO %u"
-		"\n[classified] BK %u, BE %u, VI %u, VO %u"
-		"\n\nReceive[%lu] - "
-		"packets %u, dropped %u, delivered %u, refused %u"
-		"\n",
-		qdf_system_ticks(),
-		stats->tx_called,
-		stats->tx_dropped,
-		stats->tx_orphaned,
-
-		stats->tx_dropped_ac[SME_AC_BK],
-		stats->tx_dropped_ac[SME_AC_BE],
-		stats->tx_dropped_ac[SME_AC_VI],
-		stats->tx_dropped_ac[SME_AC_VO],
-
-		stats->tx_classified_ac[SME_AC_BK],
-		stats->tx_classified_ac[SME_AC_BE],
-		stats->tx_classified_ac[SME_AC_VI],
-		stats->tx_classified_ac[SME_AC_VO],
-		qdf_system_ticks(),
-		total_rx_pkt, total_rx_dropped, total_rx_delv, total_rx_refused
-		);
+			"\nTransmit[%lu] - "
+			"called %u, dropped %u orphan %u,"
+			"\n[dropped]    BK %u, BE %u, VI %u, VO %u"
+			"\n[classified] BK %u, BE %u, VI %u, VO %u"
+			"\n\nReceive[%lu] - "
+			"packets %u, dropped %u, delivered %u, refused %u\n"
+			"GRO - agg %u non-agg %u flushes(%u %u) disabled(conc %u low-tput %u)\n",
+			qdf_system_ticks(),
+			stats->tx_called,
+			stats->tx_dropped,
+			stats->tx_orphaned,
+			stats->tx_dropped_ac[SME_AC_BK],
+			stats->tx_dropped_ac[SME_AC_BE],
+			stats->tx_dropped_ac[SME_AC_VI],
+			stats->tx_dropped_ac[SME_AC_VO],
+			stats->tx_classified_ac[SME_AC_BK],
+			stats->tx_classified_ac[SME_AC_BE],
+			stats->tx_classified_ac[SME_AC_VI],
+			stats->tx_classified_ac[SME_AC_VO],
+			qdf_system_ticks(),
+			total_rx_pkt, total_rx_dropped, total_rx_delv,
+			total_rx_refused,
+			stats->rx_aggregated, stats->rx_non_aggregated,
+			stats->rx_gro_flushes,
+			stats->rx_gro_force_flushes,
+			qdf_atomic_read(&hdd_ctx->disable_rx_ol_in_concurrency),
+			qdf_atomic_read(&hdd_ctx->disable_rx_ol_in_low_tput));
 
 	for (i = 0; i < NUM_CPUS; i++) {
 		if (stats->rx_packets[i] == 0)
 			continue;
 		len += scnprintf(buffer + len, buf_len - len,
-			"Rx CPU[%d]:"
-			"packets %u, dropped %u, delivered %u, refused %u\n",
-			i, stats->rx_packets[i], stats->rx_dropped[i],
-			stats->rx_delivered[i], stats->rx_refused[i]);
+				 "Rx CPU[%d]:"
+				 "packets %u, dropped %u, delivered %u, refused %u\n",
+				 i, stats->rx_packets[i], stats->rx_dropped[i],
+				 stats->rx_delivered[i], stats->rx_refused[i]);
 	}
 
 	len += scnprintf(buffer + len, buf_len - len,
@@ -3065,7 +3069,7 @@ void hdd_wlan_get_stats(struct hdd_adapter *adapter, uint16_t *length,
 		stats->txflow_unpause_cnt);
 
 	len += cdp_stats(cds_get_context(QDF_MODULE_ID_SOC),
-		adapter->session_id, &buffer[len], (buf_len - len));
+			 adapter->session_id, &buffer[len], (buf_len - len));
 	*length = len + 1;
 }