Browse Source

qcacmn: fast TX API and registration

This is a new FAST TX API which avoids various checks.
This API will be called when SFE tags a pkt as fast_forwarded
and vap's fast_tx flag is set.

avoid additional re-checks in the wifi TX function

CRs-Fixed: 3218650
Change-Id: Iba17ede59652a1ff2af553f57de21dc58946298e
Tallapragada Kalyan 3 years ago
parent
commit
1b1b3adbea

+ 11 - 0
dp/inc/cdp_txrx_cmn_struct.h

@@ -806,6 +806,15 @@ typedef void
 typedef qdf_nbuf_t (*ol_txrx_tx_fp)(struct cdp_soc_t *soc, uint8_t vdev_id,
 				    qdf_nbuf_t msdu_list);
 
+/**
+ * ol_txrx_tx_fast_fp - top-level fast transmit function
+ * @soc - dp soc handle
+ * @vdev_id - handle to the virtual device object
+ * @msdu_list - list of network buffers
+ */
+typedef qdf_nbuf_t (*ol_txrx_tx_fast_fp)(struct cdp_soc_t *soc, uint8_t vdev_id,
+					 qdf_nbuf_t msdu_list);
+
 /**
  * ol_txrx_tx_exc_fp - top-level transmit function on exception path
  * @soc - dp soc handle
@@ -1033,6 +1042,7 @@ struct ol_txrx_ops {
 	/* tx function pointers - specified by txrx, stored by OS shim */
 	struct {
 		ol_txrx_tx_fp         tx;
+		ol_txrx_tx_fast_fp    tx_fast;
 		ol_txrx_tx_exc_fp     tx_exception;
 		ol_txrx_tx_free_ext_fp tx_free_ext;
 		ol_txrx_completion_fp tx_comp;
@@ -1069,6 +1079,7 @@ struct ol_txrx_ops {
  */
 struct ol_txrx_hardtart_ctxt {
 	ol_txrx_tx_fp         tx;
+	ol_txrx_tx_fast_fp    tx_fast;
 	ol_txrx_tx_exc_fp     tx_exception;
 };
 

+ 6 - 0
dp/inc/cdp_txrx_ops.h

@@ -589,6 +589,12 @@ struct cdp_cmn_ops {
 				    uint32_t value);
 
 	ol_txrx_tx_fp tx_send;
+
+	/* tx_fast_send will be called only in AP mode when all the
+	 * transmit path features are disabled including extended stats
+	 */
+	ol_txrx_tx_fast_fp tx_fast_send;
+
 	/**
 	 * txrx_get_os_rx_handles_from_vdev() - Return function, osif vdev
 	 *					to deliver pkt to stack.

+ 2 - 0
dp/inc/cdp_txrx_stats_struct.h

@@ -1743,6 +1743,8 @@ struct cdp_rx_stats {
  */
 struct cdp_tx_ingress_stats {
 	struct cdp_pkt_info rcvd;
+	uint64_t rcvd_in_fast_xmit_flow;
+	uint32_t rcvd_per_core[CDP_MAX_TX_DATA_RINGS];
 	struct cdp_pkt_info processed;
 	struct cdp_pkt_info reinject_pkts;
 	struct cdp_pkt_info inspect_pkts;

+ 2 - 0
dp/wifi3.0/be/dp_be.c

@@ -716,6 +716,7 @@ static QDF_STATUS dp_vdev_attach_be(struct dp_soc *soc, struct dp_vdev *vdev)
 	be_vdev->vdev_id_check_en = DP_TX_VDEV_ID_CHECK_ENABLE;
 
 	be_vdev->bank_id = dp_tx_get_bank_profile(be_soc, be_vdev);
+	vdev->bank_id = be_vdev->bank_id;
 
 	if (be_vdev->bank_id == DP_BE_INVALID_BANK_ID) {
 		QDF_BUG(0);
@@ -1799,6 +1800,7 @@ void dp_initialize_arch_ops_be(struct dp_arch_ops *arch_ops)
 #ifndef QCA_HOST_MODE_WIFI_DISABLED
 	arch_ops->tx_hw_enqueue = dp_tx_hw_enqueue_be;
 	arch_ops->dp_rx_process = dp_rx_process_be;
+	arch_ops->dp_tx_send_fast = dp_tx_fast_send_be;
 	arch_ops->tx_comp_get_params_from_hal_desc =
 		dp_tx_comp_get_params_from_hal_desc_be;
 	arch_ops->dp_tx_process_htt_completion =

+ 161 - 2
dp/wifi3.0/be/dp_be_tx.c

@@ -800,6 +800,7 @@ dp_tx_hw_enqueue_be(struct dp_soc *soc, struct dp_vdev *vdev,
 	uint8_t bm_id = dp_tx_get_rbm_id_be(soc, ring_id);
 	hal_ring_handle_t hal_ring_hdl = NULL;
 	QDF_STATUS status = QDF_STATUS_E_RESOURCES;
+	uint8_t num_desc_bytes = HAL_TX_DESC_LEN_BYTES;
 
 	be_vdev = dp_get_be_vdev_from_dp_vdev(vdev);
 
@@ -860,7 +861,7 @@ dp_tx_hw_enqueue_be(struct dp_soc *soc, struct dp_vdev *vdev,
 		hal_tx_desc_set_l4_checksum_en(hal_tx_desc_cached, 1);
 	}
 
-	hal_tx_desc_set_bank_id(hal_tx_desc_cached, be_vdev->bank_id);
+	hal_tx_desc_set_bank_id(hal_tx_desc_cached, vdev->bank_id);
 
 	dp_tx_vdev_id_set_hal_tx_desc(hal_tx_desc_cached, vdev, msdu_info);
 
@@ -894,7 +895,7 @@ dp_tx_hw_enqueue_be(struct dp_soc *soc, struct dp_vdev *vdev,
 	dp_vdev_peer_stats_update_protocol_cnt_tx(vdev, tx_desc->nbuf);
 
 	/* Sync cached descriptor with HW */
-	hal_tx_desc_sync(hal_tx_desc_cached, hal_tx_desc);
+	hal_tx_desc_sync(hal_tx_desc_cached, hal_tx_desc, num_desc_bytes);
 
 	coalesce = dp_tx_attempt_coalescing(soc, vdev, tx_desc, tid,
 					    msdu_info, ring_id);
@@ -1089,6 +1090,7 @@ void dp_tx_update_bank_profile(struct dp_soc_be *be_soc,
 {
 	dp_tx_put_bank_profile(be_soc, be_vdev);
 	be_vdev->bank_id = dp_tx_get_bank_profile(be_soc, be_vdev);
+	be_vdev->vdev.bank_id = be_vdev->bank_id;
 }
 
 QDF_STATUS dp_tx_desc_pool_init_be(struct dp_soc *soc,
@@ -1256,3 +1258,160 @@ QDF_STATUS dp_tx_compute_tx_delay_be(struct dp_soc *soc,
 {
 	return dp_mlo_compute_hw_delay_us(soc, vdev, ts, delay_us);
 }
+
+static inline
+qdf_dma_addr_t dp_tx_nbuf_map_be(struct dp_vdev *vdev,
+				 struct dp_tx_desc_s *tx_desc,
+				 qdf_nbuf_t nbuf)
+{
+	qdf_nbuf_dma_clean_range_no_dsb((void *)nbuf->data,
+					(void *)(nbuf->data + 256));
+
+	return (qdf_dma_addr_t)qdf_mem_virt_to_phys(nbuf->data);
+}
+
+static inline
+void dp_tx_nbuf_unmap_be(struct dp_soc *soc,
+			 struct dp_tx_desc_s *desc)
+{
+}
+
+/**
+ * dp_tx_fast_send_be() - Transmit a frame on a given VAP
+ * @soc: DP soc handle
+ * @vdev_id: id of DP vdev handle
+ * @nbuf: skb
+ *
+ * Entry point for Core Tx layer (DP_TX) invoked from
+ * hard_start_xmit in OSIF/HDD or from dp_rx_process for intravap forwarding
+ * cases
+ *
+ * Return: NULL on success,
+ *         nbuf when it fails to send
+ */
+qdf_nbuf_t dp_tx_fast_send_be(struct cdp_soc_t *soc_hdl, uint8_t vdev_id,
+			      qdf_nbuf_t nbuf)
+{
+	struct dp_soc *soc = cdp_soc_t_to_dp_soc(soc_hdl);
+	struct dp_vdev *vdev = NULL;
+	struct dp_pdev *pdev = NULL;
+	struct dp_tx_desc_s *tx_desc;
+	uint16_t desc_pool_id;
+	uint16_t pkt_len;
+	qdf_dma_addr_t paddr;
+	QDF_STATUS status = QDF_STATUS_E_RESOURCES;
+	uint8_t cached_desc[HAL_TX_DESC_LEN_BYTES] = { 0 };
+	hal_ring_handle_t hal_ring_hdl = NULL;
+	uint32_t *hal_tx_desc_cached;
+	void *hal_tx_desc;
+
+	if (qdf_unlikely(vdev_id >= MAX_VDEV_CNT))
+		return nbuf;
+
+	vdev = soc->vdev_id_map[vdev_id];
+	if (qdf_unlikely(!vdev))
+		return nbuf;
+
+	desc_pool_id = qdf_nbuf_get_queue_mapping(nbuf) & DP_TX_QUEUE_MASK;
+
+	pkt_len = qdf_nbuf_headlen(nbuf);
+	DP_STATS_INC_PKT(vdev, tx_i.rcvd, 1, pkt_len);
+	DP_STATS_INC(vdev, tx_i.rcvd_in_fast_xmit_flow, 1);
+	DP_STATS_INC(vdev, tx_i.rcvd_per_core[desc_pool_id], 1);
+
+	pdev = vdev->pdev;
+	if (dp_tx_limit_check(vdev))
+		return nbuf;
+
+	tx_desc = dp_tx_desc_alloc(soc, desc_pool_id);
+
+	if (qdf_unlikely(!tx_desc)) {
+		DP_STATS_INC(vdev, tx_i.dropped.desc_na.num, 1);
+		DP_STATS_INC(vdev, tx_i.dropped.desc_na_exc_alloc_fail.num, 1);
+		return nbuf;
+	}
+
+	dp_tx_outstanding_inc(pdev);
+
+	/* Initialize the SW tx descriptor */
+	tx_desc->nbuf = nbuf;
+	tx_desc->shinfo_addr = skb_end_pointer(nbuf);
+	tx_desc->frm_type = dp_tx_frm_std;
+	tx_desc->tx_encap_type = vdev->tx_encap_type;
+	tx_desc->vdev_id = vdev_id;
+	tx_desc->pdev = pdev;
+	tx_desc->pkt_offset = 0;
+	tx_desc->length = pkt_len;
+	tx_desc->flags |= DP_TX_DESC_FLAG_SIMPLE;
+
+	paddr =  dp_tx_nbuf_map_be(vdev, tx_desc, nbuf);
+	if (!paddr) {
+		/* Handle failure */
+		dp_err("qdf_nbuf_map failed");
+		DP_STATS_INC(vdev, tx_i.dropped.dma_error, 1);
+		goto release_desc;
+	}
+
+	tx_desc->dma_addr = paddr;
+
+	hal_tx_desc_cached = (void *)cached_desc;
+	hal_tx_desc_cached[0] = (uint32_t)tx_desc->dma_addr;
+	hal_tx_desc_cached[1] = tx_desc->id <<
+		TCL_DATA_CMD_BUF_ADDR_INFO_SW_BUFFER_COOKIE_LSB;
+
+	/* bank_id */
+	hal_tx_desc_cached[2] = vdev->bank_id << TCL_DATA_CMD_BANK_ID_LSB;
+	hal_tx_desc_cached[3] = vdev->htt_tcl_metadata <<
+		TCL_DATA_CMD_TCL_CMD_NUMBER_LSB;
+
+	hal_tx_desc_cached[4] = tx_desc->length;
+	/* l3 and l4 checksum enable */
+	hal_tx_desc_cached[4] |= DP_TX_L3_L4_CSUM_ENABLE <<
+		TCL_DATA_CMD_IPV4_CHECKSUM_EN_LSB;
+
+	hal_tx_desc_cached[5] = vdev->lmac_id << TCL_DATA_CMD_PMAC_ID_LSB;
+	hal_tx_desc_cached[5] |= vdev->vdev_id << TCL_DATA_CMD_VDEV_ID_LSB;
+
+	hal_ring_hdl = dp_tx_get_hal_ring_hdl(soc, desc_pool_id);
+
+	if (qdf_unlikely(dp_tx_hal_ring_access_start(soc, hal_ring_hdl))) {
+		dp_err("HAL RING Access Failed -- %pK", hal_ring_hdl);
+		DP_STATS_INC(soc, tx.tcl_ring_full[desc_pool_id], 1);
+		DP_STATS_INC(vdev, tx_i.dropped.enqueue_fail, 1);
+		goto ring_access_fail2;
+	}
+
+	hal_tx_desc = hal_srng_src_get_next(soc->hal_soc, hal_ring_hdl);
+	if (qdf_unlikely(!hal_tx_desc)) {
+		dp_verbose_debug("TCL ring full ring_id:%d", desc_pool_id);
+		DP_STATS_INC(soc, tx.tcl_ring_full[desc_pool_id], 1);
+		DP_STATS_INC(vdev, tx_i.dropped.enqueue_fail, 1);
+		goto ring_access_fail;
+	}
+
+	tx_desc->flags |= DP_TX_DESC_FLAG_QUEUED_TX;
+
+	/* Sync cached descriptor with HW */
+	qdf_mem_copy(hal_tx_desc, hal_tx_desc_cached, DP_TX_FAST_DESC_SIZE);
+	qdf_dsb();
+
+	DP_STATS_INC_PKT(vdev, tx_i.processed, 1, tx_desc->length);
+	DP_STATS_INC(soc, tx.tcl_enq[desc_pool_id], 1);
+	status = QDF_STATUS_SUCCESS;
+
+ring_access_fail:
+	dp_tx_ring_access_end_wrapper(soc, hal_ring_hdl, 0);
+
+ring_access_fail2:
+	if (status != QDF_STATUS_SUCCESS) {
+		dp_tx_nbuf_unmap_be(soc, tx_desc);
+		goto release_desc;
+	}
+
+	return NULL;
+
+release_desc:
+	dp_tx_desc_release(tx_desc, desc_pool_id);
+
+	return nbuf;
+}

+ 19 - 0
dp/wifi3.0/be/dp_be_tx.h

@@ -46,6 +46,9 @@ struct __attribute__((__packed__)) dp_tx_comp_peer_id {
 	(((_var) & 0x30) >> 4)
 #define DP_TX_FLOW_OVERRIDE_ENABLE 0x1
 
+#define DP_TX_FAST_DESC_SIZE	24
+#define DP_TX_L3_L4_CSUM_ENABLE	0x1f
+
 /**
  * dp_tx_hw_enqueue_be() - Enqueue to TCL HW for transmit for BE target
  * @soc: DP Soc Handle
@@ -67,6 +70,22 @@ QDF_STATUS dp_tx_hw_enqueue_be(struct dp_soc *soc, struct dp_vdev *vdev,
 				struct cdp_tx_exception_metadata *metadata,
 				struct dp_tx_msdu_info_s *msdu_info);
 
+/**
+ * dp_tx_hw_enqueue_be() - This is a fast send API to directly enqueue to HW
+ * @soc: DP Soc Handle
+ * @vdev_id: DP vdev ID
+ * @nbuf: network buffer to be transmitted
+ *
+ *  Gets the next free TCL HW DMA descriptor and sets up required parameters
+ *  from software Tx descriptor
+ *
+ * Return: NULL for success
+ *         nbuf for failure
+ */
+
+qdf_nbuf_t dp_tx_fast_send_be(struct cdp_soc_t *soc, uint8_t vdev_id,
+			      qdf_nbuf_t nbuf);
+
 /**
  * dp_tx_comp_get_params_from_hal_desc_be() - Get TX desc from HAL comp desc
  * @soc: DP soc handle

+ 9 - 2
dp/wifi3.0/dp_main.c

@@ -6993,10 +6993,16 @@ static inline void dp_vdev_fetch_tx_handler(struct dp_vdev *vdev,
 	if (vdev->mesh_vdev)
 		ctx->tx = dp_tx_send_mesh;
 	else if ((wlan_cfg_is_tx_per_pkt_vdev_id_check_enabled(soc->wlan_cfg_ctx)) &&
-		 (vdev->opmode == wlan_op_mode_ap))
+		 (vdev->opmode == wlan_op_mode_ap)) {
 		ctx->tx = dp_tx_send_vdev_id_check;
-	else
+		ctx->tx_fast = dp_tx_send_vdev_id_check;
+	} else {
 		ctx->tx = dp_tx_send;
+		if (vdev->opmode == wlan_op_mode_ap)
+			ctx->tx_fast = soc->arch_ops.dp_tx_send_fast;
+		else
+			ctx->tx_fast = dp_tx_send;
+	}
 
 	/* Avoid check in regular exception Path */
 	if ((wlan_cfg_is_tx_per_pkt_vdev_id_check_enabled(soc->wlan_cfg_ctx)) &&
@@ -7021,6 +7027,7 @@ static inline void dp_vdev_register_tx_handler(struct dp_vdev *vdev,
 	dp_vdev_fetch_tx_handler(vdev, soc, &ctx);
 
 	txrx_ops->tx.tx = ctx.tx;
+	txrx_ops->tx.tx_fast = ctx.tx_fast;
 	txrx_ops->tx.tx_exception = ctx.tx_exception;
 
 	dp_info("Configure tx_vdev_id_chk_handler Feature Flag: %d and mode:%d for vdev_id:%d",

+ 14 - 0
dp/wifi3.0/dp_stats.c

@@ -7320,6 +7320,15 @@ dp_print_pdev_tx_stats(struct dp_pdev *pdev)
 		       pdev->stats.tx_i.rcvd.num);
 	DP_PRINT_STATS("	Bytes = %llu",
 		       pdev->stats.tx_i.rcvd.bytes);
+	DP_PRINT_STATS("Received from Stack in FP:");
+	DP_PRINT_STATS("	Packets = %llu",
+		       pdev->stats.tx_i.rcvd_in_fast_xmit_flow);
+	DP_PRINT_STATS("Received from Stack per core:");
+	DP_PRINT_STATS("	Packets = %u %u %u %u",
+		       pdev->stats.tx_i.rcvd_per_core[0],
+		       pdev->stats.tx_i.rcvd_per_core[1],
+		       pdev->stats.tx_i.rcvd_per_core[2],
+		       pdev->stats.tx_i.rcvd_per_core[3]);
 	DP_PRINT_STATS("Processed:");
 	DP_PRINT_STATS("	Packets = %u",
 		       pdev->stats.tx_i.processed.num);
@@ -8659,6 +8668,11 @@ void dp_update_pdev_ingress_stats(struct dp_pdev *tgtobj,
 	DP_STATS_AGGR_PKT(tgtobj, srcobj, tx_i.nawds_mcast);
 
 	DP_STATS_AGGR_PKT(tgtobj, srcobj, tx_i.rcvd);
+	DP_STATS_AGGR(tgtobj, srcobj, tx_i.rcvd_in_fast_xmit_flow);
+	DP_STATS_AGGR(tgtobj, srcobj, tx_i.rcvd_per_core[0]);
+	DP_STATS_AGGR(tgtobj, srcobj, tx_i.rcvd_per_core[1]);
+	DP_STATS_AGGR(tgtobj, srcobj, tx_i.rcvd_per_core[2]);
+	DP_STATS_AGGR(tgtobj, srcobj, tx_i.rcvd_per_core[3]);
 	DP_STATS_AGGR_PKT(tgtobj, srcobj, tx_i.processed);
 	DP_STATS_AGGR_PKT(tgtobj, srcobj, tx_i.reinject_pkts);
 	DP_STATS_AGGR_PKT(tgtobj, srcobj, tx_i.inspect_pkts);

+ 4 - 145
dp/wifi3.0/dp_tx.c

@@ -120,150 +120,6 @@ uint8_t sec_type_map[MAX_CDP_SEC_TYPE] = {HAL_TX_ENCRYPT_TYPE_NO_CIPHER,
 					  HAL_TX_ENCRYPT_TYPE_WAPI_GCM_SM4};
 qdf_export_symbol(sec_type_map);
 
-#ifdef CONFIG_WLAN_SYSFS_MEM_STATS
-/**
- * dp_update_tx_desc_stats - Update the increase or decrease in
- * outstanding tx desc count
- * values on pdev and soc
- * @vdev: DP pdev handle
- *
- * Return: void
- */
-static inline void
-dp_update_tx_desc_stats(struct dp_pdev *pdev)
-{
-	int32_t tx_descs_cnt =
-		qdf_atomic_read(&pdev->num_tx_outstanding);
-	if (pdev->tx_descs_max < tx_descs_cnt)
-		pdev->tx_descs_max = tx_descs_cnt;
-	qdf_mem_tx_desc_cnt_update(pdev->num_tx_outstanding,
-				   pdev->tx_descs_max);
-}
-
-#else /* CONFIG_WLAN_SYSFS_MEM_STATS */
-
-static inline void
-dp_update_tx_desc_stats(struct dp_pdev *pdev)
-{
-}
-#endif /* CONFIG_WLAN_SYSFS_MEM_STATS */
-
-#ifdef QCA_TX_LIMIT_CHECK
-/**
- * dp_tx_limit_check - Check if allocated tx descriptors reached
- * soc max limit and pdev max limit
- * @vdev: DP vdev handle
- *
- * Return: true if allocated tx descriptors reached max configured value, else
- * false
- */
-static inline bool
-dp_tx_limit_check(struct dp_vdev *vdev)
-{
-	struct dp_pdev *pdev = vdev->pdev;
-	struct dp_soc *soc = pdev->soc;
-
-	if (qdf_atomic_read(&soc->num_tx_outstanding) >=
-			soc->num_tx_allowed) {
-		dp_tx_info("queued packets are more than max tx, drop the frame");
-		DP_STATS_INC(vdev, tx_i.dropped.desc_na.num, 1);
-		return true;
-	}
-
-	if (qdf_atomic_read(&pdev->num_tx_outstanding) >=
-			pdev->num_tx_allowed) {
-		dp_tx_info("queued packets are more than max tx, drop the frame");
-		DP_STATS_INC(vdev, tx_i.dropped.desc_na.num, 1);
-		DP_STATS_INC(vdev, tx_i.dropped.desc_na_exc_outstand.num, 1);
-		return true;
-	}
-	return false;
-}
-
-/**
- * dp_tx_exception_limit_check - Check if allocated tx exception descriptors
- * reached soc max limit
- * @vdev: DP vdev handle
- *
- * Return: true if allocated tx descriptors reached max configured value, else
- * false
- */
-static inline bool
-dp_tx_exception_limit_check(struct dp_vdev *vdev)
-{
-	struct dp_pdev *pdev = vdev->pdev;
-	struct dp_soc *soc = pdev->soc;
-
-	if (qdf_atomic_read(&soc->num_tx_exception) >=
-			soc->num_msdu_exception_desc) {
-		dp_info("exc packets are more than max drop the exc pkt");
-		DP_STATS_INC(vdev, tx_i.dropped.exc_desc_na.num, 1);
-		return true;
-	}
-
-	return false;
-}
-
-/**
- * dp_tx_outstanding_inc - Increment outstanding tx desc values on pdev and soc
- * @vdev: DP pdev handle
- *
- * Return: void
- */
-static inline void
-dp_tx_outstanding_inc(struct dp_pdev *pdev)
-{
-	struct dp_soc *soc = pdev->soc;
-
-	qdf_atomic_inc(&pdev->num_tx_outstanding);
-	qdf_atomic_inc(&soc->num_tx_outstanding);
-	dp_update_tx_desc_stats(pdev);
-}
-
-/**
- * dp_tx_outstanding__dec - Decrement outstanding tx desc values on pdev and soc
- * @vdev: DP pdev handle
- *
- * Return: void
- */
-static inline void
-dp_tx_outstanding_dec(struct dp_pdev *pdev)
-{
-	struct dp_soc *soc = pdev->soc;
-
-	qdf_atomic_dec(&pdev->num_tx_outstanding);
-	qdf_atomic_dec(&soc->num_tx_outstanding);
-	dp_update_tx_desc_stats(pdev);
-}
-
-#else //QCA_TX_LIMIT_CHECK
-static inline bool
-dp_tx_limit_check(struct dp_vdev *vdev)
-{
-	return false;
-}
-
-static inline bool
-dp_tx_exception_limit_check(struct dp_vdev *vdev)
-{
-	return false;
-}
-
-static inline void
-dp_tx_outstanding_inc(struct dp_pdev *pdev)
-{
-	qdf_atomic_inc(&pdev->num_tx_outstanding);
-	dp_update_tx_desc_stats(pdev);
-}
-
-static inline void
-dp_tx_outstanding_dec(struct dp_pdev *pdev)
-{
-	qdf_atomic_dec(&pdev->num_tx_outstanding);
-	dp_update_tx_desc_stats(pdev);
-}
-#endif //QCA_TX_LIMIT_CHECK
-
 #ifdef WLAN_FEATURE_DP_TX_DESC_HISTORY
 static inline enum dp_tx_event_type dp_tx_get_event_type(uint32_t flags)
 {
@@ -1288,6 +1144,7 @@ struct dp_tx_desc_s *dp_tx_prepare_desc_single(struct dp_vdev *vdev,
 	tx_desc->msdu_ext_desc = NULL;
 	tx_desc->pkt_offset = 0;
 	tx_desc->length = qdf_nbuf_headlen(nbuf);
+	tx_desc->shinfo_addr = skb_end_pointer(nbuf);
 
 	dp_tx_trace_pkt(soc, nbuf, tx_desc->id, vdev->vdev_id);
 
@@ -3616,6 +3473,8 @@ qdf_nbuf_t dp_tx_send(struct cdp_soc_t *soc_hdl, uint8_t vdev_id,
 	 *  to minimize lock contention for these resources.
 	 */
 	dp_tx_get_queue(vdev, nbuf, &msdu_info.tx_queue);
+	DP_STATS_INC(vdev, tx_i.rcvd_per_core[msdu_info.tx_queue.desc_pool_id],
+		     1);
 
 	/*
 	 * TCL H/W supports 2 DSCP-TID mapping tables.
@@ -5100,11 +4959,11 @@ void dp_tx_prefetch_next_nbuf_data(struct dp_tx_desc_s *next)
 		nbuf = next->nbuf;
 	if (nbuf) {
 		/* prefetch skb->next and first few bytes of skb->cb */
+		qdf_prefetch(next->shinfo_addr);
 		qdf_prefetch(nbuf);
 		/* prefetch skb fields present in different cachelines */
 		qdf_prefetch(&nbuf->len);
 		qdf_prefetch(&nbuf->users);
-		qdf_prefetch(skb_end_pointer(nbuf));
 	}
 }
 #else

+ 144 - 0
dp/wifi3.0/dp_tx.h

@@ -1097,4 +1097,148 @@ void dp_pkt_get_timestamp(uint64_t *time)
 {
 }
 #endif
+
+#ifdef CONFIG_WLAN_SYSFS_MEM_STATS
+/**
+ * dp_update_tx_desc_stats - Update the increase or decrease in
+ * outstanding tx desc count
+ * values on pdev and soc
+ * @vdev: DP pdev handle
+ *
+ * Return: void
+ */
+static inline void
+dp_update_tx_desc_stats(struct dp_pdev *pdev)
+{
+	int32_t tx_descs_cnt =
+		qdf_atomic_read(&pdev->num_tx_outstanding);
+	if (pdev->tx_descs_max < tx_descs_cnt)
+		pdev->tx_descs_max = tx_descs_cnt;
+	qdf_mem_tx_desc_cnt_update(pdev->num_tx_outstanding,
+				   pdev->tx_descs_max);
+}
+
+#else /* CONFIG_WLAN_SYSFS_MEM_STATS */
+
+static inline void
+dp_update_tx_desc_stats(struct dp_pdev *pdev)
+{
+}
+#endif /* CONFIG_WLAN_SYSFS_MEM_STATS */
+
+#ifdef QCA_TX_LIMIT_CHECK
+/**
+ * dp_tx_limit_check - Check if allocated tx descriptors reached
+ * soc max limit and pdev max limit
+ * @vdev: DP vdev handle
+ *
+ * Return: true if allocated tx descriptors reached max configured value, else
+ * false
+ */
+static inline bool
+dp_tx_limit_check(struct dp_vdev *vdev)
+{
+	struct dp_pdev *pdev = vdev->pdev;
+	struct dp_soc *soc = pdev->soc;
+
+	if (qdf_atomic_read(&soc->num_tx_outstanding) >=
+			soc->num_tx_allowed) {
+		dp_tx_info("queued packets are more than max tx, drop the frame");
+		DP_STATS_INC(vdev, tx_i.dropped.desc_na.num, 1);
+		return true;
+	}
+
+	if (qdf_atomic_read(&pdev->num_tx_outstanding) >=
+			pdev->num_tx_allowed) {
+		dp_tx_info("queued packets are more than max tx, drop the frame");
+		DP_STATS_INC(vdev, tx_i.dropped.desc_na.num, 1);
+		DP_STATS_INC(vdev, tx_i.dropped.desc_na_exc_outstand.num, 1);
+		return true;
+	}
+	return false;
+}
+
+/**
+ * dp_tx_exception_limit_check - Check if allocated tx exception descriptors
+ * reached soc max limit
+ * @vdev: DP vdev handle
+ *
+ * Return: true if allocated tx descriptors reached max configured value, else
+ * false
+ */
+static inline bool
+dp_tx_exception_limit_check(struct dp_vdev *vdev)
+{
+	struct dp_pdev *pdev = vdev->pdev;
+	struct dp_soc *soc = pdev->soc;
+
+	if (qdf_atomic_read(&soc->num_tx_exception) >=
+			soc->num_msdu_exception_desc) {
+		dp_info("exc packets are more than max drop the exc pkt");
+		DP_STATS_INC(vdev, tx_i.dropped.exc_desc_na.num, 1);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * dp_tx_outstanding_inc - Increment outstanding tx desc values on pdev and soc
+ * @vdev: DP pdev handle
+ *
+ * Return: void
+ */
+static inline void
+dp_tx_outstanding_inc(struct dp_pdev *pdev)
+{
+	struct dp_soc *soc = pdev->soc;
+
+	qdf_atomic_inc(&pdev->num_tx_outstanding);
+	qdf_atomic_inc(&soc->num_tx_outstanding);
+	dp_update_tx_desc_stats(pdev);
+}
+
+/**
+ * dp_tx_outstanding__dec - Decrement outstanding tx desc values on pdev and soc
+ * @vdev: DP pdev handle
+ *
+ * Return: void
+ */
+static inline void
+dp_tx_outstanding_dec(struct dp_pdev *pdev)
+{
+	struct dp_soc *soc = pdev->soc;
+
+	qdf_atomic_dec(&pdev->num_tx_outstanding);
+	qdf_atomic_dec(&soc->num_tx_outstanding);
+	dp_update_tx_desc_stats(pdev);
+}
+
+#else //QCA_TX_LIMIT_CHECK
+static inline bool
+dp_tx_limit_check(struct dp_vdev *vdev)
+{
+	return false;
+}
+
+static inline bool
+dp_tx_exception_limit_check(struct dp_vdev *vdev)
+{
+	return false;
+}
+
+static inline void
+dp_tx_outstanding_inc(struct dp_pdev *pdev)
+{
+	qdf_atomic_inc(&pdev->num_tx_outstanding);
+	dp_update_tx_desc_stats(pdev);
+}
+
+static inline void
+dp_tx_outstanding_dec(struct dp_pdev *pdev)
+{
+	qdf_atomic_dec(&pdev->num_tx_outstanding);
+	dp_update_tx_desc_stats(pdev);
+}
+#endif //QCA_TX_LIMIT_CHECK
 #endif

+ 19 - 11
dp/wifi3.0/dp_types.h

@@ -594,6 +594,7 @@ struct dp_tx_desc_s {
 	uint8_t frm_type;
 	uint8_t pkt_offset;
 	uint8_t  pool_id;
+	unsigned char *shinfo_addr;
 	struct dp_tx_ext_desc_elem_s *msdu_ext_desc;
 	qdf_ktime_t timestamp;
 	struct hal_tx_desc_comp_s comp;
@@ -1848,6 +1849,10 @@ struct dp_arch_ops {
 				  hal_ring_handle_t hal_ring_hdl,
 				  uint8_t reo_ring_num, uint32_t quota);
 
+	qdf_nbuf_t (*dp_tx_send_fast)(struct cdp_soc_t *soc_hdl,
+				      uint8_t vdev_id,
+				      qdf_nbuf_t nbuf);
+
 	QDF_STATUS (*dp_tx_desc_pool_init)(struct dp_soc *soc,
 					   uint32_t num_elem,
 					   uint8_t pool_id);
@@ -3147,6 +3152,12 @@ struct dp_vdev {
 	/* TBD: check alignment constraints */
 	uint16_t htt_tcl_metadata;
 
+	/* vdev lmac_id */
+	uint8_t lmac_id;
+
+	/* vdev bank_id */
+	uint8_t bank_id;
+
 	/* Mesh mode vdev */
 	uint32_t mesh_vdev;
 
@@ -3171,8 +3182,8 @@ struct dp_vdev {
 	/* AST hash value for BSS peer in HW valid for STA VAP*/
 	uint16_t bss_ast_hash;
 
-	/* vdev lmac_id */
-	int lmac_id;
+	/* AST hash index for BSS peer in HW valid for STA VAP*/
+	uint16_t bss_ast_idx;
 
 	bool multipass_en;
 
@@ -3278,6 +3289,12 @@ struct dp_vdev {
 	struct dp_tx_desc_pool_s *tx_desc;
 	struct dp_tx_ext_desc_pool_s *tx_ext_desc;
 
+	/* Capture timestamp of previous tx packet enqueued */
+	uint64_t prev_tx_enq_tstamp;
+
+	/* Capture timestamp of previous rx packet delivered */
+	uint64_t prev_rx_deliver_tstamp;
+
 	/* VDEV Stats */
 	struct cdp_vdev_stats stats;
 
@@ -3299,15 +3316,6 @@ struct dp_vdev {
 	bool raw_mode_war;
 
 
-	/* AST hash index for BSS peer in HW valid for STA VAP*/
-	uint16_t bss_ast_idx;
-
-	/* Capture timestamp of previous tx packet enqueued */
-	uint64_t prev_tx_enq_tstamp;
-
-	/* Capture timestamp of previous rx packet delivered */
-	uint64_t prev_rx_deliver_tstamp;
-
 	/* 8021p PCP-TID mapping table ID */
 	uint8_t tidmap_tbl_id;
 

+ 1 - 0
dp/wifi3.0/li/dp_li.c

@@ -563,6 +563,7 @@ void dp_initialize_arch_ops_li(struct dp_arch_ops *arch_ops)
 #ifndef QCA_HOST_MODE_WIFI_DISABLED
 	arch_ops->tx_hw_enqueue = dp_tx_hw_enqueue_li;
 	arch_ops->dp_rx_process = dp_rx_process_li;
+	arch_ops->dp_tx_send_fast = dp_tx_send;
 	arch_ops->tx_comp_get_params_from_hal_desc =
 		dp_tx_comp_get_params_from_hal_desc_li;
 	arch_ops->dp_tx_process_htt_completion =

+ 2 - 2
hal/wifi3.0/be/hal_be_tx.h

@@ -403,9 +403,9 @@ static inline void hal_tx_desc_set_hlos_tid(void *desc,
  * @hw_desc: Hardware descriptor to be updated
  */
 static inline void hal_tx_desc_sync(void *hal_tx_desc_cached,
-				    void *hw_desc)
+				    void *hw_desc, uint8_t num_bytes)
 {
-	qdf_mem_copy(hw_desc, hal_tx_desc_cached, HAL_TX_DESC_LEN_BYTES);
+	qdf_mem_copy(hw_desc, hal_tx_desc_cached, num_bytes);
 }
 
 /**