From 9d3b562c0c26c14e3988a785b40965f500397810 Mon Sep 17 00:00:00 2001
From: Venkateswara Naralasetty <quic_vnaralas@quicinc.com>
Date: Fri, 10 Feb 2023 13:44:18 +0530
Subject: [PATCH] qcacmn: add NAPI schedule latency and poll time histogram
 support

Add support for NAPI schedule latency and poll time histogram stats
for WCN6450.

Change-Id: If982ebe4e3cfa80f47c6d2fe9d4cb9dfa318481d
CRs-Fixed: 3485287
---
 hif/inc/hif.h      |  27 +++++++++
 hif/src/hif_exec.h |  17 ------
 hif/src/hif_napi.c | 136 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 17 deletions(-)

diff --git a/hif/inc/hif.h b/hif/inc/hif.h
index f935d0faae..a450c83dc1 100644
--- a/hif/inc/hif.h
+++ b/hif/inc/hif.h
@@ -311,6 +311,23 @@ struct qca_napi_stat {
 #endif
 };
 
+/*Number of buckets for latency*/
+#define HIF_SCHED_LATENCY_BUCKETS 8
+
+/*Buckets for latency between 0 to 2 ms*/
+#define HIF_SCHED_LATENCY_BUCKET_0_2 2
+/*Buckets for latency between 3 to 10 ms*/
+#define HIF_SCHED_LATENCY_BUCKET_3_10 10
+/*Buckets for latency between 11 to 20 ms*/
+#define HIF_SCHED_LATENCY_BUCKET_11_20 20
+/*Buckets for latency between 21 to 50 ms*/
+#define HIF_SCHED_LATENCY_BUCKET_21_50 50
+/*Buckets for latency between 50 to 100 ms*/
+#define HIF_SCHED_LATENCY_BUCKET_51_100 100
+/*Buckets for latency between 100 to 250 ms*/
+#define HIF_SCHED_LATENCY_BUCKET_101_250 250
+/*Buckets for latency between 250 to 500 ms*/
+#define HIF_SCHED_LATENCY_BUCKET_251_500 500
 
 /**
  * struct qca_napi_info - per NAPI instance data structure
@@ -327,6 +344,9 @@ struct qca_napi_stat {
  * @rx_thread_napi:
  * @rx_thread_netdev:
  * @lro_ctx:
+ * @poll_start_time: napi poll service start time
+ * @sched_latency_stats: napi schedule latency stats
+ * @tstamp: napi schedule start timestamp
  *
  * This data structure holds stuff per NAPI instance.
  * Note that, in the current implementation, though scale is
@@ -350,6 +370,13 @@ struct qca_napi_info {
 	struct net_device    rx_thread_netdev;
 #endif /* RECEIVE_OFFLOAD */
 	qdf_lro_ctx_t        lro_ctx;
+#ifdef WLAN_FEATURE_RX_SOFTIRQ_TIME_LIMIT
+	unsigned long long poll_start_time;
+#endif
+#ifdef HIF_LATENCY_PROFILE_ENABLE
+	uint64_t sched_latency_stats[HIF_SCHED_LATENCY_BUCKETS];
+	uint64_t tstamp;
+#endif
 };
 
 enum qca_napi_tput_state {
diff --git a/hif/src/hif_exec.h b/hif/src/hif_exec.h
index 5f9f8e3d75..cb4fd11f5c 100644
--- a/hif/src/hif_exec.h
+++ b/hif/src/hif_exec.h
@@ -23,23 +23,6 @@
 #include <hif.h>
 #include <hif_irq_affinity.h>
 #include <linux/cpumask.h>
-/*Number of buckets for latency*/
-#define HIF_SCHED_LATENCY_BUCKETS 8
-
-/*Buckets for latency between 0 to 2 ms*/
-#define HIF_SCHED_LATENCY_BUCKET_0_2 2
-/*Buckets for latency between 3 to 10 ms*/
-#define HIF_SCHED_LATENCY_BUCKET_3_10 10
-/*Buckets for latency between 11 to 20 ms*/
-#define HIF_SCHED_LATENCY_BUCKET_11_20 20
-/*Buckets for latency between 21 to 50 ms*/
-#define HIF_SCHED_LATENCY_BUCKET_21_50 50
-/*Buckets for latency between 50 to 100 ms*/
-#define HIF_SCHED_LATENCY_BUCKET_51_100 100
-/*Buckets for latency between 100 to 250 ms*/
-#define HIF_SCHED_LATENCY_BUCKET_101_250 250
-/*Buckets for latency between 250 to 500 ms*/
-#define HIF_SCHED_LATENCY_BUCKET_251_500 500
 
 #ifndef IRQ_DISABLED_MAX_DURATION_NS
 #define IRQ_DISABLED_MAX_DURATION_NS 100000000
diff --git a/hif/src/hif_napi.c b/hif/src/hif_napi.c
index aa27a7b649..4352f6f39b 100644
--- a/hif/src/hif_napi.c
+++ b/hif/src/hif_napi.c
@@ -749,6 +749,136 @@ inline void hif_napi_enable_irq(struct hif_opaque_softc *hif, int id)
 	hif_irq_enable(scn, NAPI_ID2PIPE(id));
 }
 
+#ifdef HIF_LATENCY_PROFILE_ENABLE
+/*
+ * hif_napi_latency_profile_start() - update the schedule start timestamp
+ *
+ * @scn: HIF context
+ * ce_id: Copyengine id
+ *
+ * Return: None
+ */
+static inline void hif_napi_latency_profile_start(struct hif_softc *scn,
+						  int ce_id)
+{
+	struct qca_napi_info *napii;
+
+	napii = scn->napi_data.napis[ce_id];
+	if (napii)
+		napii->tstamp = qdf_ktime_to_ms(qdf_ktime_get());
+}
+
+/*
+ * hif_napi_latency_profile_measure() - calculate the NAPI schedule latency
+ * and update histogram
+ *
+ * @napi_info: pointer to qca_napi_info for the napi instance
+ *
+ * Return: None
+ */
+static void hif_napi_latency_profile_measure(struct qca_napi_info *napi_info)
+{
+	int64_t cur_tstamp;
+	int64_t time_elapsed;
+
+	cur_tstamp = qdf_ktime_to_ms(qdf_ktime_get());
+
+	if (cur_tstamp > napi_info->tstamp)
+		time_elapsed = (cur_tstamp - napi_info->tstamp);
+	else
+		time_elapsed = ~0x0 - (napi_info->tstamp - cur_tstamp);
+
+	napi_info->tstamp = cur_tstamp;
+
+	if (time_elapsed <= HIF_SCHED_LATENCY_BUCKET_0_2)
+		napi_info->sched_latency_stats[0]++;
+	else if (time_elapsed <= HIF_SCHED_LATENCY_BUCKET_3_10)
+		napi_info->sched_latency_stats[1]++;
+	else if (time_elapsed <= HIF_SCHED_LATENCY_BUCKET_11_20)
+		napi_info->sched_latency_stats[2]++;
+	else if (time_elapsed <= HIF_SCHED_LATENCY_BUCKET_21_50)
+		napi_info->sched_latency_stats[3]++;
+	else if (time_elapsed <= HIF_SCHED_LATENCY_BUCKET_51_100)
+		napi_info->sched_latency_stats[4]++;
+	else if (time_elapsed <= HIF_SCHED_LATENCY_BUCKET_101_250)
+		napi_info->sched_latency_stats[5]++;
+	else if (time_elapsed <= HIF_SCHED_LATENCY_BUCKET_251_500)
+		napi_info->sched_latency_stats[6]++;
+	else
+		napi_info->sched_latency_stats[7]++;
+}
+#else
+static inline void
+hif_napi_latency_profile_start(struct hif_softc *scn, int ce_id)
+{
+}
+
+static inline void
+hif_napi_latency_profile_measure(struct qca_napi_info *napi_info)
+{
+}
+#endif
+
+#ifdef WLAN_FEATURE_RX_SOFTIRQ_TIME_LIMIT
+/**
+ * hif_napi_update_service_start_time() - Update NAPI poll start time
+ *
+ * @napi_info: per NAPI instance data structure
+ *
+ * The function is called at the beginning of a NAPI poll to record the poll
+ * start time.
+ *
+ * Return: None
+ */
+static inline void
+hif_napi_update_service_start_time(struct qca_napi_info *napi_info)
+{
+	napi_info->poll_start_time = qdf_time_sched_clock();
+}
+
+/**
+ * hif_napi_fill_poll_time_histogram() - fills poll time histogram for a NAPI
+ *
+ * @napi_info: per NAPI instance data structure
+ *
+ * The function is called at the end of a NAPI poll to calculate poll time
+ * buckets.
+ *
+ * Return: void
+ */
+static void hif_napi_fill_poll_time_histogram(struct qca_napi_info *napi_info)
+{
+	struct qca_napi_stat *napi_stat;
+	unsigned long long poll_time_ns;
+	uint32_t poll_time_us;
+	uint32_t bucket_size_us = 500;
+	uint32_t bucket;
+	uint32_t cpu_id = qdf_get_cpu();
+
+	poll_time_ns = qdf_time_sched_clock() - napi_info->poll_start_time;
+	poll_time_us = qdf_do_div(poll_time_ns, 1000);
+
+	napi_stat = &napi_info->stats[cpu_id];
+	if (poll_time_ns > napi_info->stats[cpu_id].napi_max_poll_time)
+		napi_info->stats[cpu_id].napi_max_poll_time = poll_time_ns;
+
+	bucket = poll_time_us / bucket_size_us;
+	if (bucket >= QCA_NAPI_NUM_BUCKETS)
+		bucket = QCA_NAPI_NUM_BUCKETS - 1;
+
+	++napi_stat->poll_time_buckets[bucket];
+}
+#else
+static inline void
+hif_napi_update_service_start_time(struct qca_napi_info *napi_info)
+{
+}
+
+static inline void
+hif_napi_fill_poll_time_histogram(struct qca_napi_info *napi_info)
+{
+}
+#endif
 
 /**
  * hif_napi_schedule() - schedules napi, updates stats
@@ -780,6 +910,7 @@ bool hif_napi_schedule(struct hif_opaque_softc *hif_ctx, int ce_id)
 				 NULL, NULL, 0, 0);
 	napii->stats[cpu].napi_schedules++;
 	NAPI_DEBUG("scheduling napi %d (ce:%d)", napii->id, ce_id);
+	hif_napi_latency_profile_start(scn, ce_id);
 	napi_schedule(&(napii->napi));
 
 	return true;
@@ -896,6 +1027,9 @@ int hif_napi_poll(struct hif_opaque_softc *hif_ctx,
 	napi_info = (struct qca_napi_info *)
 		container_of(napi, struct qca_napi_info, napi);
 
+	hif_napi_update_service_start_time(napi_info);
+	hif_napi_latency_profile_measure(napi_info);
+
 	NAPI_DEBUG("%s -->(napi(%d, irq=%d), budget=%d)",
 		   __func__, napi_info->id, napi_info->irq, budget);
 
@@ -979,6 +1113,8 @@ int hif_napi_poll(struct hif_opaque_softc *hif_ctx,
 	hif_record_ce_desc_event(hif, NAPI_ID2PIPE(napi_info->id),
 				 NAPI_POLL_EXIT, NULL, NULL, normalized, 0);
 
+	hif_napi_fill_poll_time_histogram(napi_info);
+
 	NAPI_DEBUG("%s <--[normalized=%d]", __func__, normalized);
 	return normalized;
 out: