sched: Add snapshot of Window Assisted Load Tracking (WALT)

This snapshot is taken from msm-4.19 as of commit 5debecbe7195 ("trace: filter out spurious preemption and IRQs disable traces"). Change-Id: I8fab4084971baadcaa037f40ab549fc073a4b1ea Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
2019-09-09 15:32:44 -07:00
parent 6df025a02c
commit 64b577b9cc
24 changed files with 2050 additions and 92 deletions
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -183,9 +183,65 @@ static struct attribute_group crash_note_cpu_attr_group = {
 };
 #endif
 #ifdef CONFIG_SCHED_WALT
 static ssize_t sched_load_boost_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	ssize_t rc;
 	int boost;
 	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	int cpuid = cpu->dev.id;
 	boost = per_cpu(sched_load_boost, cpuid);
 	rc = scnprintf(buf, PAGE_SIZE-2, "%d\n", boost);
 	return rc;
 }
 static ssize_t __ref sched_load_boost_store(struct device *dev,
 				struct device_attribute *attr,
 				const char *buf, size_t count)
 {
 	int err;
 	int boost;
 	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	int cpuid = cpu->dev.id;
 	err = kstrtoint(strstrip((char *)buf), 0, &boost);
 	if (err)
 		return err;
 	/*
 	 * -100 is low enough to cancel out CPU's load and make it near zro.
 	 * 1000 is close to the maximum value that cpu_util_freq_{walt,pelt}
 	 * can take without overflow.
 	 */
 	if (boost < -100 || boost > 1000)
 		return -EINVAL;
 	per_cpu(sched_load_boost, cpuid) = boost;
 	return count;
 }
 static DEVICE_ATTR_RW(sched_load_boost);
 static struct attribute *sched_cpu_attrs[] = {
 	&dev_attr_sched_load_boost.attr,
 	NULL
 };
 static struct attribute_group sched_cpu_attr_group = {
 	.attrs = sched_cpu_attrs,
 };
 #endif
 static const struct attribute_group *common_cpu_attr_groups[] = {
 #ifdef CONFIG_KEXEC
 	&crash_note_cpu_attr_group,
 #endif
 #ifdef CONFIG_SCHED_WALT
 	&sched_cpu_attr_group,
 #endif
 	NULL
 };
@@ -193,6 +249,9 @@ static const struct attribute_group *common_cpu_attr_groups[] = {
 static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
 #ifdef CONFIG_KEXEC
 	&crash_note_cpu_attr_group,
 #endif
 #ifdef CONFIG_SCHED_WALT
 	&sched_cpu_attr_group,
 #endif
 	NULL
 };
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1459,6 +1459,56 @@ static const struct file_operations proc_pid_sched_operations = {
 #endif
 /*
 * Print out various scheduling related per-task fields:
 */
 #ifdef CONFIG_SCHED_WALT
 extern int __weak sched_wake_up_idle_show(struct seq_file *m, void *v);
 extern ssize_t __weak sched_wake_up_idle_write(struct file *file,
 		const char __user *buf, size_t count, loff_t *offset);
 extern int __weak sched_wake_up_idle_open(struct inode *inode,
 						struct file *filp);
 static const struct file_operations proc_pid_sched_wake_up_idle_operations = {
 	.open		= sched_wake_up_idle_open,
 	.read		= seq_read,
 	.write		= sched_wake_up_idle_write,
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
 extern int __weak sched_init_task_load_show(struct seq_file *m, void *v);
 extern ssize_t __weak
 sched_init_task_load_write(struct file *file, const char __user *buf,
 					size_t count, loff_t *offset);
 extern int __weak
 sched_init_task_load_open(struct inode *inode, struct file *filp);
 static const struct file_operations proc_pid_sched_init_task_load_operations = {
 	.open		= sched_init_task_load_open,
 	.read		= seq_read,
 	.write		= sched_init_task_load_write,
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
 extern int __weak sched_group_id_show(struct seq_file *m, void *v);
 extern ssize_t __weak
 sched_group_id_write(struct file *file, const char __user *buf,
 					size_t count, loff_t *offset);
 extern int __weak sched_group_id_open(struct inode *inode, struct file *filp);
 static const struct file_operations proc_pid_sched_group_id_operations = {
 	.open		= sched_group_id_open,
 	.read		= seq_read,
 	.write		= sched_group_id_write,
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
 #endif	/* CONFIG_SCHED_WALT */
 #ifdef CONFIG_SCHED_AUTOGROUP
 /*
 * Print out autogroup related information:
@@ -3011,6 +3061,13 @@ static const struct pid_entry tgid_base_stuff[] = {
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
 	ONE("limits",	  S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_WALT
 	REG("sched_wake_up_idle", 00644,
 				proc_pid_sched_wake_up_idle_operations),
 	REG("sched_init_task_load", 00644,
 				proc_pid_sched_init_task_load_operations),
 	REG("sched_group_id", 00666, proc_pid_sched_group_id_operations),
 #endif
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -69,6 +69,9 @@ enum cpuhp_state {
 	CPUHP_SLAB_PREPARE,
 	CPUHP_MD_RAID5_PREPARE,
 	CPUHP_RCUTREE_PREP,
 #ifdef CONFIG_SCHED_WALT
 	CPUHP_CORE_CTL_ISOLATION_DEAD,
 #endif
 	CPUHP_CPUIDLE_COUPLED_PREPARE,
 	CPUHP_POWERPC_PMAC_PREPARE,
 	CPUHP_POWERPC_MMU_CTX_PREPARE,
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -117,6 +117,18 @@ struct task_group;
 					 (task->flags & PF_FROZEN) == 0 && \
 					 (task->state & TASK_NOLOAD) == 0)
 /*
 * Enum for display driver to provide varying refresh rates
 */
 enum fps {
 	FPS0 = 0,
 	FPS30 = 30,
 	FPS48 = 48,
 	FPS60 = 60,
 	FPS90 = 90,
 	FPS120 = 120,
 };
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 /*
@@ -212,6 +224,21 @@ struct task_group;
 /* Task command name length: */
 #define TASK_COMM_LEN			16
 enum task_event {
 	PUT_PREV_TASK   = 0,
 	PICK_NEXT_TASK  = 1,
 	TASK_WAKE       = 2,
 	TASK_MIGRATE    = 3,
 	TASK_UPDATE     = 4,
 	IRQ_UPDATE      = 5,
 };
 /* Note: this need to be in sync with migrate_type_names array */
 enum migrate_types {
 	GROUP_TO_RQ,
 	RQ_TO_GROUP,
 };
 extern void scheduler_tick(void);
 #define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
@@ -478,6 +505,89 @@ struct sched_entity {
 #endif
 };
 struct cpu_cycle_counter_cb {
 	u64 (*get_cpu_cycle_counter)(int cpu);
 };
 DECLARE_PER_CPU_READ_MOSTLY(int, sched_load_boost);
 #ifdef CONFIG_SCHED_WALT
 extern void __weak sched_exit(struct task_struct *p);
 extern int __weak
 register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);
 extern void __weak
 sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax);
 extern void __weak free_task_load_ptrs(struct task_struct *p);
 extern void __weak sched_set_refresh_rate(enum fps fps);
 #define RAVG_HIST_SIZE_MAX  5
 #define NUM_BUSY_BUCKETS 10
 /* ravg represents frequency scaled cpu-demand of tasks */
 struct ravg {
 	/*
 	 * 'mark_start' marks the beginning of an event (task waking up, task
 	 * starting to execute, task being preempted) within a window
 	 *
 	 * 'sum' represents how runnable a task has been within current
 	 * window. It incorporates both running time and wait time and is
 	 * frequency scaled.
 	 *
 	 * 'sum_history' keeps track of history of 'sum' seen over previous
 	 * RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
 	 * ignored.
 	 *
 	 * 'demand' represents maximum sum seen over previous
 	 * sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
 	 * demand for tasks.
 	 *
 	 * 'curr_window_cpu' represents task's contribution to cpu busy time on
 	 * various CPUs in the current window
 	 *
 	 * 'prev_window_cpu' represents task's contribution to cpu busy time on
 	 * various CPUs in the previous window
 	 *
 	 * 'curr_window' represents the sum of all entries in curr_window_cpu
 	 *
 	 * 'prev_window' represents the sum of all entries in prev_window_cpu
 	 *
 	 * 'pred_demand' represents task's current predicted cpu busy time
 	 *
 	 * 'busy_buckets' groups historical busy time into different buckets
 	 * used for prediction
 	 *
 	 * 'demand_scaled' represents task's demand scaled to 1024
 	 */
 	u64 mark_start;
 	u32 sum, demand;
 	u32 coloc_demand;
 	u32 sum_history[RAVG_HIST_SIZE_MAX];
 	u32 *curr_window_cpu, *prev_window_cpu;
 	u32 curr_window, prev_window;
 	u32 pred_demand;
 	u8 busy_buckets[NUM_BUSY_BUCKETS];
 	u16 demand_scaled;
 	u16 pred_demand_scaled;
 	u64 active_time;
 	u64 last_win_size;
 };
 #else
 static inline void sched_exit(struct task_struct *p) { }
 static inline int
 register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
 {
 	return 0;
 }
 static inline void free_task_load_ptrs(struct task_struct *p) { }
 static inline void sched_update_cpu_freq_min_max(const cpumask_t *cpus,
 					u32 fmin, u32 fmax) { }
 static inline void sched_set_refresh_rate(enum fps fps) { }
 #endif /* CONFIG_SCHED_WALT */
 struct sched_rt_entity {
 	struct list_head		run_list;
 	unsigned long			timeout;
@@ -675,6 +785,20 @@ struct task_struct {
 	const struct sched_class	*sched_class;
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
 #ifdef CONFIG_SCHED_WALT
 	u64 last_sleep_ts;
 	bool wake_up_idle;
 	struct ravg ravg;
 	u32 init_load_pct;
 	u64 last_wake_ts;
 	u64 last_enqueued_ts;
 	struct related_thread_group *grp;
 	struct list_head grp_list;
 	u64 cpu_cycles;
 	bool misfit;
 	u8 unfilter;
 #endif
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
 #endif
@@ -2000,4 +2124,37 @@ int sched_trace_rq_cpu(struct rq *rq);
 const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 #ifdef CONFIG_SCHED_WALT
 #define PF_WAKE_UP_IDLE	1
 static inline u32 sched_get_wake_up_idle(struct task_struct *p)
 {
 	return p->wake_up_idle;
 }
 static inline int sched_set_wake_up_idle(struct task_struct *p,
 						int wake_up_idle)
 {
 	p->wake_up_idle = !!wake_up_idle;
 	return 0;
 }
 static inline void set_wake_up_idle(bool enabled)
 {
 	current->wake_up_idle = enabled;
 }
 #else
 static inline u32 sched_get_wake_up_idle(struct task_struct *p)
 {
 	return 0;
 }
 static inline int sched_set_wake_up_idle(struct task_struct *p,
 						int wake_up_idle)
 {
 	return 0;
 }
 static inline void set_wake_up_idle(bool enabled) {}
 #endif
 #endif
--- a/include/linux/sched/core_ctl.h
+++ b/include/linux/sched/core_ctl.h
@@ -0,0 +1,31 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016, 2019, The Linux Foundation. All rights reserved.
 */
 #ifndef __CORE_CTL_H
 #define __CORE_CTL_H
 #define MAX_CPUS_PER_CLUSTER 6
 #define MAX_CLUSTERS 3
 struct core_ctl_notif_data {
 	unsigned int nr_big;
 	unsigned int coloc_load_pct;
 	unsigned int ta_util_pct[MAX_CLUSTERS];
 	unsigned int cur_cap_pct[MAX_CLUSTERS];
 };
 #ifdef CONFIG_SCHED_WALT
 extern int __weak core_ctl_set_boost(bool boost);
 extern void __weak core_ctl_notifier_register(struct notifier_block *n);
 extern void __weak core_ctl_notifier_unregister(struct notifier_block *n);
 #else
 static inline int core_ctl_set_boost(bool boost)
 {
 	return 0;
 }
 static inline void core_ctl_notifier_register(struct notifier_block *n) {}
 static inline void core_ctl_notifier_unregister(struct notifier_block *n) {}
 #endif
 #endif
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -10,6 +10,11 @@
 #define SCHED_CPUFREQ_IOWAIT	(1U << 0)
 #define SCHED_CPUFREQ_MIGRATION	(1U << 1)
 #define SCHED_CPUFREQ_INTERCLUSTER_MIG	(1U << 3)
 #define SCHED_CPUFREQ_WALT	(1U << 4)
 #define SCHED_CPUFREQ_PL	(1U << 5)
 #define SCHED_CPUFREQ_EARLY_DET	(1U << 6)
 #define SCHED_CPUFREQ_CONTINUE	(1U << 8)
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -21,6 +21,28 @@ extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 #ifdef CONFIG_SCHED_WALT
 extern void __weak sched_update_nr_prod(int cpu, long delta, bool inc);
 extern unsigned int __weak sched_get_cpu_util(int cpu);
 extern void __weak sched_update_hyst_times(void);
 extern u64 __weak sched_lpm_disallowed_time(int cpu);
 #else
 static inline void sched_update_nr_prod(int cpu, long delta, bool inc) {}
 static inline unsigned int sched_get_cpu_util(int cpu)
 {
 	return 0;
 }
 static inline u64 sched_get_cpu_last_busy_time(int cpu)
 {
 	return 0;
 }
 static inline void sched_update_hyst_times(void) {}
 static inline u64 sched_lpm_disallowed_time(int cpu)
 {
 	return 0;
 }
 #endif
 static inline int sched_info_on(void)
 {
 #ifdef CONFIG_SCHEDSTATS
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -24,6 +24,42 @@ extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
 #ifdef CONFIG_SCHED_WALT
 extern unsigned int __weak sysctl_sched_user_hint;
 extern const int __weak sched_user_hint_max;
 extern unsigned int __weak sysctl_sched_cpu_high_irqload;
 extern unsigned int __weak sysctl_sched_boost;
 extern unsigned int __weak sysctl_sched_group_upmigrate_pct;
 extern unsigned int __weak sysctl_sched_group_downmigrate_pct;
 extern unsigned int __weak sysctl_sched_conservative_pl;
 extern unsigned int __weak sysctl_sched_walt_rotate_big_tasks;
 extern unsigned int __weak sysctl_sched_min_task_util_for_boost;
 extern unsigned int __weak sysctl_sched_min_task_util_for_colocation;
 extern unsigned int __weak sysctl_sched_asym_cap_sibling_freq_match_pct;
 extern unsigned int __weak sysctl_sched_coloc_downmigrate_ns;
 extern unsigned int __weak sysctl_sched_task_unfilter_nr_windows;
 extern unsigned int __weak sysctl_sched_busy_hyst_enable_cpus;
 extern unsigned int __weak sysctl_sched_busy_hyst;
 extern unsigned int __weak sysctl_sched_coloc_busy_hyst_enable_cpus;
 extern unsigned int __weak sysctl_sched_coloc_busy_hyst;
 extern unsigned int __weak sysctl_sched_coloc_busy_hyst_max_ms;
 extern unsigned int __weak sysctl_sched_window_stats_policy;
 extern unsigned int __weak sysctl_sched_ravg_window_nr_ticks;
 extern int __weak
 walt_proc_group_thresholds_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *lenp,
 			loff_t *ppos);
 extern int __weak
 walt_proc_user_hint_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *lenp,
 			loff_t *ppos);
 extern int __weak
 sched_ravg_window_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *lenp,
 			loff_t *ppos);
 #endif
 enum sched_tunable_scaling {
 	SCHED_TUNABLESCALING_NONE,
@@ -47,6 +83,10 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
 		loff_t *ppos);
 #endif
 #ifdef CONFIG_SCHED_WALT
 extern int __weak sched_boost_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 /*
 *  control realtime throttling:
 *
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -73,6 +73,9 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
 extern int proc_do_static_key(struct ctl_table *table, int write,
 			      void __user *buffer, size_t *lenp,
 			      loff_t *ppos);
 extern int proc_douintvec_ravg_window(struct ctl_table *table, int write,
 			      void __user *buffer, size_t *lenp,
 			      loff_t *ppos);
 /*
 * Register a set of sysctl names by calling register_sysctl_table
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -470,6 +470,15 @@ config HAVE_SCHED_AVG_IRQ
 	depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
 	depends on SMP
 config SCHED_WALT
 	bool "Support window based load tracking"
 	depends on SMP
 	help
 	This feature will allow the scheduler to maintain a tunable window
 	based set of metrics for tasks and runqueues. These metrics can be
 	used to guide task placement as well as task frequency requirements
 	for cpufreq governors.
 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"
 	depends on MULTIUSER
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -91,6 +91,9 @@ struct task_struct init_task
 #endif
 #ifdef CONFIG_CGROUP_SCHED
 	.sched_task_group = &root_task_group,
 #endif
 #ifdef CONFIG_SCHED_WALT
 	.wake_up_idle	= false,
 #endif
 	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
 	.ptrace_entry	= LIST_HEAD_INIT(init_task.ptrace_entry),
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -761,6 +761,7 @@ void __noreturn do_exit(long code)
 	}
 	exit_signals(tsk);  /* sets PF_EXITING */
 	sched_exit(tsk);
 	/*
 	 * Ensure that all new tsk->pi_lock acquisitions must observe
 	 * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2283,6 +2283,7 @@ bad_fork_cleanup_perf:
 	perf_event_free_task(p);
 bad_fork_cleanup_policy:
 	lockdep_free_task(p);
 	free_task_load_ptrs(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_threadgroup_lock:
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -19,6 +19,7 @@
 #include "../smpboot.h"
 #include "pelt.h"
 #include "walt.h"
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -1298,6 +1299,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 	uclamp_rq_inc(rq, p);
 	p->sched_class->enqueue_task(rq, p, flags);
 	walt_update_last_enqueue(p);
 }
 static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1312,6 +1314,10 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 	uclamp_rq_dec(rq, p);
 	p->sched_class->dequeue_task(rq, p, flags);
 #ifdef CONFIG_SCHED_WALT
 	if (p == rq->ed_task)
 		early_detection_notify(rq, sched_ktime_clock());
 #endif
 }
 void activate_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1331,6 +1337,11 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible++;
 #ifdef CONFIG_SCHED_WALT
 	if (flags & DEQUEUE_SLEEP)
 		clear_ed_task(p, rq);
 #endif
 	dequeue_task(rq, p, flags);
 }
@@ -1492,8 +1503,11 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
 	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
 	dequeue_task(rq, p, DEQUEUE_NOCLOCK);
 	double_lock_balance(rq, cpu_rq(new_cpu));
 	if (!(rq->clock_update_flags & RQCF_UPDATED))
 		update_rq_clock(rq);
 	set_task_cpu(p, new_cpu);
-	rq_unlock(rq, rf);
+	double_rq_unlock(cpu_rq(new_cpu), rq);
 	rq = cpu_rq(new_cpu);
@@ -1750,12 +1764,13 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		p->se.nr_migrations++;
 		rseq_migrate(p);
 		perf_event_task_migrate(p);
 		fixup_busy_time(p, new_cpu);
 	}
 	__set_task_cpu(p, new_cpu);
 }
 #ifdef CONFIG_NUMA_BALANCING
 static void __migrate_swap_task(struct task_struct *p, int cpu)
 {
 	if (task_on_rq_queued(p)) {
@@ -1870,7 +1885,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
 out:
 	return ret;
 }
 #endif /* CONFIG_NUMA_BALANCING */
 /*
 * wait_task_inactive - wait for a thread to unschedule.
@@ -2616,6 +2630,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 */
 	smp_cond_load_acquire(&p->on_cpu, !VAL);
 	walt_try_to_wake_up(p);
 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
@@ -2644,6 +2660,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 unlock:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 out:
 #ifdef CONFIG_SCHED_WALT
 	if (success && sched_predl) {
 		raw_spin_lock_irqsave(&cpu_rq(cpu)->lock, flags);
 		if (do_pl_notif(cpu_rq(cpu)))
 			cpufreq_update_util(cpu_rq(cpu),
 						SCHED_CPUFREQ_WALT |
 						SCHED_CPUFREQ_PL);
 		raw_spin_unlock_irqrestore(&cpu_rq(cpu)->lock, flags);
 	}
 #endif
 	if (success)
 		ttwu_stat(p, cpu, wake_flags);
 	preempt_enable();
@@ -2689,6 +2716,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
 #ifdef CONFIG_SCHED_WALT
 	p->last_sleep_ts		= 0;
 	p->wake_up_idle			= false;
 #endif
 	INIT_LIST_HEAD(&p->se.group_node);
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -2840,6 +2871,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long flags;
 	init_new_task_load(p);
 	__sched_fork(clone_flags, p);
 	/*
 	 * We mark the process as NEW here. This guarantees that
@@ -2945,7 +2977,9 @@ void wake_up_new_task(struct task_struct *p)
 	struct rq_flags rf;
 	struct rq *rq;
 	add_new_task_to_grp(p);
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
 	/*
@@ -2963,6 +2997,7 @@ void wake_up_new_task(struct task_struct *p)
 	update_rq_clock(rq);
 	post_init_entity_util_avg(p);
 	mark_task_starting(p);
 	activate_task(rq, p, ENQUEUE_NOCLOCK);
 	trace_sched_wakeup_new(p);
 	check_preempt_curr(rq, p, WF_FORK);
@@ -3497,6 +3532,9 @@ void sched_exec(void)
 	unsigned long flags;
 	int dest_cpu;
 	if (sched_energy_enabled())
 		return;
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
 	if (dest_cpu == smp_processor_id())
@@ -3592,16 +3630,30 @@ void scheduler_tick(void)
 	struct rq *rq = cpu_rq(cpu);
 	struct task_struct *curr = rq->curr;
 	struct rq_flags rf;
 	u64 wallclock;
 	bool early_notif;
 	u32 old_load;
 	struct related_thread_group *grp;
 	unsigned int flag = 0;
 	sched_clock_tick();
 	rq_lock(rq, &rf);
 	old_load = task_load(curr);
 	set_window_start(rq);
 	wallclock = sched_ktime_clock();
 	walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
 	update_rq_clock(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
 	calc_global_load_tick(rq);
 	psi_task_tick(rq);
 	early_notif = early_detection_notify(rq, wallclock);
 	if (early_notif)
 		flag = SCHED_CPUFREQ_WALT | SCHED_CPUFREQ_EARLY_DET;
 	cpufreq_update_util(rq, flag);
 	rq_unlock(rq, &rf);
 	perf_event_task_tick();
@@ -3610,6 +3662,15 @@ void scheduler_tick(void)
 	rq->idle_balance = idle_cpu(cpu);
 	trigger_load_balance(rq);
 #endif
 	rcu_read_lock();
 	grp = task_related_thread_group(curr);
 	if (update_preferred_cluster(grp, curr, old_load, true))
 		set_preferred_cluster(grp);
 	rcu_read_unlock();
 	if (curr->sched_class == &fair_sched_class)
 		check_for_migration(rq, curr);
 }
 #ifdef CONFIG_NO_HZ_FULL
@@ -4005,6 +4066,7 @@ static void __sched notrace __schedule(bool preempt)
 	struct rq_flags rf;
 	struct rq *rq;
 	int cpu;
 	u64 wallclock;
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
@@ -4052,7 +4114,15 @@ static void __sched notrace __schedule(bool preempt)
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 	wallclock = sched_ktime_clock();
 	if (likely(prev != next)) {
 #ifdef CONFIG_SCHED_WALT
 		if (!prev->on_rq)
 			prev->last_sleep_ts = wallclock;
 #endif
 		walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
 		walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
 		rq->nr_switches++;
 		/*
 		 * RCU users of rcu_dereference(rq->curr) may not see
@@ -4080,6 +4150,7 @@ static void __sched notrace __schedule(bool preempt)
 		/* Also unlocks the rq: */
 		rq = context_switch(rq, prev, next, &rf);
 	} else {
 		walt_update_task_ravg(prev, rq, TASK_UPDATE, wallclock, 0);
 		rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
 		rq_unlock_irq(rq, &rf);
 	}
@@ -4669,7 +4740,7 @@ struct task_struct *idle_task(int cpu)
 *
 * The task of @pid, if found. %NULL otherwise.
 */
-static struct task_struct *find_process_by_pid(pid_t pid)
+struct task_struct *find_process_by_pid(pid_t pid)
 {
 	return pid ? find_task_by_vpid(pid) : current;
 }
@@ -6253,7 +6324,7 @@ void idle_task_exit(void)
 *
 * Also see the comment "Global load-average calculations".
 */
-static void calc_load_migrate(struct rq *rq)
+void calc_load_migrate(struct rq *rq)
 {
 	long delta = calc_load_fold_active(rq, 1);
 	if (delta)
@@ -6285,7 +6356,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
 * there's no concurrency possible, we hold the required locks anyway
 * because of lock validation efforts.
 */
-static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
+void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
 {
 	struct rq *rq = dead_rq;
 	struct task_struct *next, *stop = rq->stop;
@@ -6512,6 +6583,11 @@ int sched_cpu_deactivate(unsigned int cpu)
 static void sched_rq_cpu_starting(unsigned int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
 	raw_spin_lock_irqsave(&rq->lock, flags);
 	set_window_start(rq);
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 	rq->calc_load_update = calc_load_update;
 	update_max_interval();
@@ -6521,6 +6597,7 @@ int sched_cpu_starting(unsigned int cpu)
 {
 	sched_rq_cpu_starting(cpu);
 	sched_tick_start(cpu);
 	clear_walt_request(cpu);
 	return 0;
 }
@@ -6535,6 +6612,7 @@ int sched_cpu_dying(unsigned int cpu)
 	sched_tick_stop(cpu);
 	rq_lock_irqsave(rq, &rf);
 	if (rq->rd) {
 		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 		set_rq_offline(rq);
@@ -6543,6 +6621,8 @@ int sched_cpu_dying(unsigned int cpu)
 	BUG_ON(rq->nr_running != 1);
 	rq_unlock_irqrestore(rq, &rf);
 	clear_walt_request(cpu);
 	calc_load_migrate(rq);
 	update_max_interval();
 	nohz_balance_exit_idle(rq);
@@ -6564,6 +6644,8 @@ void __init sched_init_smp(void)
 	sched_init_domains(cpu_active_mask);
 	mutex_unlock(&sched_domains_mutex);
 	update_cluster_topology();
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
 		BUG();
@@ -6618,6 +6700,8 @@ void __init sched_init(void)
 	wait_bit_init();
 	init_clusters();
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	ptr += 2 * nr_cpu_ids * sizeof(void **);
 #endif
@@ -6729,6 +6813,7 @@ void __init sched_init(void)
 		rq->idle_stamp = 0;
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 		walt_sched_init_rq(rq);
 		INIT_LIST_HEAD(&rq->cfs_tasks);
@@ -6743,6 +6828,8 @@ void __init sched_init(void)
 		atomic_set(&rq->nr_iowait, 0);
 	}
 	BUG_ON(alloc_related_thread_groups());
 	set_load_weight(&init_task, false);
 	/*
@@ -6758,6 +6845,7 @@ void __init sched_init(void)
 	 * when this runqueue becomes "idle".
 	 */
 	init_idle(current, smp_processor_id());
 	init_new_task_load(current);
 	calc_load_update = jiffies + LOAD_FREQ;
@@ -6972,6 +7060,97 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
 /* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 #if defined(CONFIG_SCHED_WALT) && defined(CONFIG_UCLAMP_TASK_GROUP)
 static inline void walt_init_sched_boost(struct task_group *tg)
 {
 	tg->sched_boost_no_override = false;
 	tg->sched_boost_enabled = true;
 	tg->colocate = false;
 	tg->colocate_update_disabled = false;
 }
 void update_cgroup_boost_settings(void)
 {
 	struct task_group *tg;
 	rcu_read_lock();
 	list_for_each_entry_rcu(tg, &task_groups, list) {
 		if (tg->sched_boost_no_override)
 			continue;
 		tg->sched_boost_enabled = false;
 	}
 	rcu_read_unlock();
 }
 void restore_cgroup_boost_settings(void)
 {
 	struct task_group *tg;
 	rcu_read_lock();
 	list_for_each_entry_rcu(tg, &task_groups, list)
 		tg->sched_boost_enabled = true;
 	rcu_read_unlock();
 }
 static void walt_schedgp_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	struct cgroup_subsys_state *css;
 	bool colocate;
 	cgroup_taskset_first(tset, &css);
 	tg = css_tg(css);
 	colocate = tg->colocate;
 	cgroup_taskset_for_each(task, css, tset)
 		sync_cgroup_colocation(task, colocate);
 }
 static u64
 sched_boost_override_read(struct cgroup_subsys_state *css,
 					struct cftype *cft)
 {
 	struct task_group *tg = css_tg(css);
 	return (u64) tg->sched_boost_no_override;
 }
 static int sched_boost_override_write(struct cgroup_subsys_state *css,
 					struct cftype *cft, u64 override)
 {
 	struct task_group *tg = css_tg(css);
 	tg->sched_boost_no_override = !!override;
 	return 0;
 }
 static u64 sched_colocate_read(struct cgroup_subsys_state *css,
 						struct cftype *cft)
 {
 	struct task_group *tg = css_tg(css);
 	return (u64) tg->colocate;
 }
 static int sched_colocate_write(struct cgroup_subsys_state *css,
 				struct cftype *cft, u64 colocate)
 {
 	struct task_group *tg = css_tg(css);
 	if (tg->colocate_update_disabled)
 		return -EPERM;
 	tg->colocate = !!colocate;
 	tg->colocate_update_disabled = true;
 	return 0;
 }
 #else
 static inline void walt_init_sched_boost(struct task_group *tg) { }
 static void walt_schedgp_attach(struct cgroup_taskset *tset) { }
 #endif /* CONFIG_SCHED_WALT */
 static inline void alloc_uclamp_sched_group(struct task_group *tg,
 					    struct task_group *parent)
 {
@@ -7139,6 +7318,7 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 	walt_init_sched_boost(tg);
 	return &tg->css;
 }
@@ -7225,6 +7405,8 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 	cgroup_taskset_for_each(task, css, tset)
 		sched_move_task(task);
 	walt_schedgp_attach(tset);
 }
 #ifdef CONFIG_UCLAMP_TASK_GROUP
@@ -7784,7 +7966,21 @@ static struct cftype cpu_legacy_files[] = {
 		.read_u64 = cpu_uclamp_ls_read_u64,
 		.write_u64 = cpu_uclamp_ls_write_u64,
 	},
-#endif
+#ifdef CONFIG_SCHED_WALT
 	{
 		.name = "uclamp.sched_boost_no_override",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.read_u64 = sched_boost_override_read,
 		.write_u64 = sched_boost_override_write,
 	},
 	{
 		.name = "uclamp.colocate",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.read_u64 = sched_colocate_read,
 		.write_u64 = sched_colocate_write,
 	},
 #endif /* CONFIG_SCHED_WALT */
 #endif /* CONFIG_UCLAMP_TASK_GROUP */
 	{ }	/* Terminate */
 };
@@ -7971,7 +8167,21 @@ static struct cftype cpu_files[] = {
 		.read_u64 = cpu_uclamp_ls_read_u64,
 		.write_u64 = cpu_uclamp_ls_write_u64,
 	},
-#endif
+#ifdef CONFIG_SCHED_WALT
 	{
 		.name = "uclamp.sched_boost_no_override",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.read_u64 = sched_boost_override_read,
 		.write_u64 = sched_boost_override_write,
 	},
 	{
 		.name = "uclamp.colocate",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.read_u64 = sched_colocate_read,
 		.write_u64 = sched_colocate_write,
 	},
 #endif /* CONFIG_SCHED_WALT */
 #endif /* CONFIG_UCLAMP_TASK_GROUP */
 	{ }	/* terminate */
 };
@@ -8040,3 +8250,57 @@ const u32 sched_prio_to_wmult[40] = {
 };
 #undef CREATE_TRACE_POINTS
 __read_mostly bool sched_predl = 1;
 void enqueue_task_core(struct rq *rq, struct task_struct *p, int flags)
 {
 	enqueue_task(rq, p, 0);
 }
 void dequeue_task_core(struct rq *rq, struct task_struct *p, int flags)
 {
 	dequeue_task(rq, p, 0);
 }
 #ifdef CONFIG_SCHED_WALT
 void sched_account_irqtime(int cpu, struct task_struct *curr,
 				u64 delta, u64 wallclock)
 {
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags, nr_windows;
 	u64 cur_jiffies_ts;
 	raw_spin_lock_irqsave(&rq->lock, flags);
 	/*
 	 * cputime (wallclock) uses sched_clock so use the same here for
 	 * consistency.
 	 */
 	delta += sched_clock() - wallclock;
 	cur_jiffies_ts = get_jiffies_64();
 	if (is_idle_task(curr))
 		walt_update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
 								delta);
 	nr_windows = cur_jiffies_ts - rq->irqload_ts;
 	if (nr_windows) {
 		if (nr_windows < 10) {
 			/* Decay CPU's irqload by 3/4 for each window. */
 			rq->avg_irqload *= (3 * nr_windows);
 			rq->avg_irqload = div64_u64(rq->avg_irqload,
 							4 * nr_windows);
 		} else {
 			rq->avg_irqload = 0;
 		}
 		rq->avg_irqload += rq->cur_irqload;
 		rq->cur_irqload = 0;
 	}
 	rq->cur_irqload += delta;
 	rq->irqload_ts = cur_jiffies_ts;
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 #endif
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -4,6 +4,7 @@
 */
 #include <linux/cpufreq_times.h>
 #include "sched.h"
 #include "walt.h"
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -53,11 +54,18 @@ void irqtime_account_irq(struct task_struct *curr)
 	struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
 	s64 delta;
 	int cpu;
 #ifdef CONFIG_SCHED_WALT
 	u64 wallclock;
 	bool account = true;
 #endif
 	if (!sched_clock_irqtime)
 		return;
 	cpu = smp_processor_id();
 #ifdef CONFIG_SCHED_WALT
 	wallclock = sched_clock_cpu(cpu);
 #endif
 	delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
 	irqtime->irq_start_time += delta;
@@ -71,6 +79,15 @@ void irqtime_account_irq(struct task_struct *curr)
 		irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
 	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
 		irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
 #ifdef CONFIG_SCHED_WALT
 	else
 		account = false;
 	if (account)
 		sched_account_irqtime(cpu, curr, delta, wallclock);
 	else if (curr != this_cpu_ksoftirqd())
 		sched_account_irqstart(cpu, curr, wallclock);
 #endif
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -17,6 +17,7 @@
 */
 #include "sched.h"
 #include "pelt.h"
 #include "walt.h"
 struct dl_bandwidth def_dl_bandwidth;
@@ -1380,6 +1381,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 	WARN_ON(!dl_prio(prio));
 	dl_rq->dl_nr_running++;
 	add_nr_running(rq_of_dl_rq(dl_rq), 1);
 	walt_inc_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
 	inc_dl_deadline(dl_rq, deadline);
 	inc_dl_migration(dl_se, dl_rq);
@@ -1394,6 +1396,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 	WARN_ON(!dl_rq->dl_nr_running);
 	dl_rq->dl_nr_running--;
 	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
 	walt_dec_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
 	dec_dl_deadline(dl_rq, dl_se->deadline);
 	dec_dl_migration(dl_se, dl_rq);
@@ -2101,7 +2104,9 @@ retry:
 	}
 	deactivate_task(rq, next_task, 0);
 	next_task->on_rq = TASK_ON_RQ_MIGRATING;
 	set_task_cpu(next_task, later_rq->cpu);
 	next_task->on_rq = TASK_ON_RQ_QUEUED;
 	/*
 	 * Update the later_rq clock here, because the clock is used
@@ -2195,7 +2200,9 @@ static void pull_dl_task(struct rq *this_rq)
 			resched = true;
 			deactivate_task(src_rq, p, 0);
 			p->on_rq = TASK_ON_RQ_MIGRATING;
 			set_task_cpu(p, this_cpu);
 			p->on_rq = TASK_ON_RQ_QUEUED;
 			activate_task(this_rq, p, 0);
 			dmin = p->dl.deadline;
@@ -2458,6 +2465,9 @@ const struct sched_class dl_sched_class = {
 	.switched_to		= switched_to_dl,
 	.update_curr		= update_curr_dl,
 #ifdef CONFIG_SCHED_WALT
 	.fixup_walt_sched_stats	= fixup_walt_sched_stats_common,
 #endif
 };
 int sched_dl_global_validate(void)
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -646,6 +646,19 @@ do {									\
 	SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
 	PN(clock);
 	PN(clock_task);
 #ifdef CONFIG_SMP
 	P(cpu_capacity);
 #endif
 #ifdef CONFIG_SCHED_WALT
 	P(cluster->max_possible_capacity);
 	P(cluster->efficiency);
 	P(cluster->cur_freq);
 	P(cluster->max_freq);
 	P(cluster->exec_scale_factor);
 	P(walt_stats.nr_big_tasks);
 	SEQ_printf(m, "  .%-30s: %llu\n", "walt_stats.cumulative_runnable_avg",
 			rq->walt_stats.cumulative_runnable_avg_scaled);
 #endif
 #undef P
 #undef PN
@@ -724,6 +737,11 @@ static void sched_debug_header(struct seq_file *m)
 	PN(sysctl_sched_wakeup_granularity);
 	P(sysctl_sched_child_runs_first);
 	P(sysctl_sched_features);
 #ifdef CONFIG_SCHED_WALT
 	P(sched_init_task_load_windows);
 	P(sched_ravg_window);
 	P(sched_load_granule);
 #endif
 #undef PN
 #undef P
@@ -915,6 +933,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 		P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
 		P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
 #ifdef CONFIG_SCHED_WALT
 		P(ravg.demand);
 #endif
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
 			avg_atom = div64_ul(avg_atom, nr_switches);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -24,6 +24,12 @@
 #include <trace/events/sched.h>
 #include "walt.h"
 #ifdef CONFIG_SMP
 static inline bool task_fits_max(struct task_struct *p, int cpu);
 #endif /* CONFIG_SMP */
 /*
 * Targeted preemption latency for CPU-bound tasks:
 *
@@ -85,6 +91,7 @@ unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
 static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
 DEFINE_PER_CPU_READ_MOSTLY(int, sched_load_boost);
 #ifdef CONFIG_SMP
 /*
@@ -118,6 +125,8 @@ int __weak arch_asym_cpu_priority(int cpu)
 unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
 #endif
 unsigned int sched_small_task_threshold = 102;
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
 	lw->weight += inc;
@@ -3689,11 +3698,6 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
 	return cfs_rq->avg.load_avg;
 }
 static inline unsigned long task_util(struct task_struct *p)
 {
 	return READ_ONCE(p->se.avg.util_avg);
 }
 static inline unsigned long _task_util_est(struct task_struct *p)
 {
 	struct util_est ue = READ_ONCE(p->se.avg.util_est);
@@ -3703,6 +3707,9 @@ static inline unsigned long _task_util_est(struct task_struct *p)
 static inline unsigned long task_util_est(struct task_struct *p)
 {
 #ifdef CONFIG_SCHED_WALT
 	return p->ravg.demand_scaled;
 #endif
 	return max(task_util(p), _task_util_est(p));
 }
@@ -4514,13 +4521,16 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
 			dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
 		qcfs_rq->h_nr_running -= task_delta;
 		qcfs_rq->idle_h_nr_running -= idle_task_delta;
 		walt_dec_throttled_cfs_rq_stats(&qcfs_rq->walt_stats, cfs_rq);
 		if (qcfs_rq->load.weight)
 			dequeue = 0;
 	}
-	if (!se)
+	if (!se) {
 		sub_nr_running(rq, task_delta);
 		walt_dec_throttled_cfs_rq_stats(&rq->walt_stats, cfs_rq);
 	}
 	cfs_rq->throttled = 1;
 	cfs_rq->throttled_clock = rq_clock(rq);
@@ -4554,6 +4564,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	struct sched_entity *se;
 	int enqueue = 1;
 	long task_delta, idle_task_delta;
 	struct cfs_rq *tcfs_rq __maybe_unused = cfs_rq;
 	se = cfs_rq->tg->se[cpu_of(rq)];
@@ -4583,6 +4594,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 			enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
 		cfs_rq->h_nr_running += task_delta;
 		cfs_rq->idle_h_nr_running += idle_task_delta;
 		walt_inc_throttled_cfs_rq_stats(&cfs_rq->walt_stats, tcfs_rq);
 		if (cfs_rq_throttled(cfs_rq))
 			break;
@@ -4590,8 +4602,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	assert_list_leaf_cfs_rq(rq);
-	if (!se)
+	if (!se) {
 		add_nr_running(rq, task_delta);
 		walt_inc_throttled_cfs_rq_stats(&rq->walt_stats, tcfs_rq);
 	}
 	/* Determine whether we need to wake up potentially idle CPU: */
 	if (rq->curr == rq->idle && rq->cfs.nr_running)
@@ -4982,6 +4996,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
 	cfs_rq->runtime_enabled = 0;
 	INIT_LIST_HEAD(&cfs_rq->throttled_list);
 	walt_init_cfs_rq_stats(cfs_rq);
 }
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
@@ -5161,8 +5176,6 @@ static inline void hrtick_update(struct rq *rq)
 #endif
 #ifdef CONFIG_SMP
 static inline unsigned long cpu_util(int cpu);
 static inline bool cpu_overutilized(int cpu)
 {
 	return !fits_capacity(cpu_util(cpu), capacity_of(cpu));
@@ -5223,6 +5236,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 			break;
 		cfs_rq->h_nr_running++;
 		cfs_rq->idle_h_nr_running += idle_h_nr_running;
 		walt_inc_cfs_rq_stats(cfs_rq, p);
 		flags = ENQUEUE_WAKEUP;
 	}
@@ -5231,6 +5245,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		cfs_rq = cfs_rq_of(se);
 		cfs_rq->h_nr_running++;
 		cfs_rq->idle_h_nr_running += idle_h_nr_running;
 		walt_inc_cfs_rq_stats(cfs_rq, p);
 		if (cfs_rq_throttled(cfs_rq))
 			break;
@@ -5241,6 +5256,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (!se) {
 		add_nr_running(rq, 1);
 		inc_rq_walt_stats(rq, p);
 		/*
 		 * Since new tasks are assigned an initial util_avg equal to
 		 * half of the spare capacity of their CPU, tiny tasks have the
@@ -5308,6 +5324,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 			break;
 		cfs_rq->h_nr_running--;
 		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
 		walt_dec_cfs_rq_stats(cfs_rq, p);
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
@@ -5328,6 +5345,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		cfs_rq = cfs_rq_of(se);
 		cfs_rq->h_nr_running--;
 		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
 		walt_dec_cfs_rq_stats(cfs_rq, p);
 		if (cfs_rq_throttled(cfs_rq))
 			break;
@@ -5336,8 +5354,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		update_cfs_group(se);
 	}
-	if (!se)
+	if (!se) {
 		sub_nr_running(rq, 1);
 		dec_rq_walt_stats(rq, p);
 	}
 	util_est_dequeue(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
@@ -5375,11 +5395,6 @@ static unsigned long cpu_runnable_load(struct rq *rq)
 	return cfs_rq_runnable_load_avg(&rq->cfs);
 }
 static unsigned long capacity_of(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity;
 }
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -6047,58 +6062,6 @@ static unsigned int uclamp_task_util(struct task_struct *p)
 #endif
 }
 /**
 * Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
 * @cpu: the CPU to get the utilization of
 *
 * The unit of the return value must be the one of capacity so we can compare
 * the utilization with the capacity of the CPU that is available for CFS task
 * (ie cpu_capacity).
 *
 * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
 * recent utilization of currently non-runnable tasks on a CPU. It represents
 * the amount of utilization of a CPU in the range [0..capacity_orig] where
 * capacity_orig is the cpu_capacity available at the highest frequency
 * (arch_scale_freq_capacity()).
 * The utilization of a CPU converges towards a sum equal to or less than the
 * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
 * the running time on this CPU scaled by capacity_curr.
 *
 * The estimated utilization of a CPU is defined to be the maximum between its
 * cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
 * currently RUNNABLE on that CPU.
 * This allows to properly represent the expected utilization of a CPU which
 * has just got a big task running since a long sleep period. At the same time
 * however it preserves the benefits of the "blocked utilization" in
 * describing the potential for other tasks waking up on the same CPU.
 *
 * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
 * higher than capacity_orig because of unfortunate rounding in
 * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
 * the average stabilizes with the new running time. We need to check that the
 * utilization stays within the range of [0..capacity_orig] and cap it if
 * necessary. Without utilization capping, a group could be seen as overloaded
 * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
 * available capacity. We allow utilization to overshoot capacity_curr (but not
 * capacity_orig) as it useful for predicting the capacity required after task
 * migrations (scheduler-driven DVFS).
 *
 * Return: the (estimated) utilization for the specified CPU
 */
 static inline unsigned long cpu_util(int cpu)
 {
 	struct cfs_rq *cfs_rq;
 	unsigned int util;
 	cfs_rq = &cpu_rq(cpu)->cfs;
 	util = READ_ONCE(cfs_rq->avg.util_avg);
 	if (sched_feat(UTIL_EST))
 		util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
 	return min_t(unsigned long, util, capacity_orig_of(cpu));
 }
 /*
 * cpu_util_without: compute cpu utilization without any contributions from *p
 * @cpu: the CPU which utilization is requested
@@ -6114,13 +6077,29 @@ static inline unsigned long cpu_util(int cpu)
 */
 static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 {
 #ifndef CONFIG_SCHED_WALT
 	struct cfs_rq *cfs_rq;
 #endif
 	unsigned int util;
 #ifdef CONFIG_SCHED_WALT
 	/*
 	 * WALT does not decay idle tasks in the same manner
 	 * as PELT, so it makes little sense to subtract task
 	 * utilization from cpu utilization. Instead just use
 	 * cpu_util for this case.
 	 */
 	if (likely(p->state == TASK_WAKING))
 		return cpu_util(cpu);
 #endif
 	/* Task has no contribution or is new */
 	if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
 		return cpu_util(cpu);
 #ifdef CONFIG_SCHED_WALT
 	util = max_t(long, cpu_util(cpu) - task_util(p), 0);
 #else
 	cfs_rq = &cpu_rq(cpu)->cfs;
 	util = READ_ONCE(cfs_rq->avg.util_avg);
@@ -6179,6 +6158,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 		util = max(util, estimated);
 	}
 #endif
 	/*
 	 * Utilization (estimated) can exceed the CPU capacity, thus let's
@@ -6188,6 +6168,18 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 	return min_t(unsigned long, util, capacity_orig_of(cpu));
 }
 /*
 * Returns the current capacity of cpu after applying both
 * cpu and freq scaling.
 */
 unsigned long capacity_curr_of(int cpu)
 {
 	unsigned long max_cap = cpu_rq(cpu)->cpu_capacity_orig;
 	unsigned long scale_freq = arch_scale_freq_capacity(cpu);
 	return cap_scale(max_cap, scale_freq);
 }
 /*
 * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
 * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
@@ -6344,7 +6336,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 * other use-cases too. So, until someone finds a better way to solve this,
 * let's keep things simple by re-using the existing slow path.
 */
-static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
+int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
 {
 	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
@@ -7363,7 +7355,13 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 	lockdep_assert_held(&env->src_rq->lock);
 	deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
 	lockdep_off();
 	double_lock_balance(env->src_rq, env->dst_rq);
 	if (!(env->src_rq->clock_update_flags & RQCF_UPDATED))
 		update_rq_clock(env->src_rq);
 	set_task_cpu(p, env->dst_cpu);
 	double_unlock_balance(env->src_rq, env->dst_rq);
 	lockdep_on();
 }
 /*
@@ -8891,8 +8889,6 @@ static int need_active_balance(struct lb_env *env)
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 static int active_load_balance_cpu_stop(void *data);
 static int should_we_balance(struct lb_env *env)
 {
 	struct sched_group *sg = env->sd->groups;
@@ -9244,7 +9240,7 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
 * least 1 task to be running on each physical CPU where possible, and
 * avoids physical / logical imbalances.
 */
-static int active_load_balance_cpu_stop(void *data)
+int active_load_balance_cpu_stop(void *data)
 {
 	struct rq *busiest_rq = data;
 	int busiest_cpu = cpu_of(busiest_rq);
@@ -10615,6 +10611,10 @@ const struct sched_class fair_sched_class = {
 #ifdef CONFIG_UCLAMP_TASK
 	.uclamp_enabled		= 1,
 #endif
 #ifdef CONFIG_SCHED_WALT
 	.fixup_walt_sched_stats	= walt_fixup_sched_stats_fair,
 #endif
 };
 #ifdef CONFIG_SCHED_DEBUG
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -61,7 +61,8 @@ static noinline int __cpuidle cpu_idle_poll(void)
 	stop_critical_timings();
 	while (!tif_need_resched() &&
-		(cpu_idle_force_poll || tick_check_broadcast_expired()))
+		(cpu_idle_force_poll || tick_check_broadcast_expired() ||
 		is_reserved(smp_processor_id())))
 		cpu_relax();
 	start_critical_timings();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
@@ -257,7 +258,8 @@ static void do_idle(void)
 		 * broadcast device expired for us, we don't want to go deep
 		 * idle as we know that the IPI is going to arrive right away.
 		 */
-		if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
+		if (cpu_idle_force_poll || tick_check_broadcast_expired() ||
 				is_reserved(smp_processor_id())) {
 			tick_nohz_idle_restart_tick();
 			cpu_idle_poll();
 		} else {
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -6,6 +6,7 @@
 #include "sched.h"
 #include "pelt.h"
 #include "walt.h"
 int sched_rr_timeslice = RR_TIMESLICE;
 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
@@ -2388,6 +2389,10 @@ const struct sched_class rt_sched_class = {
 #ifdef CONFIG_UCLAMP_TASK
 	.uclamp_enabled		= 1,
 #endif
 #ifdef CONFIG_SCHED_WALT
 	.fixup_walt_sched_stats	= fixup_walt_sched_stats_common,
 #endif
 };
 #ifdef CONFIG_RT_GROUP_SCHED
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -84,6 +84,73 @@
 struct rq;
 struct cpuidle_state;
 extern __read_mostly bool sched_predl;
 struct sched_walt_cpu_load {
 	unsigned long prev_window_util;
 	unsigned long nl;
 	unsigned long pl;
 	bool rtgb_active;
 	u64 ws;
 };
 #ifdef CONFIG_SCHED_WALT
 #define DECLARE_BITMAP_ARRAY(name, nr, bits) \
 	unsigned long name[nr][BITS_TO_LONGS(bits)]
 extern unsigned int __weak sched_ravg_window;
 struct walt_sched_stats {
 	int nr_big_tasks;
 	u64 cumulative_runnable_avg_scaled;
 	u64 pred_demands_sum_scaled;
 };
 struct cpu_cycle {
 	u64 cycles;
 	u64 time;
 };
 struct group_cpu_time {
 	u64 curr_runnable_sum;
 	u64 prev_runnable_sum;
 	u64 nt_curr_runnable_sum;
 	u64 nt_prev_runnable_sum;
 };
 struct load_subtractions {
 	u64 window_start;
 	u64 subs;
 	u64 new_subs;
 };
 #define NUM_TRACKED_WINDOWS 2
 #define NUM_LOAD_INDICES 1000
 struct sched_cluster {
 	raw_spinlock_t load_lock;
 	struct list_head list;
 	struct cpumask cpus;
 	int id;
 	int max_power_cost;
 	int min_power_cost;
 	int max_possible_capacity;
 	int efficiency; /* Differentiate cpus with different IPC capability */
 	unsigned int exec_scale_factor;
 	/*
 	 * max_freq = user maximum
 	 * max_mitigated_freq = thermal defined maximum
 	 * max_possible_freq = maximum supported by hardware
 	 */
 	unsigned int cur_freq, max_freq, max_mitigated_freq, min_freq;
 	unsigned int max_possible_freq;
 	bool freq_init_done;
 	u64 aggr_grp_load;
 };
 extern __weak cpumask_t asym_cap_sibling_cpus;
 #endif /* CONFIG_SCHED_WALT */
 /* task_struct::on_rq states: */
 #define TASK_ON_RQ_QUEUED	1
 #define TASK_ON_RQ_MIGRATING	2
@@ -401,7 +468,24 @@ struct task_group {
 	struct uclamp_se	uclamp[UCLAMP_CNT];
 	/* Latency-sensitive flag used for a task group */
 	unsigned int		latency_sensitive;
-#endif
+#ifdef CONFIG_SCHED_WALT
 	/* Toggle ability to override sched boost enabled */
 	bool sched_boost_no_override;
 	/*
 	 * Controls whether a cgroup is eligible for sched boost or not. This
 	 * can temporariliy be disabled by the kernel based on the no_override
 	 * flag above.
 	 */
 	bool sched_boost_enabled;
 	/*
 	 * Controls whether tasks of this cgroup should be colocated with each
 	 * other and tasks of other cgroups that have the same flag turned on.
 	 */
 	bool colocate;
 	/* Controls whether further updates are allowed to the colocate flag */
 	bool colocate_update_disabled;
 #endif /* CONFIG_SCHED_WALT */
 #endif /* CONFIG_UCLAMP_TASK_GROUP */
 };
@@ -565,6 +649,10 @@ struct cfs_rq {
 	struct list_head	leaf_cfs_rq_list;
 	struct task_group	*tg;	/* group that "owns" this runqueue */
 #ifdef CONFIG_SCHED_WALT
 	struct walt_sched_stats walt_stats;
 #endif
 #ifdef CONFIG_CFS_BANDWIDTH
 	int			runtime_enabled;
 	s64			runtime_remaining;
@@ -961,6 +1049,41 @@ struct rq {
 	u64			max_idle_balance_cost;
 #endif
 #ifdef CONFIG_SCHED_WALT
 	struct task_struct	*push_task;
 	struct sched_cluster	*cluster;
 	struct cpumask		freq_domain_cpumask;
 	struct walt_sched_stats walt_stats;
 	u64			window_start;
 	u32			prev_window_size;
 	unsigned long		walt_flags;
 	u64			cur_irqload;
 	u64			avg_irqload;
 	u64			irqload_ts;
 	struct task_struct	*ed_task;
 	struct cpu_cycle	cc;
 	u64			old_busy_time, old_busy_time_group;
 	u64			old_estimated_time;
 	u64			curr_runnable_sum;
 	u64			prev_runnable_sum;
 	u64			nt_curr_runnable_sum;
 	u64			nt_prev_runnable_sum;
 	u64			cum_window_demand_scaled;
 	struct group_cpu_time	grp_time;
 	struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
 	DECLARE_BITMAP_ARRAY(top_tasks_bitmap,
 			NUM_TRACKED_WINDOWS, NUM_LOAD_INDICES);
 	u8			*top_tasks[NUM_TRACKED_WINDOWS];
 	u8			curr_table;
 	int			prev_top;
 	int			curr_top;
 	bool			notif_pending;
 	u64			last_cc_update;
 	u64			cycles;
 #endif /* CONFIG_SCHED_WALT */
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 	u64			prev_irq_time;
 #endif
@@ -1306,8 +1429,6 @@ enum numa_faults_stats {
 };
 extern void sched_setnuma(struct task_struct *p, int node);
 extern int migrate_task_to(struct task_struct *p, int cpu);
 extern int migrate_swap(struct task_struct *p, struct task_struct *t,
 			int cpu, int scpu);
 extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
 #else
 static inline void
@@ -1316,6 +1437,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 extern int migrate_swap(struct task_struct *p, struct task_struct *t,
 			int cpu, int scpu);
 #ifdef CONFIG_SMP
 static inline void
@@ -1782,8 +1906,15 @@ struct sched_class {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	void (*task_change_group)(struct task_struct *p, int type);
 #endif
 #ifdef CONFIG_SCHED_WALT
 	void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p,
 					u16 updated_demand_scaled,
 					u16 updated_pred_demand_scaled);
 #endif
 };
 static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
 	WARN_ON_ONCE(rq->curr != prev);
@@ -1960,6 +2091,7 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
 {
 	unsigned prev_nr = rq->nr_running;
 	sched_update_nr_prod(cpu_of(rq), count, true);
 	rq->nr_running = prev_nr + count;
 #ifdef CONFIG_SMP
@@ -1974,6 +2106,7 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
 static inline void sub_nr_running(struct rq *rq, unsigned count)
 {
 	sched_update_nr_prod(cpu_of(rq), count, false);
 	rq->nr_running -= count;
 	/* Check if we still need preemption */
 	sched_update_tick_dependency(rq);
@@ -2014,6 +2147,18 @@ static inline int hrtick_enabled(struct rq *rq)
 #endif /* CONFIG_SCHED_HRTICK */
 #ifdef CONFIG_SCHED_WALT
 u64 __weak sched_ktime_clock(void);
 unsigned long __weak
 cpu_util_freq_walt(int cpu, struct sched_walt_cpu_load *walt_load);
 #else
 #define sched_ravg_window TICK_NSEC
 static inline u64 sched_ktime_clock(void)
 {
 	return 0;
 }
 #endif
 #ifndef arch_scale_freq_capacity
 static __always_inline
 unsigned long arch_scale_freq_capacity(int cpu)
@@ -2031,8 +2176,127 @@ unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
 }
 #endif
 unsigned long capacity_curr_of(int cpu);
 #ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPTION
+static inline unsigned long capacity_of(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity;
 }
 static inline unsigned long capacity_orig_of(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity_orig;
 }
 static inline unsigned long task_util(struct task_struct *p)
 {
 #ifdef CONFIG_SCHED_WALT
 	return p->ravg.demand_scaled;
 #endif
 	return READ_ONCE(p->se.avg.util_avg);
 }
 /**
 * Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
 * @cpu: the CPU to get the utilization of
 *
 * The unit of the return value must be the one of capacity so we can compare
 * the utilization with the capacity of the CPU that is available for CFS task
 * (ie cpu_capacity).
 *
 * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
 * recent utilization of currently non-runnable tasks on a CPU. It represents
 * the amount of utilization of a CPU in the range [0..capacity_orig] where
 * capacity_orig is the cpu_capacity available at the highest frequency
 * (arch_scale_freq_capacity()).
 * The utilization of a CPU converges towards a sum equal to or less than the
 * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
 * the running time on this CPU scaled by capacity_curr.
 *
 * The estimated utilization of a CPU is defined to be the maximum between its
 * cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
 * currently RUNNABLE on that CPU.
 * This allows to properly represent the expected utilization of a CPU which
 * has just got a big task running since a long sleep period. At the same time
 * however it preserves the benefits of the "blocked utilization" in
 * describing the potential for other tasks waking up on the same CPU.
 *
 * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
 * higher than capacity_orig because of unfortunate rounding in
 * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
 * the average stabilizes with the new running time. We need to check that the
 * utilization stays within the range of [0..capacity_orig] and cap it if
 * necessary. Without utilization capping, a group could be seen as overloaded
 * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
 * available capacity. We allow utilization to overshoot capacity_curr (but not
 * capacity_orig) as it useful for predicting the capacity required after task
 * migrations (scheduler-driven DVFS).
 *
 * Return: the (estimated) utilization for the specified CPU
 */
 static inline unsigned long cpu_util(int cpu)
 {
 	struct cfs_rq *cfs_rq;
 	unsigned int util;
 #ifdef CONFIG_SCHED_WALT
 	u64 walt_cpu_util =
 		cpu_rq(cpu)->walt_stats.cumulative_runnable_avg_scaled;
 	return min_t(unsigned long, walt_cpu_util, capacity_orig_of(cpu));
 #endif
 	cfs_rq = &cpu_rq(cpu)->cfs;
 	util = READ_ONCE(cfs_rq->avg.util_avg);
 	if (sched_feat(UTIL_EST))
 		util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
 	return min_t(unsigned long, util, capacity_orig_of(cpu));
 }
 static inline unsigned long cpu_util_cum(int cpu, int delta)
 {
 	u64 util = cpu_rq(cpu)->cfs.avg.util_avg;
 	unsigned long capacity = capacity_orig_of(cpu);
 #ifdef CONFIG_SCHED_WALT
 	util = cpu_rq(cpu)->cum_window_demand_scaled;
 #endif
 	delta += util;
 	if (delta < 0)
 		return 0;
 	return (delta >= capacity) ? capacity : delta;
 }
 static inline unsigned long
 cpu_util_freq(int cpu, struct sched_walt_cpu_load *walt_load)
 {
 #ifdef CONFIG_SCHED_WALT
 	return cpu_util_freq_walt(cpu, walt_load);
 #else
 	return cpu_util(cpu);
 #endif
 }
 extern unsigned int capacity_margin_freq;
 static inline unsigned long
 add_capacity_margin(unsigned long cpu_capacity, int cpu)
 {
 	cpu_capacity  = cpu_capacity * capacity_margin_freq *
 			(100 + per_cpu(sched_load_boost, cpu));
 	cpu_capacity /= 100;
 	cpu_capacity /= SCHED_CAPACITY_SCALE;
 	return cpu_capacity;
 }
 #endif /* CONFIG_SMP */
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPT
 static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -2345,6 +2609,11 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
 {
 	struct update_util_data *data;
 #ifdef CONFIG_SCHED_WALT
 	if (!(flags & SCHED_CPUFREQ_WALT))
 		return;
 #endif
 	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
 						  cpu_of(rq)));
 	if (data)
@@ -2432,13 +2701,6 @@ static inline bool uclamp_latency_sensitive(struct task_struct *p)
 # define arch_scale_freq_invariant()	false
 #endif
 #ifdef CONFIG_SMP
 static inline unsigned long capacity_orig_of(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity_orig;
 }
 #endif
 /**
 * enum schedutil_type - CPU utilization type
 * @FREQUENCY_UTIL:	Utilization used to select frequency
@@ -2570,3 +2832,499 @@ static inline void membarrier_switch_mm(struct rq *rq,
 {
 }
 #endif
 enum sched_boost_policy {
 	SCHED_BOOST_NONE,
 	SCHED_BOOST_ON_BIG,
 	SCHED_BOOST_ON_ALL,
 };
 #ifdef CONFIG_SCHED_WALT
 static inline int cluster_first_cpu(struct sched_cluster *cluster)
 {
 	return cpumask_first(&cluster->cpus);
 }
 struct related_thread_group {
 	int id;
 	raw_spinlock_t lock;
 	struct list_head tasks;
 	struct list_head list;
 	bool skip_min;
 	struct rcu_head rcu;
 	u64 last_update;
 	u64 downmigrate_ts;
 	u64 start_ts;
 };
 extern struct sched_cluster *sched_cluster[NR_CPUS];
 extern unsigned int __weak sched_disable_window_stats;
 extern unsigned int max_possible_freq;
 extern unsigned int min_max_freq;
 extern unsigned int max_possible_efficiency;
 extern unsigned int min_possible_efficiency;
 extern unsigned int max_possible_capacity;
 extern unsigned int __weak min_max_possible_capacity;
 extern unsigned int max_power_cost;
 extern unsigned int __read_mostly __weak sched_init_task_load_windows;
 extern unsigned int  __read_mostly __weak sched_load_granule;
 extern int __weak update_preferred_cluster(struct related_thread_group *grp,
 			struct task_struct *p, u32 old_load, bool from_tick);
 extern void __weak set_preferred_cluster(struct related_thread_group *grp);
 extern void __weak add_new_task_to_grp(struct task_struct *new);
 #define NO_BOOST 0
 #define FULL_THROTTLE_BOOST 1
 #define CONSERVATIVE_BOOST 2
 #define RESTRAINED_BOOST 3
 #define FULL_THROTTLE_BOOST_DISABLE -1
 #define CONSERVATIVE_BOOST_DISABLE -2
 #define RESTRAINED_BOOST_DISABLE -3
 #define MAX_NUM_BOOST_TYPE (RESTRAINED_BOOST+1)
 static inline int asym_cap_siblings(int cpu1, int cpu2)
 {
 	return (cpumask_test_cpu(cpu1, &asym_cap_sibling_cpus) &&
 		cpumask_test_cpu(cpu2, &asym_cap_sibling_cpus));
 }
 static inline int cpu_max_possible_capacity(int cpu)
 {
 	return cpu_rq(cpu)->cluster->max_possible_capacity;
 }
 static inline unsigned int cluster_max_freq(struct sched_cluster *cluster)
 {
 	/*
 	 * Governor and thermal driver don't know the other party's mitigation
 	 * voting. So struct cluster saves both and return min() for current
 	 * cluster fmax.
 	 */
 	return min(cluster->max_mitigated_freq, cluster->max_freq);
 }
 static inline unsigned int cpu_max_freq(int cpu)
 {
 	return cluster_max_freq(cpu_rq(cpu)->cluster);
 }
 static inline unsigned int cpu_max_possible_freq(int cpu)
 {
 	return cpu_rq(cpu)->cluster->max_possible_freq;
 }
 static inline bool hmp_capable(void)
 {
 	return max_possible_capacity != min_max_possible_capacity;
 }
 static inline bool is_max_capacity_cpu(int cpu)
 {
 	return cpu_max_possible_capacity(cpu) == max_possible_capacity;
 }
 static inline bool is_min_capacity_cpu(int cpu)
 {
 	return cpu_max_possible_capacity(cpu) == min_max_possible_capacity;
 }
 static inline unsigned int task_load(struct task_struct *p)
 {
 	return p->ravg.demand;
 }
 static inline unsigned int task_pl(struct task_struct *p)
 {
 	return p->ravg.pred_demand;
 }
 static inline bool task_in_related_thread_group(struct task_struct *p)
 {
 	return !!(rcu_access_pointer(p->grp) != NULL);
 }
 static inline
 struct related_thread_group *task_related_thread_group(struct task_struct *p)
 {
 	return rcu_dereference(p->grp);
 }
 /* Is frequency of two cpus synchronized with each other? */
 static inline int same_freq_domain(int src_cpu, int dst_cpu)
 {
 	struct rq *rq = cpu_rq(src_cpu);
 	if (src_cpu == dst_cpu)
 		return 1;
 	if (asym_cap_siblings(src_cpu, dst_cpu))
 		return 1;
 	return cpumask_test_cpu(dst_cpu, &rq->freq_domain_cpumask);
 }
 #define CPU_RESERVED    1
 extern enum sched_boost_policy __weak boost_policy;
 extern unsigned int __weak sched_task_filter_util;
 static inline enum sched_boost_policy sched_boost_policy(void)
 {
 	return boost_policy;
 }
 extern unsigned int __weak sched_boost_type;
 static inline int sched_boost(void)
 {
 	return sched_boost_type;
 }
 static inline bool rt_boost_on_big(void)
 {
 	return sched_boost() == FULL_THROTTLE_BOOST ?
 			(sched_boost_policy() == SCHED_BOOST_ON_BIG) : false;
 }
 static inline bool is_full_throttle_boost(void)
 {
 	return sched_boost() == FULL_THROTTLE_BOOST;
 }
 extern int __weak preferred_cluster(struct sched_cluster *cluster,
 						struct task_struct *p);
 extern struct sched_cluster *rq_cluster(struct rq *rq);
 #ifdef CONFIG_UCLAMP_TASK_GROUP
 static inline bool task_sched_boost(struct task_struct *p)
 {
 	struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
 	struct task_group *tg;
 	if (!css)
 		return false;
 	tg = container_of(css, struct task_group, css);
 	return tg->sched_boost_enabled;
 }
 extern int __weak sync_cgroup_colocation(struct task_struct *p, bool insert);
 extern void update_cgroup_boost_settings(void);
 extern void restore_cgroup_boost_settings(void);
 #else
 static inline bool
 same_schedtg(struct task_struct *tsk1, struct task_struct *tsk2)
 {
 	return true;
 }
 static inline bool task_sched_boost(struct task_struct *p)
 {
 	return true;
 }
 static inline void update_cgroup_boost_settings(void) { }
 static inline void restore_cgroup_boost_settings(void) { }
 #endif
 extern int __weak alloc_related_thread_groups(void);
 extern void __weak check_for_migration(struct rq *rq, struct task_struct *p);
 static inline int is_reserved(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	return test_bit(CPU_RESERVED, &rq->walt_flags);
 }
 static inline int mark_reserved(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	return test_and_set_bit(CPU_RESERVED, &rq->walt_flags);
 }
 static inline void clear_reserved(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	clear_bit(CPU_RESERVED, &rq->walt_flags);
 }
 static inline bool
 task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
 {
 	return cpu_of(rq) == task_cpu(p) && (p->on_rq || p->last_sleep_ts >=
 							 rq->window_start);
 }
 static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 scaled_delta)
 {
 	rq->cum_window_demand_scaled += scaled_delta;
 	if (unlikely((s64)rq->cum_window_demand_scaled < 0))
 		rq->cum_window_demand_scaled = 0;
 }
 extern unsigned long __weak thermal_cap(int cpu);
 extern void __weak clear_walt_request(int cpu);
 extern enum sched_boost_policy sched_boost_policy(void);
 extern void sched_boost_parse_dt(void);
 extern void __weak clear_ed_task(struct task_struct *p, struct rq *rq);
 extern bool __weak early_detection_notify(struct rq *rq, u64 wallclock);
 static inline unsigned int power_cost(int cpu, u64 demand)
 {
 	return cpu_max_possible_capacity(cpu);
 }
 void __weak note_task_waking(struct task_struct *p, u64 wallclock);
 static inline bool task_placement_boost_enabled(struct task_struct *p)
 {
 	if (task_sched_boost(p))
 		return sched_boost_policy() != SCHED_BOOST_NONE;
 	return false;
 }
 static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
 {
 	enum sched_boost_policy policy = task_sched_boost(p) ?
 						sched_boost_policy() :
 						SCHED_BOOST_NONE;
 	if (policy == SCHED_BOOST_ON_BIG) {
 		/*
 		 * Filter out tasks less than min task util threshold
 		 * under conservative boost.
 		 */
 		if (sched_boost() == CONSERVATIVE_BOOST &&
 				task_util(p) <= sched_task_filter_util)
 			policy = SCHED_BOOST_NONE;
 	}
 	return policy;
 }
 static inline bool is_min_capacity_cluster(struct sched_cluster *cluster)
 {
 	return is_min_capacity_cpu(cluster_first_cpu(cluster));
 }
 extern void __weak walt_fixup_sched_stats_fair(struct rq *rq,
 					struct task_struct *p,
 					u16 updated_demand_scaled,
 					u16 updated_pred_demand_scaled);
 extern void __weak walt_fixup_nr_big_tasks(struct rq *rq, struct task_struct *p,
 					int delta, bool inc);
 #else   /* CONFIG_SCHED_WALT */
 struct walt_sched_stats;
 struct related_thread_group;
 struct sched_cluster;
 static inline bool task_sched_boost(struct task_struct *p)
 {
 	return false;
 }
 static inline bool task_placement_boost_enabled(struct task_struct *p)
 {
 	return false;
 }
 static inline void check_for_migration(struct rq *rq, struct task_struct *p) { }
 static inline int sched_boost(void)
 {
 	return 0;
 }
 static inline bool rt_boost_on_big(void)
 {
 	return false;
 }
 static inline bool is_full_throttle_boost(void)
 {
 	return false;
 }
 static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
 {
 	return SCHED_BOOST_NONE;
 }
 static inline bool
 task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
 {
 	return false;
 }
 static inline bool hmp_capable(void) { return false; }
 static inline bool is_max_capacity_cpu(int cpu) { return true; }
 static inline bool is_min_capacity_cpu(int cpu) { return true; }
 static inline int
 preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
 {
 	return -1;
 }
 static inline struct sched_cluster *rq_cluster(struct rq *rq)
 {
 	return NULL;
 }
 static inline int asym_cap_siblings(int cpu1, int cpu2) { return 0; }
 static inline void set_preferred_cluster(struct related_thread_group *grp) { }
 static inline bool task_in_related_thread_group(struct task_struct *p)
 {
 	return false;
 }
 static inline
 struct related_thread_group *task_related_thread_group(struct task_struct *p)
 {
 	return NULL;
 }
 static inline u32 task_load(struct task_struct *p) { return 0; }
 static inline u32 task_pl(struct task_struct *p) { return 0; }
 static inline int update_preferred_cluster(struct related_thread_group *grp,
 			struct task_struct *p, u32 old_load, bool from_tick)
 {
 	return 0;
 }
 static inline void add_new_task_to_grp(struct task_struct *new) {}
 static inline int same_freq_domain(int src_cpu, int dst_cpu)
 {
 	return 1;
 }
 static inline int mark_reserved(int cpu)
 {
 	return 0;
 }
 static inline void clear_reserved(int cpu) { }
 static inline int alloc_related_thread_groups(void) { return 0; }
 static inline void walt_fixup_cum_window_demand(struct rq *rq,
 						s64 scaled_delta) { }
 #ifdef CONFIG_SMP
 static inline unsigned long thermal_cap(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity_orig;
 }
 #endif
 static inline void clear_walt_request(int cpu) { }
 static inline int is_reserved(int cpu)
 {
 	return 0;
 }
 static inline enum sched_boost_policy sched_boost_policy(void)
 {
 	return SCHED_BOOST_NONE;
 }
 static inline void sched_boost_parse_dt(void) { }
 static inline void clear_ed_task(struct task_struct *p, struct rq *rq) { }
 static inline bool early_detection_notify(struct rq *rq, u64 wallclock)
 {
 	return 0;
 }
 #ifdef CONFIG_SMP
 static inline unsigned int power_cost(int cpu, u64 demand)
 {
 	return SCHED_CAPACITY_SCALE;
 }
 #endif
 static inline void note_task_waking(struct task_struct *p, u64 wallclock) { }
 #endif  /* CONFIG_SCHED_WALT */
 struct sched_avg_stats {
 	int nr;
 	int nr_misfit;
 	int nr_max;
 	int nr_scaled;
 };
 extern void sched_get_nr_running_avg(struct sched_avg_stats *stats);
 #if defined(CONFIG_SCHED_WALT) && defined(CONFIG_CFS_BANDWIDTH)
 extern void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq);
 extern void walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p);
 extern void walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p);
 extern void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
 							struct cfs_rq *cfs_rq);
 extern void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
 							struct cfs_rq *cfs_rq);
 #else
 static inline void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq) {}
 static inline void
 walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
 static inline void
 walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
 #define walt_inc_throttled_cfs_rq_stats(...)
 #define walt_dec_throttled_cfs_rq_stats(...)
 #endif
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SCHED_WALT
 extern int __weak group_balance_cpu_not_isolated(struct sched_group *sg);
 #else
 static inline int group_balance_cpu_not_isolated(struct sched_group *sg)
 {
 	return group_balance_cpu(sg);
 }
 #endif /* CONFIG_SCHED_WALT */
 #endif /* CONFIG_SMP */
 extern int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu,
 								int sync);
 extern int active_load_balance_cpu_stop(void *data);
 #ifdef CONFIG_HOTPLUG_CPU
 extern void set_rq_online(struct rq *rq);
 extern void set_rq_offline(struct rq *rq);
 extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf);
 extern void calc_load_migrate(struct rq *rq);
 #ifdef CONFIG_SCHED_WALT
 extern void __weak
 detach_one_task_core(struct task_struct *p, struct rq *rq,
 						struct list_head *tasks);
 extern void __weak attach_tasks_core(struct list_head *tasks, struct rq *rq);
 #else
 static inline void
 detach_one_task_core(struct task_struct *p, struct rq *rq,
 						struct list_head *tasks)
 {
 }
 static inline void attach_tasks_core(struct list_head *tasks, struct rq *rq) {}
 #endif
 #endif
 extern struct task_struct *find_process_by_pid(pid_t pid);
 extern void enqueue_task_core(struct rq *rq, struct task_struct *p, int flags);
 extern void dequeue_task_core(struct rq *rq, struct task_struct *p, int flags);
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -8,6 +8,7 @@
 * See kernel/stop_machine.c
 */
 #include "sched.h"
 #include "walt.h"
 #ifdef CONFIG_SMP
 static int
@@ -50,12 +51,14 @@ static void
 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
 	add_nr_running(rq, 1);
 	walt_inc_cumulative_runnable_avg(rq, p);
 }
 static void
 dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
 	sub_nr_running(rq, 1);
 	walt_dec_cumulative_runnable_avg(rq, p);
 }
 static void yield_task_stop(struct rq *rq)
@@ -144,4 +147,7 @@ const struct sched_class stop_sched_class = {
 	.prio_changed		= prio_changed_stop,
 	.switched_to		= switched_to_stop,
 	.update_curr		= update_curr_stop,
 #ifdef CONFIG_SCHED_WALT
 	.fixup_walt_sched_stats	= fixup_walt_sched_stats_common,
 #endif
 };
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -0,0 +1,265 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
 * Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
 */
 #ifndef __WALT_H
 #define __WALT_H
 #ifdef CONFIG_SCHED_WALT
 #include <linux/sched/sysctl.h>
 #include <linux/sched/core_ctl.h>
 #define EXITING_TASK_MARKER	0xdeaddead
 extern void __weak
 walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event,
 						u64 wallclock, u64 irqtime);
 static inline void
 fixup_cumulative_runnable_avg(struct walt_sched_stats *stats,
 			      s64 demand_scaled_delta,
 			      s64 pred_demand_scaled_delta)
 {
 	if (sched_disable_window_stats)
 		return;
 	stats->cumulative_runnable_avg_scaled += demand_scaled_delta;
 	BUG_ON((s64)stats->cumulative_runnable_avg_scaled < 0);
 	stats->pred_demands_sum_scaled += pred_demand_scaled_delta;
 	BUG_ON((s64)stats->pred_demands_sum_scaled < 0);
 }
 static inline void
 walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
 {
 	if (sched_disable_window_stats)
 		return;
 	fixup_cumulative_runnable_avg(&rq->walt_stats, p->ravg.demand_scaled,
 				      p->ravg.pred_demand_scaled);
 	/*
 	 * Add a task's contribution to the cumulative window demand when
 	 *
 	 * (1) task is enqueued with on_rq = 1 i.e migration,
 	 *     prio/cgroup/class change.
 	 * (2) task is waking for the first time in this window.
 	 */
 	if (p->on_rq || (p->last_sleep_ts < rq->window_start))
 		walt_fixup_cum_window_demand(rq, p->ravg.demand_scaled);
 }
 static inline void
 walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
 {
 	if (sched_disable_window_stats)
 		return;
 	fixup_cumulative_runnable_avg(&rq->walt_stats,
 				      -(s64)p->ravg.demand_scaled,
 				      -(s64)p->ravg.pred_demand_scaled);
 	/*
 	 * on_rq will be 1 for sleeping tasks. So check if the task
 	 * is migrating or dequeuing in RUNNING state to change the
 	 * prio/cgroup/class.
 	 */
 	if (task_on_rq_migrating(p) || p->state == TASK_RUNNING)
 		walt_fixup_cum_window_demand(rq, -(s64)p->ravg.demand_scaled);
 }
 extern void __weak
 fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
 					  u16 updated_demand_scaled,
 					  u16 updated_pred_demand_scaled);
 extern void __weak inc_rq_walt_stats(struct rq *rq, struct task_struct *p);
 extern void __weak dec_rq_walt_stats(struct rq *rq, struct task_struct *p);
 extern void __weak fixup_busy_time(struct task_struct *p, int new_cpu);
 extern void __weak init_new_task_load(struct task_struct *p);
 extern void __weak mark_task_starting(struct task_struct *p);
 extern void __weak set_window_start(struct rq *rq);
 extern bool __weak do_pl_notif(struct rq *rq);
 #define SCHED_HIGH_IRQ_TIMEOUT 3
 static inline u64 sched_irqload(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	s64 delta;
 	delta = get_jiffies_64() - rq->irqload_ts;
 	/*
 	 * Current context can be preempted by irq and rq->irqload_ts can be
 	 * updated by irq context so that delta can be negative.
 	 * But this is okay and we can safely return as this means there
 	 * was recent irq occurrence.
 	 */
 	if (delta < SCHED_HIGH_IRQ_TIMEOUT)
 		return rq->avg_irqload;
 	else
 		return 0;
 }
 static inline int sched_cpu_high_irqload(int cpu)
 {
 	return sched_irqload(cpu) >= sysctl_sched_cpu_high_irqload;
 }
 static inline int exiting_task(struct task_struct *p)
 {
 	return (p->ravg.sum_history[0] == EXITING_TASK_MARKER);
 }
 static inline u64
 scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
 {
 	return div64_u64(load * (u64)src_freq, (u64)dst_freq);
 }
 extern void __weak sched_account_irqstart(int cpu, struct task_struct *curr,
 				   u64 wallclock);
 static inline unsigned int max_task_load(void)
 {
 	return sched_ravg_window;
 }
 extern void __weak update_cluster_topology(void);
 extern void __weak init_clusters(void);
 extern void sched_account_irqtime(int cpu, struct task_struct *curr,
 				 u64 delta, u64 wallclock);
 static inline int same_cluster(int src_cpu, int dst_cpu)
 {
 	return cpu_rq(src_cpu)->cluster == cpu_rq(dst_cpu)->cluster;
 }
 void __weak walt_sched_init_rq(struct rq *rq);
 static inline void walt_update_last_enqueue(struct task_struct *p)
 {
 	p->last_enqueued_ts = sched_ktime_clock();
 }
 static inline bool is_suh_max(void)
 {
 	return sysctl_sched_user_hint == sched_user_hint_max;
 }
 #define DEFAULT_CGROUP_COLOC_ID 1
 static inline bool walt_should_kick_upmigrate(struct task_struct *p, int cpu)
 {
 	struct related_thread_group *rtg = p->grp;
 	if (is_suh_max() && rtg && rtg->id == DEFAULT_CGROUP_COLOC_ID &&
 			    rtg->skip_min && p->unfilter)
 		return is_min_capacity_cpu(cpu);
 	return false;
 }
 extern bool is_rtgb_active(void);
 extern u64 get_rtgb_active_time(void);
 /* utility function to update walt signals at wakeup */
 static inline void walt_try_to_wake_up(struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(task_cpu(p));
 	struct rq_flags rf;
 	u64 wallclock;
 	unsigned int old_load;
 	struct related_thread_group *grp = NULL;
 	rq_lock_irqsave(rq, &rf);
 	old_load = task_load(p);
 	wallclock = sched_ktime_clock();
 	walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
 	walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
 	note_task_waking(p, wallclock);
 	rq_unlock_irqrestore(rq, &rf);
 	rcu_read_lock();
 	grp = task_related_thread_group(p);
 	if (update_preferred_cluster(grp, p, old_load, false))
 		set_preferred_cluster(grp);
 	rcu_read_unlock();
 }
 #else /* CONFIG_SCHED_WALT */
 static inline void walt_sched_init_rq(struct rq *rq) { }
 static inline void walt_update_last_enqueue(struct task_struct *p) { }
 static inline void walt_update_task_ravg(struct task_struct *p, struct rq *rq,
 				int event, u64 wallclock, u64 irqtime) { }
 static inline void walt_inc_cumulative_runnable_avg(struct rq *rq,
 		struct task_struct *p)
 {
 }
 static inline void walt_dec_cumulative_runnable_avg(struct rq *rq,
 		 struct task_struct *p)
 {
 }
 static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
 static inline void init_new_task_load(struct task_struct *p)
 {
 }
 static inline void mark_task_starting(struct task_struct *p) { }
 static inline void set_window_start(struct rq *rq) { }
 static inline int sched_cpu_high_irqload(int cpu) { return 0; }
 static inline void sched_account_irqstart(int cpu, struct task_struct *curr,
 					  u64 wallclock)
 {
 }
 static inline void update_cluster_topology(void) { }
 static inline void init_clusters(void) {}
 static inline void sched_account_irqtime(int cpu, struct task_struct *curr,
 				 u64 delta, u64 wallclock)
 {
 }
 static inline int same_cluster(int src_cpu, int dst_cpu) { return 1; }
 static inline bool do_pl_notif(struct rq *rq) { return false; }
 static inline void
 inc_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
 static inline void
 dec_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
 static inline void
 fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
 			      u16 updated_demand_scaled,
 			      u16 updated_pred_demand_scaled)
 {
 }
 static inline u64 sched_irqload(int cpu)
 {
 	return 0;
 }
 static inline bool walt_should_kick_upmigrate(struct task_struct *p, int cpu)
 {
 	return false;
 }
 static inline u64 get_rtgb_active_time(void)
 {
 	return 0;
 }
 #define walt_try_to_wake_up(a) {}
 #endif /* CONFIG_SCHED_WALT */
 #endif
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,7 @@
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/stat.h>
 #include <linux/kexec.h>
 #include <linux/bpf.h>
 #include <linux/mount.h>
@@ -126,6 +127,7 @@ static int sixty = 60;
 #endif
 static int __maybe_unused neg_one = -1;
 static int __maybe_unused two = 2;
 static int __maybe_unused four = 4;
 static unsigned long zero_ul;
@@ -140,7 +142,12 @@ static int ten_thousand = 10000;
 static int six_hundred_forty_kb = 640 * 1024;
 #endif
 #ifdef CONFIG_SCHED_WALT
 static int neg_three = -3;
 static int three = 3;
 static int two_hundred_fifty_five = 255;
 const int sched_user_hint_max = 1000;
 static unsigned int ns_per_sec = NSEC_PER_SEC;
 static unsigned int one_hundred_thousand = 100000;
 #endif
 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
@@ -231,6 +238,10 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
 #endif
 static int proc_dopipe_max_size(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp, loff_t *ppos);
 #ifdef CONFIG_SCHED_WALT
 static int proc_douintvec_minmax_schedhyst(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
 /* Note: sysrq code uses its own private copy */
@@ -328,6 +339,172 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 #ifdef CONFIG_SCHED_WALT
 	{
 		.procname	= "sched_user_hint",
 		.data		= &sysctl_sched_user_hint,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= walt_proc_user_hint_handler,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= (void *)&sched_user_hint_max,
 	},
 	{
 		.procname	= "sched_window_stats_policy",
 		.data		= &sysctl_sched_window_stats_policy,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &four,
 	},
 	{
 		.procname	= "sched_cpu_high_irqload",
 		.data		= &sysctl_sched_cpu_high_irqload,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "sched_group_upmigrate",
 		.data		= &sysctl_sched_group_upmigrate_pct,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= walt_proc_group_thresholds_handler,
 		.extra1		= &sysctl_sched_group_downmigrate_pct,
 	},
 	{
 		.procname	= "sched_group_downmigrate",
 		.data		= &sysctl_sched_group_downmigrate_pct,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= walt_proc_group_thresholds_handler,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &sysctl_sched_group_upmigrate_pct,
 	},
 	{
 		.procname	= "sched_boost",
 		.data		= &sysctl_sched_boost,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= sched_boost_handler,
 		.extra1		= &neg_three,
 		.extra2		= &three,
 	},
 	{
 		.procname	= "sched_conservative_pl",
 		.data		= &sysctl_sched_conservative_pl,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "sched_walt_rotate_big_tasks",
 		.data		= &sysctl_sched_walt_rotate_big_tasks,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "sched_min_task_util_for_boost",
 		.data		= &sysctl_sched_min_task_util_for_boost,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &one_thousand,
 	},
 	{
 		.procname	= "sched_min_task_util_for_colocation",
 		.data		= &sysctl_sched_min_task_util_for_colocation,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &one_thousand,
 	},
 	{
 		.procname	= "sched_asym_cap_sibling_freq_match_pct",
 		.data		= &sysctl_sched_asym_cap_sibling_freq_match_pct,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &one_hundred,
 	},
 	{
 		.procname	= "sched_coloc_downmigrate_ns",
 		.data		= &sysctl_sched_coloc_downmigrate_ns,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec_minmax,
 	},
 	{
 		.procname	= "sched_task_unfilter_nr_windows",
 		.data		= &sysctl_sched_task_unfilter_nr_windows,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &two_hundred_fifty_five,
 	},
 	{
 		.procname	= "sched_busy_hysteresis_enable_cpus",
 		.data		= &sysctl_sched_busy_hyst_enable_cpus,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec_minmax_schedhyst,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two_hundred_fifty_five,
 	},
 	{
 		.procname	= "sched_busy_hyst_ns",
 		.data		= &sysctl_sched_busy_hyst,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec_minmax_schedhyst,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &ns_per_sec,
 	},
 	{
 		.procname	= "sched_coloc_busy_hysteresis_enable_cpus",
 		.data		= &sysctl_sched_coloc_busy_hyst_enable_cpus,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec_minmax_schedhyst,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two_hundred_fifty_five,
 	},
 	{
 		.procname	= "sched_coloc_busy_hyst_ns",
 		.data		= &sysctl_sched_coloc_busy_hyst,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec_minmax_schedhyst,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &ns_per_sec,
 	},
 	{
 		.procname	= "sched_coloc_busy_hyst_max_ms",
 		.data		= &sysctl_sched_coloc_busy_hyst_max_ms,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec_minmax_schedhyst,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &one_hundred_thousand,
 	},
 	{
 		.procname	= "sched_ravg_window_nr_ticks",
 		.data		= &sysctl_sched_ravg_window_nr_ticks,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= sched_ravg_window_handler,
 	},
 #endif
 #ifdef CONFIG_SCHED_DEBUG
 	{
 		.procname	= "sched_min_granularity_ns",
@@ -2874,6 +3051,19 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
 }
 #endif
 #ifdef CONFIG_SCHED_WALT
 static int proc_douintvec_minmax_schedhyst(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
 	if (!ret && write)
 		sched_update_hyst_times();
 	return ret;
 }
 #endif
 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
 				     void __user *buffer,
 				     size_t *lenp, loff_t *ppos,
@@ -3341,6 +3531,29 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 	return err;
 }
 static int do_proc_douintvec_rwin(bool *negp, unsigned long *lvalp,
 					int *valp, int write, void *data)
 {
 	if (write) {
 		if (*lvalp == 0 || *lvalp == 2 || *lvalp == 5)
 			*valp = *lvalp;
 		else
 			return -EINVAL;
 	} else {
 		*negp = false;
 		*lvalp = *valp;
 	}
 	return 0;
 }
 int proc_douintvec_ravg_window(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 				do_proc_douintvec_rwin, NULL);
 }
 #else /* CONFIG_PROC_SYSCTL */
 int proc_dostring(struct ctl_table *table, int write,
@@ -3410,6 +3623,12 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 int proc_douintvec_ravg_window(struct ctl_table *table, int write,
 			    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 #endif /* CONFIG_PROC_SYSCTL */
 #if defined(CONFIG_SYSCTL)