sched: Add snapshot of Window Assisted Load Tracking (WALT)

This snapshot is taken from msm-4.19 as of commit 5debecbe7195
("trace: filter out spurious preemption and IRQs disable traces").

Change-Id: I8fab4084971baadcaa037f40ab549fc073a4b1ea
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
This commit is contained in:
Satya Durga Srinivasu Prabhala
2019-09-09 15:32:44 -07:00
parent 6df025a02c
commit 64b577b9cc
24 changed files with 2050 additions and 92 deletions

View File

@@ -183,9 +183,65 @@ static struct attribute_group crash_note_cpu_attr_group = {
}; };
#endif #endif
#ifdef CONFIG_SCHED_WALT
static ssize_t sched_load_boost_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
ssize_t rc;
int boost;
struct cpu *cpu = container_of(dev, struct cpu, dev);
int cpuid = cpu->dev.id;
boost = per_cpu(sched_load_boost, cpuid);
rc = scnprintf(buf, PAGE_SIZE-2, "%d\n", boost);
return rc;
}
static ssize_t __ref sched_load_boost_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int err;
int boost;
struct cpu *cpu = container_of(dev, struct cpu, dev);
int cpuid = cpu->dev.id;
err = kstrtoint(strstrip((char *)buf), 0, &boost);
if (err)
return err;
/*
* -100 is low enough to cancel out CPU's load and make it near zro.
* 1000 is close to the maximum value that cpu_util_freq_{walt,pelt}
* can take without overflow.
*/
if (boost < -100 || boost > 1000)
return -EINVAL;
per_cpu(sched_load_boost, cpuid) = boost;
return count;
}
static DEVICE_ATTR_RW(sched_load_boost);
static struct attribute *sched_cpu_attrs[] = {
&dev_attr_sched_load_boost.attr,
NULL
};
static struct attribute_group sched_cpu_attr_group = {
.attrs = sched_cpu_attrs,
};
#endif
static const struct attribute_group *common_cpu_attr_groups[] = { static const struct attribute_group *common_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group, &crash_note_cpu_attr_group,
#endif
#ifdef CONFIG_SCHED_WALT
&sched_cpu_attr_group,
#endif #endif
NULL NULL
}; };
@@ -193,6 +249,9 @@ static const struct attribute_group *common_cpu_attr_groups[] = {
static const struct attribute_group *hotplugable_cpu_attr_groups[] = { static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group, &crash_note_cpu_attr_group,
#endif
#ifdef CONFIG_SCHED_WALT
&sched_cpu_attr_group,
#endif #endif
NULL NULL
}; };

View File

@@ -1459,6 +1459,56 @@ static const struct file_operations proc_pid_sched_operations = {
#endif #endif
/*
* Print out various scheduling related per-task fields:
*/
#ifdef CONFIG_SCHED_WALT
extern int __weak sched_wake_up_idle_show(struct seq_file *m, void *v);
extern ssize_t __weak sched_wake_up_idle_write(struct file *file,
const char __user *buf, size_t count, loff_t *offset);
extern int __weak sched_wake_up_idle_open(struct inode *inode,
struct file *filp);
static const struct file_operations proc_pid_sched_wake_up_idle_operations = {
.open = sched_wake_up_idle_open,
.read = seq_read,
.write = sched_wake_up_idle_write,
.llseek = seq_lseek,
.release = single_release,
};
extern int __weak sched_init_task_load_show(struct seq_file *m, void *v);
extern ssize_t __weak
sched_init_task_load_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset);
extern int __weak
sched_init_task_load_open(struct inode *inode, struct file *filp);
static const struct file_operations proc_pid_sched_init_task_load_operations = {
.open = sched_init_task_load_open,
.read = seq_read,
.write = sched_init_task_load_write,
.llseek = seq_lseek,
.release = single_release,
};
extern int __weak sched_group_id_show(struct seq_file *m, void *v);
extern ssize_t __weak
sched_group_id_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset);
extern int __weak sched_group_id_open(struct inode *inode, struct file *filp);
static const struct file_operations proc_pid_sched_group_id_operations = {
.open = sched_group_id_open,
.read = seq_read,
.write = sched_group_id_write,
.llseek = seq_lseek,
.release = single_release,
};
#endif /* CONFIG_SCHED_WALT */
#ifdef CONFIG_SCHED_AUTOGROUP #ifdef CONFIG_SCHED_AUTOGROUP
/* /*
* Print out autogroup related information: * Print out autogroup related information:
@@ -3011,6 +3061,13 @@ static const struct pid_entry tgid_base_stuff[] = {
ONE("status", S_IRUGO, proc_pid_status), ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality), ONE("personality", S_IRUSR, proc_pid_personality),
ONE("limits", S_IRUGO, proc_pid_limits), ONE("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_WALT
REG("sched_wake_up_idle", 00644,
proc_pid_sched_wake_up_idle_operations),
REG("sched_init_task_load", 00644,
proc_pid_sched_init_task_load_operations),
REG("sched_group_id", 00666, proc_pid_sched_group_id_operations),
#endif
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif #endif

View File

@@ -69,6 +69,9 @@ enum cpuhp_state {
CPUHP_SLAB_PREPARE, CPUHP_SLAB_PREPARE,
CPUHP_MD_RAID5_PREPARE, CPUHP_MD_RAID5_PREPARE,
CPUHP_RCUTREE_PREP, CPUHP_RCUTREE_PREP,
#ifdef CONFIG_SCHED_WALT
CPUHP_CORE_CTL_ISOLATION_DEAD,
#endif
CPUHP_CPUIDLE_COUPLED_PREPARE, CPUHP_CPUIDLE_COUPLED_PREPARE,
CPUHP_POWERPC_PMAC_PREPARE, CPUHP_POWERPC_PMAC_PREPARE,
CPUHP_POWERPC_MMU_CTX_PREPARE, CPUHP_POWERPC_MMU_CTX_PREPARE,

View File

@@ -117,6 +117,18 @@ struct task_group;
(task->flags & PF_FROZEN) == 0 && \ (task->flags & PF_FROZEN) == 0 && \
(task->state & TASK_NOLOAD) == 0) (task->state & TASK_NOLOAD) == 0)
/*
* Enum for display driver to provide varying refresh rates
*/
enum fps {
FPS0 = 0,
FPS30 = 30,
FPS48 = 48,
FPS60 = 60,
FPS90 = 90,
FPS120 = 120,
};
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
/* /*
@@ -212,6 +224,21 @@ struct task_group;
/* Task command name length: */ /* Task command name length: */
#define TASK_COMM_LEN 16 #define TASK_COMM_LEN 16
enum task_event {
PUT_PREV_TASK = 0,
PICK_NEXT_TASK = 1,
TASK_WAKE = 2,
TASK_MIGRATE = 3,
TASK_UPDATE = 4,
IRQ_UPDATE = 5,
};
/* Note: this need to be in sync with migrate_type_names array */
enum migrate_types {
GROUP_TO_RQ,
RQ_TO_GROUP,
};
extern void scheduler_tick(void); extern void scheduler_tick(void);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX #define MAX_SCHEDULE_TIMEOUT LONG_MAX
@@ -478,6 +505,89 @@ struct sched_entity {
#endif #endif
}; };
struct cpu_cycle_counter_cb {
u64 (*get_cpu_cycle_counter)(int cpu);
};
DECLARE_PER_CPU_READ_MOSTLY(int, sched_load_boost);
#ifdef CONFIG_SCHED_WALT
extern void __weak sched_exit(struct task_struct *p);
extern int __weak
register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);
extern void __weak
sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax);
extern void __weak free_task_load_ptrs(struct task_struct *p);
extern void __weak sched_set_refresh_rate(enum fps fps);
#define RAVG_HIST_SIZE_MAX 5
#define NUM_BUSY_BUCKETS 10
/* ravg represents frequency scaled cpu-demand of tasks */
struct ravg {
/*
* 'mark_start' marks the beginning of an event (task waking up, task
* starting to execute, task being preempted) within a window
*
* 'sum' represents how runnable a task has been within current
* window. It incorporates both running time and wait time and is
* frequency scaled.
*
* 'sum_history' keeps track of history of 'sum' seen over previous
* RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
* ignored.
*
* 'demand' represents maximum sum seen over previous
* sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
* demand for tasks.
*
* 'curr_window_cpu' represents task's contribution to cpu busy time on
* various CPUs in the current window
*
* 'prev_window_cpu' represents task's contribution to cpu busy time on
* various CPUs in the previous window
*
* 'curr_window' represents the sum of all entries in curr_window_cpu
*
* 'prev_window' represents the sum of all entries in prev_window_cpu
*
* 'pred_demand' represents task's current predicted cpu busy time
*
* 'busy_buckets' groups historical busy time into different buckets
* used for prediction
*
* 'demand_scaled' represents task's demand scaled to 1024
*/
u64 mark_start;
u32 sum, demand;
u32 coloc_demand;
u32 sum_history[RAVG_HIST_SIZE_MAX];
u32 *curr_window_cpu, *prev_window_cpu;
u32 curr_window, prev_window;
u32 pred_demand;
u8 busy_buckets[NUM_BUSY_BUCKETS];
u16 demand_scaled;
u16 pred_demand_scaled;
u64 active_time;
u64 last_win_size;
};
#else
static inline void sched_exit(struct task_struct *p) { }
static inline int
register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
{
return 0;
}
static inline void free_task_load_ptrs(struct task_struct *p) { }
static inline void sched_update_cpu_freq_min_max(const cpumask_t *cpus,
u32 fmin, u32 fmax) { }
static inline void sched_set_refresh_rate(enum fps fps) { }
#endif /* CONFIG_SCHED_WALT */
struct sched_rt_entity { struct sched_rt_entity {
struct list_head run_list; struct list_head run_list;
unsigned long timeout; unsigned long timeout;
@@ -675,6 +785,20 @@ struct task_struct {
const struct sched_class *sched_class; const struct sched_class *sched_class;
struct sched_entity se; struct sched_entity se;
struct sched_rt_entity rt; struct sched_rt_entity rt;
#ifdef CONFIG_SCHED_WALT
u64 last_sleep_ts;
bool wake_up_idle;
struct ravg ravg;
u32 init_load_pct;
u64 last_wake_ts;
u64 last_enqueued_ts;
struct related_thread_group *grp;
struct list_head grp_list;
u64 cpu_cycles;
bool misfit;
u8 unfilter;
#endif
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group; struct task_group *sched_task_group;
#endif #endif
@@ -2000,4 +2124,37 @@ int sched_trace_rq_cpu(struct rq *rq);
const struct cpumask *sched_trace_rd_span(struct root_domain *rd); const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
#ifdef CONFIG_SCHED_WALT
#define PF_WAKE_UP_IDLE 1
static inline u32 sched_get_wake_up_idle(struct task_struct *p)
{
return p->wake_up_idle;
}
static inline int sched_set_wake_up_idle(struct task_struct *p,
int wake_up_idle)
{
p->wake_up_idle = !!wake_up_idle;
return 0;
}
static inline void set_wake_up_idle(bool enabled)
{
current->wake_up_idle = enabled;
}
#else
static inline u32 sched_get_wake_up_idle(struct task_struct *p)
{
return 0;
}
static inline int sched_set_wake_up_idle(struct task_struct *p,
int wake_up_idle)
{
return 0;
}
static inline void set_wake_up_idle(bool enabled) {}
#endif
#endif #endif

View File

@@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016, 2019, The Linux Foundation. All rights reserved.
*/
#ifndef __CORE_CTL_H
#define __CORE_CTL_H
#define MAX_CPUS_PER_CLUSTER 6
#define MAX_CLUSTERS 3
struct core_ctl_notif_data {
unsigned int nr_big;
unsigned int coloc_load_pct;
unsigned int ta_util_pct[MAX_CLUSTERS];
unsigned int cur_cap_pct[MAX_CLUSTERS];
};
#ifdef CONFIG_SCHED_WALT
extern int __weak core_ctl_set_boost(bool boost);
extern void __weak core_ctl_notifier_register(struct notifier_block *n);
extern void __weak core_ctl_notifier_unregister(struct notifier_block *n);
#else
static inline int core_ctl_set_boost(bool boost)
{
return 0;
}
static inline void core_ctl_notifier_register(struct notifier_block *n) {}
static inline void core_ctl_notifier_unregister(struct notifier_block *n) {}
#endif
#endif

View File

@@ -10,6 +10,11 @@
#define SCHED_CPUFREQ_IOWAIT (1U << 0) #define SCHED_CPUFREQ_IOWAIT (1U << 0)
#define SCHED_CPUFREQ_MIGRATION (1U << 1) #define SCHED_CPUFREQ_MIGRATION (1U << 1)
#define SCHED_CPUFREQ_INTERCLUSTER_MIG (1U << 3)
#define SCHED_CPUFREQ_WALT (1U << 4)
#define SCHED_CPUFREQ_PL (1U << 5)
#define SCHED_CPUFREQ_EARLY_DET (1U << 6)
#define SCHED_CPUFREQ_CONTINUE (1U << 8)
#ifdef CONFIG_CPU_FREQ #ifdef CONFIG_CPU_FREQ
struct update_util_data { struct update_util_data {

View File

@@ -21,6 +21,28 @@ extern bool single_task_running(void);
extern unsigned long nr_iowait(void); extern unsigned long nr_iowait(void);
extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long nr_iowait_cpu(int cpu);
#ifdef CONFIG_SCHED_WALT
extern void __weak sched_update_nr_prod(int cpu, long delta, bool inc);
extern unsigned int __weak sched_get_cpu_util(int cpu);
extern void __weak sched_update_hyst_times(void);
extern u64 __weak sched_lpm_disallowed_time(int cpu);
#else
static inline void sched_update_nr_prod(int cpu, long delta, bool inc) {}
static inline unsigned int sched_get_cpu_util(int cpu)
{
return 0;
}
static inline u64 sched_get_cpu_last_busy_time(int cpu)
{
return 0;
}
static inline void sched_update_hyst_times(void) {}
static inline u64 sched_lpm_disallowed_time(int cpu)
{
return 0;
}
#endif
static inline int sched_info_on(void) static inline int sched_info_on(void)
{ {
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS

View File

@@ -24,6 +24,42 @@ extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_child_runs_first;
#ifdef CONFIG_SCHED_WALT
extern unsigned int __weak sysctl_sched_user_hint;
extern const int __weak sched_user_hint_max;
extern unsigned int __weak sysctl_sched_cpu_high_irqload;
extern unsigned int __weak sysctl_sched_boost;
extern unsigned int __weak sysctl_sched_group_upmigrate_pct;
extern unsigned int __weak sysctl_sched_group_downmigrate_pct;
extern unsigned int __weak sysctl_sched_conservative_pl;
extern unsigned int __weak sysctl_sched_walt_rotate_big_tasks;
extern unsigned int __weak sysctl_sched_min_task_util_for_boost;
extern unsigned int __weak sysctl_sched_min_task_util_for_colocation;
extern unsigned int __weak sysctl_sched_asym_cap_sibling_freq_match_pct;
extern unsigned int __weak sysctl_sched_coloc_downmigrate_ns;
extern unsigned int __weak sysctl_sched_task_unfilter_nr_windows;
extern unsigned int __weak sysctl_sched_busy_hyst_enable_cpus;
extern unsigned int __weak sysctl_sched_busy_hyst;
extern unsigned int __weak sysctl_sched_coloc_busy_hyst_enable_cpus;
extern unsigned int __weak sysctl_sched_coloc_busy_hyst;
extern unsigned int __weak sysctl_sched_coloc_busy_hyst_max_ms;
extern unsigned int __weak sysctl_sched_window_stats_policy;
extern unsigned int __weak sysctl_sched_ravg_window_nr_ticks;
extern int __weak
walt_proc_group_thresholds_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int __weak
walt_proc_user_hint_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int __weak
sched_ravg_window_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
#endif
enum sched_tunable_scaling { enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_NONE,
@@ -47,6 +83,10 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
loff_t *ppos); loff_t *ppos);
#endif #endif
#ifdef CONFIG_SCHED_WALT
extern int __weak sched_boost_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
/* /*
* control realtime throttling: * control realtime throttling:
* *

View File

@@ -73,6 +73,9 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
extern int proc_do_static_key(struct ctl_table *table, int write, extern int proc_do_static_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, void __user *buffer, size_t *lenp,
loff_t *ppos); loff_t *ppos);
extern int proc_douintvec_ravg_window(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
/* /*
* Register a set of sysctl names by calling register_sysctl_table * Register a set of sysctl names by calling register_sysctl_table

View File

@@ -470,6 +470,15 @@ config HAVE_SCHED_AVG_IRQ
depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
depends on SMP depends on SMP
config SCHED_WALT
bool "Support window based load tracking"
depends on SMP
help
This feature will allow the scheduler to maintain a tunable window
based set of metrics for tasks and runqueues. These metrics can be
used to guide task placement as well as task frequency requirements
for cpufreq governors.
config BSD_PROCESS_ACCT config BSD_PROCESS_ACCT
bool "BSD Process Accounting" bool "BSD Process Accounting"
depends on MULTIUSER depends on MULTIUSER

View File

@@ -91,6 +91,9 @@ struct task_struct init_task
#endif #endif
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
.sched_task_group = &root_task_group, .sched_task_group = &root_task_group,
#endif
#ifdef CONFIG_SCHED_WALT
.wake_up_idle = false,
#endif #endif
.ptraced = LIST_HEAD_INIT(init_task.ptraced), .ptraced = LIST_HEAD_INIT(init_task.ptraced),
.ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry),

View File

@@ -761,6 +761,7 @@ void __noreturn do_exit(long code)
} }
exit_signals(tsk); /* sets PF_EXITING */ exit_signals(tsk); /* sets PF_EXITING */
sched_exit(tsk);
/* /*
* Ensure that all new tsk->pi_lock acquisitions must observe * Ensure that all new tsk->pi_lock acquisitions must observe
* PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().

View File

@@ -2283,6 +2283,7 @@ bad_fork_cleanup_perf:
perf_event_free_task(p); perf_event_free_task(p);
bad_fork_cleanup_policy: bad_fork_cleanup_policy:
lockdep_free_task(p); lockdep_free_task(p);
free_task_load_ptrs(p);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
mpol_put(p->mempolicy); mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock: bad_fork_cleanup_threadgroup_lock:

View File

@@ -19,6 +19,7 @@
#include "../smpboot.h" #include "../smpboot.h"
#include "pelt.h" #include "pelt.h"
#include "walt.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/sched.h> #include <trace/events/sched.h>
@@ -1298,6 +1299,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
uclamp_rq_inc(rq, p); uclamp_rq_inc(rq, p);
p->sched_class->enqueue_task(rq, p, flags); p->sched_class->enqueue_task(rq, p, flags);
walt_update_last_enqueue(p);
} }
static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1312,6 +1314,10 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
uclamp_rq_dec(rq, p); uclamp_rq_dec(rq, p);
p->sched_class->dequeue_task(rq, p, flags); p->sched_class->dequeue_task(rq, p, flags);
#ifdef CONFIG_SCHED_WALT
if (p == rq->ed_task)
early_detection_notify(rq, sched_ktime_clock());
#endif
} }
void activate_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1331,6 +1337,11 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
if (task_contributes_to_load(p)) if (task_contributes_to_load(p))
rq->nr_uninterruptible++; rq->nr_uninterruptible++;
#ifdef CONFIG_SCHED_WALT
if (flags & DEQUEUE_SLEEP)
clear_ed_task(p, rq);
#endif
dequeue_task(rq, p, flags); dequeue_task(rq, p, flags);
} }
@@ -1492,8 +1503,11 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
dequeue_task(rq, p, DEQUEUE_NOCLOCK); dequeue_task(rq, p, DEQUEUE_NOCLOCK);
double_lock_balance(rq, cpu_rq(new_cpu));
if (!(rq->clock_update_flags & RQCF_UPDATED))
update_rq_clock(rq);
set_task_cpu(p, new_cpu); set_task_cpu(p, new_cpu);
rq_unlock(rq, rf); double_rq_unlock(cpu_rq(new_cpu), rq);
rq = cpu_rq(new_cpu); rq = cpu_rq(new_cpu);
@@ -1750,12 +1764,13 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->se.nr_migrations++; p->se.nr_migrations++;
rseq_migrate(p); rseq_migrate(p);
perf_event_task_migrate(p); perf_event_task_migrate(p);
fixup_busy_time(p, new_cpu);
} }
__set_task_cpu(p, new_cpu); __set_task_cpu(p, new_cpu);
} }
#ifdef CONFIG_NUMA_BALANCING
static void __migrate_swap_task(struct task_struct *p, int cpu) static void __migrate_swap_task(struct task_struct *p, int cpu)
{ {
if (task_on_rq_queued(p)) { if (task_on_rq_queued(p)) {
@@ -1870,7 +1885,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
out: out:
return ret; return ret;
} }
#endif /* CONFIG_NUMA_BALANCING */
/* /*
* wait_task_inactive - wait for a thread to unschedule. * wait_task_inactive - wait for a thread to unschedule.
@@ -2616,6 +2630,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/ */
smp_cond_load_acquire(&p->on_cpu, !VAL); smp_cond_load_acquire(&p->on_cpu, !VAL);
walt_try_to_wake_up(p);
p->sched_contributes_to_load = !!task_contributes_to_load(p); p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING; p->state = TASK_WAKING;
@@ -2644,6 +2660,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
unlock: unlock:
raw_spin_unlock_irqrestore(&p->pi_lock, flags); raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out: out:
#ifdef CONFIG_SCHED_WALT
if (success && sched_predl) {
raw_spin_lock_irqsave(&cpu_rq(cpu)->lock, flags);
if (do_pl_notif(cpu_rq(cpu)))
cpufreq_update_util(cpu_rq(cpu),
SCHED_CPUFREQ_WALT |
SCHED_CPUFREQ_PL);
raw_spin_unlock_irqrestore(&cpu_rq(cpu)->lock, flags);
}
#endif
if (success) if (success)
ttwu_stat(p, cpu, wake_flags); ttwu_stat(p, cpu, wake_flags);
preempt_enable(); preempt_enable();
@@ -2689,6 +2716,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.prev_sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0; p->se.nr_migrations = 0;
p->se.vruntime = 0; p->se.vruntime = 0;
#ifdef CONFIG_SCHED_WALT
p->last_sleep_ts = 0;
p->wake_up_idle = false;
#endif
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -2840,6 +2871,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
{ {
unsigned long flags; unsigned long flags;
init_new_task_load(p);
__sched_fork(clone_flags, p); __sched_fork(clone_flags, p);
/* /*
* We mark the process as NEW here. This guarantees that * We mark the process as NEW here. This guarantees that
@@ -2945,7 +2977,9 @@ void wake_up_new_task(struct task_struct *p)
struct rq_flags rf; struct rq_flags rf;
struct rq *rq; struct rq *rq;
add_new_task_to_grp(p);
raw_spin_lock_irqsave(&p->pi_lock, rf.flags); raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
p->state = TASK_RUNNING; p->state = TASK_RUNNING;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
@@ -2963,6 +2997,7 @@ void wake_up_new_task(struct task_struct *p)
update_rq_clock(rq); update_rq_clock(rq);
post_init_entity_util_avg(p); post_init_entity_util_avg(p);
mark_task_starting(p);
activate_task(rq, p, ENQUEUE_NOCLOCK); activate_task(rq, p, ENQUEUE_NOCLOCK);
trace_sched_wakeup_new(p); trace_sched_wakeup_new(p);
check_preempt_curr(rq, p, WF_FORK); check_preempt_curr(rq, p, WF_FORK);
@@ -3497,6 +3532,9 @@ void sched_exec(void)
unsigned long flags; unsigned long flags;
int dest_cpu; int dest_cpu;
if (sched_energy_enabled())
return;
raw_spin_lock_irqsave(&p->pi_lock, flags); raw_spin_lock_irqsave(&p->pi_lock, flags);
dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
if (dest_cpu == smp_processor_id()) if (dest_cpu == smp_processor_id())
@@ -3592,16 +3630,30 @@ void scheduler_tick(void)
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr; struct task_struct *curr = rq->curr;
struct rq_flags rf; struct rq_flags rf;
u64 wallclock;
bool early_notif;
u32 old_load;
struct related_thread_group *grp;
unsigned int flag = 0;
sched_clock_tick(); sched_clock_tick();
rq_lock(rq, &rf); rq_lock(rq, &rf);
old_load = task_load(curr);
set_window_start(rq);
wallclock = sched_ktime_clock();
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_rq_clock(rq); update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0); curr->sched_class->task_tick(rq, curr, 0);
calc_global_load_tick(rq); calc_global_load_tick(rq);
psi_task_tick(rq); psi_task_tick(rq);
early_notif = early_detection_notify(rq, wallclock);
if (early_notif)
flag = SCHED_CPUFREQ_WALT | SCHED_CPUFREQ_EARLY_DET;
cpufreq_update_util(rq, flag);
rq_unlock(rq, &rf); rq_unlock(rq, &rf);
perf_event_task_tick(); perf_event_task_tick();
@@ -3610,6 +3662,15 @@ void scheduler_tick(void)
rq->idle_balance = idle_cpu(cpu); rq->idle_balance = idle_cpu(cpu);
trigger_load_balance(rq); trigger_load_balance(rq);
#endif #endif
rcu_read_lock();
grp = task_related_thread_group(curr);
if (update_preferred_cluster(grp, curr, old_load, true))
set_preferred_cluster(grp);
rcu_read_unlock();
if (curr->sched_class == &fair_sched_class)
check_for_migration(rq, curr);
} }
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
@@ -4005,6 +4066,7 @@ static void __sched notrace __schedule(bool preempt)
struct rq_flags rf; struct rq_flags rf;
struct rq *rq; struct rq *rq;
int cpu; int cpu;
u64 wallclock;
cpu = smp_processor_id(); cpu = smp_processor_id();
rq = cpu_rq(cpu); rq = cpu_rq(cpu);
@@ -4052,7 +4114,15 @@ static void __sched notrace __schedule(bool preempt)
clear_tsk_need_resched(prev); clear_tsk_need_resched(prev);
clear_preempt_need_resched(); clear_preempt_need_resched();
wallclock = sched_ktime_clock();
if (likely(prev != next)) { if (likely(prev != next)) {
#ifdef CONFIG_SCHED_WALT
if (!prev->on_rq)
prev->last_sleep_ts = wallclock;
#endif
walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
rq->nr_switches++; rq->nr_switches++;
/* /*
* RCU users of rcu_dereference(rq->curr) may not see * RCU users of rcu_dereference(rq->curr) may not see
@@ -4080,6 +4150,7 @@ static void __sched notrace __schedule(bool preempt)
/* Also unlocks the rq: */ /* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf); rq = context_switch(rq, prev, next, &rf);
} else { } else {
walt_update_task_ravg(prev, rq, TASK_UPDATE, wallclock, 0);
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
rq_unlock_irq(rq, &rf); rq_unlock_irq(rq, &rf);
} }
@@ -4669,7 +4740,7 @@ struct task_struct *idle_task(int cpu)
* *
* The task of @pid, if found. %NULL otherwise. * The task of @pid, if found. %NULL otherwise.
*/ */
static struct task_struct *find_process_by_pid(pid_t pid) struct task_struct *find_process_by_pid(pid_t pid)
{ {
return pid ? find_task_by_vpid(pid) : current; return pid ? find_task_by_vpid(pid) : current;
} }
@@ -6253,7 +6324,7 @@ void idle_task_exit(void)
* *
* Also see the comment "Global load-average calculations". * Also see the comment "Global load-average calculations".
*/ */
static void calc_load_migrate(struct rq *rq) void calc_load_migrate(struct rq *rq)
{ {
long delta = calc_load_fold_active(rq, 1); long delta = calc_load_fold_active(rq, 1);
if (delta) if (delta)
@@ -6285,7 +6356,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
* there's no concurrency possible, we hold the required locks anyway * there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts. * because of lock validation efforts.
*/ */
static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
{ {
struct rq *rq = dead_rq; struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop; struct task_struct *next, *stop = rq->stop;
@@ -6512,6 +6583,11 @@ int sched_cpu_deactivate(unsigned int cpu)
static void sched_rq_cpu_starting(unsigned int cpu) static void sched_rq_cpu_starting(unsigned int cpu)
{ {
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
set_window_start(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
rq->calc_load_update = calc_load_update; rq->calc_load_update = calc_load_update;
update_max_interval(); update_max_interval();
@@ -6521,6 +6597,7 @@ int sched_cpu_starting(unsigned int cpu)
{ {
sched_rq_cpu_starting(cpu); sched_rq_cpu_starting(cpu);
sched_tick_start(cpu); sched_tick_start(cpu);
clear_walt_request(cpu);
return 0; return 0;
} }
@@ -6535,6 +6612,7 @@ int sched_cpu_dying(unsigned int cpu)
sched_tick_stop(cpu); sched_tick_stop(cpu);
rq_lock_irqsave(rq, &rf); rq_lock_irqsave(rq, &rf);
if (rq->rd) { if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq); set_rq_offline(rq);
@@ -6543,6 +6621,8 @@ int sched_cpu_dying(unsigned int cpu)
BUG_ON(rq->nr_running != 1); BUG_ON(rq->nr_running != 1);
rq_unlock_irqrestore(rq, &rf); rq_unlock_irqrestore(rq, &rf);
clear_walt_request(cpu);
calc_load_migrate(rq); calc_load_migrate(rq);
update_max_interval(); update_max_interval();
nohz_balance_exit_idle(rq); nohz_balance_exit_idle(rq);
@@ -6564,6 +6644,8 @@ void __init sched_init_smp(void)
sched_init_domains(cpu_active_mask); sched_init_domains(cpu_active_mask);
mutex_unlock(&sched_domains_mutex); mutex_unlock(&sched_domains_mutex);
update_cluster_topology();
/* Move init over to a non-isolated CPU */ /* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
BUG(); BUG();
@@ -6618,6 +6700,8 @@ void __init sched_init(void)
wait_bit_init(); wait_bit_init();
init_clusters();
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
ptr += 2 * nr_cpu_ids * sizeof(void **); ptr += 2 * nr_cpu_ids * sizeof(void **);
#endif #endif
@@ -6729,6 +6813,7 @@ void __init sched_init(void)
rq->idle_stamp = 0; rq->idle_stamp = 0;
rq->avg_idle = 2*sysctl_sched_migration_cost; rq->avg_idle = 2*sysctl_sched_migration_cost;
rq->max_idle_balance_cost = sysctl_sched_migration_cost; rq->max_idle_balance_cost = sysctl_sched_migration_cost;
walt_sched_init_rq(rq);
INIT_LIST_HEAD(&rq->cfs_tasks); INIT_LIST_HEAD(&rq->cfs_tasks);
@@ -6743,6 +6828,8 @@ void __init sched_init(void)
atomic_set(&rq->nr_iowait, 0); atomic_set(&rq->nr_iowait, 0);
} }
BUG_ON(alloc_related_thread_groups());
set_load_weight(&init_task, false); set_load_weight(&init_task, false);
/* /*
@@ -6758,6 +6845,7 @@ void __init sched_init(void)
* when this runqueue becomes "idle". * when this runqueue becomes "idle".
*/ */
init_idle(current, smp_processor_id()); init_idle(current, smp_processor_id());
init_new_task_load(current);
calc_load_update = jiffies + LOAD_FREQ; calc_load_update = jiffies + LOAD_FREQ;
@@ -6972,6 +7060,97 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
/* task_group_lock serializes the addition/removal of task groups */ /* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock); static DEFINE_SPINLOCK(task_group_lock);
#if defined(CONFIG_SCHED_WALT) && defined(CONFIG_UCLAMP_TASK_GROUP)
static inline void walt_init_sched_boost(struct task_group *tg)
{
tg->sched_boost_no_override = false;
tg->sched_boost_enabled = true;
tg->colocate = false;
tg->colocate_update_disabled = false;
}
void update_cgroup_boost_settings(void)
{
struct task_group *tg;
rcu_read_lock();
list_for_each_entry_rcu(tg, &task_groups, list) {
if (tg->sched_boost_no_override)
continue;
tg->sched_boost_enabled = false;
}
rcu_read_unlock();
}
void restore_cgroup_boost_settings(void)
{
struct task_group *tg;
rcu_read_lock();
list_for_each_entry_rcu(tg, &task_groups, list)
tg->sched_boost_enabled = true;
rcu_read_unlock();
}
static void walt_schedgp_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
bool colocate;
cgroup_taskset_first(tset, &css);
tg = css_tg(css);
colocate = tg->colocate;
cgroup_taskset_for_each(task, css, tset)
sync_cgroup_colocation(task, colocate);
}
static u64
sched_boost_override_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct task_group *tg = css_tg(css);
return (u64) tg->sched_boost_no_override;
}
static int sched_boost_override_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 override)
{
struct task_group *tg = css_tg(css);
tg->sched_boost_no_override = !!override;
return 0;
}
static u64 sched_colocate_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct task_group *tg = css_tg(css);
return (u64) tg->colocate;
}
static int sched_colocate_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 colocate)
{
struct task_group *tg = css_tg(css);
if (tg->colocate_update_disabled)
return -EPERM;
tg->colocate = !!colocate;
tg->colocate_update_disabled = true;
return 0;
}
#else
static inline void walt_init_sched_boost(struct task_group *tg) { }
static void walt_schedgp_attach(struct cgroup_taskset *tset) { }
#endif /* CONFIG_SCHED_WALT */
static inline void alloc_uclamp_sched_group(struct task_group *tg, static inline void alloc_uclamp_sched_group(struct task_group *tg,
struct task_group *parent) struct task_group *parent)
{ {
@@ -7139,6 +7318,7 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (IS_ERR(tg)) if (IS_ERR(tg))
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
walt_init_sched_boost(tg);
return &tg->css; return &tg->css;
} }
@@ -7225,6 +7405,8 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset)
cgroup_taskset_for_each(task, css, tset) cgroup_taskset_for_each(task, css, tset)
sched_move_task(task); sched_move_task(task);
walt_schedgp_attach(tset);
} }
#ifdef CONFIG_UCLAMP_TASK_GROUP #ifdef CONFIG_UCLAMP_TASK_GROUP
@@ -7784,7 +7966,21 @@ static struct cftype cpu_legacy_files[] = {
.read_u64 = cpu_uclamp_ls_read_u64, .read_u64 = cpu_uclamp_ls_read_u64,
.write_u64 = cpu_uclamp_ls_write_u64, .write_u64 = cpu_uclamp_ls_write_u64,
}, },
#endif #ifdef CONFIG_SCHED_WALT
{
.name = "uclamp.sched_boost_no_override",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = sched_boost_override_read,
.write_u64 = sched_boost_override_write,
},
{
.name = "uclamp.colocate",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = sched_colocate_read,
.write_u64 = sched_colocate_write,
},
#endif /* CONFIG_SCHED_WALT */
#endif /* CONFIG_UCLAMP_TASK_GROUP */
{ } /* Terminate */ { } /* Terminate */
}; };
@@ -7971,7 +8167,21 @@ static struct cftype cpu_files[] = {
.read_u64 = cpu_uclamp_ls_read_u64, .read_u64 = cpu_uclamp_ls_read_u64,
.write_u64 = cpu_uclamp_ls_write_u64, .write_u64 = cpu_uclamp_ls_write_u64,
}, },
#endif #ifdef CONFIG_SCHED_WALT
{
.name = "uclamp.sched_boost_no_override",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = sched_boost_override_read,
.write_u64 = sched_boost_override_write,
},
{
.name = "uclamp.colocate",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = sched_colocate_read,
.write_u64 = sched_colocate_write,
},
#endif /* CONFIG_SCHED_WALT */
#endif /* CONFIG_UCLAMP_TASK_GROUP */
{ } /* terminate */ { } /* terminate */
}; };
@@ -8040,3 +8250,57 @@ const u32 sched_prio_to_wmult[40] = {
}; };
#undef CREATE_TRACE_POINTS #undef CREATE_TRACE_POINTS
__read_mostly bool sched_predl = 1;
void enqueue_task_core(struct rq *rq, struct task_struct *p, int flags)
{
enqueue_task(rq, p, 0);
}
void dequeue_task_core(struct rq *rq, struct task_struct *p, int flags)
{
dequeue_task(rq, p, 0);
}
#ifdef CONFIG_SCHED_WALT
void sched_account_irqtime(int cpu, struct task_struct *curr,
u64 delta, u64 wallclock)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags, nr_windows;
u64 cur_jiffies_ts;
raw_spin_lock_irqsave(&rq->lock, flags);
/*
* cputime (wallclock) uses sched_clock so use the same here for
* consistency.
*/
delta += sched_clock() - wallclock;
cur_jiffies_ts = get_jiffies_64();
if (is_idle_task(curr))
walt_update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
delta);
nr_windows = cur_jiffies_ts - rq->irqload_ts;
if (nr_windows) {
if (nr_windows < 10) {
/* Decay CPU's irqload by 3/4 for each window. */
rq->avg_irqload *= (3 * nr_windows);
rq->avg_irqload = div64_u64(rq->avg_irqload,
4 * nr_windows);
} else {
rq->avg_irqload = 0;
}
rq->avg_irqload += rq->cur_irqload;
rq->cur_irqload = 0;
}
rq->cur_irqload += delta;
rq->irqload_ts = cur_jiffies_ts;
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
#endif

View File

@@ -4,6 +4,7 @@
*/ */
#include <linux/cpufreq_times.h> #include <linux/cpufreq_times.h>
#include "sched.h" #include "sched.h"
#include "walt.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -53,11 +54,18 @@ void irqtime_account_irq(struct task_struct *curr)
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
s64 delta; s64 delta;
int cpu; int cpu;
#ifdef CONFIG_SCHED_WALT
u64 wallclock;
bool account = true;
#endif
if (!sched_clock_irqtime) if (!sched_clock_irqtime)
return; return;
cpu = smp_processor_id(); cpu = smp_processor_id();
#ifdef CONFIG_SCHED_WALT
wallclock = sched_clock_cpu(cpu);
#endif
delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
irqtime->irq_start_time += delta; irqtime->irq_start_time += delta;
@@ -71,6 +79,15 @@ void irqtime_account_irq(struct task_struct *curr)
irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
#ifdef CONFIG_SCHED_WALT
else
account = false;
if (account)
sched_account_irqtime(cpu, curr, delta, wallclock);
else if (curr != this_cpu_ksoftirqd())
sched_account_irqstart(cpu, curr, wallclock);
#endif
} }
EXPORT_SYMBOL_GPL(irqtime_account_irq); EXPORT_SYMBOL_GPL(irqtime_account_irq);

View File

@@ -17,6 +17,7 @@
*/ */
#include "sched.h" #include "sched.h"
#include "pelt.h" #include "pelt.h"
#include "walt.h"
struct dl_bandwidth def_dl_bandwidth; struct dl_bandwidth def_dl_bandwidth;
@@ -1380,6 +1381,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
WARN_ON(!dl_prio(prio)); WARN_ON(!dl_prio(prio));
dl_rq->dl_nr_running++; dl_rq->dl_nr_running++;
add_nr_running(rq_of_dl_rq(dl_rq), 1); add_nr_running(rq_of_dl_rq(dl_rq), 1);
walt_inc_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
inc_dl_deadline(dl_rq, deadline); inc_dl_deadline(dl_rq, deadline);
inc_dl_migration(dl_se, dl_rq); inc_dl_migration(dl_se, dl_rq);
@@ -1394,6 +1396,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
WARN_ON(!dl_rq->dl_nr_running); WARN_ON(!dl_rq->dl_nr_running);
dl_rq->dl_nr_running--; dl_rq->dl_nr_running--;
sub_nr_running(rq_of_dl_rq(dl_rq), 1); sub_nr_running(rq_of_dl_rq(dl_rq), 1);
walt_dec_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
dec_dl_deadline(dl_rq, dl_se->deadline); dec_dl_deadline(dl_rq, dl_se->deadline);
dec_dl_migration(dl_se, dl_rq); dec_dl_migration(dl_se, dl_rq);
@@ -2101,7 +2104,9 @@ retry:
} }
deactivate_task(rq, next_task, 0); deactivate_task(rq, next_task, 0);
next_task->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(next_task, later_rq->cpu); set_task_cpu(next_task, later_rq->cpu);
next_task->on_rq = TASK_ON_RQ_QUEUED;
/* /*
* Update the later_rq clock here, because the clock is used * Update the later_rq clock here, because the clock is used
@@ -2195,7 +2200,9 @@ static void pull_dl_task(struct rq *this_rq)
resched = true; resched = true;
deactivate_task(src_rq, p, 0); deactivate_task(src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, this_cpu); set_task_cpu(p, this_cpu);
p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(this_rq, p, 0); activate_task(this_rq, p, 0);
dmin = p->dl.deadline; dmin = p->dl.deadline;
@@ -2458,6 +2465,9 @@ const struct sched_class dl_sched_class = {
.switched_to = switched_to_dl, .switched_to = switched_to_dl,
.update_curr = update_curr_dl, .update_curr = update_curr_dl,
#ifdef CONFIG_SCHED_WALT
.fixup_walt_sched_stats = fixup_walt_sched_stats_common,
#endif
}; };
int sched_dl_global_validate(void) int sched_dl_global_validate(void)

View File

@@ -646,6 +646,19 @@ do { \
SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
PN(clock); PN(clock);
PN(clock_task); PN(clock_task);
#ifdef CONFIG_SMP
P(cpu_capacity);
#endif
#ifdef CONFIG_SCHED_WALT
P(cluster->max_possible_capacity);
P(cluster->efficiency);
P(cluster->cur_freq);
P(cluster->max_freq);
P(cluster->exec_scale_factor);
P(walt_stats.nr_big_tasks);
SEQ_printf(m, " .%-30s: %llu\n", "walt_stats.cumulative_runnable_avg",
rq->walt_stats.cumulative_runnable_avg_scaled);
#endif
#undef P #undef P
#undef PN #undef PN
@@ -724,6 +737,11 @@ static void sched_debug_header(struct seq_file *m)
PN(sysctl_sched_wakeup_granularity); PN(sysctl_sched_wakeup_granularity);
P(sysctl_sched_child_runs_first); P(sysctl_sched_child_runs_first);
P(sysctl_sched_features); P(sysctl_sched_features);
#ifdef CONFIG_SCHED_WALT
P(sched_init_task_load_windows);
P(sched_ravg_window);
P(sched_load_granule);
#endif
#undef PN #undef PN
#undef P #undef P
@@ -915,6 +933,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P_SCHEDSTAT(se.statistics.nr_wakeups_passive); P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
P_SCHEDSTAT(se.statistics.nr_wakeups_idle); P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
#ifdef CONFIG_SCHED_WALT
P(ravg.demand);
#endif
avg_atom = p->se.sum_exec_runtime; avg_atom = p->se.sum_exec_runtime;
if (nr_switches) if (nr_switches)
avg_atom = div64_ul(avg_atom, nr_switches); avg_atom = div64_ul(avg_atom, nr_switches);

View File

@@ -24,6 +24,12 @@
#include <trace/events/sched.h> #include <trace/events/sched.h>
#include "walt.h"
#ifdef CONFIG_SMP
static inline bool task_fits_max(struct task_struct *p, int cpu);
#endif /* CONFIG_SMP */
/* /*
* Targeted preemption latency for CPU-bound tasks: * Targeted preemption latency for CPU-bound tasks:
* *
@@ -85,6 +91,7 @@ unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
DEFINE_PER_CPU_READ_MOSTLY(int, sched_load_boost);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
@@ -118,6 +125,8 @@ int __weak arch_asym_cpu_priority(int cpu)
unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif #endif
unsigned int sched_small_task_threshold = 102;
static inline void update_load_add(struct load_weight *lw, unsigned long inc) static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{ {
lw->weight += inc; lw->weight += inc;
@@ -3689,11 +3698,6 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
return cfs_rq->avg.load_avg; return cfs_rq->avg.load_avg;
} }
static inline unsigned long task_util(struct task_struct *p)
{
return READ_ONCE(p->se.avg.util_avg);
}
static inline unsigned long _task_util_est(struct task_struct *p) static inline unsigned long _task_util_est(struct task_struct *p)
{ {
struct util_est ue = READ_ONCE(p->se.avg.util_est); struct util_est ue = READ_ONCE(p->se.avg.util_est);
@@ -3703,6 +3707,9 @@ static inline unsigned long _task_util_est(struct task_struct *p)
static inline unsigned long task_util_est(struct task_struct *p) static inline unsigned long task_util_est(struct task_struct *p)
{ {
#ifdef CONFIG_SCHED_WALT
return p->ravg.demand_scaled;
#endif
return max(task_util(p), _task_util_est(p)); return max(task_util(p), _task_util_est(p));
} }
@@ -4514,13 +4521,16 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
qcfs_rq->h_nr_running -= task_delta; qcfs_rq->h_nr_running -= task_delta;
qcfs_rq->idle_h_nr_running -= idle_task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta;
walt_dec_throttled_cfs_rq_stats(&qcfs_rq->walt_stats, cfs_rq);
if (qcfs_rq->load.weight) if (qcfs_rq->load.weight)
dequeue = 0; dequeue = 0;
} }
if (!se) if (!se) {
sub_nr_running(rq, task_delta); sub_nr_running(rq, task_delta);
walt_dec_throttled_cfs_rq_stats(&rq->walt_stats, cfs_rq);
}
cfs_rq->throttled = 1; cfs_rq->throttled = 1;
cfs_rq->throttled_clock = rq_clock(rq); cfs_rq->throttled_clock = rq_clock(rq);
@@ -4554,6 +4564,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
struct sched_entity *se; struct sched_entity *se;
int enqueue = 1; int enqueue = 1;
long task_delta, idle_task_delta; long task_delta, idle_task_delta;
struct cfs_rq *tcfs_rq __maybe_unused = cfs_rq;
se = cfs_rq->tg->se[cpu_of(rq)]; se = cfs_rq->tg->se[cpu_of(rq)];
@@ -4583,6 +4594,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
cfs_rq->h_nr_running += task_delta; cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta; cfs_rq->idle_h_nr_running += idle_task_delta;
walt_inc_throttled_cfs_rq_stats(&cfs_rq->walt_stats, tcfs_rq);
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
break; break;
@@ -4590,8 +4602,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
assert_list_leaf_cfs_rq(rq); assert_list_leaf_cfs_rq(rq);
if (!se) if (!se) {
add_nr_running(rq, task_delta); add_nr_running(rq, task_delta);
walt_inc_throttled_cfs_rq_stats(&rq->walt_stats, tcfs_rq);
}
/* Determine whether we need to wake up potentially idle CPU: */ /* Determine whether we need to wake up potentially idle CPU: */
if (rq->curr == rq->idle && rq->cfs.nr_running) if (rq->curr == rq->idle && rq->cfs.nr_running)
@@ -4982,6 +4996,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{ {
cfs_rq->runtime_enabled = 0; cfs_rq->runtime_enabled = 0;
INIT_LIST_HEAD(&cfs_rq->throttled_list); INIT_LIST_HEAD(&cfs_rq->throttled_list);
walt_init_cfs_rq_stats(cfs_rq);
} }
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
@@ -5161,8 +5176,6 @@ static inline void hrtick_update(struct rq *rq)
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static inline unsigned long cpu_util(int cpu);
static inline bool cpu_overutilized(int cpu) static inline bool cpu_overutilized(int cpu)
{ {
return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); return !fits_capacity(cpu_util(cpu), capacity_of(cpu));
@@ -5223,6 +5236,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
break; break;
cfs_rq->h_nr_running++; cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running; cfs_rq->idle_h_nr_running += idle_h_nr_running;
walt_inc_cfs_rq_stats(cfs_rq, p);
flags = ENQUEUE_WAKEUP; flags = ENQUEUE_WAKEUP;
} }
@@ -5231,6 +5245,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq = cfs_rq_of(se); cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running++; cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running; cfs_rq->idle_h_nr_running += idle_h_nr_running;
walt_inc_cfs_rq_stats(cfs_rq, p);
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
break; break;
@@ -5241,6 +5256,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!se) { if (!se) {
add_nr_running(rq, 1); add_nr_running(rq, 1);
inc_rq_walt_stats(rq, p);
/* /*
* Since new tasks are assigned an initial util_avg equal to * Since new tasks are assigned an initial util_avg equal to
* half of the spare capacity of their CPU, tiny tasks have the * half of the spare capacity of their CPU, tiny tasks have the
@@ -5308,6 +5324,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
break; break;
cfs_rq->h_nr_running--; cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running;
walt_dec_cfs_rq_stats(cfs_rq, p);
/* Don't dequeue parent if it has other entities besides us */ /* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight) { if (cfs_rq->load.weight) {
@@ -5328,6 +5345,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq = cfs_rq_of(se); cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running--; cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running;
walt_dec_cfs_rq_stats(cfs_rq, p);
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
break; break;
@@ -5336,8 +5354,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
update_cfs_group(se); update_cfs_group(se);
} }
if (!se) if (!se) {
sub_nr_running(rq, 1); sub_nr_running(rq, 1);
dec_rq_walt_stats(rq, p);
}
util_est_dequeue(&rq->cfs, p, task_sleep); util_est_dequeue(&rq->cfs, p, task_sleep);
hrtick_update(rq); hrtick_update(rq);
@@ -5375,11 +5395,6 @@ static unsigned long cpu_runnable_load(struct rq *rq)
return cfs_rq_runnable_load_avg(&rq->cfs); return cfs_rq_runnable_load_avg(&rq->cfs);
} }
static unsigned long capacity_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity;
}
static unsigned long cpu_avg_load_per_task(int cpu) static unsigned long cpu_avg_load_per_task(int cpu)
{ {
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
@@ -6047,58 +6062,6 @@ static unsigned int uclamp_task_util(struct task_struct *p)
#endif #endif
} }
/**
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
* @cpu: the CPU to get the utilization of
*
* The unit of the return value must be the one of capacity so we can compare
* the utilization with the capacity of the CPU that is available for CFS task
* (ie cpu_capacity).
*
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
* recent utilization of currently non-runnable tasks on a CPU. It represents
* the amount of utilization of a CPU in the range [0..capacity_orig] where
* capacity_orig is the cpu_capacity available at the highest frequency
* (arch_scale_freq_capacity()).
* The utilization of a CPU converges towards a sum equal to or less than the
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
* the running time on this CPU scaled by capacity_curr.
*
* The estimated utilization of a CPU is defined to be the maximum between its
* cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
* currently RUNNABLE on that CPU.
* This allows to properly represent the expected utilization of a CPU which
* has just got a big task running since a long sleep period. At the same time
* however it preserves the benefits of the "blocked utilization" in
* describing the potential for other tasks waking up on the same CPU.
*
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
* higher than capacity_orig because of unfortunate rounding in
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
* the average stabilizes with the new running time. We need to check that the
* utilization stays within the range of [0..capacity_orig] and cap it if
* necessary. Without utilization capping, a group could be seen as overloaded
* (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
* available capacity. We allow utilization to overshoot capacity_curr (but not
* capacity_orig) as it useful for predicting the capacity required after task
* migrations (scheduler-driven DVFS).
*
* Return: the (estimated) utilization for the specified CPU
*/
static inline unsigned long cpu_util(int cpu)
{
struct cfs_rq *cfs_rq;
unsigned int util;
cfs_rq = &cpu_rq(cpu)->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg);
if (sched_feat(UTIL_EST))
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
return min_t(unsigned long, util, capacity_orig_of(cpu));
}
/* /*
* cpu_util_without: compute cpu utilization without any contributions from *p * cpu_util_without: compute cpu utilization without any contributions from *p
* @cpu: the CPU which utilization is requested * @cpu: the CPU which utilization is requested
@@ -6114,13 +6077,29 @@ static inline unsigned long cpu_util(int cpu)
*/ */
static unsigned long cpu_util_without(int cpu, struct task_struct *p) static unsigned long cpu_util_without(int cpu, struct task_struct *p)
{ {
#ifndef CONFIG_SCHED_WALT
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
#endif
unsigned int util; unsigned int util;
#ifdef CONFIG_SCHED_WALT
/*
* WALT does not decay idle tasks in the same manner
* as PELT, so it makes little sense to subtract task
* utilization from cpu utilization. Instead just use
* cpu_util for this case.
*/
if (likely(p->state == TASK_WAKING))
return cpu_util(cpu);
#endif
/* Task has no contribution or is new */ /* Task has no contribution or is new */
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
return cpu_util(cpu); return cpu_util(cpu);
#ifdef CONFIG_SCHED_WALT
util = max_t(long, cpu_util(cpu) - task_util(p), 0);
#else
cfs_rq = &cpu_rq(cpu)->cfs; cfs_rq = &cpu_rq(cpu)->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg); util = READ_ONCE(cfs_rq->avg.util_avg);
@@ -6179,6 +6158,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
util = max(util, estimated); util = max(util, estimated);
} }
#endif
/* /*
* Utilization (estimated) can exceed the CPU capacity, thus let's * Utilization (estimated) can exceed the CPU capacity, thus let's
@@ -6188,6 +6168,18 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
return min_t(unsigned long, util, capacity_orig_of(cpu)); return min_t(unsigned long, util, capacity_orig_of(cpu));
} }
/*
* Returns the current capacity of cpu after applying both
* cpu and freq scaling.
*/
unsigned long capacity_curr_of(int cpu)
{
unsigned long max_cap = cpu_rq(cpu)->cpu_capacity_orig;
unsigned long scale_freq = arch_scale_freq_capacity(cpu);
return cap_scale(max_cap, scale_freq);
}
/* /*
* Disable WAKE_AFFINE in the case where task @p doesn't fit in the * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
* capacity of either the waking CPU @cpu or the previous CPU @prev_cpu. * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
@@ -6344,7 +6336,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
* other use-cases too. So, until someone finds a better way to solve this, * other use-cases too. So, until someone finds a better way to solve this,
* let's keep things simple by re-using the existing slow path. * let's keep things simple by re-using the existing slow path.
*/ */
static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync) int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
{ {
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
struct root_domain *rd = cpu_rq(smp_processor_id())->rd; struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
@@ -7363,7 +7355,13 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
lockdep_assert_held(&env->src_rq->lock); lockdep_assert_held(&env->src_rq->lock);
deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK); deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
lockdep_off();
double_lock_balance(env->src_rq, env->dst_rq);
if (!(env->src_rq->clock_update_flags & RQCF_UPDATED))
update_rq_clock(env->src_rq);
set_task_cpu(p, env->dst_cpu); set_task_cpu(p, env->dst_cpu);
double_unlock_balance(env->src_rq, env->dst_rq);
lockdep_on();
} }
/* /*
@@ -8891,8 +8889,6 @@ static int need_active_balance(struct lb_env *env)
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
} }
static int active_load_balance_cpu_stop(void *data);
static int should_we_balance(struct lb_env *env) static int should_we_balance(struct lb_env *env)
{ {
struct sched_group *sg = env->sd->groups; struct sched_group *sg = env->sd->groups;
@@ -9244,7 +9240,7 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
* least 1 task to be running on each physical CPU where possible, and * least 1 task to be running on each physical CPU where possible, and
* avoids physical / logical imbalances. * avoids physical / logical imbalances.
*/ */
static int active_load_balance_cpu_stop(void *data) int active_load_balance_cpu_stop(void *data)
{ {
struct rq *busiest_rq = data; struct rq *busiest_rq = data;
int busiest_cpu = cpu_of(busiest_rq); int busiest_cpu = cpu_of(busiest_rq);
@@ -10615,6 +10611,10 @@ const struct sched_class fair_sched_class = {
#ifdef CONFIG_UCLAMP_TASK #ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1, .uclamp_enabled = 1,
#endif #endif
#ifdef CONFIG_SCHED_WALT
.fixup_walt_sched_stats = walt_fixup_sched_stats_fair,
#endif
}; };
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG

View File

@@ -61,7 +61,8 @@ static noinline int __cpuidle cpu_idle_poll(void)
stop_critical_timings(); stop_critical_timings();
while (!tif_need_resched() && while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired())) (cpu_idle_force_poll || tick_check_broadcast_expired() ||
is_reserved(smp_processor_id())))
cpu_relax(); cpu_relax();
start_critical_timings(); start_critical_timings();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
@@ -257,7 +258,8 @@ static void do_idle(void)
* broadcast device expired for us, we don't want to go deep * broadcast device expired for us, we don't want to go deep
* idle as we know that the IPI is going to arrive right away. * idle as we know that the IPI is going to arrive right away.
*/ */
if (cpu_idle_force_poll || tick_check_broadcast_expired()) { if (cpu_idle_force_poll || tick_check_broadcast_expired() ||
is_reserved(smp_processor_id())) {
tick_nohz_idle_restart_tick(); tick_nohz_idle_restart_tick();
cpu_idle_poll(); cpu_idle_poll();
} else { } else {

View File

@@ -6,6 +6,7 @@
#include "sched.h" #include "sched.h"
#include "pelt.h" #include "pelt.h"
#include "walt.h"
int sched_rr_timeslice = RR_TIMESLICE; int sched_rr_timeslice = RR_TIMESLICE;
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
@@ -2388,6 +2389,10 @@ const struct sched_class rt_sched_class = {
#ifdef CONFIG_UCLAMP_TASK #ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1, .uclamp_enabled = 1,
#endif #endif
#ifdef CONFIG_SCHED_WALT
.fixup_walt_sched_stats = fixup_walt_sched_stats_common,
#endif
}; };
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED

View File

@@ -84,6 +84,73 @@
struct rq; struct rq;
struct cpuidle_state; struct cpuidle_state;
extern __read_mostly bool sched_predl;
struct sched_walt_cpu_load {
unsigned long prev_window_util;
unsigned long nl;
unsigned long pl;
bool rtgb_active;
u64 ws;
};
#ifdef CONFIG_SCHED_WALT
#define DECLARE_BITMAP_ARRAY(name, nr, bits) \
unsigned long name[nr][BITS_TO_LONGS(bits)]
extern unsigned int __weak sched_ravg_window;
struct walt_sched_stats {
int nr_big_tasks;
u64 cumulative_runnable_avg_scaled;
u64 pred_demands_sum_scaled;
};
struct cpu_cycle {
u64 cycles;
u64 time;
};
struct group_cpu_time {
u64 curr_runnable_sum;
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
};
struct load_subtractions {
u64 window_start;
u64 subs;
u64 new_subs;
};
#define NUM_TRACKED_WINDOWS 2
#define NUM_LOAD_INDICES 1000
struct sched_cluster {
raw_spinlock_t load_lock;
struct list_head list;
struct cpumask cpus;
int id;
int max_power_cost;
int min_power_cost;
int max_possible_capacity;
int efficiency; /* Differentiate cpus with different IPC capability */
unsigned int exec_scale_factor;
/*
* max_freq = user maximum
* max_mitigated_freq = thermal defined maximum
* max_possible_freq = maximum supported by hardware
*/
unsigned int cur_freq, max_freq, max_mitigated_freq, min_freq;
unsigned int max_possible_freq;
bool freq_init_done;
u64 aggr_grp_load;
};
extern __weak cpumask_t asym_cap_sibling_cpus;
#endif /* CONFIG_SCHED_WALT */
/* task_struct::on_rq states: */ /* task_struct::on_rq states: */
#define TASK_ON_RQ_QUEUED 1 #define TASK_ON_RQ_QUEUED 1
#define TASK_ON_RQ_MIGRATING 2 #define TASK_ON_RQ_MIGRATING 2
@@ -401,7 +468,24 @@ struct task_group {
struct uclamp_se uclamp[UCLAMP_CNT]; struct uclamp_se uclamp[UCLAMP_CNT];
/* Latency-sensitive flag used for a task group */ /* Latency-sensitive flag used for a task group */
unsigned int latency_sensitive; unsigned int latency_sensitive;
#endif #ifdef CONFIG_SCHED_WALT
/* Toggle ability to override sched boost enabled */
bool sched_boost_no_override;
/*
* Controls whether a cgroup is eligible for sched boost or not. This
* can temporariliy be disabled by the kernel based on the no_override
* flag above.
*/
bool sched_boost_enabled;
/*
* Controls whether tasks of this cgroup should be colocated with each
* other and tasks of other cgroups that have the same flag turned on.
*/
bool colocate;
/* Controls whether further updates are allowed to the colocate flag */
bool colocate_update_disabled;
#endif /* CONFIG_SCHED_WALT */
#endif /* CONFIG_UCLAMP_TASK_GROUP */
}; };
@@ -565,6 +649,10 @@ struct cfs_rq {
struct list_head leaf_cfs_rq_list; struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */ struct task_group *tg; /* group that "owns" this runqueue */
#ifdef CONFIG_SCHED_WALT
struct walt_sched_stats walt_stats;
#endif
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled; int runtime_enabled;
s64 runtime_remaining; s64 runtime_remaining;
@@ -961,6 +1049,41 @@ struct rq {
u64 max_idle_balance_cost; u64 max_idle_balance_cost;
#endif #endif
#ifdef CONFIG_SCHED_WALT
struct task_struct *push_task;
struct sched_cluster *cluster;
struct cpumask freq_domain_cpumask;
struct walt_sched_stats walt_stats;
u64 window_start;
u32 prev_window_size;
unsigned long walt_flags;
u64 cur_irqload;
u64 avg_irqload;
u64 irqload_ts;
struct task_struct *ed_task;
struct cpu_cycle cc;
u64 old_busy_time, old_busy_time_group;
u64 old_estimated_time;
u64 curr_runnable_sum;
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
u64 cum_window_demand_scaled;
struct group_cpu_time grp_time;
struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
DECLARE_BITMAP_ARRAY(top_tasks_bitmap,
NUM_TRACKED_WINDOWS, NUM_LOAD_INDICES);
u8 *top_tasks[NUM_TRACKED_WINDOWS];
u8 curr_table;
int prev_top;
int curr_top;
bool notif_pending;
u64 last_cc_update;
u64 cycles;
#endif /* CONFIG_SCHED_WALT */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time; u64 prev_irq_time;
#endif #endif
@@ -1306,8 +1429,6 @@ enum numa_faults_stats {
}; };
extern void sched_setnuma(struct task_struct *p, int node); extern void sched_setnuma(struct task_struct *p, int node);
extern int migrate_task_to(struct task_struct *p, int cpu); extern int migrate_task_to(struct task_struct *p, int cpu);
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
int cpu, int scpu);
extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p); extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
#else #else
static inline void static inline void
@@ -1316,6 +1437,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
} }
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
int cpu, int scpu);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static inline void static inline void
@@ -1782,8 +1906,15 @@ struct sched_class {
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p, int type); void (*task_change_group)(struct task_struct *p, int type);
#endif #endif
#ifdef CONFIG_SCHED_WALT
void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p,
u16 updated_demand_scaled,
u16 updated_pred_demand_scaled);
#endif
}; };
static inline void put_prev_task(struct rq *rq, struct task_struct *prev) static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{ {
WARN_ON_ONCE(rq->curr != prev); WARN_ON_ONCE(rq->curr != prev);
@@ -1960,6 +2091,7 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
{ {
unsigned prev_nr = rq->nr_running; unsigned prev_nr = rq->nr_running;
sched_update_nr_prod(cpu_of(rq), count, true);
rq->nr_running = prev_nr + count; rq->nr_running = prev_nr + count;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
@@ -1974,6 +2106,7 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
static inline void sub_nr_running(struct rq *rq, unsigned count) static inline void sub_nr_running(struct rq *rq, unsigned count)
{ {
sched_update_nr_prod(cpu_of(rq), count, false);
rq->nr_running -= count; rq->nr_running -= count;
/* Check if we still need preemption */ /* Check if we still need preemption */
sched_update_tick_dependency(rq); sched_update_tick_dependency(rq);
@@ -2014,6 +2147,18 @@ static inline int hrtick_enabled(struct rq *rq)
#endif /* CONFIG_SCHED_HRTICK */ #endif /* CONFIG_SCHED_HRTICK */
#ifdef CONFIG_SCHED_WALT
u64 __weak sched_ktime_clock(void);
unsigned long __weak
cpu_util_freq_walt(int cpu, struct sched_walt_cpu_load *walt_load);
#else
#define sched_ravg_window TICK_NSEC
static inline u64 sched_ktime_clock(void)
{
return 0;
}
#endif
#ifndef arch_scale_freq_capacity #ifndef arch_scale_freq_capacity
static __always_inline static __always_inline
unsigned long arch_scale_freq_capacity(int cpu) unsigned long arch_scale_freq_capacity(int cpu)
@@ -2031,8 +2176,127 @@ unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
} }
#endif #endif
unsigned long capacity_curr_of(int cpu);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#ifdef CONFIG_PREEMPTION static inline unsigned long capacity_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity;
}
static inline unsigned long capacity_orig_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity_orig;
}
static inline unsigned long task_util(struct task_struct *p)
{
#ifdef CONFIG_SCHED_WALT
return p->ravg.demand_scaled;
#endif
return READ_ONCE(p->se.avg.util_avg);
}
/**
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
* @cpu: the CPU to get the utilization of
*
* The unit of the return value must be the one of capacity so we can compare
* the utilization with the capacity of the CPU that is available for CFS task
* (ie cpu_capacity).
*
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
* recent utilization of currently non-runnable tasks on a CPU. It represents
* the amount of utilization of a CPU in the range [0..capacity_orig] where
* capacity_orig is the cpu_capacity available at the highest frequency
* (arch_scale_freq_capacity()).
* The utilization of a CPU converges towards a sum equal to or less than the
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
* the running time on this CPU scaled by capacity_curr.
*
* The estimated utilization of a CPU is defined to be the maximum between its
* cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
* currently RUNNABLE on that CPU.
* This allows to properly represent the expected utilization of a CPU which
* has just got a big task running since a long sleep period. At the same time
* however it preserves the benefits of the "blocked utilization" in
* describing the potential for other tasks waking up on the same CPU.
*
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
* higher than capacity_orig because of unfortunate rounding in
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
* the average stabilizes with the new running time. We need to check that the
* utilization stays within the range of [0..capacity_orig] and cap it if
* necessary. Without utilization capping, a group could be seen as overloaded
* (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
* available capacity. We allow utilization to overshoot capacity_curr (but not
* capacity_orig) as it useful for predicting the capacity required after task
* migrations (scheduler-driven DVFS).
*
* Return: the (estimated) utilization for the specified CPU
*/
static inline unsigned long cpu_util(int cpu)
{
struct cfs_rq *cfs_rq;
unsigned int util;
#ifdef CONFIG_SCHED_WALT
u64 walt_cpu_util =
cpu_rq(cpu)->walt_stats.cumulative_runnable_avg_scaled;
return min_t(unsigned long, walt_cpu_util, capacity_orig_of(cpu));
#endif
cfs_rq = &cpu_rq(cpu)->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg);
if (sched_feat(UTIL_EST))
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
return min_t(unsigned long, util, capacity_orig_of(cpu));
}
static inline unsigned long cpu_util_cum(int cpu, int delta)
{
u64 util = cpu_rq(cpu)->cfs.avg.util_avg;
unsigned long capacity = capacity_orig_of(cpu);
#ifdef CONFIG_SCHED_WALT
util = cpu_rq(cpu)->cum_window_demand_scaled;
#endif
delta += util;
if (delta < 0)
return 0;
return (delta >= capacity) ? capacity : delta;
}
static inline unsigned long
cpu_util_freq(int cpu, struct sched_walt_cpu_load *walt_load)
{
#ifdef CONFIG_SCHED_WALT
return cpu_util_freq_walt(cpu, walt_load);
#else
return cpu_util(cpu);
#endif
}
extern unsigned int capacity_margin_freq;
static inline unsigned long
add_capacity_margin(unsigned long cpu_capacity, int cpu)
{
cpu_capacity = cpu_capacity * capacity_margin_freq *
(100 + per_cpu(sched_load_boost, cpu));
cpu_capacity /= 100;
cpu_capacity /= SCHED_CAPACITY_SCALE;
return cpu_capacity;
}
#endif /* CONFIG_SMP */
#ifdef CONFIG_SMP
#ifdef CONFIG_PREEMPT
static inline void double_rq_lock(struct rq *rq1, struct rq *rq2); static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -2345,6 +2609,11 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{ {
struct update_util_data *data; struct update_util_data *data;
#ifdef CONFIG_SCHED_WALT
if (!(flags & SCHED_CPUFREQ_WALT))
return;
#endif
data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
cpu_of(rq))); cpu_of(rq)));
if (data) if (data)
@@ -2432,13 +2701,6 @@ static inline bool uclamp_latency_sensitive(struct task_struct *p)
# define arch_scale_freq_invariant() false # define arch_scale_freq_invariant() false
#endif #endif
#ifdef CONFIG_SMP
static inline unsigned long capacity_orig_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity_orig;
}
#endif
/** /**
* enum schedutil_type - CPU utilization type * enum schedutil_type - CPU utilization type
* @FREQUENCY_UTIL: Utilization used to select frequency * @FREQUENCY_UTIL: Utilization used to select frequency
@@ -2570,3 +2832,499 @@ static inline void membarrier_switch_mm(struct rq *rq,
{ {
} }
#endif #endif
enum sched_boost_policy {
SCHED_BOOST_NONE,
SCHED_BOOST_ON_BIG,
SCHED_BOOST_ON_ALL,
};
#ifdef CONFIG_SCHED_WALT
static inline int cluster_first_cpu(struct sched_cluster *cluster)
{
return cpumask_first(&cluster->cpus);
}
struct related_thread_group {
int id;
raw_spinlock_t lock;
struct list_head tasks;
struct list_head list;
bool skip_min;
struct rcu_head rcu;
u64 last_update;
u64 downmigrate_ts;
u64 start_ts;
};
extern struct sched_cluster *sched_cluster[NR_CPUS];
extern unsigned int __weak sched_disable_window_stats;
extern unsigned int max_possible_freq;
extern unsigned int min_max_freq;
extern unsigned int max_possible_efficiency;
extern unsigned int min_possible_efficiency;
extern unsigned int max_possible_capacity;
extern unsigned int __weak min_max_possible_capacity;
extern unsigned int max_power_cost;
extern unsigned int __read_mostly __weak sched_init_task_load_windows;
extern unsigned int __read_mostly __weak sched_load_granule;
extern int __weak update_preferred_cluster(struct related_thread_group *grp,
struct task_struct *p, u32 old_load, bool from_tick);
extern void __weak set_preferred_cluster(struct related_thread_group *grp);
extern void __weak add_new_task_to_grp(struct task_struct *new);
#define NO_BOOST 0
#define FULL_THROTTLE_BOOST 1
#define CONSERVATIVE_BOOST 2
#define RESTRAINED_BOOST 3
#define FULL_THROTTLE_BOOST_DISABLE -1
#define CONSERVATIVE_BOOST_DISABLE -2
#define RESTRAINED_BOOST_DISABLE -3
#define MAX_NUM_BOOST_TYPE (RESTRAINED_BOOST+1)
static inline int asym_cap_siblings(int cpu1, int cpu2)
{
return (cpumask_test_cpu(cpu1, &asym_cap_sibling_cpus) &&
cpumask_test_cpu(cpu2, &asym_cap_sibling_cpus));
}
static inline int cpu_max_possible_capacity(int cpu)
{
return cpu_rq(cpu)->cluster->max_possible_capacity;
}
static inline unsigned int cluster_max_freq(struct sched_cluster *cluster)
{
/*
* Governor and thermal driver don't know the other party's mitigation
* voting. So struct cluster saves both and return min() for current
* cluster fmax.
*/
return min(cluster->max_mitigated_freq, cluster->max_freq);
}
static inline unsigned int cpu_max_freq(int cpu)
{
return cluster_max_freq(cpu_rq(cpu)->cluster);
}
static inline unsigned int cpu_max_possible_freq(int cpu)
{
return cpu_rq(cpu)->cluster->max_possible_freq;
}
static inline bool hmp_capable(void)
{
return max_possible_capacity != min_max_possible_capacity;
}
static inline bool is_max_capacity_cpu(int cpu)
{
return cpu_max_possible_capacity(cpu) == max_possible_capacity;
}
static inline bool is_min_capacity_cpu(int cpu)
{
return cpu_max_possible_capacity(cpu) == min_max_possible_capacity;
}
static inline unsigned int task_load(struct task_struct *p)
{
return p->ravg.demand;
}
static inline unsigned int task_pl(struct task_struct *p)
{
return p->ravg.pred_demand;
}
static inline bool task_in_related_thread_group(struct task_struct *p)
{
return !!(rcu_access_pointer(p->grp) != NULL);
}
static inline
struct related_thread_group *task_related_thread_group(struct task_struct *p)
{
return rcu_dereference(p->grp);
}
/* Is frequency of two cpus synchronized with each other? */
static inline int same_freq_domain(int src_cpu, int dst_cpu)
{
struct rq *rq = cpu_rq(src_cpu);
if (src_cpu == dst_cpu)
return 1;
if (asym_cap_siblings(src_cpu, dst_cpu))
return 1;
return cpumask_test_cpu(dst_cpu, &rq->freq_domain_cpumask);
}
#define CPU_RESERVED 1
extern enum sched_boost_policy __weak boost_policy;
extern unsigned int __weak sched_task_filter_util;
static inline enum sched_boost_policy sched_boost_policy(void)
{
return boost_policy;
}
extern unsigned int __weak sched_boost_type;
static inline int sched_boost(void)
{
return sched_boost_type;
}
static inline bool rt_boost_on_big(void)
{
return sched_boost() == FULL_THROTTLE_BOOST ?
(sched_boost_policy() == SCHED_BOOST_ON_BIG) : false;
}
static inline bool is_full_throttle_boost(void)
{
return sched_boost() == FULL_THROTTLE_BOOST;
}
extern int __weak preferred_cluster(struct sched_cluster *cluster,
struct task_struct *p);
extern struct sched_cluster *rq_cluster(struct rq *rq);
#ifdef CONFIG_UCLAMP_TASK_GROUP
static inline bool task_sched_boost(struct task_struct *p)
{
struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
struct task_group *tg;
if (!css)
return false;
tg = container_of(css, struct task_group, css);
return tg->sched_boost_enabled;
}
extern int __weak sync_cgroup_colocation(struct task_struct *p, bool insert);
extern void update_cgroup_boost_settings(void);
extern void restore_cgroup_boost_settings(void);
#else
static inline bool
same_schedtg(struct task_struct *tsk1, struct task_struct *tsk2)
{
return true;
}
static inline bool task_sched_boost(struct task_struct *p)
{
return true;
}
static inline void update_cgroup_boost_settings(void) { }
static inline void restore_cgroup_boost_settings(void) { }
#endif
extern int __weak alloc_related_thread_groups(void);
extern void __weak check_for_migration(struct rq *rq, struct task_struct *p);
static inline int is_reserved(int cpu)
{
struct rq *rq = cpu_rq(cpu);
return test_bit(CPU_RESERVED, &rq->walt_flags);
}
static inline int mark_reserved(int cpu)
{
struct rq *rq = cpu_rq(cpu);
return test_and_set_bit(CPU_RESERVED, &rq->walt_flags);
}
static inline void clear_reserved(int cpu)
{
struct rq *rq = cpu_rq(cpu);
clear_bit(CPU_RESERVED, &rq->walt_flags);
}
static inline bool
task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
{
return cpu_of(rq) == task_cpu(p) && (p->on_rq || p->last_sleep_ts >=
rq->window_start);
}
static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 scaled_delta)
{
rq->cum_window_demand_scaled += scaled_delta;
if (unlikely((s64)rq->cum_window_demand_scaled < 0))
rq->cum_window_demand_scaled = 0;
}
extern unsigned long __weak thermal_cap(int cpu);
extern void __weak clear_walt_request(int cpu);
extern enum sched_boost_policy sched_boost_policy(void);
extern void sched_boost_parse_dt(void);
extern void __weak clear_ed_task(struct task_struct *p, struct rq *rq);
extern bool __weak early_detection_notify(struct rq *rq, u64 wallclock);
static inline unsigned int power_cost(int cpu, u64 demand)
{
return cpu_max_possible_capacity(cpu);
}
void __weak note_task_waking(struct task_struct *p, u64 wallclock);
static inline bool task_placement_boost_enabled(struct task_struct *p)
{
if (task_sched_boost(p))
return sched_boost_policy() != SCHED_BOOST_NONE;
return false;
}
static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
{
enum sched_boost_policy policy = task_sched_boost(p) ?
sched_boost_policy() :
SCHED_BOOST_NONE;
if (policy == SCHED_BOOST_ON_BIG) {
/*
* Filter out tasks less than min task util threshold
* under conservative boost.
*/
if (sched_boost() == CONSERVATIVE_BOOST &&
task_util(p) <= sched_task_filter_util)
policy = SCHED_BOOST_NONE;
}
return policy;
}
static inline bool is_min_capacity_cluster(struct sched_cluster *cluster)
{
return is_min_capacity_cpu(cluster_first_cpu(cluster));
}
extern void __weak walt_fixup_sched_stats_fair(struct rq *rq,
struct task_struct *p,
u16 updated_demand_scaled,
u16 updated_pred_demand_scaled);
extern void __weak walt_fixup_nr_big_tasks(struct rq *rq, struct task_struct *p,
int delta, bool inc);
#else /* CONFIG_SCHED_WALT */
struct walt_sched_stats;
struct related_thread_group;
struct sched_cluster;
static inline bool task_sched_boost(struct task_struct *p)
{
return false;
}
static inline bool task_placement_boost_enabled(struct task_struct *p)
{
return false;
}
static inline void check_for_migration(struct rq *rq, struct task_struct *p) { }
static inline int sched_boost(void)
{
return 0;
}
static inline bool rt_boost_on_big(void)
{
return false;
}
static inline bool is_full_throttle_boost(void)
{
return false;
}
static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
{
return SCHED_BOOST_NONE;
}
static inline bool
task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
{
return false;
}
static inline bool hmp_capable(void) { return false; }
static inline bool is_max_capacity_cpu(int cpu) { return true; }
static inline bool is_min_capacity_cpu(int cpu) { return true; }
static inline int
preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
{
return -1;
}
static inline struct sched_cluster *rq_cluster(struct rq *rq)
{
return NULL;
}
static inline int asym_cap_siblings(int cpu1, int cpu2) { return 0; }
static inline void set_preferred_cluster(struct related_thread_group *grp) { }
static inline bool task_in_related_thread_group(struct task_struct *p)
{
return false;
}
static inline
struct related_thread_group *task_related_thread_group(struct task_struct *p)
{
return NULL;
}
static inline u32 task_load(struct task_struct *p) { return 0; }
static inline u32 task_pl(struct task_struct *p) { return 0; }
static inline int update_preferred_cluster(struct related_thread_group *grp,
struct task_struct *p, u32 old_load, bool from_tick)
{
return 0;
}
static inline void add_new_task_to_grp(struct task_struct *new) {}
static inline int same_freq_domain(int src_cpu, int dst_cpu)
{
return 1;
}
static inline int mark_reserved(int cpu)
{
return 0;
}
static inline void clear_reserved(int cpu) { }
static inline int alloc_related_thread_groups(void) { return 0; }
static inline void walt_fixup_cum_window_demand(struct rq *rq,
s64 scaled_delta) { }
#ifdef CONFIG_SMP
static inline unsigned long thermal_cap(int cpu)
{
return cpu_rq(cpu)->cpu_capacity_orig;
}
#endif
static inline void clear_walt_request(int cpu) { }
static inline int is_reserved(int cpu)
{
return 0;
}
static inline enum sched_boost_policy sched_boost_policy(void)
{
return SCHED_BOOST_NONE;
}
static inline void sched_boost_parse_dt(void) { }
static inline void clear_ed_task(struct task_struct *p, struct rq *rq) { }
static inline bool early_detection_notify(struct rq *rq, u64 wallclock)
{
return 0;
}
#ifdef CONFIG_SMP
static inline unsigned int power_cost(int cpu, u64 demand)
{
return SCHED_CAPACITY_SCALE;
}
#endif
static inline void note_task_waking(struct task_struct *p, u64 wallclock) { }
#endif /* CONFIG_SCHED_WALT */
struct sched_avg_stats {
int nr;
int nr_misfit;
int nr_max;
int nr_scaled;
};
extern void sched_get_nr_running_avg(struct sched_avg_stats *stats);
#if defined(CONFIG_SCHED_WALT) && defined(CONFIG_CFS_BANDWIDTH)
extern void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq);
extern void walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p);
extern void walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p);
extern void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
struct cfs_rq *cfs_rq);
extern void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
struct cfs_rq *cfs_rq);
#else
static inline void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq) {}
static inline void
walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
static inline void
walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
#define walt_inc_throttled_cfs_rq_stats(...)
#define walt_dec_throttled_cfs_rq_stats(...)
#endif
#ifdef CONFIG_SMP
#ifdef CONFIG_SCHED_WALT
extern int __weak group_balance_cpu_not_isolated(struct sched_group *sg);
#else
static inline int group_balance_cpu_not_isolated(struct sched_group *sg)
{
return group_balance_cpu(sg);
}
#endif /* CONFIG_SCHED_WALT */
#endif /* CONFIG_SMP */
extern int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu,
int sync);
extern int active_load_balance_cpu_stop(void *data);
#ifdef CONFIG_HOTPLUG_CPU
extern void set_rq_online(struct rq *rq);
extern void set_rq_offline(struct rq *rq);
extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf);
extern void calc_load_migrate(struct rq *rq);
#ifdef CONFIG_SCHED_WALT
extern void __weak
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks);
extern void __weak attach_tasks_core(struct list_head *tasks, struct rq *rq);
#else
static inline void
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks)
{
}
static inline void attach_tasks_core(struct list_head *tasks, struct rq *rq) {}
#endif
#endif
extern struct task_struct *find_process_by_pid(pid_t pid);
extern void enqueue_task_core(struct rq *rq, struct task_struct *p, int flags);
extern void dequeue_task_core(struct rq *rq, struct task_struct *p, int flags);

View File

@@ -8,6 +8,7 @@
* See kernel/stop_machine.c * See kernel/stop_machine.c
*/ */
#include "sched.h" #include "sched.h"
#include "walt.h"
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static int static int
@@ -50,12 +51,14 @@ static void
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{ {
add_nr_running(rq, 1); add_nr_running(rq, 1);
walt_inc_cumulative_runnable_avg(rq, p);
} }
static void static void
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{ {
sub_nr_running(rq, 1); sub_nr_running(rq, 1);
walt_dec_cumulative_runnable_avg(rq, p);
} }
static void yield_task_stop(struct rq *rq) static void yield_task_stop(struct rq *rq)
@@ -144,4 +147,7 @@ const struct sched_class stop_sched_class = {
.prio_changed = prio_changed_stop, .prio_changed = prio_changed_stop,
.switched_to = switched_to_stop, .switched_to = switched_to_stop,
.update_curr = update_curr_stop, .update_curr = update_curr_stop,
#ifdef CONFIG_SCHED_WALT
.fixup_walt_sched_stats = fixup_walt_sched_stats_common,
#endif
}; };

265
kernel/sched/walt.h Normal file
View File

@@ -0,0 +1,265 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
*/
#ifndef __WALT_H
#define __WALT_H
#ifdef CONFIG_SCHED_WALT
#include <linux/sched/sysctl.h>
#include <linux/sched/core_ctl.h>
#define EXITING_TASK_MARKER 0xdeaddead
extern void __weak
walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event,
u64 wallclock, u64 irqtime);
static inline void
fixup_cumulative_runnable_avg(struct walt_sched_stats *stats,
s64 demand_scaled_delta,
s64 pred_demand_scaled_delta)
{
if (sched_disable_window_stats)
return;
stats->cumulative_runnable_avg_scaled += demand_scaled_delta;
BUG_ON((s64)stats->cumulative_runnable_avg_scaled < 0);
stats->pred_demands_sum_scaled += pred_demand_scaled_delta;
BUG_ON((s64)stats->pred_demands_sum_scaled < 0);
}
static inline void
walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
{
if (sched_disable_window_stats)
return;
fixup_cumulative_runnable_avg(&rq->walt_stats, p->ravg.demand_scaled,
p->ravg.pred_demand_scaled);
/*
* Add a task's contribution to the cumulative window demand when
*
* (1) task is enqueued with on_rq = 1 i.e migration,
* prio/cgroup/class change.
* (2) task is waking for the first time in this window.
*/
if (p->on_rq || (p->last_sleep_ts < rq->window_start))
walt_fixup_cum_window_demand(rq, p->ravg.demand_scaled);
}
static inline void
walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
{
if (sched_disable_window_stats)
return;
fixup_cumulative_runnable_avg(&rq->walt_stats,
-(s64)p->ravg.demand_scaled,
-(s64)p->ravg.pred_demand_scaled);
/*
* on_rq will be 1 for sleeping tasks. So check if the task
* is migrating or dequeuing in RUNNING state to change the
* prio/cgroup/class.
*/
if (task_on_rq_migrating(p) || p->state == TASK_RUNNING)
walt_fixup_cum_window_demand(rq, -(s64)p->ravg.demand_scaled);
}
extern void __weak
fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
u16 updated_demand_scaled,
u16 updated_pred_demand_scaled);
extern void __weak inc_rq_walt_stats(struct rq *rq, struct task_struct *p);
extern void __weak dec_rq_walt_stats(struct rq *rq, struct task_struct *p);
extern void __weak fixup_busy_time(struct task_struct *p, int new_cpu);
extern void __weak init_new_task_load(struct task_struct *p);
extern void __weak mark_task_starting(struct task_struct *p);
extern void __weak set_window_start(struct rq *rq);
extern bool __weak do_pl_notif(struct rq *rq);
#define SCHED_HIGH_IRQ_TIMEOUT 3
static inline u64 sched_irqload(int cpu)
{
struct rq *rq = cpu_rq(cpu);
s64 delta;
delta = get_jiffies_64() - rq->irqload_ts;
/*
* Current context can be preempted by irq and rq->irqload_ts can be
* updated by irq context so that delta can be negative.
* But this is okay and we can safely return as this means there
* was recent irq occurrence.
*/
if (delta < SCHED_HIGH_IRQ_TIMEOUT)
return rq->avg_irqload;
else
return 0;
}
static inline int sched_cpu_high_irqload(int cpu)
{
return sched_irqload(cpu) >= sysctl_sched_cpu_high_irqload;
}
static inline int exiting_task(struct task_struct *p)
{
return (p->ravg.sum_history[0] == EXITING_TASK_MARKER);
}
static inline u64
scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
{
return div64_u64(load * (u64)src_freq, (u64)dst_freq);
}
extern void __weak sched_account_irqstart(int cpu, struct task_struct *curr,
u64 wallclock);
static inline unsigned int max_task_load(void)
{
return sched_ravg_window;
}
extern void __weak update_cluster_topology(void);
extern void __weak init_clusters(void);
extern void sched_account_irqtime(int cpu, struct task_struct *curr,
u64 delta, u64 wallclock);
static inline int same_cluster(int src_cpu, int dst_cpu)
{
return cpu_rq(src_cpu)->cluster == cpu_rq(dst_cpu)->cluster;
}
void __weak walt_sched_init_rq(struct rq *rq);
static inline void walt_update_last_enqueue(struct task_struct *p)
{
p->last_enqueued_ts = sched_ktime_clock();
}
static inline bool is_suh_max(void)
{
return sysctl_sched_user_hint == sched_user_hint_max;
}
#define DEFAULT_CGROUP_COLOC_ID 1
static inline bool walt_should_kick_upmigrate(struct task_struct *p, int cpu)
{
struct related_thread_group *rtg = p->grp;
if (is_suh_max() && rtg && rtg->id == DEFAULT_CGROUP_COLOC_ID &&
rtg->skip_min && p->unfilter)
return is_min_capacity_cpu(cpu);
return false;
}
extern bool is_rtgb_active(void);
extern u64 get_rtgb_active_time(void);
/* utility function to update walt signals at wakeup */
static inline void walt_try_to_wake_up(struct task_struct *p)
{
struct rq *rq = cpu_rq(task_cpu(p));
struct rq_flags rf;
u64 wallclock;
unsigned int old_load;
struct related_thread_group *grp = NULL;
rq_lock_irqsave(rq, &rf);
old_load = task_load(p);
wallclock = sched_ktime_clock();
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
note_task_waking(p, wallclock);
rq_unlock_irqrestore(rq, &rf);
rcu_read_lock();
grp = task_related_thread_group(p);
if (update_preferred_cluster(grp, p, old_load, false))
set_preferred_cluster(grp);
rcu_read_unlock();
}
#else /* CONFIG_SCHED_WALT */
static inline void walt_sched_init_rq(struct rq *rq) { }
static inline void walt_update_last_enqueue(struct task_struct *p) { }
static inline void walt_update_task_ravg(struct task_struct *p, struct rq *rq,
int event, u64 wallclock, u64 irqtime) { }
static inline void walt_inc_cumulative_runnable_avg(struct rq *rq,
struct task_struct *p)
{
}
static inline void walt_dec_cumulative_runnable_avg(struct rq *rq,
struct task_struct *p)
{
}
static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
static inline void init_new_task_load(struct task_struct *p)
{
}
static inline void mark_task_starting(struct task_struct *p) { }
static inline void set_window_start(struct rq *rq) { }
static inline int sched_cpu_high_irqload(int cpu) { return 0; }
static inline void sched_account_irqstart(int cpu, struct task_struct *curr,
u64 wallclock)
{
}
static inline void update_cluster_topology(void) { }
static inline void init_clusters(void) {}
static inline void sched_account_irqtime(int cpu, struct task_struct *curr,
u64 delta, u64 wallclock)
{
}
static inline int same_cluster(int src_cpu, int dst_cpu) { return 1; }
static inline bool do_pl_notif(struct rq *rq) { return false; }
static inline void
inc_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
static inline void
dec_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
static inline void
fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
u16 updated_demand_scaled,
u16 updated_pred_demand_scaled)
{
}
static inline u64 sched_irqload(int cpu)
{
return 0;
}
static inline bool walt_should_kick_upmigrate(struct task_struct *p, int cpu)
{
return false;
}
static inline u64 get_rtgb_active_time(void)
{
return 0;
}
#define walt_try_to_wake_up(a) {}
#endif /* CONFIG_SCHED_WALT */
#endif

View File

@@ -64,6 +64,7 @@
#include <linux/binfmts.h> #include <linux/binfmts.h>
#include <linux/sched/sysctl.h> #include <linux/sched/sysctl.h>
#include <linux/sched/coredump.h> #include <linux/sched/coredump.h>
#include <linux/sched/stat.h>
#include <linux/kexec.h> #include <linux/kexec.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/mount.h> #include <linux/mount.h>
@@ -126,6 +127,7 @@ static int sixty = 60;
#endif #endif
static int __maybe_unused neg_one = -1; static int __maybe_unused neg_one = -1;
static int __maybe_unused two = 2; static int __maybe_unused two = 2;
static int __maybe_unused four = 4; static int __maybe_unused four = 4;
static unsigned long zero_ul; static unsigned long zero_ul;
@@ -140,7 +142,12 @@ static int ten_thousand = 10000;
static int six_hundred_forty_kb = 640 * 1024; static int six_hundred_forty_kb = 640 * 1024;
#endif #endif
#ifdef CONFIG_SCHED_WALT #ifdef CONFIG_SCHED_WALT
static int neg_three = -3;
static int three = 3;
static int two_hundred_fifty_five = 255; static int two_hundred_fifty_five = 255;
const int sched_user_hint_max = 1000;
static unsigned int ns_per_sec = NSEC_PER_SEC;
static unsigned int one_hundred_thousand = 100000;
#endif #endif
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
@@ -231,6 +238,10 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
#endif #endif
static int proc_dopipe_max_size(struct ctl_table *table, int write, static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos); void __user *buffer, size_t *lenp, loff_t *ppos);
#ifdef CONFIG_SCHED_WALT
static int proc_douintvec_minmax_schedhyst(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
#ifdef CONFIG_MAGIC_SYSRQ #ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses its own private copy */ /* Note: sysrq code uses its own private copy */
@@ -328,6 +339,172 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
#ifdef CONFIG_SCHED_WALT
{
.procname = "sched_user_hint",
.data = &sysctl_sched_user_hint,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = walt_proc_user_hint_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = (void *)&sched_user_hint_max,
},
{
.procname = "sched_window_stats_policy",
.data = &sysctl_sched_window_stats_policy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &four,
},
{
.procname = "sched_cpu_high_irqload",
.data = &sysctl_sched_cpu_high_irqload,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_group_upmigrate",
.data = &sysctl_sched_group_upmigrate_pct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = walt_proc_group_thresholds_handler,
.extra1 = &sysctl_sched_group_downmigrate_pct,
},
{
.procname = "sched_group_downmigrate",
.data = &sysctl_sched_group_downmigrate_pct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = walt_proc_group_thresholds_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = &sysctl_sched_group_upmigrate_pct,
},
{
.procname = "sched_boost",
.data = &sysctl_sched_boost,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_boost_handler,
.extra1 = &neg_three,
.extra2 = &three,
},
{
.procname = "sched_conservative_pl",
.data = &sysctl_sched_conservative_pl,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{
.procname = "sched_walt_rotate_big_tasks",
.data = &sysctl_sched_walt_rotate_big_tasks,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{
.procname = "sched_min_task_util_for_boost",
.data = &sysctl_sched_min_task_util_for_boost,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &one_thousand,
},
{
.procname = "sched_min_task_util_for_colocation",
.data = &sysctl_sched_min_task_util_for_colocation,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &one_thousand,
},
{
.procname = "sched_asym_cap_sibling_freq_match_pct",
.data = &sysctl_sched_asym_cap_sibling_freq_match_pct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
.extra2 = &one_hundred,
},
{
.procname = "sched_coloc_downmigrate_ns",
.data = &sysctl_sched_coloc_downmigrate_ns,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
},
{
.procname = "sched_task_unfilter_nr_windows",
.data = &sysctl_sched_task_unfilter_nr_windows,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
.extra2 = &two_hundred_fifty_five,
},
{
.procname = "sched_busy_hysteresis_enable_cpus",
.data = &sysctl_sched_busy_hyst_enable_cpus,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax_schedhyst,
.extra1 = SYSCTL_ZERO,
.extra2 = &two_hundred_fifty_five,
},
{
.procname = "sched_busy_hyst_ns",
.data = &sysctl_sched_busy_hyst,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax_schedhyst,
.extra1 = SYSCTL_ZERO,
.extra2 = &ns_per_sec,
},
{
.procname = "sched_coloc_busy_hysteresis_enable_cpus",
.data = &sysctl_sched_coloc_busy_hyst_enable_cpus,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax_schedhyst,
.extra1 = SYSCTL_ZERO,
.extra2 = &two_hundred_fifty_five,
},
{
.procname = "sched_coloc_busy_hyst_ns",
.data = &sysctl_sched_coloc_busy_hyst,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax_schedhyst,
.extra1 = SYSCTL_ZERO,
.extra2 = &ns_per_sec,
},
{
.procname = "sched_coloc_busy_hyst_max_ms",
.data = &sysctl_sched_coloc_busy_hyst_max_ms,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax_schedhyst,
.extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred_thousand,
},
{
.procname = "sched_ravg_window_nr_ticks",
.data = &sysctl_sched_ravg_window_nr_ticks,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_ravg_window_handler,
},
#endif
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
{ {
.procname = "sched_min_granularity_ns", .procname = "sched_min_granularity_ns",
@@ -2874,6 +3051,19 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
} }
#endif #endif
#ifdef CONFIG_SCHED_WALT
static int proc_douintvec_minmax_schedhyst(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
sched_update_hyst_times();
return ret;
}
#endif
static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
void __user *buffer, void __user *buffer,
size_t *lenp, loff_t *ppos, size_t *lenp, loff_t *ppos,
@@ -3341,6 +3531,29 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
return err; return err;
} }
static int do_proc_douintvec_rwin(bool *negp, unsigned long *lvalp,
int *valp, int write, void *data)
{
if (write) {
if (*lvalp == 0 || *lvalp == 2 || *lvalp == 5)
*valp = *lvalp;
else
return -EINVAL;
} else {
*negp = false;
*lvalp = *valp;
}
return 0;
}
int proc_douintvec_ravg_window(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_douintvec_rwin, NULL);
}
#else /* CONFIG_PROC_SYSCTL */ #else /* CONFIG_PROC_SYSCTL */
int proc_dostring(struct ctl_table *table, int write, int proc_dostring(struct ctl_table *table, int write,
@@ -3410,6 +3623,12 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
return -ENOSYS; return -ENOSYS;
} }
int proc_douintvec_ravg_window(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
#endif /* CONFIG_PROC_SYSCTL */ #endif /* CONFIG_PROC_SYSCTL */
#if defined(CONFIG_SYSCTL) #if defined(CONFIG_SYSCTL)