kernel: Add snapshot of changes to support cpu isolation

This snapshot is taken from msm-4.19 as of commit 5debecbe7195
("trace: filter out spurious preemption and IRQs disable traces").

Change-Id: I222aa448ac68f7365065f62dba9db94925da38a0
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
This commit is contained in:
Satya Durga Srinivasu Prabhala
2019-09-17 10:34:18 -07:00
parent 8573d7bfaa
commit 201ea48219
20 changed files with 398 additions and 77 deletions

View File

@@ -1156,6 +1156,11 @@ int lock_device_hotplug_sysfs(void)
return restart_syscall();
}
void lock_device_hotplug_assert(void)
{
lockdep_assert_held(&device_hotplug_lock);
}
#ifdef CONFIG_BLOCK
static inline int device_is_not_partition(struct device *dev)
{

View File

@@ -183,6 +183,32 @@ static struct attribute_group crash_note_cpu_attr_group = {
};
#endif
#ifdef CONFIG_HOTPLUG_CPU
static ssize_t isolate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, dev);
ssize_t rc;
int cpuid = cpu->dev.id;
unsigned int isolated = cpu_isolated(cpuid);
rc = scnprintf(buf, PAGE_SIZE-2, "%d\n", isolated);
return rc;
}
static DEVICE_ATTR_RO(isolate);
static struct attribute *cpu_isolated_attrs[] = {
&dev_attr_isolate.attr,
NULL
};
static struct attribute_group cpu_isolated_attr_group = {
.attrs = cpu_isolated_attrs,
};
#endif
#ifdef CONFIG_SCHED_WALT
static ssize_t sched_load_boost_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -240,6 +266,9 @@ static const struct attribute_group *common_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group,
#endif
#ifdef CONFIG_HOTPLUG_CPU
&cpu_isolated_attr_group,
#endif
#ifdef CONFIG_SCHED_WALT
&sched_cpu_attr_group,
#endif
@@ -250,6 +279,9 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group,
#endif
#ifdef CONFIG_HOTPLUG_CPU
&cpu_isolated_attr_group,
#endif
#ifdef CONFIG_SCHED_WALT
&sched_cpu_attr_group,
#endif
@@ -282,6 +314,7 @@ static struct cpu_attr cpu_attrs[] = {
_CPU_ATTR(online, &__cpu_online_mask),
_CPU_ATTR(possible, &__cpu_possible_mask),
_CPU_ATTR(present, &__cpu_present_mask),
_CPU_ATTR(core_ctl_isolated, &__cpu_isolated_mask),
};
/*
@@ -531,6 +564,7 @@ static struct attribute *cpu_root_attrs[] = {
&cpu_attrs[0].attr.attr,
&cpu_attrs[1].attr.attr,
&cpu_attrs[2].attr.attr,
&cpu_attrs[3].attr.attr,
&dev_attr_kernel_max.attr,
&dev_attr_offline.attr,
&dev_attr_isolated.attr,

View File

@@ -55,6 +55,7 @@ extern unsigned int nr_cpu_ids;
* cpu_present_mask - has bit 'cpu' set iff cpu is populated
* cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler
* cpu_active_mask - has bit 'cpu' set iff cpu available to migration
* cpu_isolated_mask- has bit 'cpu' set iff cpu isolated
*
* If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
*
@@ -91,10 +92,12 @@ extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask;
extern struct cpumask __cpu_isolated_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
#define cpu_isolated_mask ((const struct cpumask *)&__cpu_isolated_mask)
extern atomic_t __num_online_cpus;
@@ -114,19 +117,31 @@ static inline unsigned int num_online_cpus(void)
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
#define num_present_cpus() cpumask_weight(cpu_present_mask)
#define num_active_cpus() cpumask_weight(cpu_active_mask)
#define num_isolated_cpus() cpumask_weight(cpu_isolated_mask)
#define num_online_uniso_cpus() \
({ \
cpumask_t mask; \
\
cpumask_andnot(&mask, cpu_online_mask, cpu_isolated_mask); \
cpumask_weight(&mask); \
})
#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
#define cpu_isolated(cpu) cpumask_test_cpu((cpu), cpu_isolated_mask)
#else
#define num_online_cpus() 1U
#define num_possible_cpus() 1U
#define num_present_cpus() 1U
#define num_active_cpus() 1U
#define num_isolated_cpus() 0U
#define num_online_uniso_cpus() 1U
#define cpu_online(cpu) ((cpu) == 0)
#define cpu_possible(cpu) ((cpu) == 0)
#define cpu_present(cpu) ((cpu) == 0)
#define cpu_active(cpu) ((cpu) == 0)
#define cpu_isolated(cpu) ((cpu) != 0)
#endif
extern cpumask_t cpus_booted_once_mask;
@@ -806,6 +821,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask)
#define for_each_isolated_cpu(cpu) for_each_cpu((cpu), cpu_isolated_mask)
/* Wrappers for arch boot code to manipulate normally-constant masks */
void init_cpu_present(const struct cpumask *src);
@@ -846,6 +862,15 @@ set_cpu_active(unsigned int cpu, bool active)
cpumask_clear_cpu(cpu, &__cpu_active_mask);
}
static inline void
set_cpu_isolated(unsigned int cpu, bool isolated)
{
if (isolated)
cpumask_set_cpu(cpu, &__cpu_isolated_mask);
else
cpumask_clear_cpu(cpu, &__cpu_isolated_mask);
}
/**
* to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *

View File

@@ -1551,6 +1551,7 @@ static inline bool device_supports_offline(struct device *dev)
extern void lock_device_hotplug(void);
extern void unlock_device_hotplug(void);
extern int lock_device_hotplug_sysfs(void);
extern void lock_device_hotplug_assert(void);
extern int device_offline(struct device *dev);
extern int device_online(struct device *dev);
extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);

View File

@@ -13,6 +13,9 @@
#ifdef CONFIG_LOCKUP_DETECTOR
void lockup_detector_init(void);
extern void watchdog_enable(unsigned int cpu);
extern void watchdog_disable(unsigned int cpu);
extern bool watchdog_configured(unsigned int cpu);
void lockup_detector_soft_poweroff(void);
void lockup_detector_cleanup(void);
bool is_hardlockup(void);
@@ -37,6 +40,20 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
static inline void lockup_detector_init(void) { }
static inline void lockup_detector_soft_poweroff(void) { }
static inline void lockup_detector_cleanup(void) { }
static inline void watchdog_enable(unsigned int cpu)
{
}
static inline void watchdog_disable(unsigned int cpu)
{
}
static inline bool watchdog_configured(unsigned int cpu)
{
/*
* Pretend the watchdog is always configured.
* We will be waiting for the watchdog to be enabled in core isolation
*/
return true;
}
#endif /* !CONFIG_LOCKUP_DETECTOR */
#ifdef CONFIG_SOFTLOCKUP_DETECTOR

View File

@@ -239,6 +239,27 @@ enum migrate_types {
RQ_TO_GROUP,
};
#ifdef CONFIG_HOTPLUG_CPU
extern int __weak sched_isolate_cpu(int cpu);
extern int __weak sched_unisolate_cpu(int cpu);
extern int __weak sched_unisolate_cpu_unlocked(int cpu);
#else
static inline int sched_isolate_cpu(int cpu)
{
return 0;
}
static inline int sched_unisolate_cpu(int cpu)
{
return 0;
}
static inline int sched_unisolate_cpu_unlocked(int cpu)
{
return 0;
}
#endif
extern void scheduler_tick(void);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX

View File

@@ -28,7 +28,15 @@ extern void __init housekeeping_init(void);
static inline int housekeeping_any_cpu(enum hk_flags flags)
{
return smp_processor_id();
cpumask_t available;
int cpu;
cpumask_andnot(&available, cpu_online_mask, cpu_isolated_mask);
cpu = cpumask_any(&available);
if (cpu >= nr_cpu_ids)
cpu = smp_processor_id();
return cpu;
}
static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
@@ -52,7 +60,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_flags flags)
if (static_branch_unlikely(&housekeeping_overridden))
return housekeeping_test_cpu(cpu, flags);
#endif
return true;
return !cpu_isolated(cpu);
}
#endif /* _LINUX_SCHED_ISOLATION_H */

View File

@@ -990,6 +990,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
if (!cpu_present(cpu))
return -EINVAL;
if (!tasks_frozen && !cpu_isolated(cpu) && num_online_uniso_cpus() == 1)
return -EBUSY;
cpus_write_lock();
if (trace_cpuhp_latency_enabled())
start_time = sched_clock();
@@ -2377,6 +2380,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
struct cpumask __cpu_isolated_mask __read_mostly;
EXPORT_SYMBOL(__cpu_isolated_mask);
atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus);
@@ -2395,6 +2401,11 @@ void init_cpu_online(const struct cpumask *src)
cpumask_copy(&__cpu_online_mask, src);
}
void init_cpu_isolated(const struct cpumask *src)
{
cpumask_copy(&__cpu_isolated_mask, src);
}
void set_cpu_online(unsigned int cpu, bool online)
{
/*

View File

@@ -12,6 +12,7 @@
#include <linux/interrupt.h>
#include <linux/ratelimit.h>
#include <linux/irq.h>
#include <linux/cpumask.h>
#include "internals.h"
@@ -57,6 +58,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
const struct cpumask *affinity;
bool brokeaff = false;
int err;
struct cpumask available_cpus;
/*
* IRQ chip might be already torn down, but the irq descriptor is
@@ -109,6 +111,10 @@ static bool migrate_one_irq(struct irq_desc *desc)
if (maskchip && chip->irq_mask)
chip->irq_mask(d);
cpumask_copy(&available_cpus, affinity);
cpumask_andnot(&available_cpus, &available_cpus, cpu_isolated_mask);
affinity = &available_cpus;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
/*
* If the interrupt is managed, then shut it down and leave
@@ -119,16 +125,41 @@ static bool migrate_one_irq(struct irq_desc *desc)
irq_shutdown_and_deactivate(desc);
return false;
}
affinity = cpu_online_mask;
/*
* The order of preference for selecting a fallback CPU is
*
* (1) online and un-isolated CPU from default affinity
* (2) online and un-isolated CPU
* (3) online CPU
*/
cpumask_andnot(&available_cpus, cpu_online_mask,
cpu_isolated_mask);
if (cpumask_intersects(&available_cpus, irq_default_affinity))
cpumask_and(&available_cpus, &available_cpus,
irq_default_affinity);
else if (cpumask_empty(&available_cpus))
affinity = cpu_online_mask;
/*
* We are overriding the affinity with all online and
* un-isolated cpus. irq_set_affinity_locked() call
* below notify this mask to PM QOS affinity listener.
* That results in applying the CPU_DMA_LATENCY QOS
* to all the CPUs specified in the mask. But the low
* level irqchip driver sets the affinity of an irq
* to only one CPU. So pick only one CPU from the
* prepared mask while overriding the user affinity.
*/
affinity = cpumask_of(cpumask_any(affinity));
brokeaff = true;
}
/*
* Do not set the force argument of irq_do_set_affinity() as this
* Do not set the force argument of irq_set_affinity_locked() as this
* disables the masking of offline CPUs from the supplied affinity
* mask and therefore might keep/reassign the irq to the outgoing
* CPU.
*/
err = irq_do_set_affinity(d, affinity, false);
err = irq_set_affinity_locked(d, affinity, false);
if (err) {
pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
d->irq, err);

View File

@@ -132,6 +132,11 @@ static ssize_t write_irq_affinity(int type, struct file *file,
if (err)
goto free_cpumask;
if (cpumask_subset(new_value, cpu_isolated_mask)) {
err = -EINVAL;
goto free_cpumask;
}
/*
* Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least

View File

@@ -11,6 +11,7 @@
#include <linux/nospec.h>
#include <linux/kcov.h>
#include <linux/irq.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -1649,6 +1650,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
struct rq_flags rf;
struct rq *rq;
int ret = 0;
cpumask_t allowed_mask;
rq = task_rq_lock(p, &rf);
update_rq_clock(rq);
@@ -1672,10 +1674,17 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
cpumask_andnot(&allowed_mask, new_mask, cpu_isolated_mask);
cpumask_and(&allowed_mask, &allowed_mask, cpu_valid_mask);
dest_cpu = cpumask_any(&allowed_mask);
if (dest_cpu >= nr_cpu_ids) {
ret = -EINVAL;
goto out;
cpumask_and(&allowed_mask, cpu_valid_mask, new_mask);
dest_cpu = cpumask_any(&allowed_mask);
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
ret = -EINVAL;
goto out;
}
}
do_set_cpus_allowed(p, new_mask);
@@ -1691,7 +1700,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
}
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
if (cpumask_test_cpu(task_cpu(p), &allowed_mask))
goto out;
if (task_running(rq, p) || p->state == TASK_WAKING) {
@@ -2043,12 +2052,13 @@ EXPORT_SYMBOL_GPL(kick_process);
* select_task_rq() below may allow selection of !active CPUs in order
* to satisfy the above rules.
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso)
{
int nid = cpu_to_node(cpu);
const struct cpumask *nodemask = NULL;
enum { cpuset, possible, fail } state = cpuset;
enum { cpuset, possible, fail, bug } state = cpuset;
int dest_cpu;
int isolated_candidate = -1;
/*
* If the node that the CPU is on has been offlined, cpu_to_node()
@@ -2062,6 +2072,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, nodemask) {
if (!cpu_active(dest_cpu))
continue;
if (cpu_isolated(dest_cpu))
continue;
if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
return dest_cpu;
}
@@ -2072,7 +2084,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, p->cpus_ptr) {
if (!is_cpu_allowed(p, dest_cpu))
continue;
if (cpu_isolated(dest_cpu)) {
if (allow_iso)
isolated_candidate = dest_cpu;
continue;
}
goto out;
}
if (isolated_candidate != -1) {
dest_cpu = isolated_candidate;
goto out;
}
@@ -2091,6 +2112,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
break;
case fail:
allow_iso = true;
state = bug;
break;
case bug:
BUG();
break;
}
@@ -2118,6 +2144,8 @@ out:
static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
{
bool allow_isolated = (p->flags & PF_KTHREAD);
lockdep_assert_held(&p->pi_lock);
if (p->nr_cpus_allowed > 1)
@@ -2135,8 +2163,9 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
* [ this allows ->select_task() to simply return task_cpu(p) and
* not worry about this generic constraint ]
*/
if (unlikely(!is_cpu_allowed(p, cpu)))
cpu = select_fallback_rq(task_cpu(p), p);
if (unlikely(!is_cpu_allowed(p, cpu)) ||
(cpu_isolated(cpu) && !allow_isolated))
cpu = select_fallback_rq(task_cpu(p), p, allow_isolated);
return cpu;
}
@@ -2327,6 +2356,7 @@ void sched_ttwu_pending(void)
void scheduler_ipi(void)
{
int cpu = smp_processor_id();
/*
* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
* TIF_NEED_RESCHED remotely (for the first time) will also send
@@ -2356,7 +2386,7 @@ void scheduler_ipi(void)
/*
* Check if someone kicked us for doing the nohz idle load balance.
*/
if (unlikely(got_nohz_idle_kick())) {
if (unlikely(got_nohz_idle_kick()) && !cpu_isolated(cpu)) {
this_rq()->idle_balance = 1;
raise_softirq_irqoff(SCHED_SOFTIRQ);
}
@@ -3542,7 +3572,7 @@ void sched_exec(void)
if (dest_cpu == smp_processor_id())
goto unlock;
if (likely(cpu_active(dest_cpu))) {
if (likely(cpu_active(dest_cpu) && likely(!cpu_isolated(dest_cpu)))) {
struct migration_arg arg = { p, dest_cpu };
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -5463,6 +5493,8 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
cpumask_var_t cpus_allowed, new_mask;
struct task_struct *p;
int retval;
int dest_cpu;
cpumask_t allowed_mask;
rcu_read_lock();
@@ -5524,20 +5556,26 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
}
#endif
again:
retval = __set_cpus_allowed_ptr(p, new_mask, true);
if (!retval) {
cpuset_cpus_allowed(p, cpus_allowed);
if (!cpumask_subset(new_mask, cpus_allowed)) {
/*
* We must have raced with a concurrent cpuset
* update. Just reset the cpus_allowed to the
* cpuset's cpus_allowed
*/
cpumask_copy(new_mask, cpus_allowed);
goto again;
cpumask_andnot(&allowed_mask, new_mask, cpu_isolated_mask);
dest_cpu = cpumask_any_and(cpu_active_mask, &allowed_mask);
if (dest_cpu < nr_cpu_ids) {
retval = __set_cpus_allowed_ptr(p, new_mask, true);
if (!retval) {
cpuset_cpus_allowed(p, cpus_allowed);
if (!cpumask_subset(new_mask, cpus_allowed)) {
/*
* We must have raced with a concurrent cpuset
* update. Just reset the cpus_allowed to the
* cpuset's cpus_allowed
*/
cpumask_copy(new_mask, cpus_allowed);
goto again;
}
}
} else {
retval = -EINVAL;
}
out_free_new_mask:
free_cpumask_var(new_mask);
out_free_cpus_allowed:
@@ -5655,6 +5693,14 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
raw_spin_lock_irqsave(&p->pi_lock, flags);
cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
/* The userspace tasks are forbidden to run on
* isolated CPUs. So exclude isolated CPUs from
* the getaffinity.
*/
if (!(p->flags & PF_KTHREAD))
cpumask_andnot(mask, mask, cpu_isolated_mask);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out_unlock:
@@ -6351,19 +6397,25 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
}
/*
* Migrate all tasks from the rq, sleeping tasks will be migrated by
* try_to_wake_up()->select_task_rq().
* Migrate all tasks (not pinned if pinned argument say so) from the rq,
* sleeping tasks will be migrated by try_to_wake_up()->select_task_rq().
*
* Called with rq->lock held even though we'er in stop_machine() and
* there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts.
*/
void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf,
bool migrate_pinned_tasks)
{
struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop;
struct rq_flags orf = *rf;
int dest_cpu;
unsigned int num_pinned_kthreads = 1; /* this thread */
LIST_HEAD(tasks);
cpumask_t avail_cpus;
cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
/*
* Fudge the rq selection such that the below task selection loop
@@ -6386,13 +6438,20 @@ void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
for (;;) {
/*
* There's this thread running, bail when that's the only
* remaining thread:
* remaining thread.
*/
if (rq->nr_running == 1)
break;
next = __pick_migrate_task(rq);
if (!migrate_pinned_tasks && next->flags & PF_KTHREAD &&
!cpumask_intersects(&avail_cpus, &next->cpus_mask)) {
detach_one_task_core(next, rq, &tasks);
num_pinned_kthreads += 1;
continue;
}
/*
* Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->lock, such that holding either
@@ -6405,31 +6464,43 @@ void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
rq_unlock(rq, rf);
raw_spin_lock(&next->pi_lock);
rq_relock(rq, rf);
if (!(rq->clock_update_flags & RQCF_UPDATED))
update_rq_clock(rq);
/*
* Since we're inside stop-machine, _nothing_ should have
* changed the task, WARN if weird stuff happened, because in
* that case the above rq->lock drop is a fail too.
* However, during cpu isolation the load balancer might have
* interferred since we don't stop all CPUs. Ignore warning for
* this case.
*/
if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
if (task_rq(next) != rq || !task_on_rq_queued(next)) {
WARN_ON(migrate_pinned_tasks);
raw_spin_unlock(&next->pi_lock);
continue;
}
/* Find suitable destination for @next, with force if needed. */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
dest_cpu = select_fallback_rq(dead_rq->cpu, next, false);
rq = __migrate_task(rq, rf, next, dest_cpu);
if (rq != dead_rq) {
rq_unlock(rq, rf);
rq = dead_rq;
*rf = orf;
rq_relock(rq, rf);
if (!(rq->clock_update_flags & RQCF_UPDATED))
update_rq_clock(rq);
}
raw_spin_unlock(&next->pi_lock);
}
rq->stop = stop;
if (num_pinned_kthreads > 1)
attach_tasks_core(&tasks, rq);
}
#endif /* CONFIG_HOTPLUG_CPU */
void set_rq_online(struct rq *rq)
@@ -6619,7 +6690,7 @@ int sched_cpu_dying(unsigned int cpu)
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
migrate_tasks(rq, &rf);
migrate_tasks(rq, &rf, true);
BUG_ON(rq->nr_running != 1);
rq_unlock_irqrestore(rq, &rf);

View File

@@ -5983,6 +5983,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
return si_cpu;
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
if (cpu_isolated(cpu))
continue;
if (available_idle_cpu(cpu))
break;
if (si_cpu == -1 && sched_idle_cpu(cpu))
@@ -6005,14 +6007,16 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
struct sched_domain *sd;
int i, recent_used_cpu;
if (available_idle_cpu(target) || sched_idle_cpu(target))
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
!cpu_isolated(target))
return target;
/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
if (prev != target && cpus_share_cache(prev, target) &&
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
((available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
!cpu_isolated(prev)))
return prev;
/* Check a recently used CPU as a potential idle candidate: */
@@ -7892,6 +7896,8 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
struct sched_group_capacity *sgc;
struct rq *rq = cpu_rq(cpu);
if (cpumask_test_cpu(cpu, cpu_isolated_mask))
continue;
/*
* build_sched_domains() -> init_sched_groups_capacity()
* gets here before we've attached the domains to the
@@ -7922,10 +7928,15 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
group = child->groups;
do {
struct sched_group_capacity *sgc = group->sgc;
cpumask_t *cpus = sched_group_span(group);
capacity += sgc->capacity;
min_capacity = min(sgc->min_capacity, min_capacity);
max_capacity = max(sgc->max_capacity, max_capacity);
if (!cpu_isolated(cpumask_first(cpus))) {
capacity += sgc->capacity;
min_capacity = min(sgc->min_capacity,
min_capacity);
max_capacity = max(sgc->max_capacity,
max_capacity);
}
group = group->next;
} while (group != child->groups);
}
@@ -8129,6 +8140,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i);
if (cpu_isolated(i))
continue;
if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
env->flags |= LBF_NOHZ_AGAIN;
@@ -8160,17 +8174,27 @@ static inline void update_sg_lb_stats(struct lb_env *env,
}
}
/* Adjust by relative CPU capacity of the group */
sgs->group_capacity = group->sgc->capacity;
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
/* Isolated CPU has no weight */
if (!group->group_weight) {
sgs->group_capacity = 0;
sgs->avg_load = 0;
sgs->group_no_capacity = 1;
sgs->group_type = group_other;
sgs->group_weight = group->group_weight;
} else {
/* Adjust by relative CPU capacity of the group */
sgs->group_capacity = group->sgc->capacity;
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) /
sgs->group_capacity;
sgs->group_weight = group->group_weight;
sgs->group_no_capacity = group_is_overloaded(env, sgs);
sgs->group_type = group_classify(group, sgs);
}
if (sgs->sum_nr_running)
sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
sgs->group_weight = group->group_weight;
sgs->group_no_capacity = group_is_overloaded(env, sgs);
sgs->group_type = group_classify(group, sgs);
}
/**
@@ -8910,7 +8934,7 @@ static int should_we_balance(struct lb_env *env)
/* Try to find first idle CPU */
for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
if (!idle_cpu(cpu))
if (!idle_cpu(cpu) || cpu_isolated(cpu))
continue;
balance_cpu = cpu;
@@ -8918,7 +8942,7 @@ static int should_we_balance(struct lb_env *env)
}
if (balance_cpu == -1)
balance_cpu = group_balance_cpu(sg);
balance_cpu = group_balance_cpu_not_isolated(sg);
/*
* First idle CPU or the first CPU(busiest) in this sched group
@@ -9127,7 +9151,8 @@ more_balance:
* ->active_balance_work. Once set, it's cleared
* only after active load balance is finished.
*/
if (!busiest->active_balance) {
if (!busiest->active_balance &&
!cpu_isolated(cpu_of(busiest))) {
busiest->active_balance = 1;
busiest->push_cpu = this_cpu;
active_balance = 1;
@@ -9333,7 +9358,13 @@ static DEFINE_SPINLOCK(balancing);
*/
void update_max_interval(void)
{
max_load_balance_interval = HZ*num_online_cpus()/10;
cpumask_t avail_mask;
unsigned int available_cpus;
cpumask_andnot(&avail_mask, cpu_online_mask, cpu_isolated_mask);
available_cpus = cpumask_weight(&avail_mask);
max_load_balance_interval = HZ*available_cpus/10;
}
/*
@@ -9510,6 +9541,7 @@ static void nohz_balancer_kick(struct rq *rq)
struct sched_domain *sd;
int nr_busy, i, cpu = rq->cpu;
unsigned int flags = 0;
cpumask_t cpumask;
if (unlikely(rq->idle_balance))
return;
@@ -9524,7 +9556,8 @@ static void nohz_balancer_kick(struct rq *rq)
* None are in tickless mode and hence no need for NOHZ idle load
* balancing.
*/
if (likely(!atomic_read(&nohz.nr_cpus)))
cpumask_andnot(&cpumask, nohz.idle_cpus_mask, cpu_isolated_mask);
if (cpumask_empty(&cpumask))
return;
if (READ_ONCE(nohz.has_blocked) &&
@@ -9561,7 +9594,7 @@ static void nohz_balancer_kick(struct rq *rq)
* currently idle; in which case, kick the ILB to move tasks
* around.
*/
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
for_each_cpu_and(i, sched_domain_span(sd), &cpumask) {
if (sched_asym_prefer(i, cpu)) {
flags = NOHZ_KICK_MASK;
goto unlock;
@@ -9739,6 +9772,7 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
int balance_cpu;
int ret = false;
struct rq *rq;
cpumask_t cpus;
SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
@@ -9758,7 +9792,9 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
*/
smp_mb();
for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
cpumask_andnot(&cpus, nohz.idle_cpus_mask, cpu_isolated_mask);
for_each_cpu(balance_cpu, &cpus) {
if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
continue;
@@ -9910,6 +9946,9 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
int pulled_task = 0;
u64 curr_cost = 0;
if (cpu_isolated(this_cpu))
return 0;
update_misfit_status(NULL, this_rq);
/*
* We must set idle_stamp _before_ calling idle_balance(), such that we
@@ -10026,6 +10065,14 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
enum cpu_idle_type idle = this_rq->idle_balance ?
CPU_IDLE : CPU_NOT_IDLE;
/*
* Since core isolation doesn't update nohz.idle_cpus_mask, there
* is a possibility this nohz kicked cpu could be isolated. Hence
* return if the cpu is isolated.
*/
if (cpu_isolated(this_rq->cpu))
return;
/*
* If this CPU has a pending nohz_balance_kick, then do the
* balancing on behalf of the other idle CPUs whose ticks are
@@ -10047,8 +10094,10 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
*/
void trigger_load_balance(struct rq *rq)
{
/* Don't need to rebalance while attached to NULL domain */
if (unlikely(on_null_domain(rq)))
/* Don't need to rebalance while attached to NULL domain or
* cpu is isolated.
*/
if (unlikely(on_null_domain(rq)) || cpu_isolated(cpu_of(rq)))
return;
if (time_after_eq(jiffies, rq->next_balance))

View File

@@ -263,8 +263,12 @@ static void pull_rt_task(struct rq *this_rq);
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
/* Try to pull RT tasks here if we lower this rq's prio */
return rq->rt.highest_prio.curr > prev->prio;
/*
* Try to pull RT tasks here if we lower this rq's prio and cpu is not
* isolated
*/
return rq->rt.highest_prio.curr > prev->prio &&
!cpu_isolated(cpu_of(rq));
}
static inline int rt_overloaded(struct rq *rq)
@@ -2192,7 +2196,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
* we may need to handle the pulling of RT tasks
* now.
*/
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running ||
cpu_isolated(cpu_of(rq)))
return;
rt_queue_pull_task(rq);

View File

@@ -163,6 +163,10 @@ extern atomic_long_t calc_load_tasks;
extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
#ifdef CONFIG_SMP
extern void init_sched_groups_capacity(int cpu, struct sched_domain *sd);
#endif
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
@@ -3307,7 +3311,8 @@ extern int active_load_balance_cpu_stop(void *data);
#ifdef CONFIG_HOTPLUG_CPU
extern void set_rq_online(struct rq *rq);
extern void set_rq_offline(struct rq *rq);
extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf);
extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf,
bool migrate_pinned_tasks);
extern void calc_load_migrate(struct rq *rq);
#ifdef CONFIG_SCHED_WALT
extern void __weak

View File

@@ -1163,16 +1163,19 @@ build_sched_groups(struct sched_domain *sd, int cpu)
* group having more cpu_capacity will pickup more load compared to the
* group having less cpu_capacity.
*/
static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
{
struct sched_group *sg = sd->groups;
cpumask_t avail_mask;
WARN_ON(!sg);
do {
int cpu, max_cpu = -1;
sg->group_weight = cpumask_weight(sched_group_span(sg));
cpumask_andnot(&avail_mask, sched_group_span(sg),
cpu_isolated_mask);
sg->group_weight = cpumask_weight(&avail_mask);
if (!(sd->flags & SD_ASYM_PACKING))
goto next;

View File

@@ -792,7 +792,8 @@ void wake_up_all_idle_cpus(void)
if (cpu == smp_processor_id())
continue;
wake_up_if_idle(cpu);
if (!cpu_isolated(cpu))
wake_up_if_idle(cpu);
}
preempt_enable();
}

View File

@@ -2014,7 +2014,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
return 0;
}
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_CPUSETS)
#ifdef CONFIG_HOTPLUG_CPU
static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
struct hrtimer_clock_base *new_base,
bool remove_pinned)
@@ -2023,12 +2023,14 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
struct timerqueue_node *node;
struct timerqueue_head pinned;
int is_pinned;
bool is_hotplug = !cpu_online(old_base->cpu_base->cpu);
timerqueue_init_head(&pinned);
while ((node = timerqueue_getnext(&old_base->active))) {
timer = container_of(node, struct hrtimer, node);
BUG_ON(hrtimer_callback_running(timer));
if (is_hotplug)
BUG_ON(hrtimer_callback_running(timer));
debug_deactivate(timer);
/*
@@ -2106,9 +2108,7 @@ static void __migrate_hrtimers(unsigned int scpu, bool remove_pinned)
local_irq_restore(flags);
local_bh_enable();
}
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_CPUSETS */
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_dead_cpu(unsigned int scpu)
{
BUG_ON(cpu_online(scpu));
@@ -2117,14 +2117,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
__migrate_hrtimers(scpu, true);
return 0;
}
#endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_CPUSETS
void hrtimer_quiesce_cpu(void *cpup)
{
__migrate_hrtimers(*(int *)cpup, false);
}
#endif /* CONFIG_CPUSETS */
#endif /* CONFIG_HOTPLUG_CPU */
void __init hrtimers_init(void)
{

View File

@@ -2038,7 +2038,8 @@ static void __migrate_timers(unsigned int cpu, bool remove_pinned)
*/
forward_timer_base(new_base);
BUG_ON(old_base->running_timer);
if (!cpu_online(cpu))
BUG_ON(old_base->running_timer);
for (i = 0; i < WHEEL_SIZE; i++)
migrate_timer_list(new_base, old_base->vectors + i,
@@ -2057,12 +2058,10 @@ int timers_dead_cpu(unsigned int cpu)
return 0;
}
#ifdef CONFIG_CPUSETS
void timer_quiesce_cpu(void *cpup)
{
__migrate_timers(*(unsigned int *)cpup, false);
}
#endif /* CONFIG_CPUSETS */
#endif /* CONFIG_HOTPLUG_CPU */

View File

@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -170,6 +171,7 @@ static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(unsigned int, watchdog_en);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
@@ -476,16 +478,20 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
static void watchdog_enable(unsigned int cpu)
void watchdog_enable(unsigned int cpu)
{
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
struct completion *done = this_cpu_ptr(&softlockup_completion);
unsigned int *enabled = this_cpu_ptr(&watchdog_en);
WARN_ON_ONCE(cpu != smp_processor_id());
init_completion(done);
complete(done);
if (*enabled)
return;
/*
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
@@ -500,11 +506,24 @@ static void watchdog_enable(unsigned int cpu)
/* Enable the perf event */
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
watchdog_nmi_enable(cpu);
/*
* Need to ensure above operations are observed by other CPUs before
* indicating that timer is enabled. This is to synchronize core
* isolation and hotplug. Core isolation will wait for this flag to be
* set.
*/
mb();
*enabled = 1;
}
static void watchdog_disable(unsigned int cpu)
void watchdog_disable(unsigned int cpu)
{
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
unsigned int *enabled = this_cpu_ptr(&watchdog_en);
if (!*enabled)
return;
WARN_ON_ONCE(cpu != smp_processor_id());
@@ -516,6 +535,17 @@ static void watchdog_disable(unsigned int cpu)
watchdog_nmi_disable(cpu);
hrtimer_cancel(hrtimer);
wait_for_completion(this_cpu_ptr(&softlockup_completion));
/*
* No need for barrier here since disabling the watchdog is
* synchronized with hotplug lock
*/
*enabled = 0;
}
bool watchdog_configured(unsigned int cpu)
{
return *per_cpu_ptr(&watchdog_en, cpu);
}
static int softlockup_stop_fn(void *data)

View File

@@ -1807,7 +1807,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
static void vmstat_update(struct work_struct *w)
{
if (refresh_cpu_vm_stats(true)) {
if (refresh_cpu_vm_stats(true) && !cpu_isolated(smp_processor_id())) {
/*
* Counters were updated so we expect more updates
* to occur in the future. Keep on running the
@@ -1899,7 +1899,8 @@ static void vmstat_shepherd(struct work_struct *w)
for_each_online_cpu(cpu) {
struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
if (!delayed_work_pending(dw) && need_update(cpu))
if (!delayed_work_pending(dw) && need_update(cpu) &&
!cpu_isolated(cpu))
queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
}
put_online_cpus();