kernel: Add snapshot of changes to support cpu isolation

This snapshot is taken from msm-4.19 as of commit 5debecbe7195
("trace: filter out spurious preemption and IRQs disable traces").

Change-Id: I222aa448ac68f7365065f62dba9db94925da38a0
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
This commit is contained in:
Satya Durga Srinivasu Prabhala
2019-09-17 10:34:18 -07:00
parent 8573d7bfaa
commit 201ea48219
20 changed files with 398 additions and 77 deletions

View File

@@ -1156,6 +1156,11 @@ int lock_device_hotplug_sysfs(void)
return restart_syscall(); return restart_syscall();
} }
void lock_device_hotplug_assert(void)
{
lockdep_assert_held(&device_hotplug_lock);
}
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
static inline int device_is_not_partition(struct device *dev) static inline int device_is_not_partition(struct device *dev)
{ {

View File

@@ -183,6 +183,32 @@ static struct attribute_group crash_note_cpu_attr_group = {
}; };
#endif #endif
#ifdef CONFIG_HOTPLUG_CPU
static ssize_t isolate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, dev);
ssize_t rc;
int cpuid = cpu->dev.id;
unsigned int isolated = cpu_isolated(cpuid);
rc = scnprintf(buf, PAGE_SIZE-2, "%d\n", isolated);
return rc;
}
static DEVICE_ATTR_RO(isolate);
static struct attribute *cpu_isolated_attrs[] = {
&dev_attr_isolate.attr,
NULL
};
static struct attribute_group cpu_isolated_attr_group = {
.attrs = cpu_isolated_attrs,
};
#endif
#ifdef CONFIG_SCHED_WALT #ifdef CONFIG_SCHED_WALT
static ssize_t sched_load_boost_show(struct device *dev, static ssize_t sched_load_boost_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
@@ -240,6 +266,9 @@ static const struct attribute_group *common_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group, &crash_note_cpu_attr_group,
#endif #endif
#ifdef CONFIG_HOTPLUG_CPU
&cpu_isolated_attr_group,
#endif
#ifdef CONFIG_SCHED_WALT #ifdef CONFIG_SCHED_WALT
&sched_cpu_attr_group, &sched_cpu_attr_group,
#endif #endif
@@ -250,6 +279,9 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
&crash_note_cpu_attr_group, &crash_note_cpu_attr_group,
#endif #endif
#ifdef CONFIG_HOTPLUG_CPU
&cpu_isolated_attr_group,
#endif
#ifdef CONFIG_SCHED_WALT #ifdef CONFIG_SCHED_WALT
&sched_cpu_attr_group, &sched_cpu_attr_group,
#endif #endif
@@ -282,6 +314,7 @@ static struct cpu_attr cpu_attrs[] = {
_CPU_ATTR(online, &__cpu_online_mask), _CPU_ATTR(online, &__cpu_online_mask),
_CPU_ATTR(possible, &__cpu_possible_mask), _CPU_ATTR(possible, &__cpu_possible_mask),
_CPU_ATTR(present, &__cpu_present_mask), _CPU_ATTR(present, &__cpu_present_mask),
_CPU_ATTR(core_ctl_isolated, &__cpu_isolated_mask),
}; };
/* /*
@@ -531,6 +564,7 @@ static struct attribute *cpu_root_attrs[] = {
&cpu_attrs[0].attr.attr, &cpu_attrs[0].attr.attr,
&cpu_attrs[1].attr.attr, &cpu_attrs[1].attr.attr,
&cpu_attrs[2].attr.attr, &cpu_attrs[2].attr.attr,
&cpu_attrs[3].attr.attr,
&dev_attr_kernel_max.attr, &dev_attr_kernel_max.attr,
&dev_attr_offline.attr, &dev_attr_offline.attr,
&dev_attr_isolated.attr, &dev_attr_isolated.attr,

View File

@@ -55,6 +55,7 @@ extern unsigned int nr_cpu_ids;
* cpu_present_mask - has bit 'cpu' set iff cpu is populated * cpu_present_mask - has bit 'cpu' set iff cpu is populated
* cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler
* cpu_active_mask - has bit 'cpu' set iff cpu available to migration * cpu_active_mask - has bit 'cpu' set iff cpu available to migration
* cpu_isolated_mask- has bit 'cpu' set iff cpu isolated
* *
* If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
* *
@@ -91,10 +92,12 @@ extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask; extern struct cpumask __cpu_active_mask;
extern struct cpumask __cpu_isolated_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
#define cpu_isolated_mask ((const struct cpumask *)&__cpu_isolated_mask)
extern atomic_t __num_online_cpus; extern atomic_t __num_online_cpus;
@@ -114,19 +117,31 @@ static inline unsigned int num_online_cpus(void)
#define num_possible_cpus() cpumask_weight(cpu_possible_mask) #define num_possible_cpus() cpumask_weight(cpu_possible_mask)
#define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask)
#define num_active_cpus() cpumask_weight(cpu_active_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask)
#define num_isolated_cpus() cpumask_weight(cpu_isolated_mask)
#define num_online_uniso_cpus() \
({ \
cpumask_t mask; \
\
cpumask_andnot(&mask, cpu_online_mask, cpu_isolated_mask); \
cpumask_weight(&mask); \
})
#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) #define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) #define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) #define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask) #define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
#define cpu_isolated(cpu) cpumask_test_cpu((cpu), cpu_isolated_mask)
#else #else
#define num_online_cpus() 1U #define num_online_cpus() 1U
#define num_possible_cpus() 1U #define num_possible_cpus() 1U
#define num_present_cpus() 1U #define num_present_cpus() 1U
#define num_active_cpus() 1U #define num_active_cpus() 1U
#define num_isolated_cpus() 0U
#define num_online_uniso_cpus() 1U
#define cpu_online(cpu) ((cpu) == 0) #define cpu_online(cpu) ((cpu) == 0)
#define cpu_possible(cpu) ((cpu) == 0) #define cpu_possible(cpu) ((cpu) == 0)
#define cpu_present(cpu) ((cpu) == 0) #define cpu_present(cpu) ((cpu) == 0)
#define cpu_active(cpu) ((cpu) == 0) #define cpu_active(cpu) ((cpu) == 0)
#define cpu_isolated(cpu) ((cpu) != 0)
#endif #endif
extern cpumask_t cpus_booted_once_mask; extern cpumask_t cpus_booted_once_mask;
@@ -806,6 +821,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask)
#define for_each_isolated_cpu(cpu) for_each_cpu((cpu), cpu_isolated_mask)
/* Wrappers for arch boot code to manipulate normally-constant masks */ /* Wrappers for arch boot code to manipulate normally-constant masks */
void init_cpu_present(const struct cpumask *src); void init_cpu_present(const struct cpumask *src);
@@ -846,6 +862,15 @@ set_cpu_active(unsigned int cpu, bool active)
cpumask_clear_cpu(cpu, &__cpu_active_mask); cpumask_clear_cpu(cpu, &__cpu_active_mask);
} }
static inline void
set_cpu_isolated(unsigned int cpu, bool isolated)
{
if (isolated)
cpumask_set_cpu(cpu, &__cpu_isolated_mask);
else
cpumask_clear_cpu(cpu, &__cpu_isolated_mask);
}
/** /**
* to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *

View File

@@ -1551,6 +1551,7 @@ static inline bool device_supports_offline(struct device *dev)
extern void lock_device_hotplug(void); extern void lock_device_hotplug(void);
extern void unlock_device_hotplug(void); extern void unlock_device_hotplug(void);
extern int lock_device_hotplug_sysfs(void); extern int lock_device_hotplug_sysfs(void);
extern void lock_device_hotplug_assert(void);
extern int device_offline(struct device *dev); extern int device_offline(struct device *dev);
extern int device_online(struct device *dev); extern int device_online(struct device *dev);
extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);

View File

@@ -13,6 +13,9 @@
#ifdef CONFIG_LOCKUP_DETECTOR #ifdef CONFIG_LOCKUP_DETECTOR
void lockup_detector_init(void); void lockup_detector_init(void);
extern void watchdog_enable(unsigned int cpu);
extern void watchdog_disable(unsigned int cpu);
extern bool watchdog_configured(unsigned int cpu);
void lockup_detector_soft_poweroff(void); void lockup_detector_soft_poweroff(void);
void lockup_detector_cleanup(void); void lockup_detector_cleanup(void);
bool is_hardlockup(void); bool is_hardlockup(void);
@@ -37,6 +40,20 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
static inline void lockup_detector_init(void) { } static inline void lockup_detector_init(void) { }
static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_soft_poweroff(void) { }
static inline void lockup_detector_cleanup(void) { } static inline void lockup_detector_cleanup(void) { }
static inline void watchdog_enable(unsigned int cpu)
{
}
static inline void watchdog_disable(unsigned int cpu)
{
}
static inline bool watchdog_configured(unsigned int cpu)
{
/*
* Pretend the watchdog is always configured.
* We will be waiting for the watchdog to be enabled in core isolation
*/
return true;
}
#endif /* !CONFIG_LOCKUP_DETECTOR */ #endif /* !CONFIG_LOCKUP_DETECTOR */
#ifdef CONFIG_SOFTLOCKUP_DETECTOR #ifdef CONFIG_SOFTLOCKUP_DETECTOR

View File

@@ -239,6 +239,27 @@ enum migrate_types {
RQ_TO_GROUP, RQ_TO_GROUP,
}; };
#ifdef CONFIG_HOTPLUG_CPU
extern int __weak sched_isolate_cpu(int cpu);
extern int __weak sched_unisolate_cpu(int cpu);
extern int __weak sched_unisolate_cpu_unlocked(int cpu);
#else
static inline int sched_isolate_cpu(int cpu)
{
return 0;
}
static inline int sched_unisolate_cpu(int cpu)
{
return 0;
}
static inline int sched_unisolate_cpu_unlocked(int cpu)
{
return 0;
}
#endif
extern void scheduler_tick(void); extern void scheduler_tick(void);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX #define MAX_SCHEDULE_TIMEOUT LONG_MAX

View File

@@ -28,7 +28,15 @@ extern void __init housekeeping_init(void);
static inline int housekeeping_any_cpu(enum hk_flags flags) static inline int housekeeping_any_cpu(enum hk_flags flags)
{ {
return smp_processor_id(); cpumask_t available;
int cpu;
cpumask_andnot(&available, cpu_online_mask, cpu_isolated_mask);
cpu = cpumask_any(&available);
if (cpu >= nr_cpu_ids)
cpu = smp_processor_id();
return cpu;
} }
static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
@@ -52,7 +60,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_flags flags)
if (static_branch_unlikely(&housekeeping_overridden)) if (static_branch_unlikely(&housekeeping_overridden))
return housekeeping_test_cpu(cpu, flags); return housekeeping_test_cpu(cpu, flags);
#endif #endif
return true; return !cpu_isolated(cpu);
} }
#endif /* _LINUX_SCHED_ISOLATION_H */ #endif /* _LINUX_SCHED_ISOLATION_H */

View File

@@ -990,6 +990,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
if (!cpu_present(cpu)) if (!cpu_present(cpu))
return -EINVAL; return -EINVAL;
if (!tasks_frozen && !cpu_isolated(cpu) && num_online_uniso_cpus() == 1)
return -EBUSY;
cpus_write_lock(); cpus_write_lock();
if (trace_cpuhp_latency_enabled()) if (trace_cpuhp_latency_enabled())
start_time = sched_clock(); start_time = sched_clock();
@@ -2377,6 +2380,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly; struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask); EXPORT_SYMBOL(__cpu_active_mask);
struct cpumask __cpu_isolated_mask __read_mostly;
EXPORT_SYMBOL(__cpu_isolated_mask);
atomic_t __num_online_cpus __read_mostly; atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus); EXPORT_SYMBOL(__num_online_cpus);
@@ -2395,6 +2401,11 @@ void init_cpu_online(const struct cpumask *src)
cpumask_copy(&__cpu_online_mask, src); cpumask_copy(&__cpu_online_mask, src);
} }
void init_cpu_isolated(const struct cpumask *src)
{
cpumask_copy(&__cpu_isolated_mask, src);
}
void set_cpu_online(unsigned int cpu, bool online) void set_cpu_online(unsigned int cpu, bool online)
{ {
/* /*

View File

@@ -12,6 +12,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/cpumask.h>
#include "internals.h" #include "internals.h"
@@ -57,6 +58,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
const struct cpumask *affinity; const struct cpumask *affinity;
bool brokeaff = false; bool brokeaff = false;
int err; int err;
struct cpumask available_cpus;
/* /*
* IRQ chip might be already torn down, but the irq descriptor is * IRQ chip might be already torn down, but the irq descriptor is
@@ -109,6 +111,10 @@ static bool migrate_one_irq(struct irq_desc *desc)
if (maskchip && chip->irq_mask) if (maskchip && chip->irq_mask)
chip->irq_mask(d); chip->irq_mask(d);
cpumask_copy(&available_cpus, affinity);
cpumask_andnot(&available_cpus, &available_cpus, cpu_isolated_mask);
affinity = &available_cpus;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
/* /*
* If the interrupt is managed, then shut it down and leave * If the interrupt is managed, then shut it down and leave
@@ -119,16 +125,41 @@ static bool migrate_one_irq(struct irq_desc *desc)
irq_shutdown_and_deactivate(desc); irq_shutdown_and_deactivate(desc);
return false; return false;
} }
affinity = cpu_online_mask; /*
* The order of preference for selecting a fallback CPU is
*
* (1) online and un-isolated CPU from default affinity
* (2) online and un-isolated CPU
* (3) online CPU
*/
cpumask_andnot(&available_cpus, cpu_online_mask,
cpu_isolated_mask);
if (cpumask_intersects(&available_cpus, irq_default_affinity))
cpumask_and(&available_cpus, &available_cpus,
irq_default_affinity);
else if (cpumask_empty(&available_cpus))
affinity = cpu_online_mask;
/*
* We are overriding the affinity with all online and
* un-isolated cpus. irq_set_affinity_locked() call
* below notify this mask to PM QOS affinity listener.
* That results in applying the CPU_DMA_LATENCY QOS
* to all the CPUs specified in the mask. But the low
* level irqchip driver sets the affinity of an irq
* to only one CPU. So pick only one CPU from the
* prepared mask while overriding the user affinity.
*/
affinity = cpumask_of(cpumask_any(affinity));
brokeaff = true; brokeaff = true;
} }
/* /*
* Do not set the force argument of irq_do_set_affinity() as this * Do not set the force argument of irq_set_affinity_locked() as this
* disables the masking of offline CPUs from the supplied affinity * disables the masking of offline CPUs from the supplied affinity
* mask and therefore might keep/reassign the irq to the outgoing * mask and therefore might keep/reassign the irq to the outgoing
* CPU. * CPU.
*/ */
err = irq_do_set_affinity(d, affinity, false); err = irq_set_affinity_locked(d, affinity, false);
if (err) { if (err) {
pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
d->irq, err); d->irq, err);

View File

@@ -132,6 +132,11 @@ static ssize_t write_irq_affinity(int type, struct file *file,
if (err) if (err)
goto free_cpumask; goto free_cpumask;
if (cpumask_subset(new_value, cpu_isolated_mask)) {
err = -EINVAL;
goto free_cpumask;
}
/* /*
* Do not allow disabling IRQs completely - it's a too easy * Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least * way to make the system unusable accidentally :-) At least

View File

@@ -11,6 +11,7 @@
#include <linux/nospec.h> #include <linux/nospec.h>
#include <linux/kcov.h> #include <linux/kcov.h>
#include <linux/irq.h>
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/tlb.h> #include <asm/tlb.h>
@@ -1649,6 +1650,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
struct rq_flags rf; struct rq_flags rf;
struct rq *rq; struct rq *rq;
int ret = 0; int ret = 0;
cpumask_t allowed_mask;
rq = task_rq_lock(p, &rf); rq = task_rq_lock(p, &rf);
update_rq_clock(rq); update_rq_clock(rq);
@@ -1672,10 +1674,17 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_equal(p->cpus_ptr, new_mask)) if (cpumask_equal(p->cpus_ptr, new_mask))
goto out; goto out;
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); cpumask_andnot(&allowed_mask, new_mask, cpu_isolated_mask);
cpumask_and(&allowed_mask, &allowed_mask, cpu_valid_mask);
dest_cpu = cpumask_any(&allowed_mask);
if (dest_cpu >= nr_cpu_ids) { if (dest_cpu >= nr_cpu_ids) {
ret = -EINVAL; cpumask_and(&allowed_mask, cpu_valid_mask, new_mask);
goto out; dest_cpu = cpumask_any(&allowed_mask);
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
ret = -EINVAL;
goto out;
}
} }
do_set_cpus_allowed(p, new_mask); do_set_cpus_allowed(p, new_mask);
@@ -1691,7 +1700,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
} }
/* Can the task run on the task's current CPU? If so, we're done */ /* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask)) if (cpumask_test_cpu(task_cpu(p), &allowed_mask))
goto out; goto out;
if (task_running(rq, p) || p->state == TASK_WAKING) { if (task_running(rq, p) || p->state == TASK_WAKING) {
@@ -2043,12 +2052,13 @@ EXPORT_SYMBOL_GPL(kick_process);
* select_task_rq() below may allow selection of !active CPUs in order * select_task_rq() below may allow selection of !active CPUs in order
* to satisfy the above rules. * to satisfy the above rules.
*/ */
static int select_fallback_rq(int cpu, struct task_struct *p) static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso)
{ {
int nid = cpu_to_node(cpu); int nid = cpu_to_node(cpu);
const struct cpumask *nodemask = NULL; const struct cpumask *nodemask = NULL;
enum { cpuset, possible, fail } state = cpuset; enum { cpuset, possible, fail, bug } state = cpuset;
int dest_cpu; int dest_cpu;
int isolated_candidate = -1;
/* /*
* If the node that the CPU is on has been offlined, cpu_to_node() * If the node that the CPU is on has been offlined, cpu_to_node()
@@ -2062,6 +2072,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, nodemask) { for_each_cpu(dest_cpu, nodemask) {
if (!cpu_active(dest_cpu)) if (!cpu_active(dest_cpu))
continue; continue;
if (cpu_isolated(dest_cpu))
continue;
if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
return dest_cpu; return dest_cpu;
} }
@@ -2072,7 +2084,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, p->cpus_ptr) { for_each_cpu(dest_cpu, p->cpus_ptr) {
if (!is_cpu_allowed(p, dest_cpu)) if (!is_cpu_allowed(p, dest_cpu))
continue; continue;
if (cpu_isolated(dest_cpu)) {
if (allow_iso)
isolated_candidate = dest_cpu;
continue;
}
goto out;
}
if (isolated_candidate != -1) {
dest_cpu = isolated_candidate;
goto out; goto out;
} }
@@ -2091,6 +2112,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
break; break;
case fail: case fail:
allow_iso = true;
state = bug;
break;
case bug:
BUG(); BUG();
break; break;
} }
@@ -2118,6 +2144,8 @@ out:
static inline static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
{ {
bool allow_isolated = (p->flags & PF_KTHREAD);
lockdep_assert_held(&p->pi_lock); lockdep_assert_held(&p->pi_lock);
if (p->nr_cpus_allowed > 1) if (p->nr_cpus_allowed > 1)
@@ -2135,8 +2163,9 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
* [ this allows ->select_task() to simply return task_cpu(p) and * [ this allows ->select_task() to simply return task_cpu(p) and
* not worry about this generic constraint ] * not worry about this generic constraint ]
*/ */
if (unlikely(!is_cpu_allowed(p, cpu))) if (unlikely(!is_cpu_allowed(p, cpu)) ||
cpu = select_fallback_rq(task_cpu(p), p); (cpu_isolated(cpu) && !allow_isolated))
cpu = select_fallback_rq(task_cpu(p), p, allow_isolated);
return cpu; return cpu;
} }
@@ -2327,6 +2356,7 @@ void sched_ttwu_pending(void)
void scheduler_ipi(void) void scheduler_ipi(void)
{ {
int cpu = smp_processor_id();
/* /*
* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
* TIF_NEED_RESCHED remotely (for the first time) will also send * TIF_NEED_RESCHED remotely (for the first time) will also send
@@ -2356,7 +2386,7 @@ void scheduler_ipi(void)
/* /*
* Check if someone kicked us for doing the nohz idle load balance. * Check if someone kicked us for doing the nohz idle load balance.
*/ */
if (unlikely(got_nohz_idle_kick())) { if (unlikely(got_nohz_idle_kick()) && !cpu_isolated(cpu)) {
this_rq()->idle_balance = 1; this_rq()->idle_balance = 1;
raise_softirq_irqoff(SCHED_SOFTIRQ); raise_softirq_irqoff(SCHED_SOFTIRQ);
} }
@@ -3542,7 +3572,7 @@ void sched_exec(void)
if (dest_cpu == smp_processor_id()) if (dest_cpu == smp_processor_id())
goto unlock; goto unlock;
if (likely(cpu_active(dest_cpu))) { if (likely(cpu_active(dest_cpu) && likely(!cpu_isolated(dest_cpu)))) {
struct migration_arg arg = { p, dest_cpu }; struct migration_arg arg = { p, dest_cpu };
raw_spin_unlock_irqrestore(&p->pi_lock, flags); raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -5463,6 +5493,8 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
cpumask_var_t cpus_allowed, new_mask; cpumask_var_t cpus_allowed, new_mask;
struct task_struct *p; struct task_struct *p;
int retval; int retval;
int dest_cpu;
cpumask_t allowed_mask;
rcu_read_lock(); rcu_read_lock();
@@ -5524,20 +5556,26 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
} }
#endif #endif
again: again:
retval = __set_cpus_allowed_ptr(p, new_mask, true); cpumask_andnot(&allowed_mask, new_mask, cpu_isolated_mask);
dest_cpu = cpumask_any_and(cpu_active_mask, &allowed_mask);
if (!retval) { if (dest_cpu < nr_cpu_ids) {
cpuset_cpus_allowed(p, cpus_allowed); retval = __set_cpus_allowed_ptr(p, new_mask, true);
if (!cpumask_subset(new_mask, cpus_allowed)) { if (!retval) {
/* cpuset_cpus_allowed(p, cpus_allowed);
* We must have raced with a concurrent cpuset if (!cpumask_subset(new_mask, cpus_allowed)) {
* update. Just reset the cpus_allowed to the /*
* cpuset's cpus_allowed * We must have raced with a concurrent cpuset
*/ * update. Just reset the cpus_allowed to the
cpumask_copy(new_mask, cpus_allowed); * cpuset's cpus_allowed
goto again; */
cpumask_copy(new_mask, cpus_allowed);
goto again;
}
} }
} else {
retval = -EINVAL;
} }
out_free_new_mask: out_free_new_mask:
free_cpumask_var(new_mask); free_cpumask_var(new_mask);
out_free_cpus_allowed: out_free_cpus_allowed:
@@ -5655,6 +5693,14 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
raw_spin_lock_irqsave(&p->pi_lock, flags); raw_spin_lock_irqsave(&p->pi_lock, flags);
cpumask_and(mask, &p->cpus_mask, cpu_active_mask); cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
/* The userspace tasks are forbidden to run on
* isolated CPUs. So exclude isolated CPUs from
* the getaffinity.
*/
if (!(p->flags & PF_KTHREAD))
cpumask_andnot(mask, mask, cpu_isolated_mask);
raw_spin_unlock_irqrestore(&p->pi_lock, flags); raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out_unlock: out_unlock:
@@ -6351,19 +6397,25 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
} }
/* /*
* Migrate all tasks from the rq, sleeping tasks will be migrated by * Migrate all tasks (not pinned if pinned argument say so) from the rq,
* try_to_wake_up()->select_task_rq(). * sleeping tasks will be migrated by try_to_wake_up()->select_task_rq().
* *
* Called with rq->lock held even though we'er in stop_machine() and * Called with rq->lock held even though we'er in stop_machine() and
* there's no concurrency possible, we hold the required locks anyway * there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts. * because of lock validation efforts.
*/ */
void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf,
bool migrate_pinned_tasks)
{ {
struct rq *rq = dead_rq; struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop; struct task_struct *next, *stop = rq->stop;
struct rq_flags orf = *rf; struct rq_flags orf = *rf;
int dest_cpu; int dest_cpu;
unsigned int num_pinned_kthreads = 1; /* this thread */
LIST_HEAD(tasks);
cpumask_t avail_cpus;
cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
/* /*
* Fudge the rq selection such that the below task selection loop * Fudge the rq selection such that the below task selection loop
@@ -6386,13 +6438,20 @@ void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
for (;;) { for (;;) {
/* /*
* There's this thread running, bail when that's the only * There's this thread running, bail when that's the only
* remaining thread: * remaining thread.
*/ */
if (rq->nr_running == 1) if (rq->nr_running == 1)
break; break;
next = __pick_migrate_task(rq); next = __pick_migrate_task(rq);
if (!migrate_pinned_tasks && next->flags & PF_KTHREAD &&
!cpumask_intersects(&avail_cpus, &next->cpus_mask)) {
detach_one_task_core(next, rq, &tasks);
num_pinned_kthreads += 1;
continue;
}
/* /*
* Rules for changing task_struct::cpus_mask are holding * Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->lock, such that holding either * both pi_lock and rq->lock, such that holding either
@@ -6405,31 +6464,43 @@ void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
rq_unlock(rq, rf); rq_unlock(rq, rf);
raw_spin_lock(&next->pi_lock); raw_spin_lock(&next->pi_lock);
rq_relock(rq, rf); rq_relock(rq, rf);
if (!(rq->clock_update_flags & RQCF_UPDATED))
update_rq_clock(rq);
/* /*
* Since we're inside stop-machine, _nothing_ should have * Since we're inside stop-machine, _nothing_ should have
* changed the task, WARN if weird stuff happened, because in * changed the task, WARN if weird stuff happened, because in
* that case the above rq->lock drop is a fail too. * that case the above rq->lock drop is a fail too.
* However, during cpu isolation the load balancer might have
* interferred since we don't stop all CPUs. Ignore warning for
* this case.
*/ */
if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { if (task_rq(next) != rq || !task_on_rq_queued(next)) {
WARN_ON(migrate_pinned_tasks);
raw_spin_unlock(&next->pi_lock); raw_spin_unlock(&next->pi_lock);
continue; continue;
} }
/* Find suitable destination for @next, with force if needed. */ /* Find suitable destination for @next, with force if needed. */
dest_cpu = select_fallback_rq(dead_rq->cpu, next); dest_cpu = select_fallback_rq(dead_rq->cpu, next, false);
rq = __migrate_task(rq, rf, next, dest_cpu); rq = __migrate_task(rq, rf, next, dest_cpu);
if (rq != dead_rq) { if (rq != dead_rq) {
rq_unlock(rq, rf); rq_unlock(rq, rf);
rq = dead_rq; rq = dead_rq;
*rf = orf; *rf = orf;
rq_relock(rq, rf); rq_relock(rq, rf);
if (!(rq->clock_update_flags & RQCF_UPDATED))
update_rq_clock(rq);
} }
raw_spin_unlock(&next->pi_lock); raw_spin_unlock(&next->pi_lock);
} }
rq->stop = stop; rq->stop = stop;
if (num_pinned_kthreads > 1)
attach_tasks_core(&tasks, rq);
} }
#endif /* CONFIG_HOTPLUG_CPU */ #endif /* CONFIG_HOTPLUG_CPU */
void set_rq_online(struct rq *rq) void set_rq_online(struct rq *rq)
@@ -6619,7 +6690,7 @@ int sched_cpu_dying(unsigned int cpu)
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq); set_rq_offline(rq);
} }
migrate_tasks(rq, &rf); migrate_tasks(rq, &rf, true);
BUG_ON(rq->nr_running != 1); BUG_ON(rq->nr_running != 1);
rq_unlock_irqrestore(rq, &rf); rq_unlock_irqrestore(rq, &rf);

View File

@@ -5983,6 +5983,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
return si_cpu; return si_cpu;
if (!cpumask_test_cpu(cpu, p->cpus_ptr)) if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue; continue;
if (cpu_isolated(cpu))
continue;
if (available_idle_cpu(cpu)) if (available_idle_cpu(cpu))
break; break;
if (si_cpu == -1 && sched_idle_cpu(cpu)) if (si_cpu == -1 && sched_idle_cpu(cpu))
@@ -6005,14 +6007,16 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
struct sched_domain *sd; struct sched_domain *sd;
int i, recent_used_cpu; int i, recent_used_cpu;
if (available_idle_cpu(target) || sched_idle_cpu(target)) if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
!cpu_isolated(target))
return target; return target;
/* /*
* If the previous CPU is cache affine and idle, don't be stupid: * If the previous CPU is cache affine and idle, don't be stupid:
*/ */
if (prev != target && cpus_share_cache(prev, target) && if (prev != target && cpus_share_cache(prev, target) &&
(available_idle_cpu(prev) || sched_idle_cpu(prev))) ((available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
!cpu_isolated(prev)))
return prev; return prev;
/* Check a recently used CPU as a potential idle candidate: */ /* Check a recently used CPU as a potential idle candidate: */
@@ -7892,6 +7896,8 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
struct sched_group_capacity *sgc; struct sched_group_capacity *sgc;
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
if (cpumask_test_cpu(cpu, cpu_isolated_mask))
continue;
/* /*
* build_sched_domains() -> init_sched_groups_capacity() * build_sched_domains() -> init_sched_groups_capacity()
* gets here before we've attached the domains to the * gets here before we've attached the domains to the
@@ -7922,10 +7928,15 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
group = child->groups; group = child->groups;
do { do {
struct sched_group_capacity *sgc = group->sgc; struct sched_group_capacity *sgc = group->sgc;
cpumask_t *cpus = sched_group_span(group);
capacity += sgc->capacity; if (!cpu_isolated(cpumask_first(cpus))) {
min_capacity = min(sgc->min_capacity, min_capacity); capacity += sgc->capacity;
max_capacity = max(sgc->max_capacity, max_capacity); min_capacity = min(sgc->min_capacity,
min_capacity);
max_capacity = max(sgc->max_capacity,
max_capacity);
}
group = group->next; group = group->next;
} while (group != child->groups); } while (group != child->groups);
} }
@@ -8129,6 +8140,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
for_each_cpu_and(i, sched_group_span(group), env->cpus) { for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i); struct rq *rq = cpu_rq(i);
if (cpu_isolated(i))
continue;
if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
env->flags |= LBF_NOHZ_AGAIN; env->flags |= LBF_NOHZ_AGAIN;
@@ -8160,17 +8174,27 @@ static inline void update_sg_lb_stats(struct lb_env *env,
} }
} }
/* Adjust by relative CPU capacity of the group */ /* Isolated CPU has no weight */
sgs->group_capacity = group->sgc->capacity; if (!group->group_weight) {
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; sgs->group_capacity = 0;
sgs->avg_load = 0;
sgs->group_no_capacity = 1;
sgs->group_type = group_other;
sgs->group_weight = group->group_weight;
} else {
/* Adjust by relative CPU capacity of the group */
sgs->group_capacity = group->sgc->capacity;
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) /
sgs->group_capacity;
sgs->group_weight = group->group_weight;
sgs->group_no_capacity = group_is_overloaded(env, sgs);
sgs->group_type = group_classify(group, sgs);
}
if (sgs->sum_nr_running) if (sgs->sum_nr_running)
sgs->load_per_task = sgs->group_load / sgs->sum_nr_running; sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
sgs->group_weight = group->group_weight;
sgs->group_no_capacity = group_is_overloaded(env, sgs);
sgs->group_type = group_classify(group, sgs);
} }
/** /**
@@ -8910,7 +8934,7 @@ static int should_we_balance(struct lb_env *env)
/* Try to find first idle CPU */ /* Try to find first idle CPU */
for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) { for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
if (!idle_cpu(cpu)) if (!idle_cpu(cpu) || cpu_isolated(cpu))
continue; continue;
balance_cpu = cpu; balance_cpu = cpu;
@@ -8918,7 +8942,7 @@ static int should_we_balance(struct lb_env *env)
} }
if (balance_cpu == -1) if (balance_cpu == -1)
balance_cpu = group_balance_cpu(sg); balance_cpu = group_balance_cpu_not_isolated(sg);
/* /*
* First idle CPU or the first CPU(busiest) in this sched group * First idle CPU or the first CPU(busiest) in this sched group
@@ -9127,7 +9151,8 @@ more_balance:
* ->active_balance_work. Once set, it's cleared * ->active_balance_work. Once set, it's cleared
* only after active load balance is finished. * only after active load balance is finished.
*/ */
if (!busiest->active_balance) { if (!busiest->active_balance &&
!cpu_isolated(cpu_of(busiest))) {
busiest->active_balance = 1; busiest->active_balance = 1;
busiest->push_cpu = this_cpu; busiest->push_cpu = this_cpu;
active_balance = 1; active_balance = 1;
@@ -9333,7 +9358,13 @@ static DEFINE_SPINLOCK(balancing);
*/ */
void update_max_interval(void) void update_max_interval(void)
{ {
max_load_balance_interval = HZ*num_online_cpus()/10; cpumask_t avail_mask;
unsigned int available_cpus;
cpumask_andnot(&avail_mask, cpu_online_mask, cpu_isolated_mask);
available_cpus = cpumask_weight(&avail_mask);
max_load_balance_interval = HZ*available_cpus/10;
} }
/* /*
@@ -9510,6 +9541,7 @@ static void nohz_balancer_kick(struct rq *rq)
struct sched_domain *sd; struct sched_domain *sd;
int nr_busy, i, cpu = rq->cpu; int nr_busy, i, cpu = rq->cpu;
unsigned int flags = 0; unsigned int flags = 0;
cpumask_t cpumask;
if (unlikely(rq->idle_balance)) if (unlikely(rq->idle_balance))
return; return;
@@ -9524,7 +9556,8 @@ static void nohz_balancer_kick(struct rq *rq)
* None are in tickless mode and hence no need for NOHZ idle load * None are in tickless mode and hence no need for NOHZ idle load
* balancing. * balancing.
*/ */
if (likely(!atomic_read(&nohz.nr_cpus))) cpumask_andnot(&cpumask, nohz.idle_cpus_mask, cpu_isolated_mask);
if (cpumask_empty(&cpumask))
return; return;
if (READ_ONCE(nohz.has_blocked) && if (READ_ONCE(nohz.has_blocked) &&
@@ -9561,7 +9594,7 @@ static void nohz_balancer_kick(struct rq *rq)
* currently idle; in which case, kick the ILB to move tasks * currently idle; in which case, kick the ILB to move tasks
* around. * around.
*/ */
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { for_each_cpu_and(i, sched_domain_span(sd), &cpumask) {
if (sched_asym_prefer(i, cpu)) { if (sched_asym_prefer(i, cpu)) {
flags = NOHZ_KICK_MASK; flags = NOHZ_KICK_MASK;
goto unlock; goto unlock;
@@ -9739,6 +9772,7 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
int balance_cpu; int balance_cpu;
int ret = false; int ret = false;
struct rq *rq; struct rq *rq;
cpumask_t cpus;
SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK); SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
@@ -9758,7 +9792,9 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
*/ */
smp_mb(); smp_mb();
for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { cpumask_andnot(&cpus, nohz.idle_cpus_mask, cpu_isolated_mask);
for_each_cpu(balance_cpu, &cpus) {
if (balance_cpu == this_cpu || !idle_cpu(balance_cpu)) if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
continue; continue;
@@ -9910,6 +9946,9 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
int pulled_task = 0; int pulled_task = 0;
u64 curr_cost = 0; u64 curr_cost = 0;
if (cpu_isolated(this_cpu))
return 0;
update_misfit_status(NULL, this_rq); update_misfit_status(NULL, this_rq);
/* /*
* We must set idle_stamp _before_ calling idle_balance(), such that we * We must set idle_stamp _before_ calling idle_balance(), such that we
@@ -10026,6 +10065,14 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
enum cpu_idle_type idle = this_rq->idle_balance ? enum cpu_idle_type idle = this_rq->idle_balance ?
CPU_IDLE : CPU_NOT_IDLE; CPU_IDLE : CPU_NOT_IDLE;
/*
* Since core isolation doesn't update nohz.idle_cpus_mask, there
* is a possibility this nohz kicked cpu could be isolated. Hence
* return if the cpu is isolated.
*/
if (cpu_isolated(this_rq->cpu))
return;
/* /*
* If this CPU has a pending nohz_balance_kick, then do the * If this CPU has a pending nohz_balance_kick, then do the
* balancing on behalf of the other idle CPUs whose ticks are * balancing on behalf of the other idle CPUs whose ticks are
@@ -10047,8 +10094,10 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
*/ */
void trigger_load_balance(struct rq *rq) void trigger_load_balance(struct rq *rq)
{ {
/* Don't need to rebalance while attached to NULL domain */ /* Don't need to rebalance while attached to NULL domain or
if (unlikely(on_null_domain(rq))) * cpu is isolated.
*/
if (unlikely(on_null_domain(rq)) || cpu_isolated(cpu_of(rq)))
return; return;
if (time_after_eq(jiffies, rq->next_balance)) if (time_after_eq(jiffies, rq->next_balance))

View File

@@ -263,8 +263,12 @@ static void pull_rt_task(struct rq *this_rq);
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{ {
/* Try to pull RT tasks here if we lower this rq's prio */ /*
return rq->rt.highest_prio.curr > prev->prio; * Try to pull RT tasks here if we lower this rq's prio and cpu is not
* isolated
*/
return rq->rt.highest_prio.curr > prev->prio &&
!cpu_isolated(cpu_of(rq));
} }
static inline int rt_overloaded(struct rq *rq) static inline int rt_overloaded(struct rq *rq)
@@ -2192,7 +2196,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
* we may need to handle the pulling of RT tasks * we may need to handle the pulling of RT tasks
* now. * now.
*/ */
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) if (!task_on_rq_queued(p) || rq->rt.rt_nr_running ||
cpu_isolated(cpu_of(rq)))
return; return;
rt_queue_pull_task(rq); rt_queue_pull_task(rq);

View File

@@ -163,6 +163,10 @@ extern atomic_long_t calc_load_tasks;
extern void calc_global_load_tick(struct rq *this_rq); extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq, long adjust); extern long calc_load_fold_active(struct rq *this_rq, long adjust);
#ifdef CONFIG_SMP
extern void init_sched_groups_capacity(int cpu, struct sched_domain *sd);
#endif
/* /*
* Helpers for converting nanosecond timing to jiffy resolution * Helpers for converting nanosecond timing to jiffy resolution
*/ */
@@ -3307,7 +3311,8 @@ extern int active_load_balance_cpu_stop(void *data);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
extern void set_rq_online(struct rq *rq); extern void set_rq_online(struct rq *rq);
extern void set_rq_offline(struct rq *rq); extern void set_rq_offline(struct rq *rq);
extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf); extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf,
bool migrate_pinned_tasks);
extern void calc_load_migrate(struct rq *rq); extern void calc_load_migrate(struct rq *rq);
#ifdef CONFIG_SCHED_WALT #ifdef CONFIG_SCHED_WALT
extern void __weak extern void __weak

View File

@@ -1163,16 +1163,19 @@ build_sched_groups(struct sched_domain *sd, int cpu)
* group having more cpu_capacity will pickup more load compared to the * group having more cpu_capacity will pickup more load compared to the
* group having less cpu_capacity. * group having less cpu_capacity.
*/ */
static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
{ {
struct sched_group *sg = sd->groups; struct sched_group *sg = sd->groups;
cpumask_t avail_mask;
WARN_ON(!sg); WARN_ON(!sg);
do { do {
int cpu, max_cpu = -1; int cpu, max_cpu = -1;
sg->group_weight = cpumask_weight(sched_group_span(sg)); cpumask_andnot(&avail_mask, sched_group_span(sg),
cpu_isolated_mask);
sg->group_weight = cpumask_weight(&avail_mask);
if (!(sd->flags & SD_ASYM_PACKING)) if (!(sd->flags & SD_ASYM_PACKING))
goto next; goto next;

View File

@@ -792,7 +792,8 @@ void wake_up_all_idle_cpus(void)
if (cpu == smp_processor_id()) if (cpu == smp_processor_id())
continue; continue;
wake_up_if_idle(cpu); if (!cpu_isolated(cpu))
wake_up_if_idle(cpu);
} }
preempt_enable(); preempt_enable();
} }

View File

@@ -2014,7 +2014,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
return 0; return 0;
} }
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_CPUSETS) #ifdef CONFIG_HOTPLUG_CPU
static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
struct hrtimer_clock_base *new_base, struct hrtimer_clock_base *new_base,
bool remove_pinned) bool remove_pinned)
@@ -2023,12 +2023,14 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
struct timerqueue_node *node; struct timerqueue_node *node;
struct timerqueue_head pinned; struct timerqueue_head pinned;
int is_pinned; int is_pinned;
bool is_hotplug = !cpu_online(old_base->cpu_base->cpu);
timerqueue_init_head(&pinned); timerqueue_init_head(&pinned);
while ((node = timerqueue_getnext(&old_base->active))) { while ((node = timerqueue_getnext(&old_base->active))) {
timer = container_of(node, struct hrtimer, node); timer = container_of(node, struct hrtimer, node);
BUG_ON(hrtimer_callback_running(timer)); if (is_hotplug)
BUG_ON(hrtimer_callback_running(timer));
debug_deactivate(timer); debug_deactivate(timer);
/* /*
@@ -2106,9 +2108,7 @@ static void __migrate_hrtimers(unsigned int scpu, bool remove_pinned)
local_irq_restore(flags); local_irq_restore(flags);
local_bh_enable(); local_bh_enable();
} }
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_CPUSETS */
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_dead_cpu(unsigned int scpu) int hrtimers_dead_cpu(unsigned int scpu)
{ {
BUG_ON(cpu_online(scpu)); BUG_ON(cpu_online(scpu));
@@ -2117,14 +2117,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
__migrate_hrtimers(scpu, true); __migrate_hrtimers(scpu, true);
return 0; return 0;
} }
#endif /* CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_CPUSETS
void hrtimer_quiesce_cpu(void *cpup) void hrtimer_quiesce_cpu(void *cpup)
{ {
__migrate_hrtimers(*(int *)cpup, false); __migrate_hrtimers(*(int *)cpup, false);
} }
#endif /* CONFIG_CPUSETS */
#endif /* CONFIG_HOTPLUG_CPU */
void __init hrtimers_init(void) void __init hrtimers_init(void)
{ {

View File

@@ -2038,7 +2038,8 @@ static void __migrate_timers(unsigned int cpu, bool remove_pinned)
*/ */
forward_timer_base(new_base); forward_timer_base(new_base);
BUG_ON(old_base->running_timer); if (!cpu_online(cpu))
BUG_ON(old_base->running_timer);
for (i = 0; i < WHEEL_SIZE; i++) for (i = 0; i < WHEEL_SIZE; i++)
migrate_timer_list(new_base, old_base->vectors + i, migrate_timer_list(new_base, old_base->vectors + i,
@@ -2057,12 +2058,10 @@ int timers_dead_cpu(unsigned int cpu)
return 0; return 0;
} }
#ifdef CONFIG_CPUSETS
void timer_quiesce_cpu(void *cpup) void timer_quiesce_cpu(void *cpup)
{ {
__migrate_timers(*(unsigned int *)cpup, false); __migrate_timers(*(unsigned int *)cpup, false);
} }
#endif /* CONFIG_CPUSETS */
#endif /* CONFIG_HOTPLUG_CPU */ #endif /* CONFIG_HOTPLUG_CPU */

View File

@@ -14,6 +14,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/device.h>
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/module.h> #include <linux/module.h>
@@ -170,6 +171,7 @@ static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(unsigned int, watchdog_en);
static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn); static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
@@ -476,16 +478,20 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
return HRTIMER_RESTART; return HRTIMER_RESTART;
} }
static void watchdog_enable(unsigned int cpu) void watchdog_enable(unsigned int cpu)
{ {
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
struct completion *done = this_cpu_ptr(&softlockup_completion); struct completion *done = this_cpu_ptr(&softlockup_completion);
unsigned int *enabled = this_cpu_ptr(&watchdog_en);
WARN_ON_ONCE(cpu != smp_processor_id()); WARN_ON_ONCE(cpu != smp_processor_id());
init_completion(done); init_completion(done);
complete(done); complete(done);
if (*enabled)
return;
/* /*
* Start the timer first to prevent the NMI watchdog triggering * Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire. * before the timer has a chance to fire.
@@ -500,11 +506,24 @@ static void watchdog_enable(unsigned int cpu)
/* Enable the perf event */ /* Enable the perf event */
if (watchdog_enabled & NMI_WATCHDOG_ENABLED) if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
watchdog_nmi_enable(cpu); watchdog_nmi_enable(cpu);
/*
* Need to ensure above operations are observed by other CPUs before
* indicating that timer is enabled. This is to synchronize core
* isolation and hotplug. Core isolation will wait for this flag to be
* set.
*/
mb();
*enabled = 1;
} }
static void watchdog_disable(unsigned int cpu) void watchdog_disable(unsigned int cpu)
{ {
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
unsigned int *enabled = this_cpu_ptr(&watchdog_en);
if (!*enabled)
return;
WARN_ON_ONCE(cpu != smp_processor_id()); WARN_ON_ONCE(cpu != smp_processor_id());
@@ -516,6 +535,17 @@ static void watchdog_disable(unsigned int cpu)
watchdog_nmi_disable(cpu); watchdog_nmi_disable(cpu);
hrtimer_cancel(hrtimer); hrtimer_cancel(hrtimer);
wait_for_completion(this_cpu_ptr(&softlockup_completion)); wait_for_completion(this_cpu_ptr(&softlockup_completion));
/*
* No need for barrier here since disabling the watchdog is
* synchronized with hotplug lock
*/
*enabled = 0;
}
bool watchdog_configured(unsigned int cpu)
{
return *per_cpu_ptr(&watchdog_en, cpu);
} }
static int softlockup_stop_fn(void *data) static int softlockup_stop_fn(void *data)

View File

@@ -1807,7 +1807,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
static void vmstat_update(struct work_struct *w) static void vmstat_update(struct work_struct *w)
{ {
if (refresh_cpu_vm_stats(true)) { if (refresh_cpu_vm_stats(true) && !cpu_isolated(smp_processor_id())) {
/* /*
* Counters were updated so we expect more updates * Counters were updated so we expect more updates
* to occur in the future. Keep on running the * to occur in the future. Keep on running the
@@ -1899,7 +1899,8 @@ static void vmstat_shepherd(struct work_struct *w)
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
struct delayed_work *dw = &per_cpu(vmstat_work, cpu); struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
if (!delayed_work_pending(dw) && need_update(cpu)) if (!delayed_work_pending(dw) && need_update(cpu) &&
!cpu_isolated(cpu))
queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0); queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
} }
put_online_cpus(); put_online_cpus();