kernel: Add snapshot of changes to support cpu isolation
This snapshot is taken from msm-4.19 as of commit 5debecbe7195 ("trace: filter out spurious preemption and IRQs disable traces"). Change-Id: I222aa448ac68f7365065f62dba9db94925da38a0 Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
This commit is contained in:
@@ -1156,6 +1156,11 @@ int lock_device_hotplug_sysfs(void)
|
||||
return restart_syscall();
|
||||
}
|
||||
|
||||
void lock_device_hotplug_assert(void)
|
||||
{
|
||||
lockdep_assert_held(&device_hotplug_lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
static inline int device_is_not_partition(struct device *dev)
|
||||
{
|
||||
|
@@ -183,6 +183,32 @@ static struct attribute_group crash_note_cpu_attr_group = {
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static ssize_t isolate_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct cpu *cpu = container_of(dev, struct cpu, dev);
|
||||
ssize_t rc;
|
||||
int cpuid = cpu->dev.id;
|
||||
unsigned int isolated = cpu_isolated(cpuid);
|
||||
|
||||
rc = scnprintf(buf, PAGE_SIZE-2, "%d\n", isolated);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(isolate);
|
||||
|
||||
static struct attribute *cpu_isolated_attrs[] = {
|
||||
&dev_attr_isolate.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group cpu_isolated_attr_group = {
|
||||
.attrs = cpu_isolated_attrs,
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static ssize_t sched_load_boost_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
@@ -240,6 +266,9 @@ static const struct attribute_group *common_cpu_attr_groups[] = {
|
||||
#ifdef CONFIG_KEXEC
|
||||
&crash_note_cpu_attr_group,
|
||||
#endif
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
&cpu_isolated_attr_group,
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
&sched_cpu_attr_group,
|
||||
#endif
|
||||
@@ -250,6 +279,9 @@ static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
|
||||
#ifdef CONFIG_KEXEC
|
||||
&crash_note_cpu_attr_group,
|
||||
#endif
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
&cpu_isolated_attr_group,
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
&sched_cpu_attr_group,
|
||||
#endif
|
||||
@@ -282,6 +314,7 @@ static struct cpu_attr cpu_attrs[] = {
|
||||
_CPU_ATTR(online, &__cpu_online_mask),
|
||||
_CPU_ATTR(possible, &__cpu_possible_mask),
|
||||
_CPU_ATTR(present, &__cpu_present_mask),
|
||||
_CPU_ATTR(core_ctl_isolated, &__cpu_isolated_mask),
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -531,6 +564,7 @@ static struct attribute *cpu_root_attrs[] = {
|
||||
&cpu_attrs[0].attr.attr,
|
||||
&cpu_attrs[1].attr.attr,
|
||||
&cpu_attrs[2].attr.attr,
|
||||
&cpu_attrs[3].attr.attr,
|
||||
&dev_attr_kernel_max.attr,
|
||||
&dev_attr_offline.attr,
|
||||
&dev_attr_isolated.attr,
|
||||
|
@@ -55,6 +55,7 @@ extern unsigned int nr_cpu_ids;
|
||||
* cpu_present_mask - has bit 'cpu' set iff cpu is populated
|
||||
* cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler
|
||||
* cpu_active_mask - has bit 'cpu' set iff cpu available to migration
|
||||
* cpu_isolated_mask- has bit 'cpu' set iff cpu isolated
|
||||
*
|
||||
* If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
|
||||
*
|
||||
@@ -91,10 +92,12 @@ extern struct cpumask __cpu_possible_mask;
|
||||
extern struct cpumask __cpu_online_mask;
|
||||
extern struct cpumask __cpu_present_mask;
|
||||
extern struct cpumask __cpu_active_mask;
|
||||
extern struct cpumask __cpu_isolated_mask;
|
||||
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
|
||||
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
|
||||
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
|
||||
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
|
||||
#define cpu_isolated_mask ((const struct cpumask *)&__cpu_isolated_mask)
|
||||
|
||||
extern atomic_t __num_online_cpus;
|
||||
|
||||
@@ -114,19 +117,31 @@ static inline unsigned int num_online_cpus(void)
|
||||
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
|
||||
#define num_present_cpus() cpumask_weight(cpu_present_mask)
|
||||
#define num_active_cpus() cpumask_weight(cpu_active_mask)
|
||||
#define num_isolated_cpus() cpumask_weight(cpu_isolated_mask)
|
||||
#define num_online_uniso_cpus() \
|
||||
({ \
|
||||
cpumask_t mask; \
|
||||
\
|
||||
cpumask_andnot(&mask, cpu_online_mask, cpu_isolated_mask); \
|
||||
cpumask_weight(&mask); \
|
||||
})
|
||||
#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
|
||||
#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
|
||||
#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
|
||||
#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
|
||||
#define cpu_isolated(cpu) cpumask_test_cpu((cpu), cpu_isolated_mask)
|
||||
#else
|
||||
#define num_online_cpus() 1U
|
||||
#define num_possible_cpus() 1U
|
||||
#define num_present_cpus() 1U
|
||||
#define num_active_cpus() 1U
|
||||
#define num_isolated_cpus() 0U
|
||||
#define num_online_uniso_cpus() 1U
|
||||
#define cpu_online(cpu) ((cpu) == 0)
|
||||
#define cpu_possible(cpu) ((cpu) == 0)
|
||||
#define cpu_present(cpu) ((cpu) == 0)
|
||||
#define cpu_active(cpu) ((cpu) == 0)
|
||||
#define cpu_isolated(cpu) ((cpu) != 0)
|
||||
#endif
|
||||
|
||||
extern cpumask_t cpus_booted_once_mask;
|
||||
@@ -806,6 +821,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
|
||||
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
|
||||
#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
|
||||
#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask)
|
||||
#define for_each_isolated_cpu(cpu) for_each_cpu((cpu), cpu_isolated_mask)
|
||||
|
||||
/* Wrappers for arch boot code to manipulate normally-constant masks */
|
||||
void init_cpu_present(const struct cpumask *src);
|
||||
@@ -846,6 +862,15 @@ set_cpu_active(unsigned int cpu, bool active)
|
||||
cpumask_clear_cpu(cpu, &__cpu_active_mask);
|
||||
}
|
||||
|
||||
static inline void
|
||||
set_cpu_isolated(unsigned int cpu, bool isolated)
|
||||
{
|
||||
if (isolated)
|
||||
cpumask_set_cpu(cpu, &__cpu_isolated_mask);
|
||||
else
|
||||
cpumask_clear_cpu(cpu, &__cpu_isolated_mask);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
|
||||
|
@@ -1551,6 +1551,7 @@ static inline bool device_supports_offline(struct device *dev)
|
||||
extern void lock_device_hotplug(void);
|
||||
extern void unlock_device_hotplug(void);
|
||||
extern int lock_device_hotplug_sysfs(void);
|
||||
extern void lock_device_hotplug_assert(void);
|
||||
extern int device_offline(struct device *dev);
|
||||
extern int device_online(struct device *dev);
|
||||
extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
|
||||
|
@@ -13,6 +13,9 @@
|
||||
|
||||
#ifdef CONFIG_LOCKUP_DETECTOR
|
||||
void lockup_detector_init(void);
|
||||
extern void watchdog_enable(unsigned int cpu);
|
||||
extern void watchdog_disable(unsigned int cpu);
|
||||
extern bool watchdog_configured(unsigned int cpu);
|
||||
void lockup_detector_soft_poweroff(void);
|
||||
void lockup_detector_cleanup(void);
|
||||
bool is_hardlockup(void);
|
||||
@@ -37,6 +40,20 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
|
||||
static inline void lockup_detector_init(void) { }
|
||||
static inline void lockup_detector_soft_poweroff(void) { }
|
||||
static inline void lockup_detector_cleanup(void) { }
|
||||
static inline void watchdog_enable(unsigned int cpu)
|
||||
{
|
||||
}
|
||||
static inline void watchdog_disable(unsigned int cpu)
|
||||
{
|
||||
}
|
||||
static inline bool watchdog_configured(unsigned int cpu)
|
||||
{
|
||||
/*
|
||||
* Pretend the watchdog is always configured.
|
||||
* We will be waiting for the watchdog to be enabled in core isolation
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
#endif /* !CONFIG_LOCKUP_DETECTOR */
|
||||
|
||||
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
|
||||
|
@@ -239,6 +239,27 @@ enum migrate_types {
|
||||
RQ_TO_GROUP,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
extern int __weak sched_isolate_cpu(int cpu);
|
||||
extern int __weak sched_unisolate_cpu(int cpu);
|
||||
extern int __weak sched_unisolate_cpu_unlocked(int cpu);
|
||||
#else
|
||||
static inline int sched_isolate_cpu(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sched_unisolate_cpu(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sched_unisolate_cpu_unlocked(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void scheduler_tick(void);
|
||||
|
||||
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
|
||||
|
@@ -28,7 +28,15 @@ extern void __init housekeeping_init(void);
|
||||
|
||||
static inline int housekeeping_any_cpu(enum hk_flags flags)
|
||||
{
|
||||
return smp_processor_id();
|
||||
cpumask_t available;
|
||||
int cpu;
|
||||
|
||||
cpumask_andnot(&available, cpu_online_mask, cpu_isolated_mask);
|
||||
cpu = cpumask_any(&available);
|
||||
if (cpu >= nr_cpu_ids)
|
||||
cpu = smp_processor_id();
|
||||
|
||||
return cpu;
|
||||
}
|
||||
|
||||
static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
|
||||
@@ -52,7 +60,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_flags flags)
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
return housekeeping_test_cpu(cpu, flags);
|
||||
#endif
|
||||
return true;
|
||||
return !cpu_isolated(cpu);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SCHED_ISOLATION_H */
|
||||
|
11
kernel/cpu.c
11
kernel/cpu.c
@@ -990,6 +990,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
|
||||
if (!cpu_present(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
if (!tasks_frozen && !cpu_isolated(cpu) && num_online_uniso_cpus() == 1)
|
||||
return -EBUSY;
|
||||
|
||||
cpus_write_lock();
|
||||
if (trace_cpuhp_latency_enabled())
|
||||
start_time = sched_clock();
|
||||
@@ -2377,6 +2380,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
|
||||
struct cpumask __cpu_active_mask __read_mostly;
|
||||
EXPORT_SYMBOL(__cpu_active_mask);
|
||||
|
||||
struct cpumask __cpu_isolated_mask __read_mostly;
|
||||
EXPORT_SYMBOL(__cpu_isolated_mask);
|
||||
|
||||
atomic_t __num_online_cpus __read_mostly;
|
||||
EXPORT_SYMBOL(__num_online_cpus);
|
||||
|
||||
@@ -2395,6 +2401,11 @@ void init_cpu_online(const struct cpumask *src)
|
||||
cpumask_copy(&__cpu_online_mask, src);
|
||||
}
|
||||
|
||||
void init_cpu_isolated(const struct cpumask *src)
|
||||
{
|
||||
cpumask_copy(&__cpu_isolated_mask, src);
|
||||
}
|
||||
|
||||
void set_cpu_online(unsigned int cpu, bool online)
|
||||
{
|
||||
/*
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
@@ -57,6 +58,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
|
||||
const struct cpumask *affinity;
|
||||
bool brokeaff = false;
|
||||
int err;
|
||||
struct cpumask available_cpus;
|
||||
|
||||
/*
|
||||
* IRQ chip might be already torn down, but the irq descriptor is
|
||||
@@ -109,6 +111,10 @@ static bool migrate_one_irq(struct irq_desc *desc)
|
||||
if (maskchip && chip->irq_mask)
|
||||
chip->irq_mask(d);
|
||||
|
||||
cpumask_copy(&available_cpus, affinity);
|
||||
cpumask_andnot(&available_cpus, &available_cpus, cpu_isolated_mask);
|
||||
affinity = &available_cpus;
|
||||
|
||||
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
|
||||
/*
|
||||
* If the interrupt is managed, then shut it down and leave
|
||||
@@ -119,16 +125,41 @@ static bool migrate_one_irq(struct irq_desc *desc)
|
||||
irq_shutdown_and_deactivate(desc);
|
||||
return false;
|
||||
}
|
||||
affinity = cpu_online_mask;
|
||||
/*
|
||||
* The order of preference for selecting a fallback CPU is
|
||||
*
|
||||
* (1) online and un-isolated CPU from default affinity
|
||||
* (2) online and un-isolated CPU
|
||||
* (3) online CPU
|
||||
*/
|
||||
cpumask_andnot(&available_cpus, cpu_online_mask,
|
||||
cpu_isolated_mask);
|
||||
if (cpumask_intersects(&available_cpus, irq_default_affinity))
|
||||
cpumask_and(&available_cpus, &available_cpus,
|
||||
irq_default_affinity);
|
||||
else if (cpumask_empty(&available_cpus))
|
||||
affinity = cpu_online_mask;
|
||||
|
||||
/*
|
||||
* We are overriding the affinity with all online and
|
||||
* un-isolated cpus. irq_set_affinity_locked() call
|
||||
* below notify this mask to PM QOS affinity listener.
|
||||
* That results in applying the CPU_DMA_LATENCY QOS
|
||||
* to all the CPUs specified in the mask. But the low
|
||||
* level irqchip driver sets the affinity of an irq
|
||||
* to only one CPU. So pick only one CPU from the
|
||||
* prepared mask while overriding the user affinity.
|
||||
*/
|
||||
affinity = cpumask_of(cpumask_any(affinity));
|
||||
brokeaff = true;
|
||||
}
|
||||
/*
|
||||
* Do not set the force argument of irq_do_set_affinity() as this
|
||||
* Do not set the force argument of irq_set_affinity_locked() as this
|
||||
* disables the masking of offline CPUs from the supplied affinity
|
||||
* mask and therefore might keep/reassign the irq to the outgoing
|
||||
* CPU.
|
||||
*/
|
||||
err = irq_do_set_affinity(d, affinity, false);
|
||||
err = irq_set_affinity_locked(d, affinity, false);
|
||||
if (err) {
|
||||
pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
|
||||
d->irq, err);
|
||||
|
@@ -132,6 +132,11 @@ static ssize_t write_irq_affinity(int type, struct file *file,
|
||||
if (err)
|
||||
goto free_cpumask;
|
||||
|
||||
if (cpumask_subset(new_value, cpu_isolated_mask)) {
|
||||
err = -EINVAL;
|
||||
goto free_cpumask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not allow disabling IRQs completely - it's a too easy
|
||||
* way to make the system unusable accidentally :-) At least
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <linux/kcov.h>
|
||||
#include <linux/irq.h>
|
||||
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/tlb.h>
|
||||
@@ -1649,6 +1650,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
int ret = 0;
|
||||
cpumask_t allowed_mask;
|
||||
|
||||
rq = task_rq_lock(p, &rf);
|
||||
update_rq_clock(rq);
|
||||
@@ -1672,10 +1674,17 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
if (cpumask_equal(p->cpus_ptr, new_mask))
|
||||
goto out;
|
||||
|
||||
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
|
||||
cpumask_andnot(&allowed_mask, new_mask, cpu_isolated_mask);
|
||||
cpumask_and(&allowed_mask, &allowed_mask, cpu_valid_mask);
|
||||
|
||||
dest_cpu = cpumask_any(&allowed_mask);
|
||||
if (dest_cpu >= nr_cpu_ids) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
cpumask_and(&allowed_mask, cpu_valid_mask, new_mask);
|
||||
dest_cpu = cpumask_any(&allowed_mask);
|
||||
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
do_set_cpus_allowed(p, new_mask);
|
||||
@@ -1691,7 +1700,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
}
|
||||
|
||||
/* Can the task run on the task's current CPU? If so, we're done */
|
||||
if (cpumask_test_cpu(task_cpu(p), new_mask))
|
||||
if (cpumask_test_cpu(task_cpu(p), &allowed_mask))
|
||||
goto out;
|
||||
|
||||
if (task_running(rq, p) || p->state == TASK_WAKING) {
|
||||
@@ -2043,12 +2052,13 @@ EXPORT_SYMBOL_GPL(kick_process);
|
||||
* select_task_rq() below may allow selection of !active CPUs in order
|
||||
* to satisfy the above rules.
|
||||
*/
|
||||
static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso)
|
||||
{
|
||||
int nid = cpu_to_node(cpu);
|
||||
const struct cpumask *nodemask = NULL;
|
||||
enum { cpuset, possible, fail } state = cpuset;
|
||||
enum { cpuset, possible, fail, bug } state = cpuset;
|
||||
int dest_cpu;
|
||||
int isolated_candidate = -1;
|
||||
|
||||
/*
|
||||
* If the node that the CPU is on has been offlined, cpu_to_node()
|
||||
@@ -2062,6 +2072,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
for_each_cpu(dest_cpu, nodemask) {
|
||||
if (!cpu_active(dest_cpu))
|
||||
continue;
|
||||
if (cpu_isolated(dest_cpu))
|
||||
continue;
|
||||
if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
|
||||
return dest_cpu;
|
||||
}
|
||||
@@ -2072,7 +2084,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
for_each_cpu(dest_cpu, p->cpus_ptr) {
|
||||
if (!is_cpu_allowed(p, dest_cpu))
|
||||
continue;
|
||||
if (cpu_isolated(dest_cpu)) {
|
||||
if (allow_iso)
|
||||
isolated_candidate = dest_cpu;
|
||||
continue;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (isolated_candidate != -1) {
|
||||
dest_cpu = isolated_candidate;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -2091,6 +2112,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
break;
|
||||
|
||||
case fail:
|
||||
allow_iso = true;
|
||||
state = bug;
|
||||
break;
|
||||
|
||||
case bug:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
@@ -2118,6 +2144,8 @@ out:
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
{
|
||||
bool allow_isolated = (p->flags & PF_KTHREAD);
|
||||
|
||||
lockdep_assert_held(&p->pi_lock);
|
||||
|
||||
if (p->nr_cpus_allowed > 1)
|
||||
@@ -2135,8 +2163,9 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
* [ this allows ->select_task() to simply return task_cpu(p) and
|
||||
* not worry about this generic constraint ]
|
||||
*/
|
||||
if (unlikely(!is_cpu_allowed(p, cpu)))
|
||||
cpu = select_fallback_rq(task_cpu(p), p);
|
||||
if (unlikely(!is_cpu_allowed(p, cpu)) ||
|
||||
(cpu_isolated(cpu) && !allow_isolated))
|
||||
cpu = select_fallback_rq(task_cpu(p), p, allow_isolated);
|
||||
|
||||
return cpu;
|
||||
}
|
||||
@@ -2327,6 +2356,7 @@ void sched_ttwu_pending(void)
|
||||
|
||||
void scheduler_ipi(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
/*
|
||||
* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
|
||||
* TIF_NEED_RESCHED remotely (for the first time) will also send
|
||||
@@ -2356,7 +2386,7 @@ void scheduler_ipi(void)
|
||||
/*
|
||||
* Check if someone kicked us for doing the nohz idle load balance.
|
||||
*/
|
||||
if (unlikely(got_nohz_idle_kick())) {
|
||||
if (unlikely(got_nohz_idle_kick()) && !cpu_isolated(cpu)) {
|
||||
this_rq()->idle_balance = 1;
|
||||
raise_softirq_irqoff(SCHED_SOFTIRQ);
|
||||
}
|
||||
@@ -3542,7 +3572,7 @@ void sched_exec(void)
|
||||
if (dest_cpu == smp_processor_id())
|
||||
goto unlock;
|
||||
|
||||
if (likely(cpu_active(dest_cpu))) {
|
||||
if (likely(cpu_active(dest_cpu) && likely(!cpu_isolated(dest_cpu)))) {
|
||||
struct migration_arg arg = { p, dest_cpu };
|
||||
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
@@ -5463,6 +5493,8 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
|
||||
cpumask_var_t cpus_allowed, new_mask;
|
||||
struct task_struct *p;
|
||||
int retval;
|
||||
int dest_cpu;
|
||||
cpumask_t allowed_mask;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
@@ -5524,20 +5556,26 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
|
||||
}
|
||||
#endif
|
||||
again:
|
||||
retval = __set_cpus_allowed_ptr(p, new_mask, true);
|
||||
|
||||
if (!retval) {
|
||||
cpuset_cpus_allowed(p, cpus_allowed);
|
||||
if (!cpumask_subset(new_mask, cpus_allowed)) {
|
||||
/*
|
||||
* We must have raced with a concurrent cpuset
|
||||
* update. Just reset the cpus_allowed to the
|
||||
* cpuset's cpus_allowed
|
||||
*/
|
||||
cpumask_copy(new_mask, cpus_allowed);
|
||||
goto again;
|
||||
cpumask_andnot(&allowed_mask, new_mask, cpu_isolated_mask);
|
||||
dest_cpu = cpumask_any_and(cpu_active_mask, &allowed_mask);
|
||||
if (dest_cpu < nr_cpu_ids) {
|
||||
retval = __set_cpus_allowed_ptr(p, new_mask, true);
|
||||
if (!retval) {
|
||||
cpuset_cpus_allowed(p, cpus_allowed);
|
||||
if (!cpumask_subset(new_mask, cpus_allowed)) {
|
||||
/*
|
||||
* We must have raced with a concurrent cpuset
|
||||
* update. Just reset the cpus_allowed to the
|
||||
* cpuset's cpus_allowed
|
||||
*/
|
||||
cpumask_copy(new_mask, cpus_allowed);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
retval = -EINVAL;
|
||||
}
|
||||
|
||||
out_free_new_mask:
|
||||
free_cpumask_var(new_mask);
|
||||
out_free_cpus_allowed:
|
||||
@@ -5655,6 +5693,14 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
|
||||
|
||||
/* The userspace tasks are forbidden to run on
|
||||
* isolated CPUs. So exclude isolated CPUs from
|
||||
* the getaffinity.
|
||||
*/
|
||||
if (!(p->flags & PF_KTHREAD))
|
||||
cpumask_andnot(mask, mask, cpu_isolated_mask);
|
||||
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
out_unlock:
|
||||
@@ -6351,19 +6397,25 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
|
||||
}
|
||||
|
||||
/*
|
||||
* Migrate all tasks from the rq, sleeping tasks will be migrated by
|
||||
* try_to_wake_up()->select_task_rq().
|
||||
* Migrate all tasks (not pinned if pinned argument say so) from the rq,
|
||||
* sleeping tasks will be migrated by try_to_wake_up()->select_task_rq().
|
||||
*
|
||||
* Called with rq->lock held even though we'er in stop_machine() and
|
||||
* there's no concurrency possible, we hold the required locks anyway
|
||||
* because of lock validation efforts.
|
||||
*/
|
||||
void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
|
||||
void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf,
|
||||
bool migrate_pinned_tasks)
|
||||
{
|
||||
struct rq *rq = dead_rq;
|
||||
struct task_struct *next, *stop = rq->stop;
|
||||
struct rq_flags orf = *rf;
|
||||
int dest_cpu;
|
||||
unsigned int num_pinned_kthreads = 1; /* this thread */
|
||||
LIST_HEAD(tasks);
|
||||
cpumask_t avail_cpus;
|
||||
|
||||
cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
|
||||
|
||||
/*
|
||||
* Fudge the rq selection such that the below task selection loop
|
||||
@@ -6386,13 +6438,20 @@ void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
|
||||
for (;;) {
|
||||
/*
|
||||
* There's this thread running, bail when that's the only
|
||||
* remaining thread:
|
||||
* remaining thread.
|
||||
*/
|
||||
if (rq->nr_running == 1)
|
||||
break;
|
||||
|
||||
next = __pick_migrate_task(rq);
|
||||
|
||||
if (!migrate_pinned_tasks && next->flags & PF_KTHREAD &&
|
||||
!cpumask_intersects(&avail_cpus, &next->cpus_mask)) {
|
||||
detach_one_task_core(next, rq, &tasks);
|
||||
num_pinned_kthreads += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Rules for changing task_struct::cpus_mask are holding
|
||||
* both pi_lock and rq->lock, such that holding either
|
||||
@@ -6405,31 +6464,43 @@ void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
|
||||
rq_unlock(rq, rf);
|
||||
raw_spin_lock(&next->pi_lock);
|
||||
rq_relock(rq, rf);
|
||||
if (!(rq->clock_update_flags & RQCF_UPDATED))
|
||||
update_rq_clock(rq);
|
||||
|
||||
/*
|
||||
* Since we're inside stop-machine, _nothing_ should have
|
||||
* changed the task, WARN if weird stuff happened, because in
|
||||
* that case the above rq->lock drop is a fail too.
|
||||
* However, during cpu isolation the load balancer might have
|
||||
* interferred since we don't stop all CPUs. Ignore warning for
|
||||
* this case.
|
||||
*/
|
||||
if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
|
||||
if (task_rq(next) != rq || !task_on_rq_queued(next)) {
|
||||
WARN_ON(migrate_pinned_tasks);
|
||||
raw_spin_unlock(&next->pi_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Find suitable destination for @next, with force if needed. */
|
||||
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
|
||||
dest_cpu = select_fallback_rq(dead_rq->cpu, next, false);
|
||||
rq = __migrate_task(rq, rf, next, dest_cpu);
|
||||
if (rq != dead_rq) {
|
||||
rq_unlock(rq, rf);
|
||||
rq = dead_rq;
|
||||
*rf = orf;
|
||||
rq_relock(rq, rf);
|
||||
if (!(rq->clock_update_flags & RQCF_UPDATED))
|
||||
update_rq_clock(rq);
|
||||
}
|
||||
raw_spin_unlock(&next->pi_lock);
|
||||
}
|
||||
|
||||
rq->stop = stop;
|
||||
|
||||
if (num_pinned_kthreads > 1)
|
||||
attach_tasks_core(&tasks, rq);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
void set_rq_online(struct rq *rq)
|
||||
@@ -6619,7 +6690,7 @@ int sched_cpu_dying(unsigned int cpu)
|
||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||
set_rq_offline(rq);
|
||||
}
|
||||
migrate_tasks(rq, &rf);
|
||||
migrate_tasks(rq, &rf, true);
|
||||
BUG_ON(rq->nr_running != 1);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
|
@@ -5983,6 +5983,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
return si_cpu;
|
||||
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
continue;
|
||||
if (cpu_isolated(cpu))
|
||||
continue;
|
||||
if (available_idle_cpu(cpu))
|
||||
break;
|
||||
if (si_cpu == -1 && sched_idle_cpu(cpu))
|
||||
@@ -6005,14 +6007,16 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
struct sched_domain *sd;
|
||||
int i, recent_used_cpu;
|
||||
|
||||
if (available_idle_cpu(target) || sched_idle_cpu(target))
|
||||
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
|
||||
!cpu_isolated(target))
|
||||
return target;
|
||||
|
||||
/*
|
||||
* If the previous CPU is cache affine and idle, don't be stupid:
|
||||
*/
|
||||
if (prev != target && cpus_share_cache(prev, target) &&
|
||||
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
|
||||
((available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
|
||||
!cpu_isolated(prev)))
|
||||
return prev;
|
||||
|
||||
/* Check a recently used CPU as a potential idle candidate: */
|
||||
@@ -7892,6 +7896,8 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
|
||||
struct sched_group_capacity *sgc;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (cpumask_test_cpu(cpu, cpu_isolated_mask))
|
||||
continue;
|
||||
/*
|
||||
* build_sched_domains() -> init_sched_groups_capacity()
|
||||
* gets here before we've attached the domains to the
|
||||
@@ -7922,10 +7928,15 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
|
||||
group = child->groups;
|
||||
do {
|
||||
struct sched_group_capacity *sgc = group->sgc;
|
||||
cpumask_t *cpus = sched_group_span(group);
|
||||
|
||||
capacity += sgc->capacity;
|
||||
min_capacity = min(sgc->min_capacity, min_capacity);
|
||||
max_capacity = max(sgc->max_capacity, max_capacity);
|
||||
if (!cpu_isolated(cpumask_first(cpus))) {
|
||||
capacity += sgc->capacity;
|
||||
min_capacity = min(sgc->min_capacity,
|
||||
min_capacity);
|
||||
max_capacity = max(sgc->max_capacity,
|
||||
max_capacity);
|
||||
}
|
||||
group = group->next;
|
||||
} while (group != child->groups);
|
||||
}
|
||||
@@ -8129,6 +8140,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
|
||||
if (cpu_isolated(i))
|
||||
continue;
|
||||
|
||||
if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
|
||||
env->flags |= LBF_NOHZ_AGAIN;
|
||||
|
||||
@@ -8160,17 +8174,27 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||
}
|
||||
}
|
||||
|
||||
/* Adjust by relative CPU capacity of the group */
|
||||
sgs->group_capacity = group->sgc->capacity;
|
||||
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
|
||||
/* Isolated CPU has no weight */
|
||||
if (!group->group_weight) {
|
||||
sgs->group_capacity = 0;
|
||||
sgs->avg_load = 0;
|
||||
sgs->group_no_capacity = 1;
|
||||
sgs->group_type = group_other;
|
||||
sgs->group_weight = group->group_weight;
|
||||
} else {
|
||||
/* Adjust by relative CPU capacity of the group */
|
||||
sgs->group_capacity = group->sgc->capacity;
|
||||
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) /
|
||||
sgs->group_capacity;
|
||||
|
||||
sgs->group_weight = group->group_weight;
|
||||
|
||||
sgs->group_no_capacity = group_is_overloaded(env, sgs);
|
||||
sgs->group_type = group_classify(group, sgs);
|
||||
}
|
||||
|
||||
if (sgs->sum_nr_running)
|
||||
sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
|
||||
|
||||
sgs->group_weight = group->group_weight;
|
||||
|
||||
sgs->group_no_capacity = group_is_overloaded(env, sgs);
|
||||
sgs->group_type = group_classify(group, sgs);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -8910,7 +8934,7 @@ static int should_we_balance(struct lb_env *env)
|
||||
|
||||
/* Try to find first idle CPU */
|
||||
for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
|
||||
if (!idle_cpu(cpu))
|
||||
if (!idle_cpu(cpu) || cpu_isolated(cpu))
|
||||
continue;
|
||||
|
||||
balance_cpu = cpu;
|
||||
@@ -8918,7 +8942,7 @@ static int should_we_balance(struct lb_env *env)
|
||||
}
|
||||
|
||||
if (balance_cpu == -1)
|
||||
balance_cpu = group_balance_cpu(sg);
|
||||
balance_cpu = group_balance_cpu_not_isolated(sg);
|
||||
|
||||
/*
|
||||
* First idle CPU or the first CPU(busiest) in this sched group
|
||||
@@ -9127,7 +9151,8 @@ more_balance:
|
||||
* ->active_balance_work. Once set, it's cleared
|
||||
* only after active load balance is finished.
|
||||
*/
|
||||
if (!busiest->active_balance) {
|
||||
if (!busiest->active_balance &&
|
||||
!cpu_isolated(cpu_of(busiest))) {
|
||||
busiest->active_balance = 1;
|
||||
busiest->push_cpu = this_cpu;
|
||||
active_balance = 1;
|
||||
@@ -9333,7 +9358,13 @@ static DEFINE_SPINLOCK(balancing);
|
||||
*/
|
||||
void update_max_interval(void)
|
||||
{
|
||||
max_load_balance_interval = HZ*num_online_cpus()/10;
|
||||
cpumask_t avail_mask;
|
||||
unsigned int available_cpus;
|
||||
|
||||
cpumask_andnot(&avail_mask, cpu_online_mask, cpu_isolated_mask);
|
||||
available_cpus = cpumask_weight(&avail_mask);
|
||||
|
||||
max_load_balance_interval = HZ*available_cpus/10;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -9510,6 +9541,7 @@ static void nohz_balancer_kick(struct rq *rq)
|
||||
struct sched_domain *sd;
|
||||
int nr_busy, i, cpu = rq->cpu;
|
||||
unsigned int flags = 0;
|
||||
cpumask_t cpumask;
|
||||
|
||||
if (unlikely(rq->idle_balance))
|
||||
return;
|
||||
@@ -9524,7 +9556,8 @@ static void nohz_balancer_kick(struct rq *rq)
|
||||
* None are in tickless mode and hence no need for NOHZ idle load
|
||||
* balancing.
|
||||
*/
|
||||
if (likely(!atomic_read(&nohz.nr_cpus)))
|
||||
cpumask_andnot(&cpumask, nohz.idle_cpus_mask, cpu_isolated_mask);
|
||||
if (cpumask_empty(&cpumask))
|
||||
return;
|
||||
|
||||
if (READ_ONCE(nohz.has_blocked) &&
|
||||
@@ -9561,7 +9594,7 @@ static void nohz_balancer_kick(struct rq *rq)
|
||||
* currently idle; in which case, kick the ILB to move tasks
|
||||
* around.
|
||||
*/
|
||||
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
|
||||
for_each_cpu_and(i, sched_domain_span(sd), &cpumask) {
|
||||
if (sched_asym_prefer(i, cpu)) {
|
||||
flags = NOHZ_KICK_MASK;
|
||||
goto unlock;
|
||||
@@ -9739,6 +9772,7 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
|
||||
int balance_cpu;
|
||||
int ret = false;
|
||||
struct rq *rq;
|
||||
cpumask_t cpus;
|
||||
|
||||
SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
|
||||
|
||||
@@ -9758,7 +9792,9 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
|
||||
cpumask_andnot(&cpus, nohz.idle_cpus_mask, cpu_isolated_mask);
|
||||
|
||||
for_each_cpu(balance_cpu, &cpus) {
|
||||
if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
|
||||
continue;
|
||||
|
||||
@@ -9910,6 +9946,9 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
|
||||
int pulled_task = 0;
|
||||
u64 curr_cost = 0;
|
||||
|
||||
if (cpu_isolated(this_cpu))
|
||||
return 0;
|
||||
|
||||
update_misfit_status(NULL, this_rq);
|
||||
/*
|
||||
* We must set idle_stamp _before_ calling idle_balance(), such that we
|
||||
@@ -10026,6 +10065,14 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
|
||||
enum cpu_idle_type idle = this_rq->idle_balance ?
|
||||
CPU_IDLE : CPU_NOT_IDLE;
|
||||
|
||||
/*
|
||||
* Since core isolation doesn't update nohz.idle_cpus_mask, there
|
||||
* is a possibility this nohz kicked cpu could be isolated. Hence
|
||||
* return if the cpu is isolated.
|
||||
*/
|
||||
if (cpu_isolated(this_rq->cpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this CPU has a pending nohz_balance_kick, then do the
|
||||
* balancing on behalf of the other idle CPUs whose ticks are
|
||||
@@ -10047,8 +10094,10 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
|
||||
*/
|
||||
void trigger_load_balance(struct rq *rq)
|
||||
{
|
||||
/* Don't need to rebalance while attached to NULL domain */
|
||||
if (unlikely(on_null_domain(rq)))
|
||||
/* Don't need to rebalance while attached to NULL domain or
|
||||
* cpu is isolated.
|
||||
*/
|
||||
if (unlikely(on_null_domain(rq)) || cpu_isolated(cpu_of(rq)))
|
||||
return;
|
||||
|
||||
if (time_after_eq(jiffies, rq->next_balance))
|
||||
|
@@ -263,8 +263,12 @@ static void pull_rt_task(struct rq *this_rq);
|
||||
|
||||
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
/* Try to pull RT tasks here if we lower this rq's prio */
|
||||
return rq->rt.highest_prio.curr > prev->prio;
|
||||
/*
|
||||
* Try to pull RT tasks here if we lower this rq's prio and cpu is not
|
||||
* isolated
|
||||
*/
|
||||
return rq->rt.highest_prio.curr > prev->prio &&
|
||||
!cpu_isolated(cpu_of(rq));
|
||||
}
|
||||
|
||||
static inline int rt_overloaded(struct rq *rq)
|
||||
@@ -2192,7 +2196,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
|
||||
* we may need to handle the pulling of RT tasks
|
||||
* now.
|
||||
*/
|
||||
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
|
||||
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running ||
|
||||
cpu_isolated(cpu_of(rq)))
|
||||
return;
|
||||
|
||||
rt_queue_pull_task(rq);
|
||||
|
@@ -163,6 +163,10 @@ extern atomic_long_t calc_load_tasks;
|
||||
extern void calc_global_load_tick(struct rq *this_rq);
|
||||
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern void init_sched_groups_capacity(int cpu, struct sched_domain *sd);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Helpers for converting nanosecond timing to jiffy resolution
|
||||
*/
|
||||
@@ -3307,7 +3311,8 @@ extern int active_load_balance_cpu_stop(void *data);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
extern void set_rq_online(struct rq *rq);
|
||||
extern void set_rq_offline(struct rq *rq);
|
||||
extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf);
|
||||
extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf,
|
||||
bool migrate_pinned_tasks);
|
||||
extern void calc_load_migrate(struct rq *rq);
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern void __weak
|
||||
|
@@ -1163,16 +1163,19 @@ build_sched_groups(struct sched_domain *sd, int cpu)
|
||||
* group having more cpu_capacity will pickup more load compared to the
|
||||
* group having less cpu_capacity.
|
||||
*/
|
||||
static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
|
||||
void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
|
||||
{
|
||||
struct sched_group *sg = sd->groups;
|
||||
cpumask_t avail_mask;
|
||||
|
||||
WARN_ON(!sg);
|
||||
|
||||
do {
|
||||
int cpu, max_cpu = -1;
|
||||
|
||||
sg->group_weight = cpumask_weight(sched_group_span(sg));
|
||||
cpumask_andnot(&avail_mask, sched_group_span(sg),
|
||||
cpu_isolated_mask);
|
||||
sg->group_weight = cpumask_weight(&avail_mask);
|
||||
|
||||
if (!(sd->flags & SD_ASYM_PACKING))
|
||||
goto next;
|
||||
|
@@ -792,7 +792,8 @@ void wake_up_all_idle_cpus(void)
|
||||
if (cpu == smp_processor_id())
|
||||
continue;
|
||||
|
||||
wake_up_if_idle(cpu);
|
||||
if (!cpu_isolated(cpu))
|
||||
wake_up_if_idle(cpu);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
@@ -2014,7 +2014,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_CPUSETS)
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
|
||||
struct hrtimer_clock_base *new_base,
|
||||
bool remove_pinned)
|
||||
@@ -2023,12 +2023,14 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
|
||||
struct timerqueue_node *node;
|
||||
struct timerqueue_head pinned;
|
||||
int is_pinned;
|
||||
bool is_hotplug = !cpu_online(old_base->cpu_base->cpu);
|
||||
|
||||
timerqueue_init_head(&pinned);
|
||||
|
||||
while ((node = timerqueue_getnext(&old_base->active))) {
|
||||
timer = container_of(node, struct hrtimer, node);
|
||||
BUG_ON(hrtimer_callback_running(timer));
|
||||
if (is_hotplug)
|
||||
BUG_ON(hrtimer_callback_running(timer));
|
||||
debug_deactivate(timer);
|
||||
|
||||
/*
|
||||
@@ -2106,9 +2108,7 @@ static void __migrate_hrtimers(unsigned int scpu, bool remove_pinned)
|
||||
local_irq_restore(flags);
|
||||
local_bh_enable();
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_CPUSETS */
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
int hrtimers_dead_cpu(unsigned int scpu)
|
||||
{
|
||||
BUG_ON(cpu_online(scpu));
|
||||
@@ -2117,14 +2117,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
|
||||
__migrate_hrtimers(scpu, true);
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
#ifdef CONFIG_CPUSETS
|
||||
void hrtimer_quiesce_cpu(void *cpup)
|
||||
{
|
||||
__migrate_hrtimers(*(int *)cpup, false);
|
||||
}
|
||||
#endif /* CONFIG_CPUSETS */
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
void __init hrtimers_init(void)
|
||||
{
|
||||
|
@@ -2038,7 +2038,8 @@ static void __migrate_timers(unsigned int cpu, bool remove_pinned)
|
||||
*/
|
||||
forward_timer_base(new_base);
|
||||
|
||||
BUG_ON(old_base->running_timer);
|
||||
if (!cpu_online(cpu))
|
||||
BUG_ON(old_base->running_timer);
|
||||
|
||||
for (i = 0; i < WHEEL_SIZE; i++)
|
||||
migrate_timer_list(new_base, old_base->vectors + i,
|
||||
@@ -2057,12 +2058,10 @@ int timers_dead_cpu(unsigned int cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPUSETS
|
||||
void timer_quiesce_cpu(void *cpup)
|
||||
{
|
||||
__migrate_timers(*(unsigned int *)cpup, false);
|
||||
}
|
||||
#endif /* CONFIG_CPUSETS */
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
|
@@ -14,6 +14,7 @@
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
@@ -170,6 +171,7 @@ static u64 __read_mostly sample_period;
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
|
||||
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
|
||||
static DEFINE_PER_CPU(unsigned int, watchdog_en);
|
||||
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
|
||||
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
|
||||
@@ -476,16 +478,20 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
static void watchdog_enable(unsigned int cpu)
|
||||
void watchdog_enable(unsigned int cpu)
|
||||
{
|
||||
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
|
||||
struct completion *done = this_cpu_ptr(&softlockup_completion);
|
||||
unsigned int *enabled = this_cpu_ptr(&watchdog_en);
|
||||
|
||||
WARN_ON_ONCE(cpu != smp_processor_id());
|
||||
|
||||
init_completion(done);
|
||||
complete(done);
|
||||
|
||||
if (*enabled)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Start the timer first to prevent the NMI watchdog triggering
|
||||
* before the timer has a chance to fire.
|
||||
@@ -500,11 +506,24 @@ static void watchdog_enable(unsigned int cpu)
|
||||
/* Enable the perf event */
|
||||
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
|
||||
watchdog_nmi_enable(cpu);
|
||||
|
||||
/*
|
||||
* Need to ensure above operations are observed by other CPUs before
|
||||
* indicating that timer is enabled. This is to synchronize core
|
||||
* isolation and hotplug. Core isolation will wait for this flag to be
|
||||
* set.
|
||||
*/
|
||||
mb();
|
||||
*enabled = 1;
|
||||
}
|
||||
|
||||
static void watchdog_disable(unsigned int cpu)
|
||||
void watchdog_disable(unsigned int cpu)
|
||||
{
|
||||
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
|
||||
unsigned int *enabled = this_cpu_ptr(&watchdog_en);
|
||||
|
||||
if (!*enabled)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(cpu != smp_processor_id());
|
||||
|
||||
@@ -516,6 +535,17 @@ static void watchdog_disable(unsigned int cpu)
|
||||
watchdog_nmi_disable(cpu);
|
||||
hrtimer_cancel(hrtimer);
|
||||
wait_for_completion(this_cpu_ptr(&softlockup_completion));
|
||||
|
||||
/*
|
||||
* No need for barrier here since disabling the watchdog is
|
||||
* synchronized with hotplug lock
|
||||
*/
|
||||
*enabled = 0;
|
||||
}
|
||||
|
||||
bool watchdog_configured(unsigned int cpu)
|
||||
{
|
||||
return *per_cpu_ptr(&watchdog_en, cpu);
|
||||
}
|
||||
|
||||
static int softlockup_stop_fn(void *data)
|
||||
|
@@ -1807,7 +1807,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
|
||||
|
||||
static void vmstat_update(struct work_struct *w)
|
||||
{
|
||||
if (refresh_cpu_vm_stats(true)) {
|
||||
if (refresh_cpu_vm_stats(true) && !cpu_isolated(smp_processor_id())) {
|
||||
/*
|
||||
* Counters were updated so we expect more updates
|
||||
* to occur in the future. Keep on running the
|
||||
@@ -1899,7 +1899,8 @@ static void vmstat_shepherd(struct work_struct *w)
|
||||
for_each_online_cpu(cpu) {
|
||||
struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
|
||||
|
||||
if (!delayed_work_pending(dw) && need_update(cpu))
|
||||
if (!delayed_work_pending(dw) && need_update(cpu) &&
|
||||
!cpu_isolated(cpu))
|
||||
queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
|
||||
}
|
||||
put_online_cpus();
|
||||
|
Reference in New Issue
Block a user