sched: adjust when cpu_active and cpuset configurations are updated during cpu on/offlining
Currently, when a cpu goes down, cpu_active is cleared before CPU_DOWN_PREPARE starts and cpuset configuration is updated from a default priority cpu notifier. When a cpu is coming up, it's set before CPU_ONLINE but cpuset configuration again is updated from the same cpu notifier. For cpu notifiers, this presents an inconsistent state. Threads which a CPU_DOWN_PREPARE notifier expects to be bound to the CPU can be migrated to other cpus because the cpu is no more inactive. Fix it by updating cpu_active in the highest priority cpu notifier and cpuset configuration in the second highest when a cpu is coming up. Down path is updated similarly. This guarantees that all other cpu notifiers see consistent cpu_active and cpuset configuration. cpuset_track_online_cpus() notifier is converted to cpuset_update_active_cpus() which just updates the configuration and now called from cpuset_cpu_[in]active() notifiers registered from sched_init_smp(). If cpuset is disabled, cpuset_update_active_cpus() degenerates into partition_sched_domains() making separate notifier for !CONFIG_CPUSETS unnecessary. This problem is triggered by cmwq. During CPU_DOWN_PREPARE, hotplug callback creates a kthread and kthread_bind()s it to the target cpu, and the thread is expected to run on that cpu. * Ingo's test discovered __cpuinit/exit markups were incorrect. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Menage <menage@google.com>
This commit is contained in:
@@ -5804,17 +5804,46 @@ static struct notifier_block __cpuinitdata migration_notifier = {
|
||||
.priority = CPU_PRI_MIGRATION,
|
||||
};
|
||||
|
||||
static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_DOWN_FAILED:
|
||||
set_cpu_active((long)hcpu, true);
|
||||
return NOTIFY_OK;
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
}
|
||||
|
||||
static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_DOWN_PREPARE:
|
||||
set_cpu_active((long)hcpu, false);
|
||||
return NOTIFY_OK;
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
}
|
||||
|
||||
static int __init migration_init(void)
|
||||
{
|
||||
void *cpu = (void *)(long)smp_processor_id();
|
||||
int err;
|
||||
|
||||
/* Start one for the boot CPU: */
|
||||
/* Initialize migration for the boot CPU */
|
||||
err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
|
||||
BUG_ON(err == NOTIFY_BAD);
|
||||
migration_call(&migration_notifier, CPU_ONLINE, cpu);
|
||||
register_cpu_notifier(&migration_notifier);
|
||||
|
||||
/* Register cpu active notifiers */
|
||||
cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
|
||||
cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(migration_init);
|
||||
@@ -7273,29 +7302,35 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
|
||||
}
|
||||
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
|
||||
|
||||
#ifndef CONFIG_CPUSETS
|
||||
/*
|
||||
* Add online and remove offline CPUs from the scheduler domains.
|
||||
* When cpusets are enabled they take over this function.
|
||||
* Update cpusets according to cpu_active mask. If cpusets are
|
||||
* disabled, cpuset_update_active_cpus() becomes a simple wrapper
|
||||
* around partition_sched_domains().
|
||||
*/
|
||||
static int update_sched_domains(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
static int __cpuexit cpuset_cpu_active(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action) {
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_DOWN_FAILED_FROZEN:
|
||||
partition_sched_domains(1, NULL, NULL);
|
||||
cpuset_update_active_cpus();
|
||||
return NOTIFY_OK;
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
}
|
||||
|
||||
static int __cpuexit cpuset_cpu_inactive(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_DOWN_PREPARE:
|
||||
cpuset_update_active_cpus();
|
||||
return NOTIFY_OK;
|
||||
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static int update_runtime(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
@@ -7341,10 +7376,8 @@ void __init sched_init_smp(void)
|
||||
mutex_unlock(&sched_domains_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
#ifndef CONFIG_CPUSETS
|
||||
/* XXX: Theoretical race here - CPU may be hotplugged now */
|
||||
hotcpu_notifier(update_sched_domains, 0);
|
||||
#endif
|
||||
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
|
||||
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
|
||||
|
||||
/* RT runtime code needs to handle some hotplug events */
|
||||
hotcpu_notifier(update_runtime, 0);
|
||||
|
Reference in New Issue
Block a user