Merge branch 'smp/hotplug' into sched/core, to resolve conflicts

Conflicts:
	kernel/sched/core.c

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2016-05-12 09:51:36 +02:00
10 changed files with 210 additions and 279 deletions

View File

@@ -314,29 +314,6 @@ void hrtick_start(struct rq *rq, u64 delay)
}
}
static int
hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int cpu = (int)(long)hcpu;
switch (action) {
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
hrtick_clear(cpu_rq(cpu));
return NOTIFY_OK;
}
return NOTIFY_DONE;
}
static __init void init_hrtick(void)
{
hotcpu_notifier(hotplug_hrtick, 0);
}
#else
/*
* Called to set the hrtick timer state.
@@ -353,10 +330,6 @@ void hrtick_start(struct rq *rq, u64 delay)
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
HRTIMER_MODE_REL_PINNED);
}
static inline void init_hrtick(void)
{
}
#endif /* CONFIG_SMP */
static void init_rq_hrtick(struct rq *rq)
@@ -380,10 +353,6 @@ static inline void hrtick_clear(struct rq *rq)
static inline void init_rq_hrtick(struct rq *rq)
{
}
static inline void init_hrtick(void)
{
}
#endif /* CONFIG_SCHED_HRTICK */
/*
@@ -1150,6 +1119,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
static int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask, bool check)
{
const struct cpumask *cpu_valid_mask = cpu_active_mask;
unsigned int dest_cpu;
struct rq_flags rf;
struct rq *rq;
@@ -1157,6 +1127,13 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
rq = task_rq_lock(p, &rf);
if (p->flags & PF_KTHREAD) {
/*
* Kernel threads are allowed on online && !active CPUs
*/
cpu_valid_mask = cpu_online_mask;
}
/*
* Must re-check here, to close a race against __kthread_bind(),
* sched_setaffinity() is not guaranteed to observe the flag.
@@ -1169,18 +1146,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
if (cpumask_equal(&p->cpus_allowed, new_mask))
goto out;
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
ret = -EINVAL;
goto out;
}
do_set_cpus_allowed(p, new_mask);
if (p->flags & PF_KTHREAD) {
/*
* For kernel threads that do indeed end up on online &&
* !active we want to ensure they are strict per-cpu threads.
*/
WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
!cpumask_intersects(new_mask, cpu_active_mask) &&
p->nr_cpus_allowed != 1);
}
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
@@ -1499,6 +1486,25 @@ EXPORT_SYMBOL_GPL(kick_process);
/*
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
*
* A few notes on cpu_active vs cpu_online:
*
* - cpu_active must be a subset of cpu_online
*
* - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
* see __set_cpus_allowed_ptr(). At this point the newly online
* cpu isn't yet part of the sched domains, and balancing will not
* see it.
*
* - on cpu-down we clear cpu_active() to mask the sched domains and
* avoid the load balancer to place new tasks on the to be removed
* cpu. Existing tasks will remain running there and will be taken
* off.
*
* This means that fallback selection must not select !active CPUs.
* And can assume that any active CPU must be online. Conversely
* select_task_rq() below may allow selection of !active CPUs in order
* to satisfy the above rules.
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
{
@@ -1517,8 +1523,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
/* Look for allowed, online CPU in same node. */
for_each_cpu(dest_cpu, nodemask) {
if (!cpu_online(dest_cpu))
continue;
if (!cpu_active(dest_cpu))
continue;
if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1529,8 +1533,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for (;;) {
/* Any allowed, online CPU? */
for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
if (!cpu_online(dest_cpu))
continue;
if (!cpu_active(dest_cpu))
continue;
goto out;
@@ -1582,6 +1584,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
else
cpu = cpumask_any(tsk_cpus_allowed(p));
/*
* In order not to call set_task_cpu() on a blocking task we need
@@ -5288,6 +5292,8 @@ out:
#ifdef CONFIG_SMP
static bool sched_smp_initialized __read_mostly;
#ifdef CONFIG_NUMA_BALANCING
/* Migrate current task p to target_cpu */
int migrate_task_to(struct task_struct *p, int target_cpu)
@@ -5503,127 +5509,13 @@ static void set_rq_offline(struct rq *rq)
}
}
/*
* migration_call - callback that gets triggered when a CPU is added.
* Here we can start up the necessary migration thread for the new CPU.
*/
static int
migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
static void set_cpu_rq_start_time(unsigned int cpu)
{
int cpu = (long)hcpu;
unsigned long flags;
struct rq *rq = cpu_rq(cpu);
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
rq->calc_load_update = calc_load_update;
account_reset_rq(rq);
break;
case CPU_ONLINE:
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_online(rq);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DYING:
sched_ttwu_pending();
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
migrate_tasks(rq);
BUG_ON(rq->nr_running != 1); /* the migration thread */
raw_spin_unlock_irqrestore(&rq->lock, flags);
break;
case CPU_DEAD:
calc_load_migrate(rq);
break;
#endif
}
update_max_interval();
return NOTIFY_OK;
}
/*
* Register at high priority so that task migration (migrate_all_tasks)
* happens before everything else. This has to be lower priority than
* the notifier in the perf_event subsystem, though.
*/
static struct notifier_block migration_notifier = {
.notifier_call = migration_call,
.priority = CPU_PRI_MIGRATION,
};
static void set_cpu_rq_start_time(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
rq->age_stamp = sched_clock_cpu(cpu);
}
static int sched_cpu_active(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_STARTING:
set_cpu_rq_start_time();
return NOTIFY_OK;
case CPU_DOWN_FAILED:
set_cpu_active(cpu, true);
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
static int sched_cpu_inactive(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DOWN_PREPARE:
set_cpu_active((long)hcpu, false);
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
static int __init migration_init(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err;
/* Initialize migration for the boot CPU */
err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
BUG_ON(err == NOTIFY_BAD);
migration_call(&migration_notifier, CPU_ONLINE, cpu);
register_cpu_notifier(&migration_notifier);
/* Register cpu active notifiers */
cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
return 0;
}
early_initcall(migration_init);
static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
#ifdef CONFIG_SCHED_DEBUG
@@ -6771,10 +6663,10 @@ static void sched_init_numa(void)
init_numa_topology_type();
}
static void sched_domains_numa_masks_set(int cpu)
static void sched_domains_numa_masks_set(unsigned int cpu)
{
int i, j;
int node = cpu_to_node(cpu);
int i, j;
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) {
@@ -6784,51 +6676,20 @@ static void sched_domains_numa_masks_set(int cpu)
}
}
static void sched_domains_numa_masks_clear(int cpu)
static void sched_domains_numa_masks_clear(unsigned int cpu)
{
int i, j;
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++)
cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
}
}
/*
* Update sched_domains_numa_masks[level][node] array when new cpus
* are onlined.
*/
static int sched_domains_numa_masks_update(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
sched_domains_numa_masks_set(cpu);
break;
case CPU_DEAD:
sched_domains_numa_masks_clear(cpu);
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
}
#else
static inline void sched_init_numa(void)
{
}
static int sched_domains_numa_masks_update(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
return 0;
}
static inline void sched_init_numa(void) { }
static void sched_domains_numa_masks_set(unsigned int cpu) { }
static void sched_domains_numa_masks_clear(unsigned int cpu) { }
#endif /* CONFIG_NUMA */
static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -7218,13 +7079,9 @@ static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */
* If we come here as part of a suspend/resume, don't touch cpusets because we
* want to restore it back to its original state upon resume anyway.
*/
static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
void *hcpu)
static void cpuset_cpu_active(void)
{
switch (action) {
case CPU_ONLINE_FROZEN:
case CPU_DOWN_FAILED_FROZEN:
if (cpuhp_tasks_frozen) {
/*
* num_cpus_frozen tracks how many CPUs are involved in suspend
* resume sequence. As long as this is not the last online
@@ -7234,35 +7091,25 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
num_cpus_frozen--;
if (likely(num_cpus_frozen)) {
partition_sched_domains(1, NULL, NULL);
break;
return;
}
/*
* This is the last CPU online operation. So fall through and
* restore the original sched domains by considering the
* cpuset configurations.
*/
case CPU_ONLINE:
cpuset_update_active_cpus(true);
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
cpuset_update_active_cpus(true);
}
static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
void *hcpu)
static int cpuset_cpu_inactive(unsigned int cpu)
{
unsigned long flags;
long cpu = (long)hcpu;
struct dl_bw *dl_b;
bool overflow;
int cpus;
switch (action) {
case CPU_DOWN_PREPARE:
if (!cpuhp_tasks_frozen) {
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);
@@ -7274,19 +7121,120 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
rcu_read_unlock_sched();
if (overflow)
return notifier_from_errno(-EBUSY);
return -EBUSY;
cpuset_update_active_cpus(false);
break;
case CPU_DOWN_PREPARE_FROZEN:
} else {
num_cpus_frozen++;
partition_sched_domains(1, NULL, NULL);
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
return 0;
}
int sched_cpu_activate(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
set_cpu_active(cpu, true);
if (sched_smp_initialized) {
sched_domains_numa_masks_set(cpu);
cpuset_cpu_active();
}
/*
* Put the rq online, if not already. This happens:
*
* 1) In the early boot process, because we build the real domains
* after all cpus have been brought up.
*
* 2) At runtime, if cpuset_cpu_active() fails to rebuild the
* domains.
*/
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_online(rq);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
update_max_interval();
return 0;
}
int sched_cpu_deactivate(unsigned int cpu)
{
int ret;
set_cpu_active(cpu, false);
/*
* We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
* users of this state to go away such that all new such users will
* observe it.
*
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
* not imply sync_sched(), so wait for both.
*
* Do sync before park smpboot threads to take care the rcu boost case.
*/
if (IS_ENABLED(CONFIG_PREEMPT))
synchronize_rcu_mult(call_rcu, call_rcu_sched);
else
synchronize_rcu();
if (!sched_smp_initialized)
return 0;
ret = cpuset_cpu_inactive(cpu);
if (ret) {
set_cpu_active(cpu, true);
return ret;
}
sched_domains_numa_masks_clear(cpu);
return 0;
}
static void sched_rq_cpu_starting(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
rq->calc_load_update = calc_load_update;
account_reset_rq(rq);
update_max_interval();
}
int sched_cpu_starting(unsigned int cpu)
{
set_cpu_rq_start_time(cpu);
sched_rq_cpu_starting(cpu);
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
int sched_cpu_dying(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
/* Handle pending wakeups and then migrate everything off */
sched_ttwu_pending();
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
migrate_tasks(rq);
BUG_ON(rq->nr_running != 1);
raw_spin_unlock_irqrestore(&rq->lock, flags);
calc_load_migrate(rq);
update_max_interval();
nohz_balance_exit_idle(cpu);
hrtick_clear(rq);
return 0;
}
#endif
void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;
@@ -7308,12 +7256,6 @@ void __init sched_init_smp(void)
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);
hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
init_hrtick();
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
BUG();
@@ -7322,7 +7264,16 @@ void __init sched_init_smp(void)
init_sched_rt_class();
init_sched_dl_class();
sched_smp_initialized = true;
}
static int __init migration_init(void)
{
sched_rq_cpu_starting(smp_processor_id());
return 0;
}
early_initcall(migration_init);
#else
void __init sched_init_smp(void)
{
@@ -7519,7 +7470,7 @@ void __init sched_init(void)
if (cpu_isolated_map == NULL)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu();
set_cpu_rq_start_time();
set_cpu_rq_start_time(smp_processor_id());
#endif
init_sched_fair_class();