sched: Fix pick_next_task() vs 'change' pattern race
Commit67692435c4
("sched: Rework pick_next_task() slow-path") inadvertly introduced a race because it changed a previously unexplored dependency between dropping the rq->lock and sched_class::put_prev_task(). The comments about dropping rq->lock, in for example newidle_balance(), only mentions the task being current and ->on_cpu being set. But when we look at the 'change' pattern (in for example sched_setnuma()): queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */ running = task_current(rq, p); /* rq->curr == p */ if (queued) dequeue_task(...); if (running) put_prev_task(...); /* change task properties */ if (queued) enqueue_task(...); if (running) set_next_task(...); It becomes obvious that if we do this after put_prev_task() has already been called on @p, things go sideways. This is exactly what the commit in question allows to happen when it does: prev->sched_class->put_prev_task(rq, prev, rf); if (!rq->nr_running) newidle_balance(rq, rf); The newidle_balance() call will drop rq->lock after we've called put_prev_task() and that allows the above 'change' pattern to interleave and mess up the state. Furthermore, it turns out we lost the RT-pull when we put the last DL task. Fix both problems by extracting the balancing from put_prev_task() and doing a multi-class balance() pass before put_prev_task(). Fixes:67692435c4
("sched: Rework pick_next_task() slow-path") Reported-by: Quentin Perret <qperret@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Quentin Perret <qperret@google.com> Tested-by: Valentin Schneider <valentin.schneider@arm.com>
This commit is contained in:
@@ -1727,10 +1727,11 @@ struct sched_class {
|
||||
struct task_struct * (*pick_next_task)(struct rq *rq,
|
||||
struct task_struct *prev,
|
||||
struct rq_flags *rf);
|
||||
void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf);
|
||||
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
|
||||
void (*set_next_task)(struct rq *rq, struct task_struct *p);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
|
||||
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
|
||||
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
|
||||
|
||||
@@ -1773,7 +1774,7 @@ struct sched_class {
|
||||
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
WARN_ON_ONCE(rq->curr != prev);
|
||||
prev->sched_class->put_prev_task(rq, prev, NULL);
|
||||
prev->sched_class->put_prev_task(rq, prev);
|
||||
}
|
||||
|
||||
static inline void set_next_task(struct rq *rq, struct task_struct *next)
|
||||
@@ -1787,8 +1788,12 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
|
||||
#else
|
||||
#define sched_class_highest (&dl_sched_class)
|
||||
#endif
|
||||
|
||||
#define for_class_range(class, _from, _to) \
|
||||
for (class = (_from); class != (_to); class = class->next)
|
||||
|
||||
#define for_each_class(class) \
|
||||
for (class = sched_class_highest; class; class = class->next)
|
||||
for_class_range(class, sched_class_highest, NULL)
|
||||
|
||||
extern const struct sched_class stop_sched_class;
|
||||
extern const struct sched_class dl_sched_class;
|
||||
@@ -1796,6 +1801,25 @@ extern const struct sched_class rt_sched_class;
|
||||
extern const struct sched_class fair_sched_class;
|
||||
extern const struct sched_class idle_sched_class;
|
||||
|
||||
static inline bool sched_stop_runnable(struct rq *rq)
|
||||
{
|
||||
return rq->stop && task_on_rq_queued(rq->stop);
|
||||
}
|
||||
|
||||
static inline bool sched_dl_runnable(struct rq *rq)
|
||||
{
|
||||
return rq->dl.dl_nr_running > 0;
|
||||
}
|
||||
|
||||
static inline bool sched_rt_runnable(struct rq *rq)
|
||||
{
|
||||
return rq->rt.rt_queued > 0;
|
||||
}
|
||||
|
||||
static inline bool sched_fair_runnable(struct rq *rq)
|
||||
{
|
||||
return rq->cfs.nr_running > 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
|
Reference in New Issue
Block a user