Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes in this cycle were:

   - refcount conversions

   - Solve the rq->leaf_cfs_rq_list can of worms for real.

   - improve power-aware scheduling

   - add sysctl knob for Energy Aware Scheduling

   - documentation updates

   - misc other changes"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits)
  kthread: Do not use TIMER_IRQSAFE
  kthread: Convert worker lock to raw spinlock
  sched/fair: Use non-atomic cpumask_{set,clear}_cpu()
  sched/fair: Remove unused 'sd' parameter from select_idle_smt()
  sched/wait: Use freezable_schedule() when possible
  sched/fair: Prune, fix and simplify the nohz_balancer_kick() comment block
  sched/fair: Explain LLC nohz kick condition
  sched/fair: Simplify nohz_balancer_kick()
  sched/topology: Fix percpu data types in struct sd_data & struct s_data
  sched/fair: Simplify post_init_entity_util_avg() by calling it with a task_struct pointer argument
  sched/fair: Fix O(nr_cgroups) in the load balancing path
  sched/fair: Optimize update_blocked_averages()
  sched/fair: Fix insertion in rq->leaf_cfs_rq_list
  sched/fair: Add tmp_alone_branch assertion
  sched/core: Use READ_ONCE()/WRITE_ONCE() in move_queued_task()/task_rq_lock()
  sched/debug: Initialize sd_sysctl_cpus if !CONFIG_CPUMASK_OFFSTACK
  sched/pelt: Skip updating util_est when utilization is higher than CPU's capacity
  sched/fair: Update scale invariance of PELT
  sched/fair: Move the rq_of() helper function
  sched/core: Convert task_struct.stack_refcount to refcount_t
  ...
This commit is contained in:
Linus Torvalds
2019-03-06 08:14:05 -08:00
29 changed files with 1169 additions and 328 deletions

View File

@@ -13,6 +13,7 @@
#include <linux/securebits.h>
#include <linux/seqlock.h>
#include <linux/rbtree.h>
#include <linux/refcount.h>
#include <linux/sched/autogroup.h>
#include <net/net_namespace.h>
#include <linux/sched/rt.h>

View File

@@ -86,7 +86,7 @@ enum {
struct kthread_worker {
unsigned int flags;
spinlock_t lock;
raw_spinlock_t lock;
struct list_head work_list;
struct list_head delayed_work_list;
struct task_struct *task;
@@ -107,7 +107,7 @@ struct kthread_delayed_work {
};
#define KTHREAD_WORKER_INIT(worker) { \
.lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
.lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock), \
.work_list = LIST_HEAD_INIT((worker).work_list), \
.delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\
}
@@ -165,9 +165,8 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
#define kthread_init_delayed_work(dwork, fn) \
do { \
kthread_init_work(&(dwork)->work, (fn)); \
__init_timer(&(dwork)->timer, \
kthread_delayed_work_timer_fn, \
TIMER_IRQSAFE); \
timer_setup(&(dwork)->timer, \
kthread_delayed_work_timer_fn, 0); \
} while (0)
int kthread_worker_fn(void *worker_ptr);

View File

@@ -21,6 +21,7 @@
#include <linux/seccomp.h>
#include <linux/nodemask.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/resource.h>
#include <linux/latencytop.h>
#include <linux/sched/prio.h>
@@ -356,12 +357,6 @@ struct util_est {
* For cfs_rq, it is the aggregated load_avg of all runnable and
* blocked sched_entities.
*
* load_avg may also take frequency scaling into account:
*
* load_avg = runnable% * scale_load_down(load) * freq%
*
* where freq% is the CPU frequency normalized to the highest frequency.
*
* [util_avg definition]
*
* util_avg = running% * SCHED_CAPACITY_SCALE
@@ -370,17 +365,14 @@ struct util_est {
* a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
* and blocked sched_entities.
*
* util_avg may also factor frequency scaling and CPU capacity scaling:
* load_avg and util_avg don't direcly factor frequency scaling and CPU
* capacity scaling. The scaling is done through the rq_clock_pelt that
* is used for computing those signals (see update_rq_clock_pelt())
*
* util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
*
* where freq% is the same as above, and capacity% is the CPU capacity
* normalized to the greatest capacity (due to uarch differences, etc).
*
* N.B., the above ratios (runnable%, running%, freq%, and capacity%)
* themselves are in the range of [0, 1]. To do fixed point arithmetics,
* we therefore scale them to as large a range as necessary. This is for
* example reflected by util_avg's SCHED_CAPACITY_SCALE.
* N.B., the above ratios (runnable% and running%) themselves are in the
* range of [0, 1]. To do fixed point arithmetics, we therefore scale them
* to as large a range as necessary. This is for example reflected by
* util_avg's SCHED_CAPACITY_SCALE.
*
* [Overflow issue]
*
@@ -607,7 +599,7 @@ struct task_struct {
randomized_struct_fields_start
void *stack;
atomic_t usage;
refcount_t usage;
/* Per task flags (PF_*), defined further below: */
unsigned int flags;
unsigned int ptrace;
@@ -1187,7 +1179,7 @@ struct task_struct {
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* A live task holds one reference: */
atomic_t stack_refcount;
refcount_t stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCH
int patch_state;
@@ -1403,7 +1395,6 @@ extern struct pid *cad_pid;
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
@@ -1753,9 +1744,9 @@ static __always_inline bool need_resched(void)
static inline unsigned int task_cpu(const struct task_struct *p)
{
#ifdef CONFIG_THREAD_INFO_IN_TASK
return p->cpu;
return READ_ONCE(p->cpu);
#else
return task_thread_info(p)->cpu;
return READ_ONCE(task_thread_info(p)->cpu);
#endif
}

View File

@@ -8,13 +8,14 @@
#include <linux/sched/jobctl.h>
#include <linux/sched/task.h>
#include <linux/cred.h>
#include <linux/refcount.h>
/*
* Types defining task->signal and task->sighand and APIs using them:
*/
struct sighand_struct {
atomic_t count;
refcount_t count;
struct k_sigaction action[_NSIG];
spinlock_t siglock;
wait_queue_head_t signalfd_wqh;
@@ -82,7 +83,7 @@ struct multiprocess_signals {
* the locking of signal_struct.
*/
struct signal_struct {
atomic_t sigcnt;
refcount_t sigcnt;
atomic_t live;
int nr_threads;
struct list_head thread_head;

View File

@@ -83,4 +83,11 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
extern unsigned int sysctl_sched_energy_aware;
extern int sched_energy_aware_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
#endif
#endif /* _LINUX_SCHED_SYSCTL_H */

View File

@@ -88,13 +88,13 @@ extern void sched_exec(void);
#define sched_exec() {}
#endif
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
#define get_task_struct(tsk) do { refcount_inc(&(tsk)->usage); } while(0)
extern void __put_task_struct(struct task_struct *t);
static inline void put_task_struct(struct task_struct *t)
{
if (atomic_dec_and_test(&t->usage))
if (refcount_dec_and_test(&t->usage))
__put_task_struct(t);
}

View File

@@ -61,7 +61,7 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
#ifdef CONFIG_THREAD_INFO_IN_TASK
static inline void *try_get_task_stack(struct task_struct *tsk)
{
return atomic_inc_not_zero(&tsk->stack_refcount) ?
return refcount_inc_not_zero(&tsk->stack_refcount) ?
task_stack_page(tsk) : NULL;
}

View File

@@ -176,10 +176,10 @@ typedef int (*sched_domain_flags_f)(void);
#define SDTL_OVERLAP 0x01
struct sd_data {
struct sched_domain **__percpu sd;
struct sched_domain_shared **__percpu sds;
struct sched_group **__percpu sg;
struct sched_group_capacity **__percpu sgc;
struct sched_domain *__percpu *sd;
struct sched_domain_shared *__percpu *sds;
struct sched_group *__percpu *sg;
struct sched_group_capacity *__percpu *sgc;
};
struct sched_domain_topology_level {

View File

@@ -308,7 +308,7 @@ do { \
#define __wait_event_freezable(wq_head, condition) \
___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule(); try_to_freeze())
freezable_schedule())
/**
* wait_event_freezable - sleep (or freeze) until a condition gets true
@@ -367,7 +367,7 @@ do { \
#define __wait_event_freezable_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
TASK_INTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret); try_to_freeze())
__ret = freezable_schedule_timeout(__ret))
/*
* like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
@@ -588,7 +588,7 @@ do { \
#define __wait_event_freezable_exclusive(wq, condition) \
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
schedule(); try_to_freeze())
freezable_schedule())
#define wait_event_freezable_exclusive(wq, condition) \
({ \