Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes are:
- lockless wakeup support for futexes and IPC message queues
(Davidlohr Bueso, Peter Zijlstra)
- Replace spinlocks with atomics in thread_group_cputimer(), to
improve scalability (Jason Low)
- NUMA balancing improvements (Rik van Riel)
- SCHED_DEADLINE improvements (Wanpeng Li)
- clean up and reorganize preemption helpers (Frederic Weisbecker)
- decouple page fault disabling machinery from the preemption
counter, to improve debuggability and robustness (David
Hildenbrand)
- SCHED_DEADLINE documentation updates (Luca Abeni)
- topology CPU masks cleanups (Bartosz Golaszewski)
- /proc/sched_debug improvements (Srikar Dronamraju)"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (79 commits)
sched/deadline: Remove needless parameter in dl_runtime_exceeded()
sched: Remove superfluous resetting of the p->dl_throttled flag
sched/deadline: Drop duplicate init_sched_dl_class() declaration
sched/deadline: Reduce rq lock contention by eliminating locking of non-feasible target
sched/deadline: Make init_sched_dl_class() __init
sched/deadline: Optimize pull_dl_task()
sched/preempt: Add static_key() to preempt_notifiers
sched/preempt: Fix preempt notifiers documentation about hlist_del() within unsafe iteration
sched/stop_machine: Fix deadlock between multiple stop_two_cpus()
sched/debug: Add sum_sleep_runtime to /proc/<pid>/sched
sched/debug: Replace vruntime with wait_sum in /proc/sched_debug
sched/debug: Properly format runnable tasks in /proc/sched_debug
sched/numa: Only consider less busy nodes as numa balancing destinations
Revert 095bebf61a
("sched/numa: Do not move past the balance point if unbalanced")
sched/fair: Prevent throttling in early pick_next_task_fair()
preempt: Reorganize the notrace definitions a bit
preempt: Use preempt_schedule_context() as the official tracing preemption point
sched: Make preempt_schedule_context() function-tracing safe
x86: Remove cpu_sibling_mask() and cpu_core_mask()
x86: Replace cpu_**_mask() with topology_**_cpumask()
...
This commit is contained in:
@@ -25,7 +25,7 @@ struct sched_param {
|
||||
#include <linux/errno.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/preempt_mask.h>
|
||||
#include <linux/preempt.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/ptrace.h>
|
||||
@@ -174,7 +174,12 @@ extern unsigned long nr_iowait_cpu(int cpu);
|
||||
extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
|
||||
|
||||
extern void calc_global_load(unsigned long ticks);
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
||||
extern void update_cpu_load_nohz(void);
|
||||
#else
|
||||
static inline void update_cpu_load_nohz(void) { }
|
||||
#endif
|
||||
|
||||
extern unsigned long get_parent_ip(unsigned long addr);
|
||||
|
||||
@@ -214,9 +219,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
|
||||
#define TASK_WAKEKILL 128
|
||||
#define TASK_WAKING 256
|
||||
#define TASK_PARKED 512
|
||||
#define TASK_STATE_MAX 1024
|
||||
#define TASK_NOLOAD 1024
|
||||
#define TASK_STATE_MAX 2048
|
||||
|
||||
#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
|
||||
#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
|
||||
|
||||
extern char ___assert_task_state[1 - 2*!!(
|
||||
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
|
||||
@@ -226,6 +232,8 @@ extern char ___assert_task_state[1 - 2*!!(
|
||||
#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
|
||||
#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
|
||||
|
||||
#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
|
||||
|
||||
/* Convenience macros for the sake of wake_up */
|
||||
#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
|
||||
#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
|
||||
@@ -241,7 +249,8 @@ extern char ___assert_task_state[1 - 2*!!(
|
||||
((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
|
||||
#define task_contributes_to_load(task) \
|
||||
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
|
||||
(task->flags & PF_FROZEN) == 0)
|
||||
(task->flags & PF_FROZEN) == 0 && \
|
||||
(task->state & TASK_NOLOAD) == 0)
|
||||
|
||||
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
|
||||
@@ -568,6 +577,23 @@ struct task_cputime {
|
||||
.sum_exec_runtime = 0, \
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the atomic variant of task_cputime, which can be used for
|
||||
* storing and updating task_cputime statistics without locking.
|
||||
*/
|
||||
struct task_cputime_atomic {
|
||||
atomic64_t utime;
|
||||
atomic64_t stime;
|
||||
atomic64_t sum_exec_runtime;
|
||||
};
|
||||
|
||||
#define INIT_CPUTIME_ATOMIC \
|
||||
(struct task_cputime_atomic) { \
|
||||
.utime = ATOMIC64_INIT(0), \
|
||||
.stime = ATOMIC64_INIT(0), \
|
||||
.sum_exec_runtime = ATOMIC64_INIT(0), \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPT_COUNT
|
||||
#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
|
||||
#else
|
||||
@@ -585,18 +611,16 @@ struct task_cputime {
|
||||
|
||||
/**
|
||||
* struct thread_group_cputimer - thread group interval timer counts
|
||||
* @cputime: thread group interval timers.
|
||||
* @cputime_atomic: atomic thread group interval timers.
|
||||
* @running: non-zero when there are timers running and
|
||||
* @cputime receives updates.
|
||||
* @lock: lock for fields in this struct.
|
||||
*
|
||||
* This structure contains the version of task_cputime, above, that is
|
||||
* used for thread group CPU timer calculations.
|
||||
*/
|
||||
struct thread_group_cputimer {
|
||||
struct task_cputime cputime;
|
||||
struct task_cputime_atomic cputime_atomic;
|
||||
int running;
|
||||
raw_spinlock_t lock;
|
||||
};
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
@@ -900,6 +924,50 @@ enum cpu_idle_type {
|
||||
#define SCHED_CAPACITY_SHIFT 10
|
||||
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
|
||||
|
||||
/*
|
||||
* Wake-queues are lists of tasks with a pending wakeup, whose
|
||||
* callers have already marked the task as woken internally,
|
||||
* and can thus carry on. A common use case is being able to
|
||||
* do the wakeups once the corresponding user lock as been
|
||||
* released.
|
||||
*
|
||||
* We hold reference to each task in the list across the wakeup,
|
||||
* thus guaranteeing that the memory is still valid by the time
|
||||
* the actual wakeups are performed in wake_up_q().
|
||||
*
|
||||
* One per task suffices, because there's never a need for a task to be
|
||||
* in two wake queues simultaneously; it is forbidden to abandon a task
|
||||
* in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
|
||||
* already in a wake queue, the wakeup will happen soon and the second
|
||||
* waker can just skip it.
|
||||
*
|
||||
* The WAKE_Q macro declares and initializes the list head.
|
||||
* wake_up_q() does NOT reinitialize the list; it's expected to be
|
||||
* called near the end of a function, where the fact that the queue is
|
||||
* not used again will be easy to see by inspection.
|
||||
*
|
||||
* Note that this can cause spurious wakeups. schedule() callers
|
||||
* must ensure the call is done inside a loop, confirming that the
|
||||
* wakeup condition has in fact occurred.
|
||||
*/
|
||||
struct wake_q_node {
|
||||
struct wake_q_node *next;
|
||||
};
|
||||
|
||||
struct wake_q_head {
|
||||
struct wake_q_node *first;
|
||||
struct wake_q_node **lastp;
|
||||
};
|
||||
|
||||
#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
|
||||
|
||||
#define WAKE_Q(name) \
|
||||
struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
|
||||
|
||||
extern void wake_q_add(struct wake_q_head *head,
|
||||
struct task_struct *task);
|
||||
extern void wake_up_q(struct wake_q_head *head);
|
||||
|
||||
/*
|
||||
* sched-domains (multiprocessor balancing) declarations:
|
||||
*/
|
||||
@@ -1335,8 +1403,6 @@ struct task_struct {
|
||||
int rcu_read_lock_nesting;
|
||||
union rcu_special rcu_read_unlock_special;
|
||||
struct list_head rcu_node_entry;
|
||||
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
struct rcu_node *rcu_blocked_node;
|
||||
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
@@ -1367,7 +1433,7 @@ struct task_struct {
|
||||
int exit_state;
|
||||
int exit_code, exit_signal;
|
||||
int pdeath_signal; /* The signal sent when the parent dies */
|
||||
unsigned int jobctl; /* JOBCTL_*, siglock protected */
|
||||
unsigned long jobctl; /* JOBCTL_*, siglock protected */
|
||||
|
||||
/* Used for emulating ABI behavior of previous Linux versions */
|
||||
unsigned int personality;
|
||||
@@ -1513,6 +1579,8 @@ struct task_struct {
|
||||
/* Protection of the PI data structures: */
|
||||
raw_spinlock_t pi_lock;
|
||||
|
||||
struct wake_q_node wake_q;
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
/* PI waiters blocked on a rt_mutex held by this task */
|
||||
struct rb_root pi_waiters;
|
||||
@@ -1726,6 +1794,7 @@ struct task_struct {
|
||||
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
unsigned long task_state_change;
|
||||
#endif
|
||||
int pagefault_disabled;
|
||||
};
|
||||
|
||||
/* Future-safe accessor for struct task_struct's cpus_allowed. */
|
||||
@@ -2079,22 +2148,22 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
|
||||
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
|
||||
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
|
||||
|
||||
#define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT)
|
||||
#define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT)
|
||||
#define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT)
|
||||
#define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT)
|
||||
#define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT)
|
||||
#define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT)
|
||||
#define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT)
|
||||
#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
|
||||
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
|
||||
#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT)
|
||||
#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT)
|
||||
#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT)
|
||||
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
|
||||
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
|
||||
|
||||
#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
|
||||
#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
|
||||
|
||||
extern bool task_set_jobctl_pending(struct task_struct *task,
|
||||
unsigned int mask);
|
||||
unsigned long mask);
|
||||
extern void task_clear_jobctl_trapping(struct task_struct *task);
|
||||
extern void task_clear_jobctl_pending(struct task_struct *task,
|
||||
unsigned int mask);
|
||||
unsigned long mask);
|
||||
|
||||
static inline void rcu_copy_process(struct task_struct *p)
|
||||
{
|
||||
@@ -2964,11 +3033,6 @@ static __always_inline bool need_resched(void)
|
||||
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
|
||||
void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
|
||||
|
||||
static inline void thread_group_cputime_init(struct signal_struct *sig)
|
||||
{
|
||||
raw_spin_lock_init(&sig->cputimer.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reevaluate whether the task has signals pending delivery.
|
||||
* Wake the task if so.
|
||||
@@ -3082,13 +3146,13 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
|
||||
static inline unsigned long task_rlimit(const struct task_struct *tsk,
|
||||
unsigned int limit)
|
||||
{
|
||||
return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
|
||||
return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
|
||||
}
|
||||
|
||||
static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
|
||||
unsigned int limit)
|
||||
{
|
||||
return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
|
||||
return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
|
||||
}
|
||||
|
||||
static inline unsigned long rlimit(unsigned int limit)
|
||||
|
Reference in New Issue
Block a user