Merge branch 'WIP.sched/core' into sched/core

Conflicts:
	kernel/sched/Makefile

Pick up the waitqueue related renames - it didn't get much feedback,
so it appears to be uncontroversial. Famous last words? ;-)

Signed-off-by: Ingo Molnar <mingo@kernel.org>
这个提交包含在:
Ingo Molnar
2017-06-20 12:28:21 +02:00
当前提交 902b319413
修改 1212 个文件,包含 12894 行新增8218 行删除

查看文件

@@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array->map.key_size = attr->key_size;
array->map.value_size = attr->value_size;
array->map.max_entries = attr->max_entries;
array->map.map_flags = attr->map_flags;
array->elem_size = elem_size;
if (!percpu)

查看文件

@@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
trie->map.key_size = attr->key_size;
trie->map.value_size = attr->value_size;
trie->map.max_entries = attr->max_entries;
trie->map.map_flags = attr->map_flags;
trie->data_size = attr->key_size -
offsetof(struct bpf_lpm_trie_key, data);
trie->max_prefixlen = trie->data_size * 8;

查看文件

@@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
smap->map.key_size = attr->key_size;
smap->map.value_size = value_size;
smap->map.max_entries = attr->max_entries;
smap->map.map_flags = attr->map_flags;
smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

查看文件

@@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
{
BUG_ON(regno >= MAX_BPF_REG);
memset(&regs[regno], 0, sizeof(regs[regno]));
regs[regno].type = NOT_INIT;
regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
}
static void init_reg_state(struct bpf_reg_state *regs)
{
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
regs[i].type = NOT_INIT;
regs[i].imm = 0;
regs[i].min_value = BPF_REGISTER_MIN_RANGE;
regs[i].max_value = BPF_REGISTER_MAX_RANGE;
regs[i].min_align = 0;
regs[i].aux_off = 0;
regs[i].aux_off_align = 0;
}
for (i = 0; i < MAX_BPF_REG; i++)
mark_reg_not_init(regs, i);
/* frame pointer */
regs[BPF_REG_FP].type = FRAME_PTR;
@@ -843,9 +846,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
{
bool strict = env->strict_alignment;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
strict = true;
switch (reg->type) {
case PTR_TO_PACKET:
return check_pkt_ptr_alignment(reg, off, size, strict);
@@ -1349,7 +1349,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
struct bpf_reg_state *regs = state->regs;
struct bpf_reg_state *reg;
struct bpf_call_arg_meta meta;
bool changes_data;
int i, err;
@@ -1416,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
}
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
reg = regs + caller_saved[i];
reg->type = NOT_INIT;
reg->imm = 0;
}
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(regs, caller_saved[i]);
/* update return register */
if (fn->ret_type == RET_INTEGER) {
@@ -2448,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct bpf_reg_state *regs = env->cur_state.regs;
u8 mode = BPF_MODE(insn->code);
struct bpf_reg_state *reg;
int i, err;
if (!may_access_skb(env->prog->type)) {
@@ -2481,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
/* reset caller saved regs to unreadable */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
reg = regs + caller_saved[i];
reg->type = NOT_INIT;
reg->imm = 0;
}
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(regs, caller_saved[i]);
/* mark destination R0 register as readable, since it contains
* the value fetched from the packet
@@ -2696,7 +2688,8 @@ err_free:
/* the following conditions reduce the number of explored insns
* from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
*/
static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
struct bpf_reg_state *old,
struct bpf_reg_state *cur)
{
if (old->id != cur->id)
@@ -2739,7 +2732,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
* 'if (R4 > data_end)' and all further insn were already good with r=20,
* so they will be good with r=30 and we can prune the search.
*/
if (old->off <= cur->off &&
if (!env->strict_alignment && old->off <= cur->off &&
old->off >= old->range && cur->off >= cur->range)
return true;
@@ -2810,7 +2803,7 @@ static bool states_equal(struct bpf_verifier_env *env,
continue;
if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
compare_ptrs_to_packet(rold, rcur))
compare_ptrs_to_packet(env, rold, rcur))
continue;
return false;
@@ -3588,10 +3581,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
} else {
log_level = 0;
}
if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;
else
env->strict_alignment = false;
ret = replace_map_fd_with_map_ptr(env);
if (ret < 0)
@@ -3697,7 +3690,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
mutex_lock(&bpf_verifier_lock);
log_level = 0;
env->strict_alignment = false;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;
env->explored_states = kcalloc(env->prog->len,
sizeof(struct bpf_verifier_state_list *),

查看文件

@@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css)
{
lockdep_assert_held(&cgroup_mutex);
if (css->flags & CSS_DYING)
return;
css->flags |= CSS_DYING;
/*
* This must happen before css is disassociated with its cgroup.
* See seq_css() for details.

查看文件

@@ -176,9 +176,9 @@ typedef enum {
} cpuset_flagbits_t;
/* convenient tests for these bits */
static inline bool is_cpuset_online(const struct cpuset *cs)
static inline bool is_cpuset_online(struct cpuset *cs)
{
return test_bit(CS_ONLINE, &cs->flags);
return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
}
static inline int is_cpu_exclusive(const struct cpuset *cs)

查看文件

@@ -1658,13 +1658,13 @@ static ssize_t write_cpuhp_target(struct device *dev,
ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
mutex_unlock(&cpuhp_state_mutex);
if (ret)
return ret;
goto out;
if (st->state < target)
ret = do_cpu_up(dev->id, target);
else
ret = do_cpu_down(dev->id, target);
out:
unlock_device_hotplug();
return ret ? ret : count;
}

查看文件

@@ -7316,6 +7316,21 @@ int perf_event_account_interrupt(struct perf_event *event)
return __perf_event_account_interrupt(event, 1);
}
static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
{
/*
* Due to interrupt latency (AKA "skid"), we may enter the
* kernel before taking an overflow, even if the PMU is only
* counting user events.
* To avoid leaking information to userspace, we must always
* reject kernel samples when exclude_kernel is set.
*/
if (event->attr.exclude_kernel && !user_mode(regs))
return false;
return true;
}
/*
* Generic event overflow handling, sampling.
*/
@@ -7336,6 +7351,12 @@ static int __perf_event_overflow(struct perf_event *event,
ret = __perf_event_account_interrupt(event, throttle);
/*
* For security, drop the skid kernel samples if necessary.
*/
if (!sample_is_allowed(event, regs))
return ret;
/*
* XXX event_limit might not quite work as expected on inherited
* events

查看文件

@@ -1004,7 +1004,7 @@ struct wait_opts {
int __user *wo_stat;
struct rusage __user *wo_rusage;
wait_queue_t child_wait;
wait_queue_entry_t child_wait;
int notask_error;
};
@@ -1541,7 +1541,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
return 0;
}
static int child_wait_callback(wait_queue_t *wait, unsigned mode,
static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
int sync, void *key)
{
struct wait_opts *wo = container_of(wait, struct wait_opts,

查看文件

@@ -1577,6 +1577,18 @@ static __latent_entropy struct task_struct *copy_process(
if (!p)
goto fork_out;
/*
* This _must_ happen before we call free_task(), i.e. before we jump
* to any of the bad_fork_* labels. This is to avoid freeing
* p->set_child_tid which is (ab)used as a kthread's data pointer for
* kernel threads (PF_KTHREAD).
*/
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
ftrace_graph_init_task(p);
rt_mutex_init_task(p);
@@ -1743,11 +1755,6 @@ static __latent_entropy struct task_struct *copy_process(
}
}
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif

查看文件

@@ -225,7 +225,7 @@ struct futex_pi_state {
* @requeue_pi_key: the requeue_pi target futex key
* @bitset: bitset for the optional bitmasked wakeup
*
* We use this hashed waitqueue, instead of a normal wait_queue_t, so
* We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
* we can wake only the relevant ones (hashed queues may be shared).
*
* A futex_q has a woken state, just like tasks have TASK_RUNNING.

查看文件

@@ -1312,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
ret = __irq_set_trigger(desc,
new->flags & IRQF_TRIGGER_MASK);
if (ret)
if (ret) {
irq_release_resources(desc);
goto out_mask;
}
}
desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \

查看文件

@@ -122,7 +122,7 @@ static void *alloc_insn_page(void)
return module_alloc(PAGE_SIZE);
}
static void free_insn_page(void *page)
void __weak free_insn_page(void *page)
{
module_memfree(page);
}

查看文件

@@ -10,6 +10,7 @@ config LIVEPATCH
depends on SYSFS
depends on KALLSYMS_ALL
depends on HAVE_LIVEPATCH
depends on !TRIM_UNUSED_KSYMS
help
Say Y here if you want to support kernel live patching.
This option has no runtime impact until a kernel "patch"

查看文件

@@ -1785,12 +1785,14 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
int ret;
raw_spin_lock_irq(&lock->wait_lock);
set_current_state(TASK_INTERRUPTIBLE);
/* sleep on the mutex */
set_current_state(TASK_INTERRUPTIBLE);
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
*/
fixup_rt_mutex_waiters(lock);
raw_spin_unlock_irq(&lock->wait_lock);
return ret;
@@ -1821,16 +1823,26 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
bool cleanup = false;
raw_spin_lock_irq(&lock->wait_lock);
/*
* Do an unconditional try-lock, this deals with the lock stealing
* state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
* sets a NULL owner.
*
* We're not interested in the return value, because the subsequent
* test on rt_mutex_owner() will infer that. If the trylock succeeded,
* we will own the lock and it will have removed the waiter. If we
* failed the trylock, we're still not owner and we need to remove
* ourselves.
*/
try_to_take_rt_mutex(lock, current, waiter);
/*
* Unless we're the owner; we're still enqueued on the wait_list.
* So check if we became owner, if not, take us off the wait_list.
*/
if (rt_mutex_owner(lock) != current) {
remove_waiter(lock, waiter);
fixup_rt_mutex_waiters(lock);
cleanup = true;
}
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.

查看文件

@@ -132,7 +132,7 @@ int freeze_processes(void)
if (!pm_freezing)
atomic_inc(&system_freezing_cnt);
pm_wakeup_clear(true);
pm_wakeup_clear();
pr_info("Freezing user space processes ... ");
pm_freezing = true;
error = try_to_freeze_tasks(true);

查看文件

@@ -72,8 +72,6 @@ static void freeze_begin(void)
static void freeze_enter(void)
{
trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true);
spin_lock_irq(&suspend_freeze_lock);
if (pm_wakeup_pending())
goto out;
@@ -100,27 +98,6 @@ static void freeze_enter(void)
out:
suspend_freeze_state = FREEZE_STATE_NONE;
spin_unlock_irq(&suspend_freeze_lock);
trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false);
}
static void s2idle_loop(void)
{
do {
freeze_enter();
if (freeze_ops && freeze_ops->wake)
freeze_ops->wake();
dpm_resume_noirq(PMSG_RESUME);
if (freeze_ops && freeze_ops->sync)
freeze_ops->sync();
if (pm_wakeup_pending())
break;
pm_wakeup_clear(false);
} while (!dpm_suspend_noirq(PMSG_SUSPEND));
}
void freeze_wake(void)
@@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
* all the devices are suspended.
*/
if (state == PM_SUSPEND_FREEZE) {
s2idle_loop();
goto Platform_early_resume;
trace_suspend_resume(TPS("machine_suspend"), state, true);
freeze_enter();
trace_suspend_resume(TPS("machine_suspend"), state, false);
goto Platform_wake;
}
error = disable_nonboot_cpus();

查看文件

@@ -269,7 +269,6 @@ static struct console *exclusive_console;
#define MAX_CMDLINECONSOLES 8
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int console_cmdline_cnt;
static int preferred_console = -1;
int console_set_on_cmdline;
@@ -1906,25 +1905,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
* See if this tty is not yet registered, and
* if we have a slot free.
*/
for (i = 0, c = console_cmdline; i < console_cmdline_cnt; i++, c++) {
for (i = 0, c = console_cmdline;
i < MAX_CMDLINECONSOLES && c->name[0];
i++, c++) {
if (strcmp(c->name, name) == 0 && c->index == idx) {
if (brl_options)
return 0;
/*
* Maintain an invariant that will help to find if
* the matching console is preferred, see
* register_console():
*
* The last non-braille console is always
* the preferred one.
*/
if (i != console_cmdline_cnt - 1)
swap(console_cmdline[i],
console_cmdline[console_cmdline_cnt - 1]);
preferred_console = console_cmdline_cnt - 1;
if (!brl_options)
preferred_console = i;
return 0;
}
}
@@ -1937,7 +1923,6 @@ static int __add_preferred_console(char *name, int idx, char *options,
braille_set_options(c, brl_options);
c->index = idx;
console_cmdline_cnt++;
return 0;
}
/*
@@ -2477,23 +2462,12 @@ void register_console(struct console *newcon)
}
/*
* See if this console matches one we selected on the command line.
*
* There may be several entries in the console_cmdline array matching
* with the same console, one with newcon->match(), another by
* name/index:
*
* pl011,mmio,0x87e024000000,115200 -- added from SPCR
* ttyAMA0 -- added from command line
*
* Traverse the console_cmdline array in reverse order to be
* sure that if this console is preferred then it will be the first
* matching entry. We use the invariant that is maintained in
* __add_preferred_console().
* See if this console matches one we selected on
* the command line.
*/
for (i = console_cmdline_cnt - 1; i >= 0; i--) {
c = console_cmdline + i;
for (i = 0, c = console_cmdline;
i < MAX_CMDLINECONSOLES && c->name[0];
i++, c++) {
if (!newcon->match ||
newcon->match(newcon, c->name, c->index, c->options) != 0) {
/* default matching */

查看文件

@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
}
void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
const struct cred *ptracer_cred)
{
BUG_ON(!list_empty(&child->ptrace_entry));
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
child->ptracer_cred = get_cred(ptracer_cred);
}
/*
* ptrace a task: make the debugger its new parent and
* move it to the ptrace list.
*
* Must be called with the tasklist lock write-held.
*/
void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
BUG_ON(!list_empty(&child->ptrace_entry));
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
rcu_read_lock();
child->ptracer_cred = get_cred(__task_cred(new_parent));
__ptrace_link(child, new_parent, __task_cred(new_parent));
rcu_read_unlock();
}
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request,
flags |= PT_SEIZED;
task->ptrace = flags;
__ptrace_link(task, current);
ptrace_link(task, current);
/* SEIZE doesn't trap tracee on attach */
if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
*/
if (!ret && !(current->real_parent->flags & PF_EXITING)) {
current->ptrace = PT_PTRACED;
__ptrace_link(current, current->real_parent);
ptrace_link(current, current->real_parent);
}
}
write_unlock_irq(&tasklist_lock);

查看文件

@@ -263,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Must be called from process context.
* srcu_struct.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
int __srcu_read_lock(struct srcu_struct *sp)
@@ -271,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
int idx;
idx = READ_ONCE(sp->completed) & 0x1;
__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
return idx;
}
@@ -281,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
* Must be called from process context.
*/
void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{

查看文件

@@ -97,8 +97,9 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Must be called from process context.
* Returns an index that must be passed to the matching srcu_read_unlock().
* srcu_struct. Can be invoked from irq/bh handlers, but the matching
* __srcu_read_unlock() must be in the same handler instance. Returns an
* index that must be passed to the matching srcu_read_unlock().
*/
int __srcu_read_lock(struct srcu_struct *sp)
{
@@ -112,7 +113,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
/*
* Removes the count for the old reader from the appropriate element of
* the srcu_struct. Must be called from process context.
* the srcu_struct.
*/
void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{

查看文件

@@ -357,7 +357,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Must be called from process context.
* srcu_struct.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
int __srcu_read_lock(struct srcu_struct *sp)
@@ -365,7 +365,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
int idx;
idx = READ_ONCE(sp->srcu_idx) & 0x1;
__this_cpu_inc(sp->sda->srcu_lock_count[idx]);
this_cpu_inc(sp->sda->srcu_lock_count[idx]);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
return idx;
}
@@ -375,7 +375,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
* Must be called from process context.
*/
void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{

查看文件

@@ -17,7 +17,7 @@ endif
obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o
obj-y += wait.o swait.o completion.o idle.o
obj-y += wait.o wait_bit.o swait.o completion.o idle.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
obj-$(CONFIG_SCHEDSTATS) += stats.o

查看文件

@@ -66,7 +66,7 @@ do_wait_for_common(struct completion *x,
if (!x->done) {
DECLARE_WAITQUEUE(wait, current);
__add_wait_queue_tail_exclusive(&x->wait, &wait);
__add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;

查看文件

@@ -10,6 +10,7 @@
#include <uapi/linux/sched/types.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/hotplug.h>
#include <linux/wait_bit.h>
#include <linux/cpuset.h>
#include <linux/delayacct.h>
#include <linux/init_task.h>
@@ -3692,7 +3693,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
exception_exit(prev_state);
}
int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
void *key)
{
return try_to_wake_up(curr->private, mode, wake_flags);
@@ -5592,7 +5593,7 @@ void idle_task_exit(void)
BUG_ON(cpu_online(smp_processor_id()));
if (mm != &init_mm) {
switch_mm_irqs_off(mm, &init_mm, current);
switch_mm(mm, &init_mm, current);
finish_arch_post_lock_switch();
}
mmdrop(mm);
@@ -6010,28 +6011,13 @@ static struct kmem_cache *task_group_cache __read_mostly;
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
#define WAIT_TABLE_BITS 8
#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
wait_queue_head_t *bit_waitqueue(void *word, int bit)
{
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
unsigned long val = (unsigned long)word << shift | bit;
return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
}
EXPORT_SYMBOL(bit_waitqueue);
void __init sched_init(void)
{
int i, j;
unsigned long alloc_size = 0, ptr;
sched_clock_init();
for (i = 0; i < WAIT_TABLE_SIZE; i++)
init_waitqueue_head(bit_wait_table + i);
wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);

查看文件

@@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
if (sg_policy->next_freq == next_freq)
return;
if (sg_policy->next_freq > next_freq)
next_freq = (sg_policy->next_freq + next_freq) >> 1;
sg_policy->next_freq = next_freq;
sg_policy->last_freq_update_time = time;

查看文件

@@ -3565,7 +3565,7 @@ static inline void check_schedstat_required(void)
trace_sched_stat_runtime_enabled()) {
printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
"stat_blocked and stat_runtime require the "
"kernel parameter schedstats=enabled or "
"kernel parameter schedstats=enable or "
"kernel.sched_schedstats=1\n");
}
#endif

查看文件

@@ -12,44 +12,44 @@
#include <linux/hash.h>
#include <linux/kthread.h>
void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
lockdep_set_class_and_name(&q->lock, key, name);
INIT_LIST_HEAD(&q->task_list);
spin_lock_init(&wq_head->lock);
lockdep_set_class_and_name(&wq_head->lock, key, name);
INIT_LIST_HEAD(&wq_head->head);
}
EXPORT_SYMBOL(__init_waitqueue_head);
void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&wq_head->lock, flags);
__add_wait_queue_entry_tail(wq_head, wq_entry);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue);
void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
unsigned long flags;
wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue_tail(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&wq_head->lock, flags);
__add_wait_queue_entry_tail(wq_head, wq_entry);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue_exclusive);
void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
unsigned long flags;
spin_lock_irqsave(&q->lock, flags);
__remove_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
spin_lock_irqsave(&wq_head->lock, flags);
__remove_wait_queue(wq_head, wq_entry);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL(remove_wait_queue);
@@ -63,12 +63,12 @@ EXPORT_SYMBOL(remove_wait_queue);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
wait_queue_entry_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&
@@ -79,7 +79,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @q: the waitqueue
* @wq_head: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
@@ -87,35 +87,35 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void __wake_up(wait_queue_head_t *q, unsigned int mode,
void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags;
spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
spin_lock_irqsave(&wq_head->lock, flags);
__wake_up_common(wq_head, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL(__wake_up);
/*
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
*/
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
{
__wake_up_common(q, mode, nr, 0, NULL);
__wake_up_common(wq_head, mode, nr, 0, NULL);
}
EXPORT_SYMBOL_GPL(__wake_up_locked);
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
{
__wake_up_common(q, mode, 1, 0, key);
__wake_up_common(wq_head, mode, 1, 0, key);
}
EXPORT_SYMBOL_GPL(__wake_up_locked_key);
/**
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
* @q: the waitqueue
* @wq_head: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: opaque value to be passed to wakeup targets
@@ -130,30 +130,30 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags;
int wake_flags = 1; /* XXX WF_SYNC */
if (unlikely(!q))
if (unlikely(!wq_head))
return;
if (unlikely(nr_exclusive != 1))
wake_flags = 0;
spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
spin_unlock_irqrestore(&q->lock, flags);
spin_lock_irqsave(&wq_head->lock, flags);
__wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL_GPL(__wake_up_sync_key);
/*
* __wake_up_sync - see __wake_up_sync_key()
*/
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive)
{
__wake_up_sync_key(q, mode, nr_exclusive, NULL);
__wake_up_sync_key(wq_head, mode, nr_exclusive, NULL);
}
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
@@ -170,48 +170,48 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
* loads to move into the critical region).
*/
void
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&wq_head->lock, flags);
if (list_empty(&wq_entry->entry))
__add_wait_queue(wq_head, wq_entry);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL(prepare_to_wait);
void
prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
{
unsigned long flags;
wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue_tail(q, wait);
wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&wq_head->lock, flags);
if (list_empty(&wq_entry->entry))
__add_wait_queue_entry_tail(wq_head, wq_entry);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
void init_wait_entry(wait_queue_t *wait, int flags)
void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
{
wait->flags = flags;
wait->private = current;
wait->func = autoremove_wake_function;
INIT_LIST_HEAD(&wait->task_list);
wq_entry->flags = flags;
wq_entry->private = current;
wq_entry->func = autoremove_wake_function;
INIT_LIST_HEAD(&wq_entry->entry);
}
EXPORT_SYMBOL(init_wait_entry);
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
{
unsigned long flags;
long ret = 0;
spin_lock_irqsave(&q->lock, flags);
spin_lock_irqsave(&wq_head->lock, flags);
if (unlikely(signal_pending_state(state, current))) {
/*
* Exclusive waiter must not fail if it was selected by wakeup,
@@ -219,24 +219,24 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
*
* The caller will recheck the condition and return success if
* we were already woken up, we can not miss the event because
* wakeup locks/unlocks the same q->lock.
* wakeup locks/unlocks the same wq_head->lock.
*
* But we need to ensure that set-condition + wakeup after that
* can't see us, it should wake up another exclusive waiter if
* we fail.
*/
list_del_init(&wait->task_list);
list_del_init(&wq_entry->entry);
ret = -ERESTARTSYS;
} else {
if (list_empty(&wait->task_list)) {
if (wait->flags & WQ_FLAG_EXCLUSIVE)
__add_wait_queue_tail(q, wait);
if (list_empty(&wq_entry->entry)) {
if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
__add_wait_queue_entry_tail(wq_head, wq_entry);
else
__add_wait_queue(q, wait);
__add_wait_queue(wq_head, wq_entry);
}
set_current_state(state);
}
spin_unlock_irqrestore(&q->lock, flags);
spin_unlock_irqrestore(&wq_head->lock, flags);
return ret;
}
@@ -249,10 +249,10 @@ EXPORT_SYMBOL(prepare_to_wait_event);
* condition in the caller before they add the wait
* entry to the wake queue.
*/
int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
{
if (likely(list_empty(&wait->task_list)))
__add_wait_queue_tail(wq, wait);
if (likely(list_empty(&wait->entry)))
__add_wait_queue_entry_tail(wq, wait);
set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current))
@@ -265,10 +265,10 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
}
EXPORT_SYMBOL(do_wait_intr);
int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
{
if (likely(list_empty(&wait->task_list)))
__add_wait_queue_tail(wq, wait);
if (likely(list_empty(&wait->entry)))
__add_wait_queue_entry_tail(wq, wait);
set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current))
@@ -283,14 +283,14 @@ EXPORT_SYMBOL(do_wait_intr_irq);
/**
* finish_wait - clean up after waiting in a queue
* @q: waitqueue waited on
* @wait: wait descriptor
* @wq_head: waitqueue waited on
* @wq_entry: wait descriptor
*
* Sets current thread back to running state and removes
* the wait descriptor from the given waitqueue if still
* queued.
*/
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
unsigned long flags;
@@ -308,20 +308,20 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
* have _one_ other CPU that looks at or modifies
* the list).
*/
if (!list_empty_careful(&wait->task_list)) {
spin_lock_irqsave(&q->lock, flags);
list_del_init(&wait->task_list);
spin_unlock_irqrestore(&q->lock, flags);
if (!list_empty_careful(&wq_entry->entry)) {
spin_lock_irqsave(&wq_head->lock, flags);
list_del_init(&wq_entry->entry);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
}
EXPORT_SYMBOL(finish_wait);
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
{
int ret = default_wake_function(wait, mode, sync, key);
int ret = default_wake_function(wq_entry, mode, sync, key);
if (ret)
list_del_init(&wait->task_list);
list_del_init(&wq_entry->entry);
return ret;
}
EXPORT_SYMBOL(autoremove_wake_function);
@@ -334,24 +334,24 @@ static inline bool is_kthread_should_stop(void)
/*
* DEFINE_WAIT_FUNC(wait, woken_wake_func);
*
* add_wait_queue(&wq, &wait);
* add_wait_queue(&wq_head, &wait);
* for (;;) {
* if (condition)
* break;
*
* p->state = mode; condition = true;
* smp_mb(); // A smp_wmb(); // C
* if (!wait->flags & WQ_FLAG_WOKEN) wait->flags |= WQ_FLAG_WOKEN;
* if (!wq_entry->flags & WQ_FLAG_WOKEN) wq_entry->flags |= WQ_FLAG_WOKEN;
* schedule() try_to_wake_up();
* p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~
* wait->flags &= ~WQ_FLAG_WOKEN; condition = true;
* wq_entry->flags &= ~WQ_FLAG_WOKEN; condition = true;
* smp_mb() // B smp_wmb(); // C
* wait->flags |= WQ_FLAG_WOKEN;
* wq_entry->flags |= WQ_FLAG_WOKEN;
* }
* remove_wait_queue(&wq, &wait);
* remove_wait_queue(&wq_head, &wait);
*
*/
long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
{
set_current_state(mode); /* A */
/*
@@ -359,7 +359,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
* woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
* also observe all state before the wakeup.
*/
if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
timeout = schedule_timeout(timeout);
__set_current_state(TASK_RUNNING);
@@ -369,13 +369,13 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
* condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
* an event.
*/
smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
return timeout;
}
EXPORT_SYMBOL(wait_woken);
int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
{
/*
* Although this function is called under waitqueue lock, LOCK
@@ -385,267 +385,8 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
* and is paired with smp_store_mb() in wait_woken().
*/
smp_wmb(); /* C */
wait->flags |= WQ_FLAG_WOKEN;
wq_entry->flags |= WQ_FLAG_WOKEN;
return default_wake_function(wait, mode, sync, key);
return default_wake_function(wq_entry, mode, sync, key);
}
EXPORT_SYMBOL(woken_wake_function);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
{
struct wait_bit_key *key = arg;
struct wait_bit_queue *wait_bit
= container_of(wait, struct wait_bit_queue, wait);
if (wait_bit->key.flags != key->flags ||
wait_bit->key.bit_nr != key->bit_nr ||
test_bit(key->bit_nr, key->flags))
return 0;
else
return autoremove_wake_function(wait, mode, sync, key);
}
EXPORT_SYMBOL(wake_bit_function);
/*
* To allow interruptible waiting and asynchronous (i.e. nonblocking)
* waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
* permitted return codes. Nonzero return codes halt waiting and return.
*/
int __sched
__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
wait_bit_action_f *action, unsigned mode)
{
int ret = 0;
do {
prepare_to_wait(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags))
ret = (*action)(&q->key, mode);
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
finish_wait(wq, &q->wait);
return ret;
}
EXPORT_SYMBOL(__wait_on_bit);
int __sched out_of_line_wait_on_bit(void *word, int bit,
wait_bit_action_f *action, unsigned mode)
{
wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit);
return __wait_on_bit(wq, &wait, action, mode);
}
EXPORT_SYMBOL(out_of_line_wait_on_bit);
int __sched out_of_line_wait_on_bit_timeout(
void *word, int bit, wait_bit_action_f *action,
unsigned mode, unsigned long timeout)
{
wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit);
wait.key.timeout = jiffies + timeout;
return __wait_on_bit(wq, &wait, action, mode);
}
EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
int __sched
__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
wait_bit_action_f *action, unsigned mode)
{
int ret = 0;
for (;;) {
prepare_to_wait_exclusive(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags)) {
ret = action(&q->key, mode);
/*
* See the comment in prepare_to_wait_event().
* finish_wait() does not necessarily takes wq->lock,
* but test_and_set_bit() implies mb() which pairs with
* smp_mb__after_atomic() before wake_up_page().
*/
if (ret)
finish_wait(wq, &q->wait);
}
if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
if (!ret)
finish_wait(wq, &q->wait);
return 0;
} else if (ret) {
return ret;
}
}
}
EXPORT_SYMBOL(__wait_on_bit_lock);
int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
wait_bit_action_f *action, unsigned mode)
{
wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit);
return __wait_on_bit_lock(wq, &wait, action, mode);
}
EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
{
struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
if (waitqueue_active(wq))
__wake_up(wq, TASK_NORMAL, 1, &key);
}
EXPORT_SYMBOL(__wake_up_bit);
/**
* wake_up_bit - wake up a waiter on a bit
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
*
* There is a standard hashed waitqueue table for generic use. This
* is the part of the hashtable's accessor API that wakes up waiters
* on a bit. For instance, if one were to have waiters on a bitflag,
* one would call wake_up_bit() after clearing the bit.
*
* In order for this to function properly, as it uses waitqueue_active()
* internally, some kind of memory barrier must be done prior to calling
* this. Typically, this will be smp_mb__after_atomic(), but in some
* cases where bitflags are manipulated non-atomically under a lock, one
* may need to use a less regular barrier, such fs/inode.c's smp_mb(),
* because spin_unlock() does not guarantee a memory barrier.
*/
void wake_up_bit(void *word, int bit)
{
__wake_up_bit(bit_waitqueue(word, bit), word, bit);
}
EXPORT_SYMBOL(wake_up_bit);
/*
* Manipulate the atomic_t address to produce a better bit waitqueue table hash
* index (we're keying off bit -1, but that would produce a horrible hash
* value).
*/
static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
{
if (BITS_PER_LONG == 64) {
unsigned long q = (unsigned long)p;
return bit_waitqueue((void *)(q & ~1), q & 1);
}
return bit_waitqueue(p, 0);
}
static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
void *arg)
{
struct wait_bit_key *key = arg;
struct wait_bit_queue *wait_bit
= container_of(wait, struct wait_bit_queue, wait);
atomic_t *val = key->flags;
if (wait_bit->key.flags != key->flags ||
wait_bit->key.bit_nr != key->bit_nr ||
atomic_read(val) != 0)
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
/*
* To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
* the actions of __wait_on_atomic_t() are permitted return codes. Nonzero
* return codes halt waiting and return.
*/
static __sched
int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
int (*action)(atomic_t *), unsigned mode)
{
atomic_t *val;
int ret = 0;
do {
prepare_to_wait(wq, &q->wait, mode);
val = q->key.flags;
if (atomic_read(val) == 0)
break;
ret = (*action)(val);
} while (!ret && atomic_read(val) != 0);
finish_wait(wq, &q->wait);
return ret;
}
#define DEFINE_WAIT_ATOMIC_T(name, p) \
struct wait_bit_queue name = { \
.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \
.wait = { \
.private = current, \
.func = wake_atomic_t_function, \
.task_list = \
LIST_HEAD_INIT((name).wait.task_list), \
}, \
}
__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
unsigned mode)
{
wait_queue_head_t *wq = atomic_t_waitqueue(p);
DEFINE_WAIT_ATOMIC_T(wait, p);
return __wait_on_atomic_t(wq, &wait, action, mode);
}
EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
/**
* wake_up_atomic_t - Wake up a waiter on a atomic_t
* @p: The atomic_t being waited on, a kernel virtual address
*
* Wake up anyone waiting for the atomic_t to go to zero.
*
* Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
* check is done by the waiter's wake function, not the by the waker itself).
*/
void wake_up_atomic_t(atomic_t *p)
{
__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
}
EXPORT_SYMBOL(wake_up_atomic_t);
__sched int bit_wait(struct wait_bit_key *word, int mode)
{
schedule();
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait);
__sched int bit_wait_io(struct wait_bit_key *word, int mode)
{
io_schedule();
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait_io);
__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout))
return -EAGAIN;
schedule_timeout(word->timeout - now);
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_timeout);
__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout))
return -EAGAIN;
io_schedule_timeout(word->timeout - now);
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_io_timeout);

286
kernel/sched/wait_bit.c 普通文件
查看文件

@@ -0,0 +1,286 @@
/*
* The implementation of the wait_bit*() and related waiting APIs:
*/
#include <linux/wait_bit.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <linux/hash.h>
#define WAIT_TABLE_BITS 8
#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
wait_queue_head_t *bit_waitqueue(void *word, int bit)
{
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
unsigned long val = (unsigned long)word << shift | bit;
return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
}
EXPORT_SYMBOL(bit_waitqueue);
int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
{
struct wait_bit_key *key = arg;
struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
if (wait_bit->key.flags != key->flags ||
wait_bit->key.bit_nr != key->bit_nr ||
test_bit(key->bit_nr, key->flags))
return 0;
else
return autoremove_wake_function(wq_entry, mode, sync, key);
}
EXPORT_SYMBOL(wake_bit_function);
/*
* To allow interruptible waiting and asynchronous (i.e. nonblocking)
* waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
* permitted return codes. Nonzero return codes halt waiting and return.
*/
int __sched
__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
wait_bit_action_f *action, unsigned mode)
{
int ret = 0;
do {
prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
ret = (*action)(&wbq_entry->key, mode);
} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
finish_wait(wq_head, &wbq_entry->wq_entry);
return ret;
}
EXPORT_SYMBOL(__wait_on_bit);
int __sched out_of_line_wait_on_bit(void *word, int bit,
wait_bit_action_f *action, unsigned mode)
{
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wq_entry, word, bit);
return __wait_on_bit(wq_head, &wq_entry, action, mode);
}
EXPORT_SYMBOL(out_of_line_wait_on_bit);
int __sched out_of_line_wait_on_bit_timeout(
void *word, int bit, wait_bit_action_f *action,
unsigned mode, unsigned long timeout)
{
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wq_entry, word, bit);
wq_entry.key.timeout = jiffies + timeout;
return __wait_on_bit(wq_head, &wq_entry, action, mode);
}
EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
int __sched
__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
wait_bit_action_f *action, unsigned mode)
{
int ret = 0;
for (;;) {
prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode);
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
ret = action(&wbq_entry->key, mode);
/*
* See the comment in prepare_to_wait_event().
* finish_wait() does not necessarily takes wwq_head->lock,
* but test_and_set_bit() implies mb() which pairs with
* smp_mb__after_atomic() before wake_up_page().
*/
if (ret)
finish_wait(wq_head, &wbq_entry->wq_entry);
}
if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
if (!ret)
finish_wait(wq_head, &wbq_entry->wq_entry);
return 0;
} else if (ret) {
return ret;
}
}
}
EXPORT_SYMBOL(__wait_on_bit_lock);
int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
wait_bit_action_f *action, unsigned mode)
{
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wq_entry, word, bit);
return __wait_on_bit_lock(wq_head, &wq_entry, action, mode);
}
EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
{
struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
if (waitqueue_active(wq_head))
__wake_up(wq_head, TASK_NORMAL, 1, &key);
}
EXPORT_SYMBOL(__wake_up_bit);
/**
* wake_up_bit - wake up a waiter on a bit
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
*
* There is a standard hashed waitqueue table for generic use. This
* is the part of the hashtable's accessor API that wakes up waiters
* on a bit. For instance, if one were to have waiters on a bitflag,
* one would call wake_up_bit() after clearing the bit.
*
* In order for this to function properly, as it uses waitqueue_active()
* internally, some kind of memory barrier must be done prior to calling
* this. Typically, this will be smp_mb__after_atomic(), but in some
* cases where bitflags are manipulated non-atomically under a lock, one
* may need to use a less regular barrier, such fs/inode.c's smp_mb(),
* because spin_unlock() does not guarantee a memory barrier.
*/
void wake_up_bit(void *word, int bit)
{
__wake_up_bit(bit_waitqueue(word, bit), word, bit);
}
EXPORT_SYMBOL(wake_up_bit);
/*
* Manipulate the atomic_t address to produce a better bit waitqueue table hash
* index (we're keying off bit -1, but that would produce a horrible hash
* value).
*/
static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
{
if (BITS_PER_LONG == 64) {
unsigned long q = (unsigned long)p;
return bit_waitqueue((void *)(q & ~1), q & 1);
}
return bit_waitqueue(p, 0);
}
static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync,
void *arg)
{
struct wait_bit_key *key = arg;
struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
atomic_t *val = key->flags;
if (wait_bit->key.flags != key->flags ||
wait_bit->key.bit_nr != key->bit_nr ||
atomic_read(val) != 0)
return 0;
return autoremove_wake_function(wq_entry, mode, sync, key);
}
/*
* To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
* the actions of __wait_on_atomic_t() are permitted return codes. Nonzero
* return codes halt waiting and return.
*/
static __sched
int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
int (*action)(atomic_t *), unsigned mode)
{
atomic_t *val;
int ret = 0;
do {
prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
val = wbq_entry->key.flags;
if (atomic_read(val) == 0)
break;
ret = (*action)(val);
} while (!ret && atomic_read(val) != 0);
finish_wait(wq_head, &wbq_entry->wq_entry);
return ret;
}
#define DEFINE_WAIT_ATOMIC_T(name, p) \
struct wait_bit_queue_entry name = { \
.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \
.wq_entry = { \
.private = current, \
.func = wake_atomic_t_function, \
.entry = \
LIST_HEAD_INIT((name).wq_entry.entry), \
}, \
}
__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
unsigned mode)
{
struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
DEFINE_WAIT_ATOMIC_T(wq_entry, p);
return __wait_on_atomic_t(wq_head, &wq_entry, action, mode);
}
EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
/**
* wake_up_atomic_t - Wake up a waiter on a atomic_t
* @p: The atomic_t being waited on, a kernel virtual address
*
* Wake up anyone waiting for the atomic_t to go to zero.
*
* Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
* check is done by the waiter's wake function, not the by the waker itself).
*/
void wake_up_atomic_t(atomic_t *p)
{
__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
}
EXPORT_SYMBOL(wake_up_atomic_t);
__sched int bit_wait(struct wait_bit_key *word, int mode)
{
schedule();
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait);
__sched int bit_wait_io(struct wait_bit_key *word, int mode)
{
io_schedule();
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait_io);
__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout))
return -EAGAIN;
schedule_timeout(word->timeout - now);
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_timeout);
__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout))
return -EAGAIN;
io_schedule_timeout(word->timeout - now);
if (signal_pending_state(mode, current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
void __init wait_bit_init(void)
{
int i;
for (i = 0; i < WAIT_TABLE_SIZE; i++)
init_waitqueue_head(bit_wait_table + i);
}

查看文件

@@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
start = ktime_add(start, base->gettime());
start = ktime_add_safe(start, base->gettime());
alarm_start(alarm, start);
}
EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
overrun++;
}
alarm->node.expires = ktime_add(alarm->node.expires, interval);
alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
return overrun;
}
EXPORT_SYMBOL_GPL(alarm_forward);
@@ -660,13 +660,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
/* start the timer */
timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
/*
* Rate limit to the tick as a hot fix to prevent DOS. Will be
* mopped up later.
*/
if (timr->it.alarm.interval < TICK_NSEC)
timr->it.alarm.interval = TICK_NSEC;
exp = timespec64_to_ktime(new_setting->it_value);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now;
now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
exp = ktime_add(now, exp);
exp = ktime_add_safe(now, exp);
}
alarm_start(&timr->it.alarm.alarmtimer, exp);

查看文件

@@ -825,8 +825,10 @@ static void check_thread_timers(struct task_struct *tsk,
* At the hard limit, we just die.
* No need to calculate anything else now.
*/
pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
if (print_fatal_signals) {
pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
}
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
return;
}
@@ -838,8 +840,10 @@ static void check_thread_timers(struct task_struct *tsk,
soft += USEC_PER_SEC;
sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
}
pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
if (print_fatal_signals) {
pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
}
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
}
}
@@ -936,8 +940,10 @@ static void check_process_timers(struct task_struct *tsk,
* At the hard limit, we just die.
* No need to calculate anything else now.
*/
pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
if (print_fatal_signals) {
pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
}
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
return;
}
@@ -945,8 +951,10 @@ static void check_process_timers(struct task_struct *tsk,
/*
* At the soft limit, send a SIGXCPU every second.
*/
pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
if (print_fatal_signals) {
pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
}
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
if (soft < hard) {
soft++;

查看文件

@@ -37,9 +37,11 @@ static int tick_broadcast_forced;
static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
#ifdef CONFIG_TICK_ONESHOT
static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
static void tick_broadcast_clear_oneshot(int cpu);
static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
#else
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
static inline void tick_broadcast_clear_oneshot(int cpu) { }
static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
#endif
@@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
/**
* tick_broadcast_setup_oneshot - setup the broadcast device
*/
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
int cpu = smp_processor_id();

查看文件

@@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
/* Functions related to oneshot broadcasting */
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
extern int tick_broadcast_oneshot_active(void);
@@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void);
bool tick_broadcast_oneshot_available(void);
extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
#else /* !(BROADCAST && ONESHOT): */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }

查看文件

@@ -5063,7 +5063,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
}
out:
kfree(fgd->new_hash);
free_ftrace_hash(fgd->new_hash);
kfree(fgd);
return ret;

查看文件

@@ -2864,11 +2864,11 @@ bool flush_work(struct work_struct *work)
EXPORT_SYMBOL_GPL(flush_work);
struct cwt_wait {
wait_queue_t wait;
wait_queue_entry_t wait;
struct work_struct *work;
};
static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);