Merge branch 'linus' into timers/core

Pick up upstream fixes for pending changes.
This commit is contained in:
Thomas Gleixner
2019-06-22 12:07:35 +02:00
209 changed files with 2009 additions and 1165 deletions

View File

@@ -215,7 +215,8 @@ static struct cftype cgroup_base_files[];
static int cgroup_apply_control(struct cgroup *cgrp);
static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
static void css_task_iter_advance(struct css_task_iter *it);
static void css_task_iter_skip(struct css_task_iter *it,
struct task_struct *task);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
struct cgroup_subsys *ss);
@@ -738,6 +739,7 @@ struct css_set init_css_set = {
.dom_cset = &init_css_set,
.tasks = LIST_HEAD_INIT(init_css_set.tasks),
.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
.dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
@@ -843,6 +845,21 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
cgroup_update_populated(link->cgrp, populated);
}
/*
* @task is leaving, advance task iterators which are pointing to it so
* that they can resume at the next position. Advancing an iterator might
* remove it from the list, use safe walk. See css_task_iter_skip() for
* details.
*/
static void css_set_skip_task_iters(struct css_set *cset,
struct task_struct *task)
{
struct css_task_iter *it, *pos;
list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
css_task_iter_skip(it, task);
}
/**
* css_set_move_task - move a task from one css_set to another
* @task: task being moved
@@ -868,22 +885,9 @@ static void css_set_move_task(struct task_struct *task,
css_set_update_populated(to_cset, true);
if (from_cset) {
struct css_task_iter *it, *pos;
WARN_ON_ONCE(list_empty(&task->cg_list));
/*
* @task is leaving, advance task iterators which are
* pointing to it so that they can resume at the next
* position. Advancing an iterator might remove it from
* the list, use safe walk. See css_task_iter_advance*()
* for details.
*/
list_for_each_entry_safe(it, pos, &from_cset->task_iters,
iters_node)
if (it->task_pos == &task->cg_list)
css_task_iter_advance(it);
css_set_skip_task_iters(from_cset, task);
list_del_init(&task->cg_list);
if (!css_set_populated(from_cset))
css_set_update_populated(from_cset, false);
@@ -1210,6 +1214,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
cset->dom_cset = cset;
INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_tasks);
INIT_LIST_HEAD(&cset->dying_tasks);
INIT_LIST_HEAD(&cset->task_iters);
INIT_LIST_HEAD(&cset->threaded_csets);
INIT_HLIST_NODE(&cset->hlist);
@@ -1460,8 +1465,8 @@ struct cgroup *task_cgroup_from_root(struct task_struct *task,
static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
static char *cgroup_fill_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf, bool write_link_name)
static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf)
{
struct cgroup_subsys *ss = cft->ss;
@@ -1471,26 +1476,13 @@ static char *cgroup_fill_name(struct cgroup *cgrp, const struct cftype *cft,
snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
write_link_name ? cft->link_name : cft->name);
cft->name);
} else {
strscpy(buf, write_link_name ? cft->link_name : cft->name,
CGROUP_FILE_NAME_MAX);
strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
}
return buf;
}
static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf)
{
return cgroup_fill_name(cgrp, cft, buf, false);
}
static char *cgroup_link_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf)
{
return cgroup_fill_name(cgrp, cft, buf, true);
}
/**
* cgroup_file_mode - deduce file mode of a control file
* @cft: the control file in question
@@ -1649,9 +1641,6 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
}
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
if (cft->flags & CFTYPE_SYMLINKED)
kernfs_remove_by_name(cgrp->kn,
cgroup_link_name(cgrp, cft, name));
}
/**
@@ -3837,7 +3826,6 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
{
char name[CGROUP_FILE_NAME_MAX];
struct kernfs_node *kn;
struct kernfs_node *kn_link;
struct lock_class_key *key = NULL;
int ret;
@@ -3868,14 +3856,6 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
spin_unlock_irq(&cgroup_file_kn_lock);
}
if (cft->flags & CFTYPE_SYMLINKED) {
kn_link = kernfs_create_link(cgrp->kn,
cgroup_link_name(cgrp, cft, name),
kn);
if (IS_ERR(kn_link))
return PTR_ERR(kn_link);
}
return 0;
}
@@ -4433,15 +4413,18 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
it->task_pos = NULL;
return;
}
} while (!css_set_populated(cset));
} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
if (!list_empty(&cset->tasks))
it->task_pos = cset->tasks.next;
else
else if (!list_empty(&cset->mg_tasks))
it->task_pos = cset->mg_tasks.next;
else
it->task_pos = cset->dying_tasks.next;
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
it->dying_tasks_head = &cset->dying_tasks;
/*
* We don't keep css_sets locked across iteration steps and thus
@@ -4467,9 +4450,20 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
list_add(&it->iters_node, &cset->task_iters);
}
static void css_task_iter_skip(struct css_task_iter *it,
struct task_struct *task)
{
lockdep_assert_held(&css_set_lock);
if (it->task_pos == &task->cg_list) {
it->task_pos = it->task_pos->next;
it->flags |= CSS_TASK_ITER_SKIPPED;
}
}
static void css_task_iter_advance(struct css_task_iter *it)
{
struct list_head *next;
struct task_struct *task;
lockdep_assert_held(&css_set_lock);
repeat:
@@ -4479,25 +4473,40 @@ repeat:
* consumed first and then ->mg_tasks. After ->mg_tasks,
* we move onto the next cset.
*/
next = it->task_pos->next;
if (next == it->tasks_head)
next = it->mg_tasks_head->next;
if (next == it->mg_tasks_head)
css_task_iter_advance_css_set(it);
if (it->flags & CSS_TASK_ITER_SKIPPED)
it->flags &= ~CSS_TASK_ITER_SKIPPED;
else
it->task_pos = next;
it->task_pos = it->task_pos->next;
if (it->task_pos == it->tasks_head)
it->task_pos = it->mg_tasks_head->next;
if (it->task_pos == it->mg_tasks_head)
it->task_pos = it->dying_tasks_head->next;
if (it->task_pos == it->dying_tasks_head)
css_task_iter_advance_css_set(it);
} else {
/* called from start, proceed to the first cset */
css_task_iter_advance_css_set(it);
}
/* if PROCS, skip over tasks which aren't group leaders */
if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
!thread_group_leader(list_entry(it->task_pos, struct task_struct,
cg_list)))
goto repeat;
if (!it->task_pos)
return;
task = list_entry(it->task_pos, struct task_struct, cg_list);
if (it->flags & CSS_TASK_ITER_PROCS) {
/* if PROCS, skip over tasks which aren't group leaders */
if (!thread_group_leader(task))
goto repeat;
/* and dying leaders w/o live member threads */
if (!atomic_read(&task->signal->live))
goto repeat;
} else {
/* skip all dying ones */
if (task->flags & PF_EXITING)
goto repeat;
}
}
/**
@@ -4553,6 +4562,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
spin_lock_irq(&css_set_lock);
/* @it may be half-advanced by skips, finish advancing */
if (it->flags & CSS_TASK_ITER_SKIPPED)
css_task_iter_advance(it);
if (it->task_pos) {
it->cur_task = list_entry(it->task_pos, struct task_struct,
cg_list);
@@ -6034,6 +6047,7 @@ void cgroup_exit(struct task_struct *tsk)
if (!list_empty(&tsk->cg_list)) {
spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
list_add_tail(&tsk->cg_list, &cset->dying_tasks);
cset->nr_tasks--;
WARN_ON_ONCE(cgroup_task_frozen(tsk));
@@ -6059,6 +6073,13 @@ void cgroup_release(struct task_struct *task)
do_each_subsys_mask(ss, ssid, have_release_callback) {
ss->release(task);
} while_each_subsys_mask();
if (use_task_css_set_links) {
spin_lock_irq(&css_set_lock);
css_set_skip_task_iters(task_css_set(task), task);
list_del_init(&task->cg_list);
spin_unlock_irq(&css_set_lock);
}
}
void cgroup_free(struct task_struct *task)

View File

@@ -3254,10 +3254,23 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
spin_unlock_irqrestore(&callback_lock, flags);
}
/**
* cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
* @tsk: pointer to task_struct with which the scheduler is struggling
*
* Description: In the case that the scheduler cannot find an allowed cpu in
* tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
* mode however, this value is the same as task_cs(tsk)->effective_cpus,
* which will not contain a sane cpumask during cases such as cpu hotplugging.
* This is the absolute last resort for the scheduler and it is only used if
* _every_ other avenue has been traveled.
**/
void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
rcu_read_lock();
do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
do_set_cpus_allowed(tsk, is_in_v2_mode() ?
task_cs(tsk)->cpus_allowed : cpu_possible_mask);
rcu_read_unlock();
/*

View File

@@ -446,6 +446,15 @@ int commit_creds(struct cred *new)
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
/*
* If a task drops privileges and becomes nondumpable,
* the dumpability change must become visible before
* the credential change; otherwise, a __ptrace_may_access()
* racing with this change may be able to attach to a task it
* shouldn't be able to attach to (as if the task had dropped
* privileges without becoming nondumpable).
* Pairs with a read barrier in __ptrace_may_access().
*/
smp_wmb();
}

View File

@@ -195,6 +195,7 @@ repeat:
rcu_read_unlock();
proc_flush_task(p);
cgroup_release(p);
write_lock_irq(&tasklist_lock);
ptrace_release_task(p);
@@ -220,7 +221,6 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
cgroup_release(p);
release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct);

View File

@@ -18,6 +18,7 @@
#include <linux/elf.h>
#include <linux/moduleloader.h>
#include <linux/completion.h>
#include <linux/memory.h>
#include <asm/cacheflush.h>
#include "core.h"
#include "patch.h"
@@ -718,16 +719,21 @@ static int klp_init_object_loaded(struct klp_patch *patch,
struct klp_func *func;
int ret;
mutex_lock(&text_mutex);
module_disable_ro(patch->mod);
ret = klp_write_object_relocations(patch->mod, obj);
if (ret) {
module_enable_ro(patch->mod, true);
mutex_unlock(&text_mutex);
return ret;
}
arch_klp_init_object_loaded(patch, obj);
module_enable_ro(patch->mod, true);
mutex_unlock(&text_mutex);
klp_for_each_func(obj, func) {
ret = klp_find_object_symbol(obj->name, func->old_name,
func->old_sympos,

View File

@@ -95,6 +95,7 @@ static void devm_memremap_pages_release(void *data)
pgmap->kill(pgmap->ref);
for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn));
pgmap->cleanup(pgmap->ref);
/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
@@ -133,8 +134,8 @@ static void devm_memremap_pages_release(void *data)
* 2/ The altmap field may optionally be initialized, in which case altmap_valid
* must be set to true
*
* 3/ pgmap->ref must be 'live' on entry and will be killed at
* devm_memremap_pages_release() time, or if this routine fails.
* 3/ pgmap->ref must be 'live' on entry and will be killed and reaped
* at devm_memremap_pages_release() time, or if this routine fails.
*
* 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
@@ -156,8 +157,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgprot_t pgprot = PAGE_KERNEL;
int error, nid, is_ram;
if (!pgmap->ref || !pgmap->kill)
if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) {
WARN(1, "Missing reference count teardown definition\n");
return ERR_PTR(-EINVAL);
}
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -168,14 +171,16 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
return ERR_PTR(-ENOMEM);
error = -ENOMEM;
goto err_array;
}
conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL);
if (conflict_pgmap) {
dev_WARN(dev, "Conflicting mapping in same section\n");
put_dev_pagemap(conflict_pgmap);
return ERR_PTR(-ENOMEM);
error = -ENOMEM;
goto err_array;
}
is_ram = region_intersects(align_start, align_size,
@@ -267,10 +272,18 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
pgmap_array_delete(res);
err_array:
pgmap->kill(pgmap->ref);
pgmap->cleanup(pgmap->ref);
return ERR_PTR(error);
}
EXPORT_SYMBOL_GPL(devm_memremap_pages);
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
devm_release_action(dev, devm_memremap_pages_release, pgmap);
}
EXPORT_SYMBOL_GPL(devm_memunmap_pages);
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
/* number of pfns from base where pfn_to_page() is valid */

View File

@@ -324,6 +324,16 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
return -EPERM;
ok:
rcu_read_unlock();
/*
* If a task drops privileges and becomes nondumpable (through a syscall
* like setresuid()) while we are trying to access it, we must ensure
* that the dumpability is read after the credentials; otherwise,
* we may be able to attach to a task that we shouldn't be able to
* attach to (as if the task had dropped privileges without becoming
* nondumpable).
* Pairs with a write barrier in commit_creds().
*/
smp_rmb();
mm = task->mm;
if (mm &&
((get_dumpable(mm) != SUID_DUMP_USER) &&
@@ -705,6 +715,10 @@ static int ptrace_peek_siginfo(struct task_struct *child,
if (arg.nr < 0)
return -EINVAL;
/* Ensure arg.off fits in an unsigned long */
if (arg.off > ULONG_MAX)
return 0;
if (arg.flags & PTRACE_PEEKSIGINFO_SHARED)
pending = &child->signal->shared_pending;
else
@@ -712,18 +726,20 @@ static int ptrace_peek_siginfo(struct task_struct *child,
for (i = 0; i < arg.nr; ) {
kernel_siginfo_t info;
s32 off = arg.off + i;
unsigned long off = arg.off + i;
bool found = false;
spin_lock_irq(&child->sighand->siglock);
list_for_each_entry(q, &pending->list, list) {
if (!off--) {
found = true;
copy_siginfo(&info, &q->info);
break;
}
}
spin_unlock_irq(&child->sighand->siglock);
if (off >= 0) /* beyond the end of the list */
if (!found) /* beyond the end of the list */
break;
#ifdef CONFIG_COMPAT

View File

@@ -808,17 +808,18 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, *offset = offsets[offs];
u64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr_mono.base, *offset);
nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
} while (read_seqcount_retry(&tk_core.seq, seq));
return base;
return base + nsecs;
}
EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);

View File

@@ -34,6 +34,7 @@
#include <linux/hash.h>
#include <linux/rcupdate.h>
#include <linux/kprobes.h>
#include <linux/memory.h>
#include <trace/events/sched.h>
@@ -2610,10 +2611,12 @@ static void ftrace_run_update_code(int command)
{
int ret;
mutex_lock(&text_mutex);
ret = ftrace_arch_code_modify_prepare();
FTRACE_WARN_ON(ret);
if (ret)
return;
goto out_unlock;
/*
* By default we use stop_machine() to modify the code.
@@ -2625,6 +2628,9 @@ static void ftrace_run_update_code(int command)
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
out_unlock:
mutex_unlock(&text_mutex);
}
static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
@@ -2935,14 +2941,13 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
p = &pg->records[i];
p->flags = rec_flags;
#ifndef CC_USING_NOP_MCOUNT
/*
* Do the initial record conversion from mcount jump
* to the NOP instructions.
*/
if (!ftrace_code_disable(mod, p))
if (!__is_defined(CC_USING_NOP_MCOUNT) &&
!ftrace_code_disable(mod, p))
break;
#endif
update_cnt++;
}
@@ -4221,10 +4226,13 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper,
struct ftrace_func_entry *entry;
struct ftrace_func_map *map;
struct hlist_head *hhd;
int size = 1 << mapper->hash.size_bits;
int i;
int size, i;
if (!mapper)
return;
if (free_func && mapper->hash.count) {
size = 1 << mapper->hash.size_bits;
for (i = 0; i < size; i++) {
hhd = &mapper->hash.buckets[i];
hlist_for_each_entry(entry, hhd, hlist) {
@@ -5776,6 +5784,7 @@ void ftrace_module_enable(struct module *mod)
struct ftrace_page *pg;
mutex_lock(&ftrace_lock);
mutex_lock(&text_mutex);
if (ftrace_disabled)
goto out_unlock;
@@ -5837,6 +5846,7 @@ void ftrace_module_enable(struct module *mod)
ftrace_arch_code_modify_post_process();
out_unlock:
mutex_unlock(&text_mutex);
mutex_unlock(&ftrace_lock);
process_cached_mods(mod->name);

View File

@@ -6923,7 +6923,7 @@ struct tracing_log_err {
static DEFINE_MUTEX(tracing_err_log_lock);
struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
{
struct tracing_log_err *err;
@@ -8192,7 +8192,7 @@ static const struct file_operations buffer_percent_fops = {
.llseek = default_llseek,
};
struct dentry *trace_instance_dir;
static struct dentry *trace_instance_dir;
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);

View File

@@ -1057,7 +1057,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
trace_seq_puts(s, "<stack trace>\n");
for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
for (p = field->caller; p && p < end && *p != ULONG_MAX; p++) {
if (trace_seq_has_overflowed(s))
break;

View File

@@ -426,8 +426,6 @@ end:
/*
* Argument syntax:
* - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
*
* - Remove uprobe: -:[GRP/]EVENT
*/
static int trace_uprobe_create(int argc, const char **argv)
{
@@ -443,10 +441,17 @@ static int trace_uprobe_create(int argc, const char **argv)
ret = 0;
ref_ctr_offset = 0;
/* argc must be >= 1 */
if (argv[0][0] == 'r')
switch (argv[0][0]) {
case 'r':
is_return = true;
else if (argv[0][0] != 'p' || argc < 2)
break;
case 'p':
break;
default:
return -ECANCELED;
}
if (argc < 2)
return -ECANCELED;
if (argv[0][1] == ':')