Merge tag 'noinstr-x86-kvm-2020-05-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into HEAD

This commit is contained in:
Paolo Bonzini
2020-05-20 03:40:09 -04:00
868 changed files with 8787 additions and 4525 deletions

View File

@@ -1326,6 +1326,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
if (!audit_enabled && msg_type != AUDIT_USER_AVC)
return 0;
/* exit early if there isn't at least one character to print */
if (data_len < 2)
return -EINVAL;
err = audit_filter(msg_type, AUDIT_FILTER_USER);
if (err == 1) { /* match or error */

View File

@@ -469,7 +469,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EOVERFLOW;
/* Make sure CPU is a valid possible cpu */
if (!cpu_possible(key_cpu))
if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
return -ENODEV;
if (qsize == 0) {

View File

@@ -2283,7 +2283,7 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
}
#endif
const struct file_operations bpf_link_fops = {
static const struct file_operations bpf_link_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_link_show_fdinfo,
#endif
@@ -3628,8 +3628,10 @@ static int link_update(union bpf_attr *attr)
return PTR_ERR(link);
new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
if (IS_ERR(new_prog))
return PTR_ERR(new_prog);
if (IS_ERR(new_prog)) {
ret = PTR_ERR(new_prog);
goto out_put_link;
}
if (flags & BPF_F_REPLACE) {
old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
@@ -3638,6 +3640,9 @@ static int link_update(union bpf_attr *attr)
old_prog = NULL;
goto out_put_progs;
}
} else if (attr->link_update.old_prog_fd) {
ret = -EINVAL;
goto out_put_progs;
}
#ifdef CONFIG_CGROUP_BPF
@@ -3653,6 +3658,8 @@ out_put_progs:
bpf_prog_put(old_prog);
if (ret)
bpf_prog_put(new_prog);
out_put_link:
bpf_link_put(link);
return ret;
}

View File

@@ -2118,6 +2118,15 @@ static bool register_is_const(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
if (allow_ptr_leaks)
return false;
return reg->type != SCALAR_VALUE;
}
static void save_register_state(struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg)
{
@@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
/* If value_regno==-1, the caller is asking us whether
* it is acceptable to use this value as a SCALAR_VALUE
* (e.g. for XADD).
* We must not allow unprivileged callers to do that
* with spilled pointers.
*/
verbose(env, "leaking pointer from stack off %d\n",
off);
return -EACCES;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
@@ -2673,15 +2692,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
return -EACCES;
}
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
if (allow_ptr_leaks)
return false;
return reg->type != SCALAR_VALUE;
}
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
return cur_regs(env) + regno;
@@ -3089,7 +3099,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (ret < 0)
return ret;
if (atype == BPF_READ) {
if (atype == BPF_READ && value_regno >= 0) {
if (ret == SCALAR_VALUE) {
mark_reg_unknown(env, regs, value_regno);
return 0;
@@ -10487,6 +10497,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
env->ops = bpf_verifier_ops[tgt_prog->type];
prog->expected_attach_type = tgt_prog->expected_attach_type;
}
if (!tgt_prog->jited) {
verbose(env, "Can attach to only JITed progs\n");
@@ -10831,6 +10842,13 @@ err_release_maps:
* them now. Otherwise free_used_maps() will release them.
*/
release_maps(env);
/* extension progs temporarily inherit the attach_type of their targets
for verification purposes, so set it back to zero before returning
*/
if (env->prog->type == BPF_PROG_TYPE_EXT)
env->prog->expected_attach_type = 0;
*prog = env->prog;
err_unlock:
if (!is_priv)

View File

@@ -7491,10 +7491,17 @@ static void perf_event_task_output(struct perf_event *event,
goto out;
task_event->event_id.pid = perf_event_pid(event, task);
task_event->event_id.ppid = perf_event_pid(event, current);
task_event->event_id.tid = perf_event_tid(event, task);
task_event->event_id.ptid = perf_event_tid(event, current);
if (task_event->event_id.header.type == PERF_RECORD_EXIT) {
task_event->event_id.ppid = perf_event_pid(event,
task->real_parent);
task_event->event_id.ptid = perf_event_pid(event,
task->real_parent);
} else { /* PERF_RECORD_FORK */
task_event->event_id.ppid = perf_event_pid(event, current);
task_event->event_id.ptid = perf_event_tid(event, current);
}
task_event->event_id.time = perf_event_clock(event);

View File

@@ -219,6 +219,7 @@ repeat:
write_unlock_irq(&tasklist_lock);
proc_flush_pid(thread_pid);
put_pid(thread_pid);
release_thread(p);
put_task_struct_rcu_user(p);

View File

@@ -740,8 +740,8 @@ static const struct file_operations kcov_fops = {
* kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
* arbitrary 4-byte non-zero number as the instance id). This common handle
* then gets saved into the task_struct of the process that issued the
* KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
* kernel threads, the common handle must be retrived via kcov_common_handle()
* KCOV_REMOTE_ENABLE ioctl. When this process issues system calls that spawn
* kernel threads, the common handle must be retrieved via kcov_common_handle()
* and passed to the spawned threads via custom annotations. Those kernel
* threads must in turn be annotated with kcov_remote_start(common_handle) and
* kcov_remote_stop(). All of the threads that are spawned by the same process

View File

@@ -3635,13 +3635,10 @@ mark_held_locks(struct task_struct *curr, enum lock_usage_bit base_bit)
/*
* Hardirqs will be enabled:
*/
static void __trace_hardirqs_on_caller(unsigned long ip)
static void __trace_hardirqs_on_caller(void)
{
struct task_struct *curr = current;
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
/*
* We are going to turn hardirqs on, so set the
* usage bit for all held locks:
@@ -3654,15 +3651,19 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
* this bit from being set before)
*/
if (curr->softirqs_enabled)
if (!mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ))
return;
curr->hardirq_enable_ip = ip;
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_on_events);
mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
}
void lockdep_hardirqs_on(unsigned long ip)
/**
* lockdep_hardirqs_on_prepare - Prepare for enabling interrupts
* @ip: Caller address
*
* Invoked before a possible transition to RCU idle from exit to user or
* guest mode. This ensures that all RCU operations are done before RCU
* stops watching. After the RCU transition lockdep_hardirqs_on() has to be
* invoked to set the final state.
*/
void lockdep_hardirqs_on_prepare(unsigned long ip)
{
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -3698,20 +3699,62 @@ void lockdep_hardirqs_on(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
return;
current->hardirq_chain_key = current->curr_chain_key;
current->lockdep_recursion++;
__trace_hardirqs_on_caller(ip);
__trace_hardirqs_on_caller();
lockdep_recursion_finish();
}
NOKPROBE_SYMBOL(lockdep_hardirqs_on);
EXPORT_SYMBOL_GPL(lockdep_hardirqs_on_prepare);
void noinstr lockdep_hardirqs_on(unsigned long ip)
{
struct task_struct *curr = current;
if (unlikely(!debug_locks || curr->lockdep_recursion))
return;
if (curr->hardirqs_enabled) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
* in a stat is not a big deal.
*/
__debug_atomic_inc(redundant_hardirqs_on);
return;
}
/*
* We're enabling irqs and according to our state above irqs weren't
* already enabled, yet we find the hardware thinks they are in fact
* enabled.. someone messed up their IRQ state tracing.
*/
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
/*
* Ensure the lock stack remained unchanged between
* lockdep_hardirqs_on_prepare() and lockdep_hardirqs_on().
*/
DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
current->curr_chain_key);
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
curr->hardirq_enable_ip = ip;
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_on_events);
}
EXPORT_SYMBOL_GPL(lockdep_hardirqs_on);
/*
* Hardirqs were disabled:
*/
void lockdep_hardirqs_off(unsigned long ip)
void noinstr lockdep_hardirqs_off(unsigned long ip)
{
struct task_struct *curr = current;
if (unlikely(!debug_locks || current->lockdep_recursion))
if (unlikely(!debug_locks || curr->lockdep_recursion))
return;
/*
@@ -3729,10 +3772,11 @@ void lockdep_hardirqs_off(unsigned long ip)
curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_off_events);
} else
} else {
debug_atomic_inc(redundant_hardirqs_off);
}
}
NOKPROBE_SYMBOL(lockdep_hardirqs_off);
EXPORT_SYMBOL_GPL(lockdep_hardirqs_off);
/*
* Softirqs will be enabled:
@@ -4408,8 +4452,8 @@ static void print_unlock_imbalance_bug(struct task_struct *curr,
dump_stack();
}
static int match_held_lock(const struct held_lock *hlock,
const struct lockdep_map *lock)
static noinstr int match_held_lock(const struct held_lock *hlock,
const struct lockdep_map *lock)
{
if (hlock->instance == lock)
return 1;
@@ -4696,7 +4740,7 @@ __lock_release(struct lockdep_map *lock, unsigned long ip)
return 0;
}
static nokprobe_inline
static __always_inline
int __lock_is_held(const struct lockdep_map *lock, int read)
{
struct task_struct *curr = current;
@@ -4956,7 +5000,7 @@ void lock_release(struct lockdep_map *lock, unsigned long ip)
}
EXPORT_SYMBOL_GPL(lock_release);
int lock_is_held_type(const struct lockdep_map *lock, int read)
noinstr int lock_is_held_type(const struct lockdep_map *lock, int read)
{
unsigned long flags;
int ret = 0;

View File

@@ -898,6 +898,13 @@ static int software_resume(void)
error = freeze_processes();
if (error)
goto Close_Finish;
error = freeze_kernel_threads();
if (error) {
thaw_processes();
goto Close_Finish;
}
error = load_image_and_restore();
thaw_processes();
Finish:

View File

@@ -1232,13 +1232,8 @@ static void uclamp_fork(struct task_struct *p)
return;
for_each_clamp_id(clamp_id) {
unsigned int clamp_value = uclamp_none(clamp_id);
/* By default, RT tasks always get 100% boost */
if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
clamp_value = uclamp_none(UCLAMP_MAX);
uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false);
uclamp_se_set(&p->uclamp_req[clamp_id],
uclamp_none(clamp_id), false);
}
}

View File

@@ -1989,8 +1989,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
sig = 0;
}
/*
* Send with __send_signal as si_pid and si_uid are in the
* parent's namespaces.
*/
if (valid_signal(sig) && sig)
__group_send_sig_info(sig, &info, tsk->parent);
__send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false);
__wake_up_parent(tsk, tsk->parent);
spin_unlock_irqrestore(&psig->siglock, flags);

View File

@@ -466,7 +466,6 @@ config PROFILE_ANNOTATED_BRANCHES
config PROFILE_ALL_BRANCHES
bool "Profile all if conditionals" if !FORTIFY_SOURCE
select TRACE_BRANCH_PROFILING
imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives
help
This tracer profiles all branch conditions. Every if ()
taken in the kernel is recorded whether it hit or miss.

View File

@@ -5165,6 +5165,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
list_del_rcu(&direct->next);
synchronize_rcu_tasks();
kfree(direct);
kfree(entry);
ftrace_direct_func_count--;
}
}

View File

@@ -113,22 +113,42 @@ static int preemptirq_delay_run(void *data)
for (i = 0; i < s; i++)
(testfuncs[i])(i);
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
}
static struct task_struct *preemptirq_start_test(void)
static int preemptirq_run_test(void)
{
struct task_struct *task;
char task_name[50];
snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
return kthread_run(preemptirq_delay_run, NULL, task_name);
task = kthread_run(preemptirq_delay_run, NULL, task_name);
if (IS_ERR(task))
return PTR_ERR(task);
if (task)
kthread_stop(task);
return 0;
}
static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
preemptirq_start_test();
ssize_t ret;
ret = preemptirq_run_test();
if (ret)
return ret;
return count;
}
@@ -148,11 +168,9 @@ static struct kobject *preemptirq_delay_kobj;
static int __init preemptirq_delay_init(void)
{
struct task_struct *test_task;
int retval;
test_task = preemptirq_start_test();
retval = PTR_ERR_OR_ZERO(test_task);
retval = preemptirq_run_test();
if (retval != 0)
return retval;

View File

@@ -947,7 +947,8 @@ int __trace_bputs(unsigned long ip, const char *str)
EXPORT_SYMBOL_GPL(__trace_bputs);
#ifdef CONFIG_TRACER_SNAPSHOT
void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
static void tracing_snapshot_instance_cond(struct trace_array *tr,
void *cond_data)
{
struct tracer *tracer = tr->current_trace;
unsigned long flags;
@@ -8525,6 +8526,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
*/
allocate_snapshot = false;
#endif
/*
* Because of some magic with the way alloc_percpu() works on
* x86_64, we need to synchronize the pgd of all the tables,
* otherwise the trace events that happen in x86_64 page fault
* handlers can't cope with accessing the chance that a
* alloc_percpu()'d memory might be touched in the page fault trace
* event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
* calls in tracing, because something might get triggered within a
* page fault trace event!
*/
vmalloc_sync_mappings();
return 0;
}

View File

@@ -95,23 +95,19 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
struct xbc_node *anode;
char buf[MAX_BUF_LEN];
const char *val;
int ret;
kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
ret = kprobe_event_gen_cmd_start(&cmd, event, NULL);
if (ret)
return ret;
int ret = 0;
xbc_node_for_each_array_value(node, "probes", anode, val) {
ret = kprobe_event_add_field(&cmd, val);
if (ret)
return ret;
}
kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
ret = kprobe_event_gen_cmd_end(&cmd);
if (ret)
pr_err("Failed to add probe: %s\n", buf);
ret = kprobe_event_gen_cmd_start(&cmd, event, val);
if (ret)
break;
ret = kprobe_event_gen_cmd_end(&cmd);
if (ret)
pr_err("Failed to add probe: %s\n", buf);
}
return ret;
}

View File

@@ -3320,6 +3320,9 @@ static void __destroy_hist_field(struct hist_field *hist_field)
kfree(hist_field->name);
kfree(hist_field->type);
kfree(hist_field->system);
kfree(hist_field->event_name);
kfree(hist_field);
}
@@ -4382,6 +4385,7 @@ static struct hist_field *create_var(struct hist_trigger_data *hist_data,
goto out;
}
var->ref = 1;
var->flags = HIST_FIELD_FL_VAR;
var->var.idx = idx;
var->var.hist_data = var->hist_data = hist_data;
@@ -5011,6 +5015,9 @@ static void destroy_field_vars(struct hist_trigger_data *hist_data)
for (i = 0; i < hist_data->n_field_vars; i++)
destroy_field_var(hist_data->field_vars[i]);
for (i = 0; i < hist_data->n_save_vars; i++)
destroy_field_var(hist_data->save_vars[i]);
}
static void save_field_var(struct hist_trigger_data *hist_data,

View File

@@ -453,7 +453,7 @@ static bool __within_notrace_func(unsigned long addr)
static bool within_notrace_func(struct trace_kprobe *tk)
{
unsigned long addr = addr = trace_kprobe_address(tk);
unsigned long addr = trace_kprobe_address(tk);
char symname[KSYM_NAME_LEN], *p;
if (!__within_notrace_func(addr))
@@ -940,6 +940,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init);
* complete command or only the first part of it; in the latter case,
* kprobe_event_add_fields() can be used to add more fields following this.
*
* Unlikely the synth_event_gen_cmd_start(), @loc must be specified. This
* returns -EINVAL if @loc == NULL.
*
* Return: 0 if successful, error otherwise.
*/
int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
@@ -953,6 +956,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
if (cmd->type != DYNEVENT_TYPE_KPROBE)
return -EINVAL;
if (!loc)
return -EINVAL;
if (kretprobe)
snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name);
else

View File

@@ -19,6 +19,24 @@
/* Per-cpu variable to prevent redundant calls when IRQs already off */
static DEFINE_PER_CPU(int, tracing_irq_cpu);
/*
* Like trace_hardirqs_on() but without the lockdep invocation. This is
* used in the low level entry code where the ordering vs. RCU is important
* and lockdep uses a staged approach which splits the lockdep hardirq
* tracking into a RCU on and a RCU off section.
*/
void trace_hardirqs_on_prepare(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
if (!in_nmi())
trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
}
EXPORT_SYMBOL(trace_hardirqs_on_prepare);
NOKPROBE_SYMBOL(trace_hardirqs_on_prepare);
void trace_hardirqs_on(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
@@ -28,11 +46,31 @@ void trace_hardirqs_on(void)
this_cpu_write(tracing_irq_cpu, 0);
}
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
NOKPROBE_SYMBOL(trace_hardirqs_on);
/*
* Like trace_hardirqs_off() but without the lockdep invocation. This is
* used in the low level entry code where the ordering vs. RCU is important
* and lockdep uses a staged approach which splits the lockdep hardirq
* tracking into a RCU on and a RCU off section.
*/
void trace_hardirqs_off_prepare(void)
{
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
if (!in_nmi())
trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
}
}
EXPORT_SYMBOL(trace_hardirqs_off_prepare);
NOKPROBE_SYMBOL(trace_hardirqs_off_prepare);
void trace_hardirqs_off(void)
{
if (!this_cpu_read(tracing_irq_cpu)) {
@@ -56,6 +94,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
this_cpu_write(tracing_irq_cpu, 0);
}
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);

View File

@@ -283,7 +283,7 @@ int tracing_map_add_key_field(struct tracing_map *map,
return idx;
}
void tracing_map_array_clear(struct tracing_map_array *a)
static void tracing_map_array_clear(struct tracing_map_array *a)
{
unsigned int i;
@@ -294,7 +294,7 @@ void tracing_map_array_clear(struct tracing_map_array *a)
memset(a->pages[i], 0, PAGE_SIZE);
}
void tracing_map_array_free(struct tracing_map_array *a)
static void tracing_map_array_free(struct tracing_map_array *a)
{
unsigned int i;
@@ -316,7 +316,7 @@ void tracing_map_array_free(struct tracing_map_array *a)
kfree(a);
}
struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
unsigned int entry_size)
{
struct tracing_map_array *a;

View File

@@ -544,6 +544,11 @@ EXPORT_SYMBOL_GPL(fork_usermode_blob);
* Runs a user-space application. The application is started
* asynchronously if wait is not set, and runs as a child of system workqueues.
* (ie. it runs with full root capabilities and optimized affinity).
*
* Note: successful return value does not guarantee the helper was called at
* all. You can't rely on sub_info->{init,cleanup} being called even for
* UMH_WAIT_* wait modes as STATIC_USERMODEHELPER_PATH="" turns all helpers
* into a successful no-op.
*/
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{