Merge branch 'linus' into locking/core, to fix up conflicts
Conflicts: mm/page_alloc.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -652,12 +652,27 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
|
||||
}
|
||||
}
|
||||
|
||||
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
|
||||
{
|
||||
return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
|
||||
BITS_PER_LONG == 64;
|
||||
}
|
||||
|
||||
static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
|
||||
{
|
||||
u32 size = htab->map.value_size;
|
||||
|
||||
if (percpu || fd_htab_map_needs_adjust(htab))
|
||||
size = round_up(size, 8);
|
||||
return size;
|
||||
}
|
||||
|
||||
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
void *value, u32 key_size, u32 hash,
|
||||
bool percpu, bool onallcpus,
|
||||
struct htab_elem *old_elem)
|
||||
{
|
||||
u32 size = htab->map.value_size;
|
||||
u32 size = htab_size_value(htab, percpu);
|
||||
bool prealloc = htab_is_prealloc(htab);
|
||||
struct htab_elem *l_new, **pl_new;
|
||||
void __percpu *pptr;
|
||||
@@ -696,9 +711,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
|
||||
memcpy(l_new->key, key, key_size);
|
||||
if (percpu) {
|
||||
/* round up value_size to 8 bytes */
|
||||
size = round_up(size, 8);
|
||||
|
||||
if (prealloc) {
|
||||
pptr = htab_elem_get_ptr(l_new, key_size);
|
||||
} else {
|
||||
@@ -1209,17 +1221,9 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
|
||||
|
||||
static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_map *map;
|
||||
|
||||
if (attr->value_size != sizeof(u32))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* pointer is stored internally */
|
||||
attr->value_size = sizeof(void *);
|
||||
map = htab_map_alloc(attr);
|
||||
attr->value_size = sizeof(u32);
|
||||
|
||||
return map;
|
||||
return htab_map_alloc(attr);
|
||||
}
|
||||
|
||||
static void fd_htab_map_free(struct bpf_map *map)
|
||||
|
@@ -1899,6 +1899,7 @@ static struct cftype files[] = {
|
||||
{
|
||||
.name = "memory_pressure",
|
||||
.read_u64 = cpuset_read_u64,
|
||||
.private = FILE_MEMORY_PRESSURE,
|
||||
},
|
||||
|
||||
{
|
||||
|
@@ -7906,16 +7906,15 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
|
||||
}
|
||||
}
|
||||
perf_tp_event(call->event.type, count, raw_data, size, regs, head,
|
||||
rctx, task);
|
||||
rctx, task, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
|
||||
|
||||
void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
|
||||
struct pt_regs *regs, struct hlist_head *head, int rctx,
|
||||
struct task_struct *task)
|
||||
struct task_struct *task, struct perf_event *event)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct perf_event *event;
|
||||
|
||||
struct perf_raw_record raw = {
|
||||
.frag = {
|
||||
@@ -7929,9 +7928,15 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
|
||||
|
||||
perf_trace_buf_update(record, event_type);
|
||||
|
||||
hlist_for_each_entry_rcu(event, head, hlist_entry) {
|
||||
/* Use the given event instead of the hlist */
|
||||
if (event) {
|
||||
if (perf_tp_event_match(event, &data, regs))
|
||||
perf_swevent_event(event, count, &data, regs);
|
||||
} else {
|
||||
hlist_for_each_entry_rcu(event, head, hlist_entry) {
|
||||
if (perf_tp_event_match(event, &data, regs))
|
||||
perf_swevent_event(event, count, &data, regs);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -9611,6 +9616,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
|
||||
attr->size = size;
|
||||
|
||||
if (attr->__reserved_1)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -10032,28 +10039,27 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
goto err_context;
|
||||
|
||||
/*
|
||||
* Do not allow to attach to a group in a different
|
||||
* task or CPU context:
|
||||
* Make sure we're both events for the same CPU;
|
||||
* grouping events for different CPUs is broken; since
|
||||
* you can never concurrently schedule them anyhow.
|
||||
*/
|
||||
if (move_group) {
|
||||
/*
|
||||
* Make sure we're both on the same task, or both
|
||||
* per-cpu events.
|
||||
*/
|
||||
if (group_leader->ctx->task != ctx->task)
|
||||
goto err_context;
|
||||
if (group_leader->cpu != event->cpu)
|
||||
goto err_context;
|
||||
|
||||
/*
|
||||
* Make sure we're both events for the same CPU;
|
||||
* grouping events for different CPUs is broken; since
|
||||
* you can never concurrently schedule them anyhow.
|
||||
*/
|
||||
if (group_leader->cpu != event->cpu)
|
||||
goto err_context;
|
||||
} else {
|
||||
if (group_leader->ctx != ctx)
|
||||
goto err_context;
|
||||
}
|
||||
/*
|
||||
* Make sure we're both on the same task, or both
|
||||
* per-CPU events.
|
||||
*/
|
||||
if (group_leader->ctx->task != ctx->task)
|
||||
goto err_context;
|
||||
|
||||
/*
|
||||
* Do not allow to attach to a group in a different task
|
||||
* or CPU context. If we're moving SW events, we'll fix
|
||||
* this up later, so allow that.
|
||||
*/
|
||||
if (!move_group && group_leader->ctx != ctx)
|
||||
goto err_context;
|
||||
|
||||
/*
|
||||
* Only a group leader can be exclusive or pinned
|
||||
|
@@ -1262,8 +1262,6 @@ void uprobe_end_dup_mmap(void)
|
||||
|
||||
void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
|
||||
{
|
||||
newmm->uprobes_state.xol_area = NULL;
|
||||
|
||||
if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
|
||||
set_bit(MMF_HAS_UPROBES, &newmm->flags);
|
||||
/* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */
|
||||
|
@@ -787,6 +787,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mm_init_uprobes_state(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_UPROBES
|
||||
mm->uprobes_state.xol_area = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
struct user_namespace *user_ns)
|
||||
{
|
||||
@@ -808,11 +815,13 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
mm_init_cpumask(mm);
|
||||
mm_init_aio(mm);
|
||||
mm_init_owner(mm, p);
|
||||
RCU_INIT_POINTER(mm->exe_file, NULL);
|
||||
mmu_notifier_mm_init(mm);
|
||||
init_tlb_flush_pending(mm);
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
|
||||
mm->pmd_huge_pte = NULL;
|
||||
#endif
|
||||
mm_init_uprobes_state(mm);
|
||||
|
||||
if (current->mm) {
|
||||
mm->flags = current->mm->flags & MMF_INIT_MASK;
|
||||
|
@@ -637,6 +637,7 @@ repeat:
|
||||
schedule();
|
||||
|
||||
try_to_freeze();
|
||||
cond_resched();
|
||||
goto repeat;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_worker_fn);
|
||||
|
@@ -70,9 +70,10 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
|
||||
|
||||
list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
|
||||
unsigned flags = curr->flags;
|
||||
|
||||
if (curr->func(curr, mode, wake_flags, key) &&
|
||||
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
|
||||
int ret = curr->func(curr, mode, wake_flags, key);
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -637,9 +637,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
|
||||
tk->ktime_sec = seconds;
|
||||
|
||||
/* Update the monotonic raw base */
|
||||
seconds = tk->raw_sec;
|
||||
nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
|
||||
tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
|
||||
tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
|
||||
}
|
||||
|
||||
/* must hold timekeeper_lock */
|
||||
|
@@ -203,6 +203,7 @@ struct timer_base {
|
||||
bool migration_enabled;
|
||||
bool nohz_active;
|
||||
bool is_idle;
|
||||
bool must_forward_clk;
|
||||
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
|
||||
struct hlist_head vectors[WHEEL_SIZE];
|
||||
} ____cacheline_aligned;
|
||||
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
|
||||
|
||||
static inline void forward_timer_base(struct timer_base *base)
|
||||
{
|
||||
unsigned long jnow = READ_ONCE(jiffies);
|
||||
unsigned long jnow;
|
||||
|
||||
/*
|
||||
* We only forward the base when it's idle and we have a delta between
|
||||
* base clock and jiffies.
|
||||
* We only forward the base when we are idle or have just come out of
|
||||
* idle (must_forward_clk logic), and have a delta between base clock
|
||||
* and jiffies. In the common case, run_timers will take care of it.
|
||||
*/
|
||||
if (!base->is_idle || (long) (jnow - base->clk) < 2)
|
||||
if (likely(!base->must_forward_clk))
|
||||
return;
|
||||
|
||||
jnow = READ_ONCE(jiffies);
|
||||
base->must_forward_clk = base->is_idle;
|
||||
if ((long)(jnow - base->clk) < 2)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
* same array bucket then just return:
|
||||
*/
|
||||
if (timer_pending(timer)) {
|
||||
/*
|
||||
* The downside of this optimization is that it can result in
|
||||
* larger granularity than you would get from adding a new
|
||||
* timer with this expiry.
|
||||
*/
|
||||
if (timer->expires == expires)
|
||||
return 1;
|
||||
|
||||
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
* dequeue/enqueue dance.
|
||||
*/
|
||||
base = lock_timer_base(timer, &flags);
|
||||
forward_timer_base(base);
|
||||
|
||||
clk = base->clk;
|
||||
idx = calc_wheel_index(expires, clk);
|
||||
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
}
|
||||
} else {
|
||||
base = lock_timer_base(timer, &flags);
|
||||
forward_timer_base(base);
|
||||
}
|
||||
|
||||
ret = detach_if_pending(timer, base, false);
|
||||
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
raw_spin_lock(&base->lock);
|
||||
WRITE_ONCE(timer->flags,
|
||||
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
|
||||
forward_timer_base(base);
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to forward a stale timer base clock */
|
||||
forward_timer_base(base);
|
||||
|
||||
timer->expires = expires;
|
||||
/*
|
||||
* If 'idx' was calculated above and the base time did not advance
|
||||
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
||||
WRITE_ONCE(timer->flags,
|
||||
(timer->flags & ~TIMER_BASEMASK) | cpu);
|
||||
}
|
||||
forward_timer_base(base);
|
||||
|
||||
debug_activate(timer, timer->expires);
|
||||
internal_add_timer(base, timer);
|
||||
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
|
||||
if (!is_max_delta)
|
||||
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
|
||||
/*
|
||||
* If we expect to sleep more than a tick, mark the base idle:
|
||||
* If we expect to sleep more than a tick, mark the base idle.
|
||||
* Also the tick is stopped so any added timer must forward
|
||||
* the base clk itself to keep granularity small. This idle
|
||||
* logic is only maintained for the BASE_STD base, deferrable
|
||||
* timers may still see large granularity skew (by design).
|
||||
*/
|
||||
if ((expires - basem) > TICK_NSEC)
|
||||
if ((expires - basem) > TICK_NSEC) {
|
||||
base->must_forward_clk = true;
|
||||
base->is_idle = true;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&base->lock);
|
||||
|
||||
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
|
||||
|
||||
/*
|
||||
* must_forward_clk must be cleared before running timers so that any
|
||||
* timer functions that call mod_timer will not try to forward the
|
||||
* base. idle trcking / clock forwarding logic is only used with
|
||||
* BASE_STD timers.
|
||||
*
|
||||
* The deferrable base does not do idle tracking at all, so we do
|
||||
* not forward it. This can result in very large variations in
|
||||
* granularity for deferrable timers, but they can be deferred for
|
||||
* long periods due to idle.
|
||||
*/
|
||||
base->must_forward_clk = false;
|
||||
|
||||
__run_timers(base);
|
||||
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
|
||||
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
|
||||
|
@@ -306,6 +306,7 @@ static void
|
||||
perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *ops, struct pt_regs *pt_regs)
|
||||
{
|
||||
struct perf_event *event;
|
||||
struct ftrace_entry *entry;
|
||||
struct hlist_head *head;
|
||||
struct pt_regs regs;
|
||||
@@ -329,8 +330,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
|
||||
|
||||
entry->ip = ip;
|
||||
entry->parent_ip = parent_ip;
|
||||
event = container_of(ops, struct perf_event, ftrace_ops);
|
||||
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
|
||||
1, ®s, head, NULL);
|
||||
1, ®s, head, NULL, event);
|
||||
|
||||
#undef ENTRY_SIZE
|
||||
}
|
||||
|
@@ -1200,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
|
||||
memset(&entry[1], 0, dsize);
|
||||
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
|
||||
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
|
||||
head, NULL);
|
||||
head, NULL, NULL);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_perf_func);
|
||||
|
||||
@@ -1236,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
entry->ret_ip = (unsigned long)ri->ret_addr;
|
||||
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
|
||||
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
|
||||
head, NULL);
|
||||
head, NULL, NULL);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_perf_func);
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
@@ -596,7 +596,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
(unsigned long *)&rec->args);
|
||||
perf_trace_buf_submit(rec, size, rctx,
|
||||
sys_data->enter_event->event.type, 1, regs,
|
||||
head, NULL);
|
||||
head, NULL, NULL);
|
||||
}
|
||||
|
||||
static int perf_sysenter_enable(struct trace_event_call *call)
|
||||
@@ -667,7 +667,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
rec->nr = syscall_nr;
|
||||
rec->ret = syscall_get_return_value(current, regs);
|
||||
perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
|
||||
1, regs, head, NULL);
|
||||
1, regs, head, NULL, NULL);
|
||||
}
|
||||
|
||||
static int perf_sysexit_enable(struct trace_event_call *call)
|
||||
|
@@ -1156,7 +1156,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
|
||||
}
|
||||
|
||||
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
|
||||
head, NULL);
|
||||
head, NULL, NULL);
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
Reference in New Issue
Block a user