Merge branch 'linus' into locking/core, to fix up conflicts

Conflicts:
	mm/page_alloc.c

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2017-09-04 11:01:18 +02:00
1536 changed files with 48718 additions and 53809 deletions

View File

@@ -652,12 +652,27 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
}
}
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
{
return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
BITS_PER_LONG == 64;
}
static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
{
u32 size = htab->map.value_size;
if (percpu || fd_htab_map_needs_adjust(htab))
size = round_up(size, 8);
return size;
}
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
bool percpu, bool onallcpus,
struct htab_elem *old_elem)
{
u32 size = htab->map.value_size;
u32 size = htab_size_value(htab, percpu);
bool prealloc = htab_is_prealloc(htab);
struct htab_elem *l_new, **pl_new;
void __percpu *pptr;
@@ -696,9 +711,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
memcpy(l_new->key, key, key_size);
if (percpu) {
/* round up value_size to 8 bytes */
size = round_up(size, 8);
if (prealloc) {
pptr = htab_elem_get_ptr(l_new, key_size);
} else {
@@ -1209,17 +1221,9 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
{
struct bpf_map *map;
if (attr->value_size != sizeof(u32))
return ERR_PTR(-EINVAL);
/* pointer is stored internally */
attr->value_size = sizeof(void *);
map = htab_map_alloc(attr);
attr->value_size = sizeof(u32);
return map;
return htab_map_alloc(attr);
}
static void fd_htab_map_free(struct bpf_map *map)

View File

@@ -1899,6 +1899,7 @@ static struct cftype files[] = {
{
.name = "memory_pressure",
.read_u64 = cpuset_read_u64,
.private = FILE_MEMORY_PRESSURE,
},
{

View File

@@ -7906,16 +7906,15 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
}
}
perf_tp_event(call->event.type, count, raw_data, size, regs, head,
rctx, task);
rctx, task, NULL);
}
EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct pt_regs *regs, struct hlist_head *head, int rctx,
struct task_struct *task)
struct task_struct *task, struct perf_event *event)
{
struct perf_sample_data data;
struct perf_event *event;
struct perf_raw_record raw = {
.frag = {
@@ -7929,9 +7928,15 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
perf_trace_buf_update(record, event_type);
hlist_for_each_entry_rcu(event, head, hlist_entry) {
/* Use the given event instead of the hlist */
if (event) {
if (perf_tp_event_match(event, &data, regs))
perf_swevent_event(event, count, &data, regs);
} else {
hlist_for_each_entry_rcu(event, head, hlist_entry) {
if (perf_tp_event_match(event, &data, regs))
perf_swevent_event(event, count, &data, regs);
}
}
/*
@@ -9611,6 +9616,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (ret)
return -EFAULT;
attr->size = size;
if (attr->__reserved_1)
return -EINVAL;
@@ -10032,28 +10039,27 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_context;
/*
* Do not allow to attach to a group in a different
* task or CPU context:
* Make sure we're both events for the same CPU;
* grouping events for different CPUs is broken; since
* you can never concurrently schedule them anyhow.
*/
if (move_group) {
/*
* Make sure we're both on the same task, or both
* per-cpu events.
*/
if (group_leader->ctx->task != ctx->task)
goto err_context;
if (group_leader->cpu != event->cpu)
goto err_context;
/*
* Make sure we're both events for the same CPU;
* grouping events for different CPUs is broken; since
* you can never concurrently schedule them anyhow.
*/
if (group_leader->cpu != event->cpu)
goto err_context;
} else {
if (group_leader->ctx != ctx)
goto err_context;
}
/*
* Make sure we're both on the same task, or both
* per-CPU events.
*/
if (group_leader->ctx->task != ctx->task)
goto err_context;
/*
* Do not allow to attach to a group in a different task
* or CPU context. If we're moving SW events, we'll fix
* this up later, so allow that.
*/
if (!move_group && group_leader->ctx != ctx)
goto err_context;
/*
* Only a group leader can be exclusive or pinned

View File

@@ -1262,8 +1262,6 @@ void uprobe_end_dup_mmap(void)
void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{
newmm->uprobes_state.xol_area = NULL;
if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
set_bit(MMF_HAS_UPROBES, &newmm->flags);
/* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */

View File

@@ -787,6 +787,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
mm->uprobes_state.xol_area = NULL;
#endif
}
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
struct user_namespace *user_ns)
{
@@ -808,11 +815,13 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_mm_init(mm);
init_tlb_flush_pending(mm);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
mm->pmd_huge_pte = NULL;
#endif
mm_init_uprobes_state(mm);
if (current->mm) {
mm->flags = current->mm->flags & MMF_INIT_MASK;

View File

@@ -637,6 +637,7 @@ repeat:
schedule();
try_to_freeze();
cond_resched();
goto repeat;
}
EXPORT_SYMBOL_GPL(kthread_worker_fn);

View File

@@ -70,9 +70,10 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
int ret = curr->func(curr, mode, wake_flags, key);
if (ret < 0)
break;
if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}

View File

@@ -637,9 +637,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
tk->ktime_sec = seconds;
/* Update the monotonic raw base */
seconds = tk->raw_sec;
nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}
/* must hold timekeeper_lock */

View File

@@ -203,6 +203,7 @@ struct timer_base {
bool migration_enabled;
bool nohz_active;
bool is_idle;
bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base)
{
unsigned long jnow = READ_ONCE(jiffies);
unsigned long jnow;
/*
* We only forward the base when it's idle and we have a delta between
* base clock and jiffies.
* We only forward the base when we are idle or have just come out of
* idle (must_forward_clk logic), and have a delta between base clock
* and jiffies. In the common case, run_timers will take care of it.
*/
if (!base->is_idle || (long) (jnow - base->clk) < 2)
if (likely(!base->must_forward_clk))
return;
jnow = READ_ONCE(jiffies);
base->must_forward_clk = base->is_idle;
if ((long)(jnow - base->clk) < 2)
return;
/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* same array bucket then just return:
*/
if (timer_pending(timer)) {
/*
* The downside of this optimization is that it can result in
* larger granularity than you would get from adding a new
* timer with this expiry.
*/
if (timer->expires == expires)
return 1;
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* dequeue/enqueue dance.
*/
base = lock_timer_base(timer, &flags);
forward_timer_base(base);
clk = base->clk;
idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
}
} else {
base = lock_timer_base(timer, &flags);
forward_timer_base(base);
}
ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
raw_spin_lock(&base->lock);
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
forward_timer_base(base);
}
}
/* Try to forward a stale timer base clock */
forward_timer_base(base);
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
forward_timer_base(base);
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
if (!is_max_delta)
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
* If we expect to sleep more than a tick, mark the base idle:
* If we expect to sleep more than a tick, mark the base idle.
* Also the tick is stopped so any added timer must forward
* the base clk itself to keep granularity small. This idle
* logic is only maintained for the BASE_STD base, deferrable
* timers may still see large granularity skew (by design).
*/
if ((expires - basem) > TICK_NSEC)
if ((expires - basem) > TICK_NSEC) {
base->must_forward_clk = true;
base->is_idle = true;
}
}
raw_spin_unlock(&base->lock);
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
/*
* must_forward_clk must be cleared before running timers so that any
* timer functions that call mod_timer will not try to forward the
* base. idle trcking / clock forwarding logic is only used with
* BASE_STD timers.
*
* The deferrable base does not do idle tracking at all, so we do
* not forward it. This can result in very large variations in
* granularity for deferrable timers, but they can be deferred for
* long periods due to idle.
*/
base->must_forward_clk = false;
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));

View File

@@ -306,6 +306,7 @@ static void
perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *pt_regs)
{
struct perf_event *event;
struct ftrace_entry *entry;
struct hlist_head *head;
struct pt_regs regs;
@@ -329,8 +330,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
entry->ip = ip;
entry->parent_ip = parent_ip;
event = container_of(ops, struct perf_event, ftrace_ops);
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
1, &regs, head, NULL);
1, &regs, head, NULL, event);
#undef ENTRY_SIZE
}

View File

@@ -1200,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
head, NULL, NULL);
}
NOKPROBE_SYMBOL(kprobe_perf_func);
@@ -1236,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
entry->ret_ip = (unsigned long)ri->ret_addr;
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
head, NULL, NULL);
}
NOKPROBE_SYMBOL(kretprobe_perf_func);
#endif /* CONFIG_PERF_EVENTS */

View File

@@ -596,7 +596,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
(unsigned long *)&rec->args);
perf_trace_buf_submit(rec, size, rctx,
sys_data->enter_event->event.type, 1, regs,
head, NULL);
head, NULL, NULL);
}
static int perf_sysenter_enable(struct trace_event_call *call)
@@ -667,7 +667,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
1, regs, head, NULL);
1, regs, head, NULL, NULL);
}
static int perf_sysexit_enable(struct trace_event_call *call)

View File

@@ -1156,7 +1156,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
}
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
head, NULL, NULL);
out:
preempt_enable();
}