Merge branch 'master' into for-4.4-fixes

The following commit which went into mainline through networking tree

  3b13758f51 ("cgroups: Allow dynamically changing net_classid")

conflicts in net/core/netclassid_cgroup.c with the following pending
fix in cgroup/for-4.4-fixes.

  1f7dd3e5a6 ("cgroup: fix handling of multi-destination migration from subtree_control enabling")

The former separates out update_classid() from cgrp_attach() and
updates it to walk all fds of all tasks in the target css so that it
can be used from both migration and config change paths.  The latter
drops @css from cgrp_attach().

Resolve the conflict by making cgrp_attach() call update_classid()
with the css from the first task.  We can revive @tset walking in
cgrp_attach() but given that net_cls is v1 only where there always is
only one target css during migration, this is fine.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Nina Schiff <ninasc@fb.com>
This commit is contained in:
Tejun Heo
2015-12-07 10:09:03 -05:00
885 changed files with 12494 additions and 9541 deletions

View File

@@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
attr->value_size == 0)
return ERR_PTR(-EINVAL);
if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
/* if value_size is bigger, the user space won't be able to
* access the elements.
*/
return ERR_PTR(-E2BIG);
elem_size = round_up(attr->value_size, 8);
/* check round_up into zero and u32 overflow */
if (elem_size == 0 ||
attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
return ERR_PTR(-ENOMEM);
array_size = sizeof(*array) + attr->max_entries * elem_size;
@@ -105,7 +111,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
/* all elements already exist */
return -EEXIST;
memcpy(array->value + array->elem_size * index, value, array->elem_size);
memcpy(array->value + array->elem_size * index, value, map->value_size);
return 0;
}

View File

@@ -64,12 +64,35 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
*/
goto free_htab;
err = -ENOMEM;
if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
MAX_BPF_STACK - sizeof(struct htab_elem))
/* if value_size is bigger, the user space won't be able to
* access the elements via bpf syscall. This check also makes
* sure that the elem_size doesn't overflow and it's
* kmalloc-able later in htab_map_update_elem()
*/
goto free_htab;
htab->elem_size = sizeof(struct htab_elem) +
round_up(htab->map.key_size, 8) +
htab->map.value_size;
/* prevent zero size kmalloc and check for u32 overflow */
if (htab->n_buckets == 0 ||
htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
goto free_htab;
if ((u64) htab->n_buckets * sizeof(struct hlist_head) +
(u64) htab->elem_size * htab->map.max_entries >=
U32_MAX - PAGE_SIZE)
/* make sure page count doesn't overflow */
goto free_htab;
htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
htab->elem_size * htab->map.max_entries,
PAGE_SIZE) >> PAGE_SHIFT;
err = -ENOMEM;
htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
GFP_USER | __GFP_NOWARN);
@@ -85,13 +108,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->lock);
htab->count = 0;
htab->elem_size = sizeof(struct htab_elem) +
round_up(htab->map.key_size, 8) +
htab->map.value_size;
htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
htab->elem_size * htab->map.max_entries,
PAGE_SIZE) >> PAGE_SHIFT;
return &htab->map;
free_htab:
@@ -222,7 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
WARN_ON_ONCE(!rcu_read_lock_held());
/* allocate new element outside of lock */
l_new = kmalloc(htab->elem_size, GFP_ATOMIC);
l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if (!l_new)
return -ENOMEM;

View File

@@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
break;
case BPF_TYPE_MAP:
atomic_inc(&((struct bpf_map *)raw)->refcnt);
bpf_map_inc(raw, true);
break;
default:
WARN_ON_ONCE(1);
@@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
bpf_prog_put(raw);
break;
case BPF_TYPE_MAP:
bpf_map_put(raw);
bpf_map_put_with_uref(raw);
break;
default:
WARN_ON_ONCE(1);
@@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
void *raw;
*type = BPF_TYPE_MAP;
raw = bpf_map_get(ufd);
raw = bpf_map_get_with_uref(ufd);
if (IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
raw = bpf_prog_get(ufd);

View File

@@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work)
map->ops->map_free(map);
}
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic_dec_and_test(&map->usercnt)) {
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
bpf_fd_array_map_clear(map);
}
}
/* decrement map refcnt and schedule it for freeing via workqueue
* (unrelying map implementation ops->map_free() might sleep)
*/
@@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map)
}
}
void bpf_map_put_with_uref(struct bpf_map *map)
{
bpf_map_put_uref(map);
bpf_map_put(map);
}
static int bpf_map_release(struct inode *inode, struct file *filp)
{
struct bpf_map *map = filp->private_data;
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
/* prog_array stores refcnt-ed bpf_prog pointers
* release them all when user space closes prog_array_fd
*/
bpf_fd_array_map_clear(map);
bpf_map_put(map);
bpf_map_put_with_uref(filp->private_data);
return 0;
}
@@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr)
return PTR_ERR(map);
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
err = bpf_map_charge_memlock(map);
if (err)
@@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
}
struct bpf_map *bpf_map_get(u32 ufd)
void bpf_map_inc(struct bpf_map *map, bool uref)
{
atomic_inc(&map->refcnt);
if (uref)
atomic_inc(&map->usercnt);
}
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_map *map;
@@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
if (IS_ERR(map))
return map;
atomic_inc(&map->refcnt);
bpf_map_inc(map, true);
fdput(f);
return map;
@@ -226,7 +240,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto free_key;
err = -ENOMEM;
value = kmalloc(map->value_size, GFP_USER);
value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -285,7 +299,7 @@ static int map_update_elem(union bpf_attr *attr)
goto free_key;
err = -ENOMEM;
value = kmalloc(map->value_size, GFP_USER);
value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;

View File

@@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
* will be used by the valid program until it's unloaded
* and all maps are released in free_bpf_prog_info()
*/
atomic_inc(&map->refcnt);
bpf_map_inc(map, false);
fdput(f);
next_insn:
insn++;

View File

@@ -294,6 +294,12 @@ static int klp_write_object_relocations(struct module *pmod,
for (reloc = obj->relocs; reloc->name; reloc++) {
if (!klp_is_module(obj)) {
#if defined(CONFIG_RANDOMIZE_BASE)
/* If KASLR has been enabled, adjust old value accordingly */
if (kaslr_enabled())
reloc->val += kaslr_offset();
#endif
ret = klp_verify_vmlinux_symbol(reloc->name,
reloc->val);
if (ret)

View File

@@ -152,8 +152,11 @@ void panic(const char *fmt, ...)
* We may have ended up stopping the CPU holding the lock (in
* smp_send_stop()) while still having some valuable data in the console
* buffer. Try to acquire the lock then release it regardless of the
* result. The release will also print the buffers out.
* result. The release will also print the buffers out. Locks debug
* should be disabled to avoid reporting bad unlock balance when
* panic() is not being callled from OOPS.
*/
debug_locks_off();
console_trylock();
console_unlock();

View File

@@ -467,7 +467,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
rcu_read_lock();
if (type != PIDTYPE_PID)
task = task->group_leader;
pid = get_pid(task->pids[type].pid);
pid = get_pid(rcu_dereference(task->pids[type].pid));
rcu_read_unlock();
return pid;
}
@@ -528,7 +528,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
if (likely(pid_alive(task))) {
if (type != PIDTYPE_PID)
task = task->group_leader;
nr = pid_nr_ns(task->pids[type].pid, ns);
nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();

View File

@@ -1946,6 +1946,25 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
goto stat;
#ifdef CONFIG_SMP
/*
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
* possible to, falsely, observe p->on_cpu == 0.
*
* One must be running (->on_cpu == 1) in order to remove oneself
* from the runqueue.
*
* [S] ->on_cpu = 1; [L] ->on_rq
* UNLOCK rq->lock
* RMB
* LOCK rq->lock
* [S] ->on_rq = 0; [L] ->on_cpu
*
* Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
* from the consecutive calls to schedule(); the first switching to our
* task, the second putting it to sleep.
*/
smp_rmb();
/*
* If the owning (remote) cpu is still in the middle of schedule() with
* this task as prev, wait until its done referencing the task.
@@ -1953,7 +1972,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
while (p->on_cpu)
cpu_relax();
/*
* Pairs with the smp_wmb() in finish_lock_switch().
* Combined with the control dependency above, we have an effective
* smp_load_acquire() without the need for full barriers.
*
* Pairs with the smp_store_release() in finish_lock_switch().
*
* This ensures that tasks getting woken will be fully ordered against
* their previous state and preserve Program Order.
*/
smp_rmb();
@@ -2039,7 +2064,6 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
WARN_ON(task_is_stopped_or_traced(p));
return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);
@@ -5847,13 +5871,13 @@ static int init_rootdomain(struct root_domain *rd)
{
memset(rd, 0, sizeof(*rd));
if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
goto out;
if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
goto free_span;
if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
goto free_online;
if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
goto free_dlo_mask;
init_dl_bw(&rd->dl_bw);

View File

@@ -788,6 +788,9 @@ cputime_t task_gtime(struct task_struct *t)
unsigned int seq;
cputime_t gtime;
if (!context_tracking_is_enabled())
return t->gtime;
do {
seq = read_seqbegin(&t->vtime_seqlock);

View File

@@ -64,7 +64,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
#ifdef CONFIG_SMP
#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
static void push_irq_work_func(struct irq_work *work);
#endif

View File

@@ -1073,6 +1073,9 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
* We must ensure this doesn't happen until the switch is completely
* finished.
*
* In particular, the load of prev->state in finish_task_switch() must
* happen before this.
*
* Pairs with the control dependency and rmb in try_to_wake_up().
*/
smp_store_release(&prev->on_cpu, 0);

View File

@@ -583,18 +583,18 @@ EXPORT_SYMBOL(wake_up_atomic_t);
__sched int bit_wait(struct wait_bit_key *word)
{
if (signal_pending_state(current->state, current))
return 1;
schedule();
if (signal_pending(current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait);
__sched int bit_wait_io(struct wait_bit_key *word)
{
if (signal_pending_state(current->state, current))
return 1;
io_schedule();
if (signal_pending(current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait_io);
@@ -602,11 +602,11 @@ EXPORT_SYMBOL(bit_wait_io);
__sched int bit_wait_timeout(struct wait_bit_key *word)
{
unsigned long now = READ_ONCE(jiffies);
if (signal_pending_state(current->state, current))
return 1;
if (time_after_eq(now, word->timeout))
return -EAGAIN;
schedule_timeout(word->timeout - now);
if (signal_pending(current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_timeout);
@@ -614,11 +614,11 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout);
__sched int bit_wait_io_timeout(struct wait_bit_key *word)
{
unsigned long now = READ_ONCE(jiffies);
if (signal_pending_state(current->state, current))
return 1;
if (time_after_eq(now, word->timeout))
return -EAGAIN;
io_schedule_timeout(word->timeout - now);
if (signal_pending(current))
return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_io_timeout);

View File

@@ -3503,7 +3503,7 @@ SYSCALL_DEFINE0(pause)
#endif
int sigsuspend(sigset_t *set)
static int sigsuspend(sigset_t *set)
{
current->saved_sigmask = current->blocked;
set_current_blocked(set);

View File

@@ -1887,12 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
}
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
cpu_buffer->reader_page->read = 0;
}
static void rb_inc_iter(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -2803,8 +2797,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
event = __rb_reserve_next(cpu_buffer, &info);
if (unlikely(PTR_ERR(event) == -EAGAIN))
if (unlikely(PTR_ERR(event) == -EAGAIN)) {
if (info.add_timestamp)
info.length -= RB_LEN_TIME_EXTEND;
goto again;
}
if (!event)
goto out_fail;
@@ -3626,7 +3623,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
/* Finally update the reader page to the new head */
cpu_buffer->reader_page = reader;
rb_reset_reader_page(cpu_buffer);
cpu_buffer->reader_page->read = 0;
if (overwrite != cpu_buffer->last_overrun) {
cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
@@ -3636,6 +3633,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto again;
out:
/* Update the read_stamp on the first event */
if (reader && reader->read == 0)
cpu_buffer->read_stamp = reader->page->time_stamp;
arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);

View File

@@ -582,6 +582,12 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
list_for_each_entry(file, &tr->events, list) {
clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
}
@@ -1729,6 +1735,16 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
tr, INT_MAX);
register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
}
/*