Merge tag 'v4.18-rc6' into for-4.19/block2

Pull in 4.18-rc6 to get the NVMe core AEN change to avoid a
merge conflict down the line.

Signed-of-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe
2018-08-05 19:32:09 -06:00
487 changed files with 4170 additions and 2412 deletions

View File

@@ -991,16 +991,13 @@ static void btf_int_bits_seq_show(const struct btf *btf,
void *data, u8 bits_offset,
struct seq_file *m)
{
u16 left_shift_bits, right_shift_bits;
u32 int_data = btf_type_int(t);
u16 nr_bits = BTF_INT_BITS(int_data);
u16 total_bits_offset;
u16 nr_copy_bytes;
u16 nr_copy_bits;
u8 nr_upper_bits;
union {
u64 u64_num;
u8 u8_nums[8];
} print_num;
u64 print_num;
total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
@@ -1008,21 +1005,20 @@ static void btf_int_bits_seq_show(const struct btf *btf,
nr_copy_bits = nr_bits + bits_offset;
nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
print_num.u64_num = 0;
memcpy(&print_num.u64_num, data, nr_copy_bytes);
print_num = 0;
memcpy(&print_num, data, nr_copy_bytes);
/* Ditch the higher order bits */
nr_upper_bits = BITS_PER_BYTE_MASKED(nr_copy_bits);
if (nr_upper_bits) {
/* We need to mask out some bits of the upper byte. */
u8 mask = (1 << nr_upper_bits) - 1;
#ifdef __BIG_ENDIAN_BITFIELD
left_shift_bits = bits_offset;
#else
left_shift_bits = BITS_PER_U64 - nr_copy_bits;
#endif
right_shift_bits = BITS_PER_U64 - nr_bits;
print_num.u8_nums[nr_copy_bytes - 1] &= mask;
}
print_num <<= left_shift_bits;
print_num >>= right_shift_bits;
print_num.u64_num >>= bits_offset;
seq_printf(m, "0x%llx", print_num.u64_num);
seq_printf(m, "0x%llx", print_num);
}
static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,

View File

@@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
{
struct net_device *dev = dst->dev;
struct xdp_frame *xdpf;
int err;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
if (unlikely(err))
return err;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
{
int err;
err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
err = xdp_ok_fwd_dev(dst->dev, skb->len);
if (unlikely(err))
return err;
skb->dev = dst->dev;

View File

@@ -747,13 +747,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
* old element will be freed immediately.
* Otherwise return an error
*/
atomic_dec(&htab->count);
return ERR_PTR(-E2BIG);
l_new = ERR_PTR(-E2BIG);
goto dec_count;
}
l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
htab->map.numa_node);
if (!l_new)
return ERR_PTR(-ENOMEM);
if (!l_new) {
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
}
memcpy(l_new->key, key, key_size);
@@ -766,7 +768,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
GFP_ATOMIC | __GFP_NOWARN);
if (!pptr) {
kfree(l_new);
return ERR_PTR(-ENOMEM);
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
}
@@ -780,6 +783,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
l_new->hash = hash;
return l_new;
dec_count:
atomic_dec(&htab->count);
return l_new;
}
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,

View File

@@ -312,10 +312,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
struct smap_psock *psock;
struct sock *osk;
lock_sock(sk);
rcu_read_lock();
psock = smap_psock_sk(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
return sk->sk_prot->close(sk, timeout);
}
@@ -371,6 +373,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
e = psock_map_pop(sk, psock);
}
rcu_read_unlock();
release_sock(sk);
close_fun(sk, timeout);
}
@@ -568,7 +571,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
while (sg[i].length) {
free += sg[i].length;
sk_mem_uncharge(sk, sg[i].length);
put_page(sg_page(&sg[i]));
if (!md->skb)
put_page(sg_page(&sg[i]));
sg[i].length = 0;
sg[i].page_link = 0;
sg[i].offset = 0;
@@ -577,6 +581,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
if (i == MAX_SKB_FRAGS)
i = 0;
}
if (md->skb)
consume_skb(md->skb);
return free;
}
@@ -1230,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
*/
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
skb->sk = psock->sock;
bpf_compute_data_pointers(skb);
bpf_compute_data_end_sk_skb(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
@@ -1485,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp,
* any socket yet.
*/
skb->sk = psock->sock;
bpf_compute_data_pointers(skb);
bpf_compute_data_end_sk_skb(skb);
rc = (*prog->bpf_func)(skb, prog->insnsi);
skb->sk = NULL;
rcu_read_unlock();
@@ -1896,7 +1902,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e) {
err = -ENOMEM;
goto out_progs;
goto out_free;
}
}
@@ -2069,7 +2075,13 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EOPNOTSUPP;
}
lock_sock(skops.sk);
preempt_disable();
rcu_read_lock();
err = sock_map_ctx_update_elem(&skops, map, key, flags);
rcu_read_unlock();
preempt_enable();
release_sock(skops.sk);
fput(socket->file);
return err;
}
@@ -2342,7 +2354,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
if (err)
goto err;
/* bpf_map_update_elem() can be called in_irq() */
/* psock is valid here because otherwise above *ctx_update_elem would
* have thrown an error. It is safe to skip error check.
*/
psock = smap_psock_sk(sock);
raw_spin_lock_bh(&b->lock);
l_old = lookup_elem_raw(head, hash, key, key_size);
if (l_old && map_flags == BPF_NOEXIST) {
@@ -2360,12 +2375,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
goto bucket_err;
}
psock = smap_psock_sk(sock);
if (unlikely(!psock)) {
err = -EINVAL;
goto bucket_err;
}
rcu_assign_pointer(e->hash_link, l_new);
rcu_assign_pointer(e->htab,
container_of(map, struct bpf_htab, map));
@@ -2388,12 +2397,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
raw_spin_unlock_bh(&b->lock);
return 0;
bucket_err:
smap_release_sock(psock, sock);
raw_spin_unlock_bh(&b->lock);
err:
kfree(e);
psock = smap_psock_sk(sock);
if (psock)
smap_release_sock(psock, sock);
return err;
}
@@ -2415,7 +2422,13 @@ static int sock_hash_update_elem(struct bpf_map *map,
return -EINVAL;
}
lock_sock(skops.sk);
preempt_disable();
rcu_read_lock();
err = sock_hash_ctx_update_elem(&skops, map, key, flags);
rcu_read_unlock();
preempt_enable();
release_sock(skops.sk);
fput(socket->file);
return err;
}
@@ -2472,10 +2485,8 @@ struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
b = __select_bucket(htab, hash);
head = &b->head;
raw_spin_lock_bh(&b->lock);
l = lookup_elem_raw(head, hash, key, key_size);
sk = l ? l->sk : NULL;
raw_spin_unlock_bh(&b->lock);
return sk;
}

View File

@@ -735,7 +735,9 @@ static int map_update_elem(union bpf_attr *attr)
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_update_elem(map, key, value, attr->flags);
goto out;
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
err = map->ops->map_update_elem(map, key, value, attr->flags);
goto out;
}

View File

@@ -5430,6 +5430,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
continue;
/* Upon error here we cannot fall back to interpreter but
* need a hard reject of the program. Thus -EFAULT is
* propagated in any case.
*/
subprog = find_subprog(env, i + insn->imm + 1);
if (subprog < 0) {
WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
@@ -5450,7 +5454,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
if (!func)
return -ENOMEM;
goto out_undo_insn;
for (i = 0; i < env->subprog_cnt; i++) {
subprog_start = subprog_end;
@@ -5515,7 +5519,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
tmp = bpf_int_jit_compile(func[i]);
if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
err = -EFAULT;
err = -ENOTSUPP;
goto out_free;
}
cond_resched();
@@ -5552,6 +5556,7 @@ out_free:
if (func[i])
bpf_jit_free(func[i]);
kfree(func);
out_undo_insn:
/* cleanup main prog to be interpreted */
prog->jit_requested = 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
@@ -5578,6 +5583,8 @@ static int fixup_call_args(struct bpf_verifier_env *env)
err = jit_subprogs(env);
if (err == 0)
return 0;
if (err == -EFAULT)
return err;
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
for (i = 0; i < prog->len; i++, insn++) {

View File

@@ -303,11 +303,38 @@ struct kmem_cache *files_cachep;
struct kmem_cache *fs_cachep;
/* SLAB cache for vm_area_struct structures */
struct kmem_cache *vm_area_cachep;
static struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
{
struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (vma) {
vma->vm_mm = mm;
INIT_LIST_HEAD(&vma->anon_vma_chain);
}
return vma;
}
struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
{
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (new) {
*new = *orig;
INIT_LIST_HEAD(&new->anon_vma_chain);
}
return new;
}
void vm_area_free(struct vm_area_struct *vma)
{
kmem_cache_free(vm_area_cachep, vma);
}
static void account_kernel_stack(struct task_struct *tsk, int account)
{
void *stack = task_stack_page(tsk);
@@ -455,11 +482,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto fail_nomem;
charge = len;
}
tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
tmp = vm_area_dup(mpnt);
if (!tmp)
goto fail_nomem;
*tmp = *mpnt;
INIT_LIST_HEAD(&tmp->anon_vma_chain);
retval = vma_dup_policy(mpnt, tmp);
if (retval)
goto fail_nomem_policy;
@@ -539,7 +564,7 @@ fail_uprobe_end:
fail_nomem_anon_vma_fork:
mpol_put(vma_policy(tmp));
fail_nomem_policy:
kmem_cache_free(vm_area_cachep, tmp);
vm_area_free(tmp);
fail_nomem:
retval = -ENOMEM;
vm_unacct_memory(charge);

View File

@@ -85,9 +85,9 @@ static int rseq_update_cpu_id(struct task_struct *t)
{
u32 cpu_id = raw_smp_processor_id();
if (__put_user(cpu_id, &t->rseq->cpu_id_start))
if (put_user(cpu_id, &t->rseq->cpu_id_start))
return -EFAULT;
if (__put_user(cpu_id, &t->rseq->cpu_id))
if (put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT;
trace_rseq_update(t);
return 0;
@@ -100,14 +100,14 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
/*
* Reset cpu_id_start to its initial state (0).
*/
if (__put_user(cpu_id_start, &t->rseq->cpu_id_start))
if (put_user(cpu_id_start, &t->rseq->cpu_id_start))
return -EFAULT;
/*
* Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming
* in after unregistration can figure out that rseq needs to be
* registered again.
*/
if (__put_user(cpu_id, &t->rseq->cpu_id))
if (put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT;
return 0;
}
@@ -115,29 +115,36 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
{
struct rseq_cs __user *urseq_cs;
unsigned long ptr;
u64 ptr;
u32 __user *usig;
u32 sig;
int ret;
ret = __get_user(ptr, &t->rseq->rseq_cs);
if (ret)
return ret;
if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
return -EFAULT;
if (!ptr) {
memset(rseq_cs, 0, sizeof(*rseq_cs));
return 0;
}
urseq_cs = (struct rseq_cs __user *)ptr;
if (ptr >= TASK_SIZE)
return -EINVAL;
urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
return -EFAULT;
if (rseq_cs->version > 0)
return -EINVAL;
if (rseq_cs->start_ip >= TASK_SIZE ||
rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
rseq_cs->abort_ip >= TASK_SIZE ||
rseq_cs->version > 0)
return -EINVAL;
/* Check for overflow. */
if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
return -EINVAL;
/* Ensure that abort_ip is not in the critical section. */
if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
return -EINVAL;
usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32));
usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
ret = get_user(sig, usig);
if (ret)
return ret;
@@ -146,7 +153,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
printk_ratelimited(KERN_WARNING
"Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
sig, current->rseq_sig, current->pid, usig);
return -EPERM;
return -EINVAL;
}
return 0;
}
@@ -157,7 +164,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
int ret;
/* Get thread flags. */
ret = __get_user(flags, &t->rseq->flags);
ret = get_user(flags, &t->rseq->flags);
if (ret)
return ret;
@@ -195,9 +202,11 @@ static int clear_rseq_cs(struct task_struct *t)
* of code outside of the rseq assembly block. This performs
* a lazy clear of the rseq_cs field.
*
* Set rseq_cs to NULL with single-copy atomicity.
* Set rseq_cs to NULL.
*/
return __put_user(0UL, &t->rseq->rseq_cs);
if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
return -EFAULT;
return 0;
}
/*

View File

@@ -2290,8 +2290,17 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
if (task_on_rq_queued(p) && p->dl.dl_runtime)
task_non_contending(p);
if (!task_on_rq_queued(p))
if (!task_on_rq_queued(p)) {
/*
* Inactive timer is armed. However, p is leaving DEADLINE and
* might migrate away from this rq while continuing to run on
* some other class. We need to remove its contribution from
* this rq running_bw now, or sub_rq_bw (below) will complain.
*/
if (p->dl.dl_non_contending)
sub_running_bw(&p->dl, &rq->dl);
sub_rq_bw(&p->dl, &rq->dl);
}
/*
* We cannot use inactive_task_timer() to invoke sub_running_bw()

View File

@@ -79,12 +79,16 @@ static void wakeup_softirqd(void)
/*
* If ksoftirqd is scheduled, we do not want to process pending softirqs
* right now. Let ksoftirqd handle this at its own rate, to get fairness.
* right now. Let ksoftirqd handle this at its own rate, to get fairness,
* unless we're doing some of the synchronous softirqs.
*/
static bool ksoftirqd_running(void)
#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
static bool ksoftirqd_running(unsigned long pending)
{
struct task_struct *tsk = __this_cpu_read(ksoftirqd);
if (pending & SOFTIRQ_NOW_MASK)
return false;
return tsk && (tsk->state == TASK_RUNNING);
}
@@ -328,7 +332,7 @@ asmlinkage __visible void do_softirq(void)
pending = local_softirq_pending();
if (pending && !ksoftirqd_running())
if (pending && !ksoftirqd_running(pending))
do_softirq_own_stack();
local_irq_restore(flags);
@@ -355,7 +359,7 @@ void irq_enter(void)
static inline void invoke_softirq(void)
{
if (ksoftirqd_running())
if (ksoftirqd_running(local_softirq_pending()))
return;
if (!force_irqthreads) {

View File

@@ -270,7 +270,11 @@ unlock:
goto retry;
}
wake_up_q(&wakeq);
if (!err) {
preempt_disable();
wake_up_q(&wakeq);
preempt_enable();
}
return err;
}

View File

@@ -277,8 +277,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev,
*/
return !curdev ||
newdev->rating > curdev->rating ||
(!cpumask_equal(curdev->cpumask, newdev->cpumask) &&
!tick_check_percpu(curdev, newdev, smp_processor_id()));
!cpumask_equal(curdev->cpumask, newdev->cpumask);
}
/*

View File

@@ -3365,8 +3365,8 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
print_event_info(buf, m);
seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
seq_printf(m, "# | | | %s | |\n", tgid ? " | " : "");
seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
}
static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
@@ -3386,9 +3386,9 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
tgid ? tgid_space : space);
seq_printf(m, "# %s||| / delay\n",
tgid ? tgid_space : space);
seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n",
seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n",
tgid ? " TGID " : space);
seq_printf(m, "# | | | %s|||| | |\n",
seq_printf(m, "# | | %s | |||| | |\n",
tgid ? " | " : space);
}

View File

@@ -1480,8 +1480,10 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
}
ret = __register_trace_kprobe(tk);
if (ret < 0)
if (ret < 0) {
kfree(tk->tp.call.print_fmt);
goto error;
}
return &tk->tp.call;
error:
@@ -1501,6 +1503,8 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call)
}
__unregister_trace_kprobe(tk);
kfree(tk->tp.call.print_fmt);
free_trace_kprobe(tk);
}
#endif /* CONFIG_PERF_EVENTS */

View File

@@ -594,8 +594,7 @@ int trace_print_context(struct trace_iterator *iter)
trace_find_cmdline(entry->pid, comm);
trace_seq_printf(s, "%16s-%-5d [%03d] ",
comm, entry->pid, iter->cpu);
trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
unsigned int tgid = trace_find_tgid(entry->pid);
@@ -606,6 +605,8 @@ int trace_print_context(struct trace_iterator *iter)
trace_seq_printf(s, "(%5d) ", tgid);
}
trace_seq_printf(s, "[%03d] ", iter->cpu);
if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
trace_print_lat_fmt(s, entry);