Merge branch 'perf/urgent' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
|
||||
array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
|
||||
|
||||
if (array_size >= U32_MAX - PAGE_SIZE ||
|
||||
elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
|
||||
bpf_array_alloc_percpu(array)) {
|
||||
bpf_map_area_free(array);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
@@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list);
|
||||
|
||||
static u64 dev_map_bitmap_size(const union bpf_attr *attr)
|
||||
{
|
||||
return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
|
||||
return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
|
||||
}
|
||||
|
||||
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
||||
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
||||
int err = -EINVAL;
|
||||
u64 cost;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
/* check sanity of attributes */
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
|
||||
@@ -111,8 +114,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
||||
err = -ENOMEM;
|
||||
|
||||
/* A per cpu bitfield with a bit per possible net device */
|
||||
dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
|
||||
__alignof__(unsigned long));
|
||||
dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
|
||||
__alignof__(unsigned long),
|
||||
GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!dtab->flush_needed)
|
||||
goto free_dtab;
|
||||
|
||||
|
@@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
*/
|
||||
goto free_htab;
|
||||
|
||||
if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
|
||||
/* make sure the size for pcpu_alloc() is reasonable */
|
||||
goto free_htab;
|
||||
|
||||
htab->elem_size = sizeof(struct htab_elem) +
|
||||
round_up(htab->map.key_size, 8);
|
||||
if (percpu)
|
||||
|
@@ -39,6 +39,7 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/list.h>
|
||||
#include <net/strparser.h>
|
||||
#include <net/tcp.h>
|
||||
|
||||
struct bpf_stab {
|
||||
struct bpf_map map;
|
||||
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
|
||||
return SK_DROP;
|
||||
|
||||
skb_orphan(skb);
|
||||
/* We need to ensure that BPF metadata for maps is also cleared
|
||||
* when we orphan the skb so that we don't have the possibility
|
||||
* to reference a stale map.
|
||||
*/
|
||||
TCP_SKB_CB(skb)->bpf.map = NULL;
|
||||
skb->sk = psock->sock;
|
||||
bpf_compute_data_end(skb);
|
||||
preempt_disable();
|
||||
rc = (*prog->bpf_func)(skb, prog->insnsi);
|
||||
preempt_enable();
|
||||
skb->sk = NULL;
|
||||
|
||||
return rc;
|
||||
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
|
||||
struct sock *sk;
|
||||
int rc;
|
||||
|
||||
/* Because we use per cpu values to feed input from sock redirect
|
||||
* in BPF program to do_sk_redirect_map() call we need to ensure we
|
||||
* are not preempted. RCU read lock is not sufficient in this case
|
||||
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
|
||||
*/
|
||||
preempt_disable();
|
||||
rc = smap_verdict_func(psock, skb);
|
||||
switch (rc) {
|
||||
case SK_REDIRECT:
|
||||
sk = do_sk_redirect_map();
|
||||
preempt_enable();
|
||||
sk = do_sk_redirect_map(skb);
|
||||
if (likely(sk)) {
|
||||
struct smap_psock *peer = smap_psock_sk(sk);
|
||||
|
||||
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
|
||||
/* Fall through and free skb otherwise */
|
||||
case SK_DROP:
|
||||
default:
|
||||
if (rc != SK_REDIRECT)
|
||||
preempt_enable();
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
|
||||
int err = -EINVAL;
|
||||
u64 cost;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
/* check sanity of attributes */
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
|
||||
@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (skops.sk->sk_type != SOCK_STREAM ||
|
||||
skops.sk->sk_protocol != IPPROTO_TCP) {
|
||||
fput(socket->file);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
err = sock_map_ctx_update_elem(&skops, map, key, flags);
|
||||
fput(socket->file);
|
||||
return err;
|
||||
|
@@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
||||
/* ctx accesses must be at a fixed offset, so that we can
|
||||
* determine what type of data were returned.
|
||||
*/
|
||||
if (!tnum_is_const(reg->var_off)) {
|
||||
if (reg->off) {
|
||||
verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
|
||||
regno, reg->off, off - reg->off);
|
||||
return -EACCES;
|
||||
}
|
||||
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
|
||||
char tn_buf[48];
|
||||
|
||||
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
|
||||
@@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
||||
tn_buf, off, size);
|
||||
return -EACCES;
|
||||
}
|
||||
off += reg->var_off.value;
|
||||
err = check_ctx_access(env, insn_idx, off, size, t, ®_type);
|
||||
if (!err && t == BPF_READ && value_regno >= 0) {
|
||||
/* ctx access returns either a scalar, or a
|
||||
@@ -2426,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
||||
}
|
||||
|
||||
static void find_good_pkt_pointers(struct bpf_verifier_state *state,
|
||||
struct bpf_reg_state *dst_reg)
|
||||
struct bpf_reg_state *dst_reg,
|
||||
bool range_right_open)
|
||||
{
|
||||
struct bpf_reg_state *regs = state->regs, *reg;
|
||||
u16 new_range;
|
||||
int i;
|
||||
|
||||
if (dst_reg->off < 0)
|
||||
if (dst_reg->off < 0 ||
|
||||
(dst_reg->off == 0 && range_right_open))
|
||||
/* This doesn't give us any range */
|
||||
return;
|
||||
|
||||
@@ -2442,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
|
||||
*/
|
||||
return;
|
||||
|
||||
/* LLVM can generate four kind of checks:
|
||||
new_range = dst_reg->off;
|
||||
if (range_right_open)
|
||||
new_range--;
|
||||
|
||||
/* Examples for register markings:
|
||||
*
|
||||
* Type 1/2:
|
||||
* pkt_data in dst register:
|
||||
*
|
||||
* r2 = r3;
|
||||
* r2 += 8;
|
||||
@@ -2461,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
|
||||
* r2=pkt(id=n,off=8,r=0)
|
||||
* r3=pkt(id=n,off=0,r=0)
|
||||
*
|
||||
* Type 3/4:
|
||||
* pkt_data in src register:
|
||||
*
|
||||
* r2 = r3;
|
||||
* r2 += 8;
|
||||
@@ -2479,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
|
||||
* r3=pkt(id=n,off=0,r=0)
|
||||
*
|
||||
* Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
|
||||
* so that range of bytes [r3, r3 + 8) is safe to access.
|
||||
* or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
|
||||
* and [r3, r3 + 8-1) respectively is safe to access depending on
|
||||
* the check.
|
||||
*/
|
||||
|
||||
/* If our ids match, then we must have the same max_value. And we
|
||||
@@ -2490,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
|
||||
for (i = 0; i < MAX_BPF_REG; i++)
|
||||
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
|
||||
/* keep the maximum range already checked */
|
||||
regs[i].range = max_t(u16, regs[i].range, dst_reg->off);
|
||||
regs[i].range = max(regs[i].range, new_range);
|
||||
|
||||
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
|
||||
if (state->stack_slot_type[i] != STACK_SPILL)
|
||||
continue;
|
||||
reg = &state->spilled_regs[i / BPF_REG_SIZE];
|
||||
if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
|
||||
reg->range = max_t(u16, reg->range, dst_reg->off);
|
||||
reg->range = max(reg->range, new_range);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2861,19 +2874,43 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
|
||||
dst_reg->type == PTR_TO_PACKET &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
|
||||
find_good_pkt_pointers(this_branch, dst_reg);
|
||||
/* pkt_data' > pkt_end */
|
||||
find_good_pkt_pointers(this_branch, dst_reg, false);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
|
||||
dst_reg->type == PTR_TO_PACKET_END &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET) {
|
||||
/* pkt_end > pkt_data' */
|
||||
find_good_pkt_pointers(other_branch, ®s[insn->src_reg], true);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
|
||||
dst_reg->type == PTR_TO_PACKET &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
|
||||
find_good_pkt_pointers(other_branch, dst_reg);
|
||||
/* pkt_data' < pkt_end */
|
||||
find_good_pkt_pointers(other_branch, dst_reg, true);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
|
||||
dst_reg->type == PTR_TO_PACKET_END &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET) {
|
||||
/* pkt_end < pkt_data' */
|
||||
find_good_pkt_pointers(this_branch, ®s[insn->src_reg], false);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
|
||||
dst_reg->type == PTR_TO_PACKET &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
|
||||
/* pkt_data' >= pkt_end */
|
||||
find_good_pkt_pointers(this_branch, dst_reg, true);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
|
||||
dst_reg->type == PTR_TO_PACKET_END &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET) {
|
||||
find_good_pkt_pointers(other_branch, ®s[insn->src_reg]);
|
||||
/* pkt_end >= pkt_data' */
|
||||
find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
|
||||
dst_reg->type == PTR_TO_PACKET &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
|
||||
/* pkt_data' <= pkt_end */
|
||||
find_good_pkt_pointers(other_branch, dst_reg, false);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
|
||||
dst_reg->type == PTR_TO_PACKET_END &&
|
||||
regs[insn->src_reg].type == PTR_TO_PACKET) {
|
||||
find_good_pkt_pointers(this_branch, ®s[insn->src_reg]);
|
||||
/* pkt_end <= pkt_data' */
|
||||
find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true);
|
||||
} else if (is_pointer_value(env, insn->dst_reg)) {
|
||||
verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
|
||||
return -EACCES;
|
||||
|
@@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
|
||||
__cpuhp_kick_ap(st);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up the leftovers so the next hotplug operation wont use stale
|
||||
* data.
|
||||
*/
|
||||
st->node = st->last = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@@ -1611,7 +1611,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
|
||||
return err;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
|
||||
goto Efault;
|
||||
return -EFAULT;
|
||||
|
||||
user_access_begin();
|
||||
unsafe_put_user(signo, &infop->si_signo, Efault);
|
||||
@@ -1739,7 +1739,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
|
||||
return err;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
|
||||
goto Efault;
|
||||
return -EFAULT;
|
||||
|
||||
user_access_begin();
|
||||
unsafe_put_user(signo, &infop->si_signo, Efault);
|
||||
|
@@ -135,17 +135,26 @@ void irq_gc_ack_clr_bit(struct irq_data *d)
|
||||
}
|
||||
|
||||
/**
|
||||
* irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt
|
||||
* irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt
|
||||
* @d: irq_data
|
||||
*
|
||||
* This generic implementation of the irq_mask_ack method is for chips
|
||||
* with separate enable/disable registers instead of a single mask
|
||||
* register and where a pending interrupt is acknowledged by setting a
|
||||
* bit.
|
||||
*
|
||||
* Note: This is the only permutation currently used. Similar generic
|
||||
* functions should be added here if other permutations are required.
|
||||
*/
|
||||
void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
|
||||
void irq_gc_mask_disable_and_ack_set(struct irq_data *d)
|
||||
{
|
||||
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
|
||||
struct irq_chip_type *ct = irq_data_get_chip_type(d);
|
||||
u32 mask = d->mask;
|
||||
|
||||
irq_gc_lock(gc);
|
||||
irq_reg_writel(gc, mask, ct->regs.mask);
|
||||
irq_reg_writel(gc, mask, ct->regs.disable);
|
||||
*ct->mask_cache &= ~mask;
|
||||
irq_reg_writel(gc, mask, ct->regs.ack);
|
||||
irq_gc_unlock(gc);
|
||||
}
|
||||
|
@@ -68,6 +68,7 @@ enum {
|
||||
* attach_mutex to avoid changing binding state while
|
||||
* worker_attach_to_pool() is in progress.
|
||||
*/
|
||||
POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
|
||||
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
|
||||
|
||||
/* worker flags */
|
||||
@@ -165,7 +166,6 @@ struct worker_pool {
|
||||
/* L: hash of busy workers */
|
||||
|
||||
/* see manage_workers() for details on the two manager mutexes */
|
||||
struct mutex manager_arb; /* manager arbitration */
|
||||
struct worker *manager; /* L: purely informational */
|
||||
struct mutex attach_mutex; /* attach/detach exclusion */
|
||||
struct list_head workers; /* A: attached workers */
|
||||
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
|
||||
|
||||
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
|
||||
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
|
||||
static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
|
||||
|
||||
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
|
||||
static bool workqueue_freezing; /* PL: have wqs started freezing? */
|
||||
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
|
||||
/* Do we have too many workers and should some go away? */
|
||||
static bool too_many_workers(struct worker_pool *pool)
|
||||
{
|
||||
bool managing = mutex_is_locked(&pool->manager_arb);
|
||||
bool managing = pool->flags & POOL_MANAGER_ACTIVE;
|
||||
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
|
||||
int nr_busy = pool->nr_workers - nr_idle;
|
||||
|
||||
@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
|
||||
{
|
||||
struct worker_pool *pool = worker->pool;
|
||||
|
||||
/*
|
||||
* Anyone who successfully grabs manager_arb wins the arbitration
|
||||
* and becomes the manager. mutex_trylock() on pool->manager_arb
|
||||
* failure while holding pool->lock reliably indicates that someone
|
||||
* else is managing the pool and the worker which failed trylock
|
||||
* can proceed to executing work items. This means that anyone
|
||||
* grabbing manager_arb is responsible for actually performing
|
||||
* manager duties. If manager_arb is grabbed and released without
|
||||
* actual management, the pool may stall indefinitely.
|
||||
*/
|
||||
if (!mutex_trylock(&pool->manager_arb))
|
||||
if (pool->flags & POOL_MANAGER_ACTIVE)
|
||||
return false;
|
||||
|
||||
pool->flags |= POOL_MANAGER_ACTIVE;
|
||||
pool->manager = worker;
|
||||
|
||||
maybe_create_worker(pool);
|
||||
|
||||
pool->manager = NULL;
|
||||
mutex_unlock(&pool->manager_arb);
|
||||
pool->flags &= ~POOL_MANAGER_ACTIVE;
|
||||
wake_up(&wq_manager_wait);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
|
||||
setup_timer(&pool->mayday_timer, pool_mayday_timeout,
|
||||
(unsigned long)pool);
|
||||
|
||||
mutex_init(&pool->manager_arb);
|
||||
mutex_init(&pool->attach_mutex);
|
||||
INIT_LIST_HEAD(&pool->workers);
|
||||
|
||||
@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
|
||||
hash_del(&pool->hash_node);
|
||||
|
||||
/*
|
||||
* Become the manager and destroy all workers. Grabbing
|
||||
* manager_arb prevents @pool's workers from blocking on
|
||||
* attach_mutex.
|
||||
* Become the manager and destroy all workers. This prevents
|
||||
* @pool's workers from blocking on attach_mutex. We're the last
|
||||
* manager and @pool gets freed with the flag set.
|
||||
*/
|
||||
mutex_lock(&pool->manager_arb);
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
wait_event_lock_irq(wq_manager_wait,
|
||||
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
|
||||
pool->flags |= POOL_MANAGER_ACTIVE;
|
||||
|
||||
while ((worker = first_idle_worker(pool)))
|
||||
destroy_worker(worker);
|
||||
WARN_ON(pool->nr_workers || pool->nr_idle);
|
||||
@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
|
||||
if (pool->detach_completion)
|
||||
wait_for_completion(pool->detach_completion);
|
||||
|
||||
mutex_unlock(&pool->manager_arb);
|
||||
|
||||
/* shut down the timers */
|
||||
del_timer_sync(&pool->idle_timer);
|
||||
del_timer_sync(&pool->mayday_timer);
|
||||
|
Reference in New Issue
Block a user