Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2018-01-09 The following pull-request contains BPF updates for your *net* tree. The main changes are: 1) Prevent out-of-bounds speculation in BPF maps by masking the index after bounds checks in order to fix spectre v1, and add an option BPF_JIT_ALWAYS_ON into Kconfig that allows for removing the BPF interpreter from the kernel in favor of JIT-only mode to make spectre v2 harder, from Alexei. 2) Remove false sharing of map refcount with max_entries which was used in spectre v1, from Daniel. 3) Add a missing NULL psock check in sockmap in order to fix a race, from John. 4) Fix test_align BPF selftest case since a recent change in verifier rejects the bit-wise arithmetic on pointers earlier but test_align update was missing, from Alexei. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
|
||||
int numa_node = bpf_map_attr_numa_node(attr);
|
||||
u32 elem_size, index_mask, max_entries;
|
||||
bool unpriv = !capable(CAP_SYS_ADMIN);
|
||||
struct bpf_array *array;
|
||||
u64 array_size;
|
||||
u32 elem_size;
|
||||
|
||||
/* check sanity of attributes */
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
|
||||
|
||||
elem_size = round_up(attr->value_size, 8);
|
||||
|
||||
max_entries = attr->max_entries;
|
||||
index_mask = roundup_pow_of_two(max_entries) - 1;
|
||||
|
||||
if (unpriv)
|
||||
/* round up array size to nearest power of 2,
|
||||
* since cpu will speculate within index_mask limits
|
||||
*/
|
||||
max_entries = index_mask + 1;
|
||||
|
||||
array_size = sizeof(*array);
|
||||
if (percpu)
|
||||
array_size += (u64) attr->max_entries * sizeof(void *);
|
||||
array_size += (u64) max_entries * sizeof(void *);
|
||||
else
|
||||
array_size += (u64) attr->max_entries * elem_size;
|
||||
array_size += (u64) max_entries * elem_size;
|
||||
|
||||
/* make sure there is no u32 overflow later in round_up() */
|
||||
if (array_size >= U32_MAX - PAGE_SIZE)
|
||||
@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
|
||||
array = bpf_map_area_alloc(array_size, numa_node);
|
||||
if (!array)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
array->index_mask = index_mask;
|
||||
array->map.unpriv_array = unpriv;
|
||||
|
||||
/* copy mandatory map attributes */
|
||||
array->map.map_type = attr->map_type;
|
||||
@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
if (unlikely(index >= array->map.max_entries))
|
||||
return NULL;
|
||||
|
||||
return array->value + array->elem_size * index;
|
||||
return array->value + array->elem_size * (index & array->index_mask);
|
||||
}
|
||||
|
||||
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
|
||||
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
u32 elem_size = round_up(map->value_size, 8);
|
||||
const int ret = BPF_REG_0;
|
||||
@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
|
||||
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
|
||||
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
|
||||
if (map->unpriv_array) {
|
||||
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
|
||||
} else {
|
||||
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
|
||||
}
|
||||
|
||||
if (is_power_of_2(elem_size)) {
|
||||
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
|
||||
@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
if (unlikely(index >= array->map.max_entries))
|
||||
return NULL;
|
||||
|
||||
return this_cpu_ptr(array->pptrs[index]);
|
||||
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
|
||||
}
|
||||
|
||||
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
|
||||
@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
|
||||
*/
|
||||
size = round_up(map->value_size, 8);
|
||||
rcu_read_lock();
|
||||
pptr = array->pptrs[index];
|
||||
pptr = array->pptrs[index & array->index_mask];
|
||||
for_each_possible_cpu(cpu) {
|
||||
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
|
||||
off += size;
|
||||
@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
return -EEXIST;
|
||||
|
||||
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
|
||||
memcpy(this_cpu_ptr(array->pptrs[index]),
|
||||
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
|
||||
value, map->value_size);
|
||||
else
|
||||
memcpy(array->value + array->elem_size * index,
|
||||
memcpy(array->value +
|
||||
array->elem_size * (index & array->index_mask),
|
||||
value, map->value_size);
|
||||
return 0;
|
||||
}
|
||||
@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
|
||||
*/
|
||||
size = round_up(map->value_size, 8);
|
||||
rcu_read_lock();
|
||||
pptr = array->pptrs[index];
|
||||
pptr = array->pptrs[index & array->index_mask];
|
||||
for_each_possible_cpu(cpu) {
|
||||
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
|
||||
off += size;
|
||||
@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
static u32 array_of_map_gen_lookup(struct bpf_map *map,
|
||||
struct bpf_insn *insn_buf)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
u32 elem_size = round_up(map->value_size, 8);
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
const int ret = BPF_REG_0;
|
||||
@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
|
||||
|
||||
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
|
||||
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
|
||||
if (map->unpriv_array) {
|
||||
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
|
||||
} else {
|
||||
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
|
||||
}
|
||||
if (is_power_of_2(elem_size))
|
||||
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
|
||||
else
|
||||
|
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__bpf_call_base);
|
||||
|
||||
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
/**
|
||||
* __bpf_prog_run - run eBPF program on a given context
|
||||
* @ctx: is the data we are operating on
|
||||
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
|
||||
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
|
||||
};
|
||||
|
||||
#else
|
||||
static unsigned int __bpf_prog_ret0(const void *ctx,
|
||||
const struct bpf_insn *insn)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool bpf_prog_array_compatible(struct bpf_array *array,
|
||||
const struct bpf_prog *fp)
|
||||
{
|
||||
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
|
||||
*/
|
||||
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
|
||||
{
|
||||
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
|
||||
|
||||
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
|
||||
#else
|
||||
fp->bpf_func = __bpf_prog_ret0;
|
||||
#endif
|
||||
|
||||
/* eBPF JITs can rewrite the program in case constant
|
||||
* blinding is active. However, in case of error during
|
||||
@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
|
||||
*/
|
||||
if (!bpf_prog_is_dev_bound(fp->aux)) {
|
||||
fp = bpf_int_jit_compile(fp);
|
||||
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
if (!fp->jited) {
|
||||
*err = -ENOTSUPP;
|
||||
return fp;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
*err = bpf_prog_offload_compile(fp);
|
||||
if (*err)
|
||||
|
@@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)
|
||||
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
smap_list_remove(psock, &stab->sock_map[i]);
|
||||
smap_release_sock(psock, sock);
|
||||
/* This check handles a racing sock event that can get the
|
||||
* sk_callback_lock before this case but after xchg happens
|
||||
* causing the refcnt to hit zero and sock user data (psock)
|
||||
* to be null and queued for garbage collection.
|
||||
*/
|
||||
if (likely(psock)) {
|
||||
smap_list_remove(psock, &stab->sock_map[i]);
|
||||
smap_release_sock(psock, sock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@@ -1729,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
|
||||
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
|
||||
if (err)
|
||||
return err;
|
||||
if (func_id == BPF_FUNC_tail_call) {
|
||||
if (meta.map_ptr == NULL) {
|
||||
verbose(env, "verifier bug\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
|
||||
}
|
||||
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -4456,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
*/
|
||||
insn->imm = 0;
|
||||
insn->code = BPF_JMP | BPF_TAIL_CALL;
|
||||
|
||||
/* instead of changing every JIT dealing with tail_call
|
||||
* emit two extra insns:
|
||||
* if (index >= max_entries) goto out;
|
||||
* index &= array->index_mask;
|
||||
* to avoid out-of-bounds cpu speculation
|
||||
*/
|
||||
map_ptr = env->insn_aux_data[i + delta].map_ptr;
|
||||
if (map_ptr == BPF_MAP_PTR_POISON) {
|
||||
verbose(env, "tail_call obusing map_ptr\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!map_ptr->unpriv_array)
|
||||
continue;
|
||||
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
|
||||
map_ptr->max_entries, 2);
|
||||
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
|
||||
container_of(map_ptr,
|
||||
struct bpf_array,
|
||||
map)->index_mask);
|
||||
insn_buf[2] = *insn;
|
||||
cnt = 3;
|
||||
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
delta += cnt - 1;
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user