Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-09-23

The following pull-request contains BPF updates for your *net-next* tree.

We've added 95 non-merge commits during the last 22 day(s) which contain
a total of 124 files changed, 4211 insertions(+), 2040 deletions(-).

The main changes are:

1) Full multi function support in libbpf, from Andrii.

2) Refactoring of function argument checks, from Lorenz.

3) Make bpf_tail_call compatible with functions (subprograms), from Maciej.

4) Program metadata support, from YiFei.

5) bpf iterator optimizations, from Yonghong.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2020-09-23 13:11:11 -07:00
124 changed files with 4217 additions and 2046 deletions

View File

@@ -238,7 +238,6 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
u32 btf_id;
};
struct btf *btf_vmlinux;
@@ -436,6 +435,15 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
return type == ARG_PTR_TO_SOCK_COMMON;
}
static bool arg_type_may_be_null(enum bpf_arg_type type)
{
return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
type == ARG_PTR_TO_MEM_OR_NULL ||
type == ARG_PTR_TO_CTX_OR_NULL ||
type == ARG_PTR_TO_SOCKET_OR_NULL ||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
}
/* Determine whether the function releases some resources allocated by another
* function call. The first reference type argument will be assumed to be
* released by release_reference().
@@ -1490,6 +1498,13 @@ static int check_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
if (code == (BPF_JMP | BPF_CALL) &&
insn[i].imm == BPF_FUNC_tail_call &&
insn[i].src_reg != BPF_PSEUDO_CALL)
subprog[cur_subprog].has_tail_call = true;
if (BPF_CLASS(code) == BPF_LD &&
(BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
subprog[cur_subprog].has_ld_abs = true;
if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
@@ -2979,10 +2994,37 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
bool tail_call_reachable = false;
int ret_insn[MAX_CALL_FRAMES];
int ret_prog[MAX_CALL_FRAMES];
int j;
process_func:
/* protect against potential stack overflow that might happen when
* bpf2bpf calls get combined with tailcalls. Limit the caller's stack
* depth for such case down to 256 so that the worst case scenario
* would result in 8k stack size (32 which is tailcall limit * 256 =
* 8k).
*
* To get the idea what might happen, see an example:
* func1 -> sub rsp, 128
* subfunc1 -> sub rsp, 256
* tailcall1 -> add rsp, 256
* func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
* subfunc2 -> sub rsp, 64
* subfunc22 -> sub rsp, 128
* tailcall2 -> add rsp, 128
* func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
*
* tailcall will unwind the current stack frame but it will not get rid
* of caller's stack as shown on the example above.
*/
if (idx && subprog[idx].has_tail_call && depth >= 256) {
verbose(env,
"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
depth);
return -EACCES;
}
/* round up to 32-bytes, since this is granularity
* of interpreter stack size
*/
@@ -3011,6 +3053,10 @@ continue_func:
i);
return -EFAULT;
}
if (subprog[idx].has_tail_call)
tail_call_reachable = true;
frame++;
if (frame >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep !\n",
@@ -3019,6 +3065,15 @@ continue_func:
}
goto process_func;
}
/* if tail call got detected across bpf2bpf calls then mark each of the
* currently present subprog frames as tail call reachable subprogs;
* this info will be utilized by JIT so that we will be preserving the
* tail call counter throughout bpf2bpf calls combined with tailcalls
*/
if (tail_call_reachable)
for (j = 0; j < frame; j++)
subprog[ret_prog[j]].tail_call_reachable = true;
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
*/
@@ -3594,18 +3649,6 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
struct bpf_func_state *state = func(env, reg);
int err, min_off, max_off, i, j, slot, spi;
if (reg->type != PTR_TO_STACK) {
/* Allow zero-byte read from NULL, regardless of pointer type */
if (zero_size_allowed && access_size == 0 &&
register_is_null(reg))
return 0;
verbose(env, "R%d type=%s expected=%s\n", regno,
reg_type_str[reg->type],
reg_type_str[PTR_TO_STACK]);
return -EACCES;
}
if (tnum_is_const(reg->var_off)) {
min_off = max_off = reg->var_off.value + reg->off;
err = __check_stack_boundary(env, regno, min_off, access_size,
@@ -3750,9 +3793,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
access_size, zero_size_allowed,
"rdwr",
&env->prog->aux->max_rdwr_access);
default: /* scalar_value|ptr_to_stack or invalid ptr */
case PTR_TO_STACK:
return check_stack_boundary(env, regno, access_size,
zero_size_allowed, meta);
default: /* scalar_value or invalid ptr */
/* Allow zero-byte read from NULL, regardless of pointer type */
if (zero_size_allowed && access_size == 0 &&
register_is_null(reg))
return 0;
verbose(env, "R%d type=%s expected=%s\n", regno,
reg_type_str[reg->type],
reg_type_str[PTR_TO_STACK]);
return -EACCES;
}
}
@@ -3784,10 +3837,6 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
struct bpf_map *map = reg->map_ptr;
u64 val = reg->var_off.value;
if (reg->type != PTR_TO_MAP_VALUE) {
verbose(env, "R%d is not a pointer to map_value\n", regno);
return -EINVAL;
}
if (!is_const) {
verbose(env,
"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
@@ -3854,12 +3903,6 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
type == ARG_CONST_SIZE_OR_ZERO;
}
static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type)
{
return type == ARG_PTR_TO_ALLOC_MEM ||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
}
static bool arg_type_is_alloc_size(enum bpf_arg_type type)
{
return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
@@ -3908,14 +3951,114 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
return 0;
}
struct bpf_reg_types {
const enum bpf_reg_type types[10];
};
static const struct bpf_reg_types map_key_value_types = {
.types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_VALUE,
},
};
static const struct bpf_reg_types sock_types = {
.types = {
PTR_TO_SOCK_COMMON,
PTR_TO_SOCKET,
PTR_TO_TCP_SOCK,
PTR_TO_XDP_SOCK,
},
};
static const struct bpf_reg_types mem_types = {
.types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
PTR_TO_RDONLY_BUF,
PTR_TO_RDWR_BUF,
},
};
static const struct bpf_reg_types int_ptr_types = {
.types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_VALUE,
},
};
static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
[ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
[ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
[ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types,
[ARG_CONST_SIZE] = &scalar_types,
[ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
[ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
[ARG_CONST_MAP_PTR] = &const_map_ptr_types,
[ARG_PTR_TO_CTX] = &context_types,
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
[ARG_PTR_TO_SOCKET] = &fullsock_types,
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
[ARG_PTR_TO_MEM] = &mem_types,
[ARG_PTR_TO_MEM_OR_NULL] = &mem_types,
[ARG_PTR_TO_UNINIT_MEM] = &mem_types,
[ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
const struct bpf_reg_types *compatible)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_reg_type expected, type = reg->type;
int i, j;
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
expected = compatible->types[i];
if (expected == NOT_INIT)
break;
if (type == expected)
return 0;
}
verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
for (j = 0; j + 1 < i; j++)
verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
return -EACCES;
}
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
{
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_reg_type expected_type, type = reg->type;
enum bpf_arg_type arg_type = fn->arg_type[arg];
const struct bpf_reg_types *compatible;
enum bpf_reg_type type = reg->type;
int err = 0;
if (arg_type == ARG_DONTCARE)
@@ -3948,125 +4091,48 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return err;
}
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
expected_type = PTR_TO_STACK;
if (register_is_null(reg) &&
arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
/* final test in check_stack_boundary() */;
else if (!type_is_pkt_pointer(type) &&
type != PTR_TO_MAP_VALUE &&
type != expected_type)
goto err_type;
} else if (arg_type == ARG_CONST_SIZE ||
arg_type == ARG_CONST_SIZE_OR_ZERO ||
arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) {
expected_type = SCALAR_VALUE;
if (type != expected_type)
goto err_type;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
if (type != expected_type)
goto err_type;
} else if (arg_type == ARG_PTR_TO_CTX ||
arg_type == ARG_PTR_TO_CTX_OR_NULL) {
expected_type = PTR_TO_CTX;
if (!(register_is_null(reg) &&
arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
if (type != expected_type)
goto err_type;
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
}
} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
expected_type = PTR_TO_SOCK_COMMON;
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
if (!type_is_sk_pointer(type))
goto err_type;
if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
regno, reg->ref_obj_id,
meta->ref_obj_id);
return -EFAULT;
}
meta->ref_obj_id = reg->ref_obj_id;
}
} else if (arg_type == ARG_PTR_TO_SOCKET ||
arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
expected_type = PTR_TO_SOCKET;
if (!(register_is_null(reg) &&
arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
if (type != expected_type)
goto err_type;
}
} else if (arg_type == ARG_PTR_TO_BTF_ID) {
bool ids_match = false;
if (register_is_null(reg) && arg_type_may_be_null(arg_type))
/* A NULL register has a SCALAR_VALUE type, so skip
* type checking.
*/
goto skip_type_check;
expected_type = PTR_TO_BTF_ID;
if (type != expected_type)
goto err_type;
if (!fn->check_btf_id) {
if (reg->btf_id != meta->btf_id) {
ids_match = btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
meta->btf_id);
if (!ids_match) {
verbose(env, "Helper has type %s got %s in R%d\n",
kernel_type_name(meta->btf_id),
kernel_type_name(reg->btf_id), regno);
return -EACCES;
}
}
} else if (!fn->check_btf_id(reg->btf_id, arg)) {
verbose(env, "Helper does not support %s in R%d\n",
kernel_type_name(reg->btf_id), regno);
compatible = compatible_reg_types[arg_type];
if (!compatible) {
verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
return -EFAULT;
}
err = check_reg_type(env, regno, compatible);
if (err)
return err;
if (type == PTR_TO_BTF_ID) {
const u32 *btf_id = fn->arg_btf_id[arg];
if (!btf_id) {
verbose(env, "verifier internal error: missing BTF ID\n");
return -EFAULT;
}
if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *btf_id)) {
verbose(env, "R%d is of type %s but %s is expected\n",
regno, kernel_type_name(reg->btf_id), kernel_type_name(*btf_id));
return -EACCES;
}
if ((reg->off && !ids_match) || !tnum_is_const(reg->var_off) || reg->var_off.value) {
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
regno);
return -EACCES;
}
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
return -EACCES;
} else if (meta->func_id == BPF_FUNC_spin_unlock) {
if (process_spin_lock(env, regno, false))
return -EACCES;
} else {
verbose(env, "verifier internal error\n");
return -EFAULT;
}
} else if (arg_type_is_mem_ptr(arg_type)) {
expected_type = PTR_TO_STACK;
/* One exception here. In case function allows for NULL to be
* passed in as argument, it's a SCALAR_VALUE type. Final test
* happens during stack boundary checking.
*/
if (register_is_null(reg) &&
(arg_type == ARG_PTR_TO_MEM_OR_NULL ||
arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL))
/* final test in check_stack_boundary() */;
else if (!type_is_pkt_pointer(type) &&
type != PTR_TO_MAP_VALUE &&
type != PTR_TO_MEM &&
type != PTR_TO_RDONLY_BUF &&
type != PTR_TO_RDWR_BUF &&
type != expected_type)
goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
} else if (arg_type_is_alloc_mem_ptr(arg_type)) {
expected_type = PTR_TO_MEM;
if (register_is_null(reg) &&
arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)
/* final test in check_stack_boundary() */;
else if (type != expected_type)
goto err_type;
} else if (type == PTR_TO_CTX) {
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
}
skip_type_check:
if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
regno, reg->ref_obj_id,
@@ -4074,15 +4140,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EFAULT;
}
meta->ref_obj_id = reg->ref_obj_id;
} else if (arg_type_is_int_ptr(arg_type)) {
expected_type = PTR_TO_STACK;
if (!type_is_pkt_pointer(type) &&
type != PTR_TO_MAP_VALUE &&
type != expected_type)
goto err_type;
} else {
verbose(env, "unsupported arg_type %d\n", arg_type);
return -EFAULT;
}
if (arg_type == ARG_CONST_MAP_PTR) {
@@ -4121,6 +4178,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
return -EACCES;
} else if (meta->func_id == BPF_FUNC_spin_unlock) {
if (process_spin_lock(env, regno, false))
return -EACCES;
} else {
verbose(env, "verifier internal error\n");
return -EFAULT;
}
} else if (arg_type_is_mem_ptr(arg_type)) {
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
*/
meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
@@ -4186,10 +4259,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
return err;
err_type:
verbose(env, "R%d type=%s expected=%s\n", regno,
reg_type_str[type], reg_type_str[expected_type]);
return -EACCES;
}
static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
@@ -4224,6 +4293,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
return false;
}
static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
{
return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
}
static int check_map_func_compatibility(struct bpf_verifier_env *env,
struct bpf_map *map, int func_id)
{
@@ -4339,8 +4413,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_tail_call:
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
if (env->subprog_cnt > 1) {
verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
return -EINVAL;
}
break;
@@ -4495,10 +4569,22 @@ static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
return count <= 1;
}
static bool check_btf_id_ok(const struct bpf_func_proto *fn)
{
int i;
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++)
if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false;
return true;
}
static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
check_btf_id_ok(fn) &&
check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
@@ -4894,11 +4980,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
if (!fn->check_btf_id) {
err = btf_resolve_helper_id(&env->log, fn, i);
if (err > 0)
meta.btf_id = err;
}
err = check_func_arg(env, i, &meta, fn);
if (err)
return err;
@@ -5317,6 +5398,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case CONST_PTR_TO_MAP:
/* smin_val represents the known value */
if (known && smin_val == 0 && opcode == BPF_ADD)
break;
/* fall-through */
case PTR_TO_PACKET_END:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
@@ -7461,18 +7546,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
if (env->subprog_cnt > 1) {
/* when program has LD_ABS insn JITs and interpreter assume
* that r1 == ctx == skb which is not the case for callees
* that can have arbitrary arguments. It's problematic
* for main prog as well since JITs would need to analyze
* all functions in order to make proper register save/restore
* decisions in the main prog. Hence disallow LD_ABS with calls
*/
verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
return -EINVAL;
}
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@ -7883,6 +7956,23 @@ err_free:
return ret;
}
static int check_abnormal_return(struct bpf_verifier_env *env)
{
int i;
for (i = 1; i < env->subprog_cnt; i++) {
if (env->subprog_info[i].has_ld_abs) {
verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
return -EINVAL;
}
if (env->subprog_info[i].has_tail_call) {
verbose(env, "tail_call is not allowed in subprogs without BTF\n");
return -EINVAL;
}
}
return 0;
}
/* The minimum supported BTF func info size */
#define MIN_BPF_FUNCINFO_SIZE 8
#define MAX_FUNCINFO_REC_SIZE 252
@@ -7891,20 +7981,24 @@ static int check_btf_func(struct bpf_verifier_env *env,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
const struct btf_type *type, *func_proto, *ret_type;
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
struct bpf_func_info_aux *info_aux = NULL;
const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
void __user *urecord;
u32 prev_offset = 0;
bool scalar_return;
int ret = -ENOMEM;
nfuncs = attr->func_info_cnt;
if (!nfuncs)
if (!nfuncs) {
if (check_abnormal_return(env))
return -EINVAL;
return 0;
}
if (nfuncs != env->subprog_cnt) {
verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
@@ -7952,25 +8046,23 @@ static int check_btf_func(struct bpf_verifier_env *env,
}
/* check insn_off */
ret = -EINVAL;
if (i == 0) {
if (krecord[i].insn_off) {
verbose(env,
"nonzero insn_off %u for the first func info record",
krecord[i].insn_off);
ret = -EINVAL;
goto err_free;
}
} else if (krecord[i].insn_off <= prev_offset) {
verbose(env,
"same or smaller insn offset (%u) than previous func info record (%u)",
krecord[i].insn_off, prev_offset);
ret = -EINVAL;
goto err_free;
}
if (env->subprog_info[i].start != krecord[i].insn_off) {
verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
ret = -EINVAL;
goto err_free;
}
@@ -7979,10 +8071,26 @@ static int check_btf_func(struct bpf_verifier_env *env,
if (!type || !btf_type_is_func(type)) {
verbose(env, "invalid type id %d in func info",
krecord[i].type_id);
ret = -EINVAL;
goto err_free;
}
info_aux[i].linkage = BTF_INFO_VLEN(type->info);
func_proto = btf_type_by_id(btf, type->type);
if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
/* btf_func_check() already verified it during BTF load */
goto err_free;
ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
scalar_return =
btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
goto err_free;
}
if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
goto err_free;
}
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
@@ -8143,8 +8251,11 @@ static int check_btf_info(struct bpf_verifier_env *env,
struct btf *btf;
int err;
if (!attr->func_info_cnt && !attr->line_info_cnt)
if (!attr->func_info_cnt && !attr->line_info_cnt) {
if (check_abnormal_return(env))
return -EINVAL;
return 0;
}
btf = btf_get_by_fd(attr->prog_btf_fd);
if (IS_ERR(btf))
@@ -9619,6 +9730,18 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
}
}
static void adjust_poke_descs(struct bpf_prog *prog, u32 len)
{
struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
int i, sz = prog->aux->size_poke_tab;
struct bpf_jit_poke_descriptor *desc;
for (i = 0; i < sz; i++) {
desc = &tab[i];
desc->insn_idx += len - 1;
}
}
static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
const struct bpf_insn *patch, u32 len)
{
@@ -9635,6 +9758,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
if (adjust_insn_aux_data(env, new_prog, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
adjust_poke_descs(new_prog, len);
return new_prog;
}
@@ -10165,6 +10289,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog, **func, *tmp;
int i, j, subprog_start, subprog_end = 0, len, subprog;
struct bpf_map *map_ptr;
struct bpf_insn *insn;
void *old_bpf_func;
int err, num_exentries;
@@ -10232,6 +10357,31 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
for (j = 0; j < prog->aux->size_poke_tab; j++) {
u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
int ret;
if (!(insn_idx >= subprog_start &&
insn_idx <= subprog_end))
continue;
ret = bpf_jit_add_poke_descriptor(func[i],
&prog->aux->poke_tab[j]);
if (ret < 0) {
verbose(env, "adding tail call poke descriptor failed\n");
goto out_free;
}
func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;
map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
if (ret < 0) {
verbose(env, "tracking tail call prog failed\n");
goto out_free;
}
}
/* Use bpf_prog_F_tag to indicate functions in stack traces.
* Long term would need debug info to populate names
*/
@@ -10250,6 +10400,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
num_exentries++;
}
func[i]->aux->num_exentries = num_exentries;
func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
@@ -10257,6 +10408,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
}
cond_resched();
}
/* Untrack main program's aux structs so that during map_poke_run()
* we will not stumble upon the unfilled poke descriptors; each
* of the main program's poke descs got distributed across subprogs
* and got tracked onto map, so we are sure that none of them will
* be missed after the operation below
*/
for (i = 0; i < prog->aux->size_poke_tab; i++) {
map_ptr = prog->aux->poke_tab[i].tail_call.map;
map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
}
/* at this point all bpf functions were successfully JITed
* now populate all bpf_calls with correct addresses and
* run last pass of JIT
@@ -10325,9 +10489,16 @@ static int jit_subprogs(struct bpf_verifier_env *env)
bpf_prog_free_unused_jited_linfo(prog);
return 0;
out_free:
for (i = 0; i < env->subprog_cnt; i++)
if (func[i])
bpf_jit_free(func[i]);
for (i = 0; i < env->subprog_cnt; i++) {
if (!func[i])
continue;
for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
}
bpf_jit_free(func[i]);
}
kfree(func);
out_undo_insn:
/* cleanup main prog to be interpreted */
@@ -10361,6 +10532,13 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
/* When JIT fails the progs with bpf2bpf calls and tail_calls
* have to be rejected, since interpreter doesn't support them yet.
*/
verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
return -EINVAL;
}
for (i = 0; i < prog->len; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
@@ -10524,8 +10702,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
* the program array.
*/
prog->cb_access = 1;
env->prog->aux->stack_depth = MAX_BPF_STACK;
env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
if (!allow_tail_call_in_subprogs(env))
prog->aux->stack_depth = MAX_BPF_STACK;
prog->aux->max_pkt_offset = MAX_PACKET_OFF;
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
@@ -10545,6 +10724,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
.reason = BPF_POKE_REASON_TAIL_CALL,
.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
.tail_call.key = bpf_map_key_immediate(aux),
.insn_idx = i + delta,
};
ret = bpf_jit_add_poke_descriptor(prog, &desc);