Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-03-13 The following pull-request contains BPF updates for your *net-next* tree. We've added 86 non-merge commits during the last 12 day(s) which contain a total of 107 files changed, 5771 insertions(+), 1700 deletions(-). The main changes are: 1) Add modify_return attach type which allows to attach to a function via BPF trampoline and is run after the fentry and before the fexit programs and can pass a return code to the original caller, from KP Singh. 2) Generalize BPF's kallsyms handling and add BPF trampoline and dispatcher objects to be visible in /proc/kallsyms so they can be annotated in stack traces, from Jiri Olsa. 3) Extend BPF sockmap to allow for UDP next to existing TCP support in order in order to enable this for BPF based socket dispatch, from Lorenz Bauer. 4) Introduce a new bpftool 'prog profile' command which attaches to existing BPF programs via fentry and fexit hooks and reads out hardware counters during that period, from Song Liu. Example usage: bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) 5) Batch of improvements to libbpf, bpftool and BPF selftests. Also addition of a new bpf_link abstraction to keep in particular BPF tracing programs attached even when the applicaion owning them exits, from Andrii Nakryiko. 6) New bpf_get_current_pid_tgid() helper for tracing to perform PID filtering and which returns the PID as seen by the init namespace, from Carlos Neira. 7) Refactor of RISC-V JIT code to move out common pieces and addition of a new RV32G BPF JIT compiler, from Luke Nelson. 8) Add gso_size context member to __sk_buff in order to be able to know whether a given skb is GSO or not, from Willem de Bruijn. 9) Add a new bpf_xdp_output() helper which reuses XDP's existing perf RB output implementation but can be called from tracepoint programs, from Eelco Chaudron. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -320,6 +320,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
struct bpf_struct_ops_value *uvalue, *kvalue;
|
||||
const struct btf_member *member;
|
||||
const struct btf_type *t = st_ops->type;
|
||||
struct bpf_tramp_progs *tprogs = NULL;
|
||||
void *udata, *kdata;
|
||||
int prog_fd, err = 0;
|
||||
void *image;
|
||||
@@ -343,6 +344,10 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
if (uvalue->state || refcount_read(&uvalue->refcnt))
|
||||
return -EINVAL;
|
||||
|
||||
tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
|
||||
if (!tprogs)
|
||||
return -ENOMEM;
|
||||
|
||||
uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
|
||||
kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
|
||||
|
||||
@@ -425,10 +430,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
goto reset_unlock;
|
||||
}
|
||||
|
||||
tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
|
||||
tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
|
||||
err = arch_prepare_bpf_trampoline(image,
|
||||
st_map->image + PAGE_SIZE,
|
||||
&st_ops->func_models[i], 0,
|
||||
&prog, 1, NULL, 0, NULL);
|
||||
tprogs, NULL);
|
||||
if (err < 0)
|
||||
goto reset_unlock;
|
||||
|
||||
@@ -469,6 +476,7 @@ reset_unlock:
|
||||
memset(uvalue, 0, map->value_size);
|
||||
memset(kvalue, 0, map->value_size);
|
||||
unlock:
|
||||
kfree(tprogs);
|
||||
mutex_unlock(&st_map->lock);
|
||||
return err;
|
||||
}
|
||||
|
@@ -3710,13 +3710,26 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
nr_args--;
|
||||
}
|
||||
|
||||
if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
|
||||
arg == nr_args) {
|
||||
if (!t)
|
||||
/* Default prog with 5 args. 6th arg is retval. */
|
||||
return true;
|
||||
/* function return type */
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
if (arg == nr_args) {
|
||||
if (prog->expected_attach_type == BPF_TRACE_FEXIT) {
|
||||
if (!t)
|
||||
return true;
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
|
||||
/* For now the BPF_MODIFY_RETURN can only be attached to
|
||||
* functions that return an int.
|
||||
*/
|
||||
if (!t)
|
||||
return false;
|
||||
|
||||
t = btf_type_skip_modifiers(btf, t->type, NULL);
|
||||
if (!btf_type_is_int(t)) {
|
||||
bpf_log(log,
|
||||
"ret type %s not allowed for fmod_ret\n",
|
||||
btf_kind_str[BTF_INFO_KIND(t->info)]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (arg >= nr_args) {
|
||||
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
|
||||
tname, arg + 1);
|
||||
|
@@ -97,7 +97,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
|
||||
fp->aux->prog = fp;
|
||||
fp->jit_requested = ebpf_jit_enabled();
|
||||
|
||||
INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
|
||||
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
|
||||
|
||||
return fp;
|
||||
}
|
||||
@@ -523,22 +523,22 @@ int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
|
||||
int bpf_jit_harden __read_mostly;
|
||||
long bpf_jit_limit __read_mostly;
|
||||
|
||||
static __always_inline void
|
||||
bpf_get_prog_addr_region(const struct bpf_prog *prog,
|
||||
unsigned long *symbol_start,
|
||||
unsigned long *symbol_end)
|
||||
static void
|
||||
bpf_prog_ksym_set_addr(struct bpf_prog *prog)
|
||||
{
|
||||
const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
|
||||
unsigned long addr = (unsigned long)hdr;
|
||||
|
||||
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
|
||||
|
||||
*symbol_start = addr;
|
||||
*symbol_end = addr + hdr->pages * PAGE_SIZE;
|
||||
prog->aux->ksym.start = (unsigned long) prog->bpf_func;
|
||||
prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
|
||||
}
|
||||
|
||||
void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
|
||||
static void
|
||||
bpf_prog_ksym_set_name(struct bpf_prog *prog)
|
||||
{
|
||||
char *sym = prog->aux->ksym.name;
|
||||
const char *end = sym + KSYM_NAME_LEN;
|
||||
const struct btf_type *type;
|
||||
const char *func_name;
|
||||
@@ -572,36 +572,27 @@ void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
|
||||
*sym = 0;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long
|
||||
bpf_get_prog_addr_start(struct latch_tree_node *n)
|
||||
static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
|
||||
{
|
||||
unsigned long symbol_start, symbol_end;
|
||||
const struct bpf_prog_aux *aux;
|
||||
|
||||
aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
|
||||
bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
|
||||
|
||||
return symbol_start;
|
||||
return container_of(n, struct bpf_ksym, tnode)->start;
|
||||
}
|
||||
|
||||
static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
|
||||
struct latch_tree_node *b)
|
||||
{
|
||||
return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
|
||||
return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
|
||||
}
|
||||
|
||||
static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
|
||||
{
|
||||
unsigned long val = (unsigned long)key;
|
||||
unsigned long symbol_start, symbol_end;
|
||||
const struct bpf_prog_aux *aux;
|
||||
const struct bpf_ksym *ksym;
|
||||
|
||||
aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
|
||||
bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
|
||||
ksym = container_of(n, struct bpf_ksym, tnode);
|
||||
|
||||
if (val < symbol_start)
|
||||
if (val < ksym->start)
|
||||
return -1;
|
||||
if (val >= symbol_end)
|
||||
if (val >= ksym->end)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@@ -616,20 +607,29 @@ static DEFINE_SPINLOCK(bpf_lock);
|
||||
static LIST_HEAD(bpf_kallsyms);
|
||||
static struct latch_tree_root bpf_tree __cacheline_aligned;
|
||||
|
||||
static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
|
||||
void bpf_ksym_add(struct bpf_ksym *ksym)
|
||||
{
|
||||
WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
|
||||
list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
|
||||
latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
|
||||
spin_lock_bh(&bpf_lock);
|
||||
WARN_ON_ONCE(!list_empty(&ksym->lnode));
|
||||
list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
|
||||
latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
|
||||
spin_unlock_bh(&bpf_lock);
|
||||
}
|
||||
|
||||
static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
|
||||
static void __bpf_ksym_del(struct bpf_ksym *ksym)
|
||||
{
|
||||
if (list_empty(&aux->ksym_lnode))
|
||||
if (list_empty(&ksym->lnode))
|
||||
return;
|
||||
|
||||
latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
|
||||
list_del_rcu(&aux->ksym_lnode);
|
||||
latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
|
||||
list_del_rcu(&ksym->lnode);
|
||||
}
|
||||
|
||||
void bpf_ksym_del(struct bpf_ksym *ksym)
|
||||
{
|
||||
spin_lock_bh(&bpf_lock);
|
||||
__bpf_ksym_del(ksym);
|
||||
spin_unlock_bh(&bpf_lock);
|
||||
}
|
||||
|
||||
static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
|
||||
@@ -639,8 +639,8 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
|
||||
|
||||
static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
|
||||
{
|
||||
return list_empty(&fp->aux->ksym_lnode) ||
|
||||
fp->aux->ksym_lnode.prev == LIST_POISON2;
|
||||
return list_empty(&fp->aux->ksym.lnode) ||
|
||||
fp->aux->ksym.lnode.prev == LIST_POISON2;
|
||||
}
|
||||
|
||||
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
|
||||
@@ -649,9 +649,11 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return;
|
||||
|
||||
spin_lock_bh(&bpf_lock);
|
||||
bpf_prog_ksym_node_add(fp->aux);
|
||||
spin_unlock_bh(&bpf_lock);
|
||||
bpf_prog_ksym_set_addr(fp);
|
||||
bpf_prog_ksym_set_name(fp);
|
||||
fp->aux->ksym.prog = true;
|
||||
|
||||
bpf_ksym_add(&fp->aux->ksym);
|
||||
}
|
||||
|
||||
void bpf_prog_kallsyms_del(struct bpf_prog *fp)
|
||||
@@ -659,33 +661,30 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)
|
||||
if (!bpf_prog_kallsyms_candidate(fp))
|
||||
return;
|
||||
|
||||
spin_lock_bh(&bpf_lock);
|
||||
bpf_prog_ksym_node_del(fp->aux);
|
||||
spin_unlock_bh(&bpf_lock);
|
||||
bpf_ksym_del(&fp->aux->ksym);
|
||||
}
|
||||
|
||||
static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
|
||||
static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
|
||||
{
|
||||
struct latch_tree_node *n;
|
||||
|
||||
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
|
||||
return n ?
|
||||
container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
|
||||
NULL;
|
||||
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
|
||||
}
|
||||
|
||||
const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
|
||||
unsigned long *off, char *sym)
|
||||
{
|
||||
unsigned long symbol_start, symbol_end;
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_ksym *ksym;
|
||||
char *ret = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = bpf_prog_kallsyms_find(addr);
|
||||
if (prog) {
|
||||
bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
|
||||
bpf_get_prog_name(prog, sym);
|
||||
ksym = bpf_ksym_find(addr);
|
||||
if (ksym) {
|
||||
unsigned long symbol_start = ksym->start;
|
||||
unsigned long symbol_end = ksym->end;
|
||||
|
||||
strncpy(sym, ksym->name, KSYM_NAME_LEN);
|
||||
|
||||
ret = sym;
|
||||
if (size)
|
||||
@@ -703,19 +702,28 @@ bool is_bpf_text_address(unsigned long addr)
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = bpf_prog_kallsyms_find(addr) != NULL;
|
||||
ret = bpf_ksym_find(addr) != NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
|
||||
{
|
||||
struct bpf_ksym *ksym = bpf_ksym_find(addr);
|
||||
|
||||
return ksym && ksym->prog ?
|
||||
container_of(ksym, struct bpf_prog_aux, ksym)->prog :
|
||||
NULL;
|
||||
}
|
||||
|
||||
const struct exception_table_entry *search_bpf_extables(unsigned long addr)
|
||||
{
|
||||
const struct exception_table_entry *e = NULL;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = bpf_prog_kallsyms_find(addr);
|
||||
prog = bpf_prog_ksym_find(addr);
|
||||
if (!prog)
|
||||
goto out;
|
||||
if (!prog->aux->num_exentries)
|
||||
@@ -730,7 +738,7 @@ out:
|
||||
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
||||
char *sym)
|
||||
{
|
||||
struct bpf_prog_aux *aux;
|
||||
struct bpf_ksym *ksym;
|
||||
unsigned int it = 0;
|
||||
int ret = -ERANGE;
|
||||
|
||||
@@ -738,13 +746,13 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
||||
return ret;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
|
||||
list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
|
||||
if (it++ != symnum)
|
||||
continue;
|
||||
|
||||
bpf_get_prog_name(aux->prog, sym);
|
||||
strncpy(sym, ksym->name, KSYM_NAME_LEN);
|
||||
|
||||
*value = (unsigned long)aux->prog->bpf_func;
|
||||
*value = ksym->start;
|
||||
*type = BPF_SYM_ELF_TYPE;
|
||||
|
||||
ret = 0;
|
||||
@@ -2149,6 +2157,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
|
||||
|
||||
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
|
||||
{
|
||||
|
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
|
||||
noff = 0;
|
||||
} else {
|
||||
old = d->image + d->image_off;
|
||||
noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
|
||||
noff = d->image_off ^ (PAGE_SIZE / 2);
|
||||
}
|
||||
|
||||
new = d->num_progs ? d->image + noff : NULL;
|
||||
@@ -140,9 +140,10 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
|
||||
|
||||
mutex_lock(&d->mutex);
|
||||
if (!d->image) {
|
||||
d->image = bpf_image_alloc();
|
||||
d->image = bpf_jit_alloc_exec_page();
|
||||
if (!d->image)
|
||||
goto out;
|
||||
bpf_image_ksym_add(d->image, &d->ksym);
|
||||
}
|
||||
|
||||
prev_num_progs = d->num_progs;
|
||||
|
@@ -12,6 +12,8 @@
|
||||
#include <linux/filter.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/proc_ns.h>
|
||||
|
||||
#include "../../lib/kstrtox.h"
|
||||
|
||||
@@ -499,3 +501,46 @@ const struct bpf_func_proto bpf_strtoul_proto = {
|
||||
.arg4_type = ARG_PTR_TO_LONG,
|
||||
};
|
||||
#endif
|
||||
|
||||
BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
|
||||
struct bpf_pidns_info *, nsdata, u32, size)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct pid_namespace *pidns;
|
||||
int err = -EINVAL;
|
||||
|
||||
if (unlikely(size != sizeof(struct bpf_pidns_info)))
|
||||
goto clear;
|
||||
|
||||
if (unlikely((u64)(dev_t)dev != dev))
|
||||
goto clear;
|
||||
|
||||
if (unlikely(!task))
|
||||
goto clear;
|
||||
|
||||
pidns = task_active_pid_ns(task);
|
||||
if (unlikely(!pidns)) {
|
||||
err = -ENOENT;
|
||||
goto clear;
|
||||
}
|
||||
|
||||
if (!ns_match(&pidns->ns, (dev_t)dev, ino))
|
||||
goto clear;
|
||||
|
||||
nsdata->pid = task_pid_nr_ns(task, pidns);
|
||||
nsdata->tgid = task_tgid_nr_ns(task, pidns);
|
||||
return 0;
|
||||
clear:
|
||||
memset((void *)nsdata, 0, (size_t) size);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
|
||||
.func = bpf_get_ns_current_pid_tgid,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_ANYTHING,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg4_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
@@ -25,6 +25,7 @@ enum bpf_type {
|
||||
BPF_TYPE_UNSPEC = 0,
|
||||
BPF_TYPE_PROG,
|
||||
BPF_TYPE_MAP,
|
||||
BPF_TYPE_LINK,
|
||||
};
|
||||
|
||||
static void *bpf_any_get(void *raw, enum bpf_type type)
|
||||
@@ -36,6 +37,9 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
|
||||
case BPF_TYPE_MAP:
|
||||
bpf_map_inc_with_uref(raw);
|
||||
break;
|
||||
case BPF_TYPE_LINK:
|
||||
bpf_link_inc(raw);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
@@ -53,6 +57,9 @@ static void bpf_any_put(void *raw, enum bpf_type type)
|
||||
case BPF_TYPE_MAP:
|
||||
bpf_map_put_with_uref(raw);
|
||||
break;
|
||||
case BPF_TYPE_LINK:
|
||||
bpf_link_put(raw);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
@@ -63,20 +70,32 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
|
||||
{
|
||||
void *raw;
|
||||
|
||||
*type = BPF_TYPE_MAP;
|
||||
raw = bpf_map_get_with_uref(ufd);
|
||||
if (IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_PROG;
|
||||
raw = bpf_prog_get(ufd);
|
||||
if (!IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_MAP;
|
||||
return raw;
|
||||
}
|
||||
|
||||
return raw;
|
||||
raw = bpf_prog_get(ufd);
|
||||
if (!IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_PROG;
|
||||
return raw;
|
||||
}
|
||||
|
||||
raw = bpf_link_get_from_fd(ufd);
|
||||
if (!IS_ERR(raw)) {
|
||||
*type = BPF_TYPE_LINK;
|
||||
return raw;
|
||||
}
|
||||
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static const struct inode_operations bpf_dir_iops;
|
||||
|
||||
static const struct inode_operations bpf_prog_iops = { };
|
||||
static const struct inode_operations bpf_map_iops = { };
|
||||
static const struct inode_operations bpf_link_iops = { };
|
||||
|
||||
static struct inode *bpf_get_inode(struct super_block *sb,
|
||||
const struct inode *dir,
|
||||
@@ -114,6 +133,8 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
|
||||
*type = BPF_TYPE_PROG;
|
||||
else if (inode->i_op == &bpf_map_iops)
|
||||
*type = BPF_TYPE_MAP;
|
||||
else if (inode->i_op == &bpf_link_iops)
|
||||
*type = BPF_TYPE_LINK;
|
||||
else
|
||||
return -EACCES;
|
||||
|
||||
@@ -335,6 +356,12 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
|
||||
&bpffs_map_fops : &bpffs_obj_fops);
|
||||
}
|
||||
|
||||
static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
|
||||
{
|
||||
return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
|
||||
&bpffs_obj_fops);
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
|
||||
{
|
||||
@@ -411,6 +438,9 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
|
||||
case BPF_TYPE_MAP:
|
||||
ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
|
||||
break;
|
||||
case BPF_TYPE_LINK:
|
||||
ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
|
||||
break;
|
||||
default:
|
||||
ret = -EPERM;
|
||||
}
|
||||
@@ -487,6 +517,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
|
||||
ret = bpf_prog_new_fd(raw);
|
||||
else if (type == BPF_TYPE_MAP)
|
||||
ret = bpf_map_new_fd(raw, f_flags);
|
||||
else if (type == BPF_TYPE_LINK)
|
||||
ret = bpf_link_new_fd(raw);
|
||||
else
|
||||
return -ENOENT;
|
||||
|
||||
@@ -504,6 +536,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
|
||||
|
||||
if (inode->i_op == &bpf_map_iops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (inode->i_op == &bpf_link_iops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (inode->i_op != &bpf_prog_iops)
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
|
@@ -2173,84 +2173,274 @@ static int bpf_obj_get(const union bpf_attr *attr)
|
||||
attr->file_flags);
|
||||
}
|
||||
|
||||
static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct bpf_prog *prog = filp->private_data;
|
||||
struct bpf_link {
|
||||
atomic64_t refcnt;
|
||||
const struct bpf_link_ops *ops;
|
||||
struct bpf_prog *prog;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
bpf_prog_put(prog);
|
||||
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
atomic64_set(&link->refcnt, 1);
|
||||
link->ops = ops;
|
||||
link->prog = prog;
|
||||
}
|
||||
|
||||
/* Clean up bpf_link and corresponding anon_inode file and FD. After
|
||||
* anon_inode is created, bpf_link can't be just kfree()'d due to deferred
|
||||
* anon_inode's release() call. This helper manages marking bpf_link as
|
||||
* defunct, releases anon_inode file and puts reserved FD.
|
||||
*/
|
||||
static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
|
||||
int link_fd)
|
||||
{
|
||||
link->prog = NULL;
|
||||
fput(link_file);
|
||||
put_unused_fd(link_fd);
|
||||
}
|
||||
|
||||
void bpf_link_inc(struct bpf_link *link)
|
||||
{
|
||||
atomic64_inc(&link->refcnt);
|
||||
}
|
||||
|
||||
/* bpf_link_free is guaranteed to be called from process context */
|
||||
static void bpf_link_free(struct bpf_link *link)
|
||||
{
|
||||
if (link->prog) {
|
||||
/* detach BPF program, clean up used resources */
|
||||
link->ops->release(link);
|
||||
bpf_prog_put(link->prog);
|
||||
}
|
||||
/* free bpf_link and its containing memory */
|
||||
link->ops->dealloc(link);
|
||||
}
|
||||
|
||||
static void bpf_link_put_deferred(struct work_struct *work)
|
||||
{
|
||||
struct bpf_link *link = container_of(work, struct bpf_link, work);
|
||||
|
||||
bpf_link_free(link);
|
||||
}
|
||||
|
||||
/* bpf_link_put can be called from atomic context, but ensures that resources
|
||||
* are freed from process context
|
||||
*/
|
||||
void bpf_link_put(struct bpf_link *link)
|
||||
{
|
||||
if (!atomic64_dec_and_test(&link->refcnt))
|
||||
return;
|
||||
|
||||
if (in_atomic()) {
|
||||
INIT_WORK(&link->work, bpf_link_put_deferred);
|
||||
schedule_work(&link->work);
|
||||
} else {
|
||||
bpf_link_free(link);
|
||||
}
|
||||
}
|
||||
|
||||
static int bpf_link_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct bpf_link *link = filp->private_data;
|
||||
|
||||
bpf_link_put(link);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations bpf_tracing_prog_fops = {
|
||||
.release = bpf_tracing_prog_release,
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static const struct bpf_link_ops bpf_raw_tp_lops;
|
||||
static const struct bpf_link_ops bpf_tracing_link_lops;
|
||||
static const struct bpf_link_ops bpf_xdp_link_lops;
|
||||
|
||||
static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
|
||||
{
|
||||
const struct bpf_link *link = filp->private_data;
|
||||
const struct bpf_prog *prog = link->prog;
|
||||
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
|
||||
const char *link_type;
|
||||
|
||||
if (link->ops == &bpf_raw_tp_lops)
|
||||
link_type = "raw_tracepoint";
|
||||
else if (link->ops == &bpf_tracing_link_lops)
|
||||
link_type = "tracing";
|
||||
else
|
||||
link_type = "unknown";
|
||||
|
||||
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
|
||||
seq_printf(m,
|
||||
"link_type:\t%s\n"
|
||||
"prog_tag:\t%s\n"
|
||||
"prog_id:\t%u\n",
|
||||
link_type,
|
||||
prog_tag,
|
||||
prog->aux->id);
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct file_operations bpf_link_fops = {
|
||||
#ifdef CONFIG_PROC_FS
|
||||
.show_fdinfo = bpf_link_show_fdinfo,
|
||||
#endif
|
||||
.release = bpf_link_release,
|
||||
.read = bpf_dummy_read,
|
||||
.write = bpf_dummy_write,
|
||||
};
|
||||
|
||||
int bpf_link_new_fd(struct bpf_link *link)
|
||||
{
|
||||
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
|
||||
}
|
||||
|
||||
/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
|
||||
* instead of immediately installing fd in fdtable, just reserve it and
|
||||
* return. Caller then need to either install it with fd_install(fd, file) or
|
||||
* release with put_unused_fd(fd).
|
||||
* This is useful for cases when bpf_link attachment/detachment are
|
||||
* complicated and expensive operations and should be delayed until all the fd
|
||||
* reservation and anon_inode creation succeeds.
|
||||
*/
|
||||
struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
|
||||
{
|
||||
struct file *file;
|
||||
int fd;
|
||||
|
||||
fd = get_unused_fd_flags(O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return ERR_PTR(fd);
|
||||
|
||||
file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(fd);
|
||||
return file;
|
||||
}
|
||||
|
||||
*reserved_fd = fd;
|
||||
return file;
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
|
||||
{
|
||||
struct fd f = fdget(ufd);
|
||||
struct bpf_link *link;
|
||||
|
||||
if (!f.file)
|
||||
return ERR_PTR(-EBADF);
|
||||
if (f.file->f_op != &bpf_link_fops) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
link = f.file->private_data;
|
||||
bpf_link_inc(link);
|
||||
fdput(f);
|
||||
|
||||
return link;
|
||||
}
|
||||
|
||||
struct bpf_tracing_link {
|
||||
struct bpf_link link;
|
||||
};
|
||||
|
||||
static void bpf_tracing_link_release(struct bpf_link *link)
|
||||
{
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
|
||||
}
|
||||
|
||||
static void bpf_tracing_link_dealloc(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_tracing_link *tr_link =
|
||||
container_of(link, struct bpf_tracing_link, link);
|
||||
|
||||
kfree(tr_link);
|
||||
}
|
||||
|
||||
static const struct bpf_link_ops bpf_tracing_link_lops = {
|
||||
.release = bpf_tracing_link_release,
|
||||
.dealloc = bpf_tracing_link_dealloc,
|
||||
};
|
||||
|
||||
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
||||
{
|
||||
int tr_fd, err;
|
||||
struct bpf_tracing_link *link;
|
||||
struct file *link_file;
|
||||
int link_fd, err;
|
||||
|
||||
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
|
||||
prog->expected_attach_type != BPF_TRACE_FEXIT &&
|
||||
prog->expected_attach_type != BPF_MODIFY_RETURN &&
|
||||
prog->type != BPF_PROG_TYPE_EXT) {
|
||||
err = -EINVAL;
|
||||
goto out_put_prog;
|
||||
}
|
||||
|
||||
err = bpf_trampoline_link_prog(prog);
|
||||
if (err)
|
||||
goto out_put_prog;
|
||||
|
||||
tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
|
||||
prog, O_CLOEXEC);
|
||||
if (tr_fd < 0) {
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
err = tr_fd;
|
||||
link = kzalloc(sizeof(*link), GFP_USER);
|
||||
if (!link) {
|
||||
err = -ENOMEM;
|
||||
goto out_put_prog;
|
||||
}
|
||||
return tr_fd;
|
||||
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
|
||||
|
||||
link_file = bpf_link_new_file(&link->link, &link_fd);
|
||||
if (IS_ERR(link_file)) {
|
||||
kfree(link);
|
||||
err = PTR_ERR(link_file);
|
||||
goto out_put_prog;
|
||||
}
|
||||
|
||||
err = bpf_trampoline_link_prog(prog);
|
||||
if (err) {
|
||||
bpf_link_cleanup(&link->link, link_file, link_fd);
|
||||
goto out_put_prog;
|
||||
}
|
||||
|
||||
fd_install(link_fd, link_file);
|
||||
return link_fd;
|
||||
|
||||
out_put_prog:
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct bpf_raw_tracepoint {
|
||||
struct bpf_raw_tp_link {
|
||||
struct bpf_link link;
|
||||
struct bpf_raw_event_map *btp;
|
||||
struct bpf_prog *prog;
|
||||
};
|
||||
|
||||
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
|
||||
static void bpf_raw_tp_link_release(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_raw_tracepoint *raw_tp = filp->private_data;
|
||||
struct bpf_raw_tp_link *raw_tp =
|
||||
container_of(link, struct bpf_raw_tp_link, link);
|
||||
|
||||
if (raw_tp->prog) {
|
||||
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
|
||||
bpf_prog_put(raw_tp->prog);
|
||||
}
|
||||
bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
|
||||
bpf_put_raw_tracepoint(raw_tp->btp);
|
||||
kfree(raw_tp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations bpf_raw_tp_fops = {
|
||||
.release = bpf_raw_tracepoint_release,
|
||||
.read = bpf_dummy_read,
|
||||
.write = bpf_dummy_write,
|
||||
static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_raw_tp_link *raw_tp =
|
||||
container_of(link, struct bpf_raw_tp_link, link);
|
||||
|
||||
kfree(raw_tp);
|
||||
}
|
||||
|
||||
static const struct bpf_link_ops bpf_raw_tp_lops = {
|
||||
.release = bpf_raw_tp_link_release,
|
||||
.dealloc = bpf_raw_tp_link_dealloc,
|
||||
};
|
||||
|
||||
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
|
||||
|
||||
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_raw_tracepoint *raw_tp;
|
||||
struct bpf_raw_tp_link *link;
|
||||
struct bpf_raw_event_map *btp;
|
||||
struct file *link_file;
|
||||
struct bpf_prog *prog;
|
||||
const char *tp_name;
|
||||
char buf[128];
|
||||
int tp_fd, err;
|
||||
int link_fd, err;
|
||||
|
||||
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
|
||||
return -EINVAL;
|
||||
@@ -2297,29 +2487,30 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||
goto out_put_prog;
|
||||
}
|
||||
|
||||
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
|
||||
if (!raw_tp) {
|
||||
link = kzalloc(sizeof(*link), GFP_USER);
|
||||
if (!link) {
|
||||
err = -ENOMEM;
|
||||
goto out_put_btp;
|
||||
}
|
||||
raw_tp->btp = btp;
|
||||
raw_tp->prog = prog;
|
||||
bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
|
||||
link->btp = btp;
|
||||
|
||||
err = bpf_probe_register(raw_tp->btp, prog);
|
||||
if (err)
|
||||
goto out_free_tp;
|
||||
|
||||
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
|
||||
O_CLOEXEC);
|
||||
if (tp_fd < 0) {
|
||||
bpf_probe_unregister(raw_tp->btp, prog);
|
||||
err = tp_fd;
|
||||
goto out_free_tp;
|
||||
link_file = bpf_link_new_file(&link->link, &link_fd);
|
||||
if (IS_ERR(link_file)) {
|
||||
kfree(link);
|
||||
err = PTR_ERR(link_file);
|
||||
goto out_put_btp;
|
||||
}
|
||||
return tp_fd;
|
||||
|
||||
out_free_tp:
|
||||
kfree(raw_tp);
|
||||
err = bpf_probe_register(link->btp, prog);
|
||||
if (err) {
|
||||
bpf_link_cleanup(&link->link, link_file, link_fd);
|
||||
goto out_put_btp;
|
||||
}
|
||||
|
||||
fd_install(link_fd, link_file);
|
||||
return link_fd;
|
||||
|
||||
out_put_btp:
|
||||
bpf_put_raw_tracepoint(btp);
|
||||
out_put_prog:
|
||||
@@ -3266,15 +3457,21 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_op == &bpf_raw_tp_fops) {
|
||||
struct bpf_raw_tracepoint *raw_tp = file->private_data;
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
if (file->f_op == &bpf_link_fops) {
|
||||
struct bpf_link *link = file->private_data;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
if (link->ops == &bpf_raw_tp_lops) {
|
||||
struct bpf_raw_tp_link *raw_tp =
|
||||
container_of(link, struct bpf_raw_tp_link, link);
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->link.prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
}
|
||||
goto out_not_supp;
|
||||
}
|
||||
|
||||
event = perf_get_event(file);
|
||||
@@ -3294,6 +3491,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
out_not_supp:
|
||||
err = -ENOTSUPP;
|
||||
put_file:
|
||||
fput(file);
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include <linux/filter.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/rbtree_latch.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
/* dummy _ops. The verifier will operate on target program's ops. */
|
||||
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
|
||||
@@ -17,12 +18,11 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
|
||||
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
|
||||
|
||||
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
|
||||
static struct latch_tree_root image_tree __cacheline_aligned;
|
||||
|
||||
/* serializes access to trampoline_table and image_tree */
|
||||
/* serializes access to trampoline_table */
|
||||
static DEFINE_MUTEX(trampoline_mutex);
|
||||
|
||||
static void *bpf_jit_alloc_exec_page(void)
|
||||
void *bpf_jit_alloc_exec_page(void)
|
||||
{
|
||||
void *image;
|
||||
|
||||
@@ -38,62 +38,28 @@ static void *bpf_jit_alloc_exec_page(void)
|
||||
return image;
|
||||
}
|
||||
|
||||
static __always_inline bool image_tree_less(struct latch_tree_node *a,
|
||||
struct latch_tree_node *b)
|
||||
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
|
||||
{
|
||||
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
|
||||
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
|
||||
|
||||
return ia < ib;
|
||||
ksym->start = (unsigned long) data;
|
||||
ksym->end = ksym->start + PAGE_SIZE;
|
||||
bpf_ksym_add(ksym);
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
|
||||
PAGE_SIZE, false, ksym->name);
|
||||
}
|
||||
|
||||
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
|
||||
void bpf_image_ksym_del(struct bpf_ksym *ksym)
|
||||
{
|
||||
void *image = container_of(n, struct bpf_image, tnode);
|
||||
|
||||
if (addr < image)
|
||||
return -1;
|
||||
if (addr >= image + PAGE_SIZE)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
bpf_ksym_del(ksym);
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
|
||||
PAGE_SIZE, true, ksym->name);
|
||||
}
|
||||
|
||||
static const struct latch_tree_ops image_tree_ops = {
|
||||
.less = image_tree_less,
|
||||
.comp = image_tree_comp,
|
||||
};
|
||||
|
||||
static void *__bpf_image_alloc(bool lock)
|
||||
static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
|
||||
{
|
||||
struct bpf_image *image;
|
||||
struct bpf_ksym *ksym = &tr->ksym;
|
||||
|
||||
image = bpf_jit_alloc_exec_page();
|
||||
if (!image)
|
||||
return NULL;
|
||||
|
||||
if (lock)
|
||||
mutex_lock(&trampoline_mutex);
|
||||
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
|
||||
if (lock)
|
||||
mutex_unlock(&trampoline_mutex);
|
||||
return image->data;
|
||||
}
|
||||
|
||||
void *bpf_image_alloc(void)
|
||||
{
|
||||
return __bpf_image_alloc(true);
|
||||
}
|
||||
|
||||
bool is_bpf_image_address(unsigned long addr)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
|
||||
bpf_image_ksym_add(tr->image, ksym);
|
||||
}
|
||||
|
||||
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
|
||||
@@ -116,7 +82,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
|
||||
goto out;
|
||||
|
||||
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
|
||||
image = __bpf_image_alloc(false);
|
||||
image = bpf_jit_alloc_exec_page();
|
||||
if (!image) {
|
||||
kfree(tr);
|
||||
tr = NULL;
|
||||
@@ -131,6 +97,8 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
|
||||
for (i = 0; i < BPF_TRAMP_MAX; i++)
|
||||
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
|
||||
tr->image = image;
|
||||
INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
|
||||
bpf_trampoline_ksym_add(tr);
|
||||
out:
|
||||
mutex_unlock(&trampoline_mutex);
|
||||
return tr;
|
||||
@@ -190,40 +158,50 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
|
||||
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
|
||||
*/
|
||||
#define BPF_MAX_TRAMP_PROGS 40
|
||||
static struct bpf_tramp_progs *
|
||||
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
|
||||
{
|
||||
const struct bpf_prog_aux *aux;
|
||||
struct bpf_tramp_progs *tprogs;
|
||||
struct bpf_prog **progs;
|
||||
int kind;
|
||||
|
||||
*total = 0;
|
||||
tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
|
||||
if (!tprogs)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
|
||||
tprogs[kind].nr_progs = tr->progs_cnt[kind];
|
||||
*total += tr->progs_cnt[kind];
|
||||
progs = tprogs[kind].progs;
|
||||
|
||||
hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
|
||||
*progs++ = aux->prog;
|
||||
}
|
||||
return tprogs;
|
||||
}
|
||||
|
||||
static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
||||
{
|
||||
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
|
||||
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
|
||||
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
|
||||
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
|
||||
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
|
||||
struct bpf_prog **progs, **fentry, **fexit;
|
||||
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
|
||||
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
|
||||
struct bpf_tramp_progs *tprogs;
|
||||
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
|
||||
struct bpf_prog_aux *aux;
|
||||
int err;
|
||||
int err, total;
|
||||
|
||||
if (fentry_cnt + fexit_cnt == 0) {
|
||||
tprogs = bpf_trampoline_get_progs(tr, &total);
|
||||
if (IS_ERR(tprogs))
|
||||
return PTR_ERR(tprogs);
|
||||
|
||||
if (total == 0) {
|
||||
err = unregister_fentry(tr, old_image);
|
||||
tr->selector = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* populate fentry progs */
|
||||
fentry = progs = progs_to_run;
|
||||
hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
|
||||
*progs++ = aux->prog;
|
||||
|
||||
/* populate fexit progs */
|
||||
fexit = progs;
|
||||
hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
|
||||
*progs++ = aux->prog;
|
||||
|
||||
if (fexit_cnt)
|
||||
if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
|
||||
tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
|
||||
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
|
||||
|
||||
/* Though the second half of trampoline page is unused a task could be
|
||||
@@ -232,12 +210,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
||||
* preempted task. Hence wait for tasks to voluntarily schedule or go
|
||||
* to userspace.
|
||||
*/
|
||||
|
||||
synchronize_rcu_tasks();
|
||||
|
||||
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
|
||||
&tr->func.model, flags,
|
||||
fentry, fentry_cnt,
|
||||
fexit, fexit_cnt,
|
||||
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
|
||||
&tr->func.model, flags, tprogs,
|
||||
tr->func.addr);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
@@ -252,6 +229,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
||||
goto out;
|
||||
tr->selector++;
|
||||
out:
|
||||
kfree(tprogs);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -260,6 +238,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
|
||||
switch (t) {
|
||||
case BPF_TRACE_FENTRY:
|
||||
return BPF_TRAMP_FENTRY;
|
||||
case BPF_MODIFY_RETURN:
|
||||
return BPF_TRAMP_MODIFY_RETURN;
|
||||
case BPF_TRACE_FEXIT:
|
||||
return BPF_TRAMP_FEXIT;
|
||||
default:
|
||||
@@ -344,8 +324,6 @@ out:
|
||||
|
||||
void bpf_trampoline_put(struct bpf_trampoline *tr)
|
||||
{
|
||||
struct bpf_image *image;
|
||||
|
||||
if (!tr)
|
||||
return;
|
||||
mutex_lock(&trampoline_mutex);
|
||||
@@ -356,11 +334,10 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
|
||||
goto out;
|
||||
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
|
||||
goto out;
|
||||
image = container_of(tr->image, struct bpf_image, data);
|
||||
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
|
||||
bpf_image_ksym_del(&tr->ksym);
|
||||
/* wait for tasks to get out of trampoline before freeing it */
|
||||
synchronize_rcu_tasks();
|
||||
bpf_jit_free_exec(image);
|
||||
bpf_jit_free_exec(tr->image);
|
||||
hlist_del(&tr->hlist);
|
||||
kfree(tr);
|
||||
out:
|
||||
@@ -375,6 +352,7 @@ out:
|
||||
* call __bpf_prog_exit
|
||||
*/
|
||||
u64 notrace __bpf_prog_enter(void)
|
||||
__acquires(RCU)
|
||||
{
|
||||
u64 start = 0;
|
||||
|
||||
@@ -386,6 +364,7 @@ u64 notrace __bpf_prog_enter(void)
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
|
||||
__releases(RCU)
|
||||
{
|
||||
struct bpf_prog_stats *stats;
|
||||
|
||||
@@ -409,8 +388,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
|
||||
int __weak
|
||||
arch_prepare_bpf_trampoline(void *image, void *image_end,
|
||||
const struct btf_func_model *m, u32 flags,
|
||||
struct bpf_prog **fentry_progs, int fentry_cnt,
|
||||
struct bpf_prog **fexit_progs, int fexit_cnt,
|
||||
struct bpf_tramp_progs *tprogs,
|
||||
void *orig_call)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
|
@@ -19,6 +19,7 @@
|
||||
#include <linux/sort.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/error-injection.h>
|
||||
|
||||
#include "disasm.h"
|
||||
|
||||
@@ -3649,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||
if (func_id != BPF_FUNC_perf_event_read &&
|
||||
func_id != BPF_FUNC_perf_event_output &&
|
||||
func_id != BPF_FUNC_skb_output &&
|
||||
func_id != BPF_FUNC_perf_event_read_value)
|
||||
func_id != BPF_FUNC_perf_event_read_value &&
|
||||
func_id != BPF_FUNC_xdp_output)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_MAP_TYPE_STACK_TRACE:
|
||||
@@ -3739,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||
case BPF_FUNC_perf_event_output:
|
||||
case BPF_FUNC_perf_event_read_value:
|
||||
case BPF_FUNC_skb_output:
|
||||
case BPF_FUNC_xdp_output:
|
||||
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
|
||||
goto error;
|
||||
break;
|
||||
@@ -9800,6 +9803,26 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define SECURITY_PREFIX "security_"
|
||||
|
||||
static int check_attach_modify_return(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_prog *prog = env->prog;
|
||||
unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
|
||||
|
||||
/* This is expected to be cleaned up in the future with the KRSI effort
|
||||
* introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
|
||||
*/
|
||||
if (within_error_injection_list(addr) ||
|
||||
!strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
|
||||
sizeof(SECURITY_PREFIX) - 1))
|
||||
return 0;
|
||||
|
||||
verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n",
|
||||
prog->aux->attach_btf_id, prog->aux->attach_func_name);
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
{
|
||||
@@ -9950,6 +9973,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
if (!prog_extension)
|
||||
return -EINVAL;
|
||||
/* fallthrough */
|
||||
case BPF_MODIFY_RETURN:
|
||||
case BPF_TRACE_FENTRY:
|
||||
case BPF_TRACE_FEXIT:
|
||||
if (!btf_type_is_func(t)) {
|
||||
@@ -9999,6 +10023,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
}
|
||||
tr->func.addr = (void *)addr;
|
||||
prog->aux->trampoline = tr;
|
||||
|
||||
if (prog->expected_attach_type == BPF_MODIFY_RETURN)
|
||||
ret = check_attach_modify_return(env);
|
||||
out:
|
||||
mutex_unlock(&tr->mutex);
|
||||
if (ret)
|
||||
|
@@ -8255,23 +8255,22 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
|
||||
enum perf_bpf_event_type type)
|
||||
{
|
||||
bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
|
||||
char sym[KSYM_NAME_LEN];
|
||||
int i;
|
||||
|
||||
if (prog->aux->func_cnt == 0) {
|
||||
bpf_get_prog_name(prog, sym);
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
|
||||
(u64)(unsigned long)prog->bpf_func,
|
||||
prog->jited_len, unregister, sym);
|
||||
prog->jited_len, unregister,
|
||||
prog->aux->ksym.name);
|
||||
} else {
|
||||
for (i = 0; i < prog->aux->func_cnt; i++) {
|
||||
struct bpf_prog *subprog = prog->aux->func[i];
|
||||
|
||||
bpf_get_prog_name(subprog, sym);
|
||||
perf_event_ksymbol(
|
||||
PERF_RECORD_KSYMBOL_TYPE_BPF,
|
||||
(u64)(unsigned long)subprog->bpf_func,
|
||||
subprog->jited_len, unregister, sym);
|
||||
subprog->jited_len, unregister,
|
||||
prog->aux->ksym.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -149,8 +149,6 @@ int kernel_text_address(unsigned long addr)
|
||||
goto out;
|
||||
if (is_bpf_text_address(addr))
|
||||
goto out;
|
||||
if (is_bpf_image_address(addr))
|
||||
goto out;
|
||||
ret = 0;
|
||||
out:
|
||||
if (no_rcu)
|
||||
|
@@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_send_signal_thread_proto;
|
||||
case BPF_FUNC_perf_event_read_value:
|
||||
return &bpf_perf_event_read_value_proto;
|
||||
case BPF_FUNC_get_ns_current_pid_tgid:
|
||||
return &bpf_get_ns_current_pid_tgid_proto;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@@ -1143,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
|
||||
};
|
||||
|
||||
extern const struct bpf_func_proto bpf_skb_output_proto;
|
||||
extern const struct bpf_func_proto bpf_xdp_output_proto;
|
||||
|
||||
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
|
||||
struct bpf_map *, map, u64, flags)
|
||||
@@ -1218,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
#ifdef CONFIG_NET
|
||||
case BPF_FUNC_skb_output:
|
||||
return &bpf_skb_output_proto;
|
||||
case BPF_FUNC_xdp_output:
|
||||
return &bpf_xdp_output_proto;
|
||||
#endif
|
||||
default:
|
||||
return raw_tp_prog_func_proto(func_id, prog);
|
||||
@@ -1252,6 +1257,13 @@ static bool tracing_prog_is_valid_access(int off, int size,
|
||||
return btf_ctx_access(off, size, type, prog, info);
|
||||
}
|
||||
|
||||
int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
|
||||
const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
|
||||
.get_func_proto = raw_tp_prog_func_proto,
|
||||
.is_valid_access = raw_tp_prog_is_valid_access,
|
||||
@@ -1266,6 +1278,7 @@ const struct bpf_verifier_ops tracing_verifier_ops = {
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops tracing_prog_ops = {
|
||||
.test_run = bpf_prog_test_run_tracing,
|
||||
};
|
||||
|
||||
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
|
||||
|
Reference in New Issue
Block a user