bpf: Add bpf_xdp_output() helper
Introduce new helper that reuses existing xdp perf_event output implementation, but can be called from raw_tracepoint programs that receive 'struct xdp_buff *' as a tracepoint argument. Signed-off-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/158348514556.2239.11050972434793741444.stgit@xdp-tutorial
This commit is contained in:

committed by
Alexei Starovoitov

parent
4823b7210b
commit
d831ee84bf
@@ -2927,6 +2927,29 @@ union bpf_attr {
|
|||||||
*
|
*
|
||||||
* **-ENOENT** if pidns does not exists for the current task.
|
* **-ENOENT** if pidns does not exists for the current task.
|
||||||
*
|
*
|
||||||
|
* int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
|
||||||
|
* Description
|
||||||
|
* Write raw *data* blob into a special BPF perf event held by
|
||||||
|
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
|
||||||
|
* event must have the following attributes: **PERF_SAMPLE_RAW**
|
||||||
|
* as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
|
||||||
|
* **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
|
||||||
|
*
|
||||||
|
* The *flags* are used to indicate the index in *map* for which
|
||||||
|
* the value must be put, masked with **BPF_F_INDEX_MASK**.
|
||||||
|
* Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
|
||||||
|
* to indicate that the index of the current CPU core should be
|
||||||
|
* used.
|
||||||
|
*
|
||||||
|
* The value to write, of *size*, is passed through eBPF stack and
|
||||||
|
* pointed by *data*.
|
||||||
|
*
|
||||||
|
* *ctx* is a pointer to in-kernel struct xdp_buff.
|
||||||
|
*
|
||||||
|
* This helper is similar to **bpf_perf_eventoutput**\ () but
|
||||||
|
* restricted to raw_tracepoint bpf programs.
|
||||||
|
* Return
|
||||||
|
* 0 on success, or a negative error in case of failure.
|
||||||
*/
|
*/
|
||||||
#define __BPF_FUNC_MAPPER(FN) \
|
#define __BPF_FUNC_MAPPER(FN) \
|
||||||
FN(unspec), \
|
FN(unspec), \
|
||||||
@@ -3049,7 +3072,8 @@ union bpf_attr {
|
|||||||
FN(send_signal_thread), \
|
FN(send_signal_thread), \
|
||||||
FN(jiffies64), \
|
FN(jiffies64), \
|
||||||
FN(read_branch_records), \
|
FN(read_branch_records), \
|
||||||
FN(get_ns_current_pid_tgid),
|
FN(get_ns_current_pid_tgid), \
|
||||||
|
FN(xdp_output),
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
* function eBPF program intends to call
|
* function eBPF program intends to call
|
||||||
|
@@ -3650,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
|||||||
if (func_id != BPF_FUNC_perf_event_read &&
|
if (func_id != BPF_FUNC_perf_event_read &&
|
||||||
func_id != BPF_FUNC_perf_event_output &&
|
func_id != BPF_FUNC_perf_event_output &&
|
||||||
func_id != BPF_FUNC_skb_output &&
|
func_id != BPF_FUNC_skb_output &&
|
||||||
func_id != BPF_FUNC_perf_event_read_value)
|
func_id != BPF_FUNC_perf_event_read_value &&
|
||||||
|
func_id != BPF_FUNC_xdp_output)
|
||||||
goto error;
|
goto error;
|
||||||
break;
|
break;
|
||||||
case BPF_MAP_TYPE_STACK_TRACE:
|
case BPF_MAP_TYPE_STACK_TRACE:
|
||||||
@@ -3740,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
|||||||
case BPF_FUNC_perf_event_output:
|
case BPF_FUNC_perf_event_output:
|
||||||
case BPF_FUNC_perf_event_read_value:
|
case BPF_FUNC_perf_event_read_value:
|
||||||
case BPF_FUNC_skb_output:
|
case BPF_FUNC_skb_output:
|
||||||
|
case BPF_FUNC_xdp_output:
|
||||||
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
|
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
|
||||||
goto error;
|
goto error;
|
||||||
break;
|
break;
|
||||||
|
@@ -1145,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
extern const struct bpf_func_proto bpf_skb_output_proto;
|
extern const struct bpf_func_proto bpf_skb_output_proto;
|
||||||
|
extern const struct bpf_func_proto bpf_xdp_output_proto;
|
||||||
|
|
||||||
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
|
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
|
||||||
struct bpf_map *, map, u64, flags)
|
struct bpf_map *, map, u64, flags)
|
||||||
@@ -1220,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||||||
#ifdef CONFIG_NET
|
#ifdef CONFIG_NET
|
||||||
case BPF_FUNC_skb_output:
|
case BPF_FUNC_skb_output:
|
||||||
return &bpf_skb_output_proto;
|
return &bpf_skb_output_proto;
|
||||||
|
case BPF_FUNC_xdp_output:
|
||||||
|
return &bpf_xdp_output_proto;
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
return raw_tp_prog_func_proto(func_id, prog);
|
return raw_tp_prog_func_proto(func_id, prog);
|
||||||
|
@@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
|
|||||||
|
|
||||||
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
|
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
|
if (unlikely(!xdp ||
|
||||||
|
xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
return bpf_event_output(map, flags, meta, meta_size, xdp->data,
|
return bpf_event_output(map, flags, meta, meta_size, xdp->data,
|
||||||
@@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
|
|||||||
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int bpf_xdp_output_btf_ids[5];
|
||||||
|
const struct bpf_func_proto bpf_xdp_output_proto = {
|
||||||
|
.func = bpf_xdp_event_output,
|
||||||
|
.gpl_only = true,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_BTF_ID,
|
||||||
|
.arg2_type = ARG_CONST_MAP_PTR,
|
||||||
|
.arg3_type = ARG_ANYTHING,
|
||||||
|
.arg4_type = ARG_PTR_TO_MEM,
|
||||||
|
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
|
.btf_id = bpf_xdp_output_btf_ids,
|
||||||
|
};
|
||||||
|
|
||||||
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
|
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
|
||||||
{
|
{
|
||||||
return skb->sk ? sock_gen_cookie(skb->sk) : 0;
|
return skb->sk ? sock_gen_cookie(skb->sk) : 0;
|
||||||
|
@@ -2927,6 +2927,29 @@ union bpf_attr {
|
|||||||
*
|
*
|
||||||
* **-ENOENT** if pidns does not exists for the current task.
|
* **-ENOENT** if pidns does not exists for the current task.
|
||||||
*
|
*
|
||||||
|
* int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
|
||||||
|
* Description
|
||||||
|
* Write raw *data* blob into a special BPF perf event held by
|
||||||
|
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
|
||||||
|
* event must have the following attributes: **PERF_SAMPLE_RAW**
|
||||||
|
* as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
|
||||||
|
* **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
|
||||||
|
*
|
||||||
|
* The *flags* are used to indicate the index in *map* for which
|
||||||
|
* the value must be put, masked with **BPF_F_INDEX_MASK**.
|
||||||
|
* Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
|
||||||
|
* to indicate that the index of the current CPU core should be
|
||||||
|
* used.
|
||||||
|
*
|
||||||
|
* The value to write, of *size*, is passed through eBPF stack and
|
||||||
|
* pointed by *data*.
|
||||||
|
*
|
||||||
|
* *ctx* is a pointer to in-kernel struct xdp_buff.
|
||||||
|
*
|
||||||
|
* This helper is similar to **bpf_perf_eventoutput**\ () but
|
||||||
|
* restricted to raw_tracepoint bpf programs.
|
||||||
|
* Return
|
||||||
|
* 0 on success, or a negative error in case of failure.
|
||||||
*/
|
*/
|
||||||
#define __BPF_FUNC_MAPPER(FN) \
|
#define __BPF_FUNC_MAPPER(FN) \
|
||||||
FN(unspec), \
|
FN(unspec), \
|
||||||
@@ -3049,7 +3072,8 @@ union bpf_attr {
|
|||||||
FN(send_signal_thread), \
|
FN(send_signal_thread), \
|
||||||
FN(jiffies64), \
|
FN(jiffies64), \
|
||||||
FN(read_branch_records), \
|
FN(read_branch_records), \
|
||||||
FN(get_ns_current_pid_tgid),
|
FN(get_ns_current_pid_tgid), \
|
||||||
|
FN(xdp_output),
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
* function eBPF program intends to call
|
* function eBPF program intends to call
|
||||||
|
@@ -4,17 +4,51 @@
|
|||||||
#include "test_xdp.skel.h"
|
#include "test_xdp.skel.h"
|
||||||
#include "test_xdp_bpf2bpf.skel.h"
|
#include "test_xdp_bpf2bpf.skel.h"
|
||||||
|
|
||||||
|
struct meta {
|
||||||
|
int ifindex;
|
||||||
|
int pkt_len;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
|
||||||
|
{
|
||||||
|
int duration = 0;
|
||||||
|
struct meta *meta = (struct meta *)data;
|
||||||
|
struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
|
||||||
|
|
||||||
|
if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
|
||||||
|
"check_size", "size %u < %zu\n",
|
||||||
|
size, sizeof(pkt_v4) + sizeof(*meta)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
|
||||||
|
"meta->ifindex = %d\n", meta->ifindex))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
|
||||||
|
"meta->pkt_len = %zd\n", sizeof(pkt_v4)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
|
||||||
|
"check_packet_content", "content not the same\n"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
*(bool *)ctx = true;
|
||||||
|
}
|
||||||
|
|
||||||
void test_xdp_bpf2bpf(void)
|
void test_xdp_bpf2bpf(void)
|
||||||
{
|
{
|
||||||
__u32 duration = 0, retval, size;
|
__u32 duration = 0, retval, size;
|
||||||
char buf[128];
|
char buf[128];
|
||||||
int err, pkt_fd, map_fd;
|
int err, pkt_fd, map_fd;
|
||||||
|
bool passed = false;
|
||||||
struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
|
struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
|
||||||
struct iptnl_info value4 = {.family = AF_INET};
|
struct iptnl_info value4 = {.family = AF_INET};
|
||||||
struct test_xdp *pkt_skel = NULL;
|
struct test_xdp *pkt_skel = NULL;
|
||||||
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
|
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
|
||||||
struct vip key4 = {.protocol = 6, .family = AF_INET};
|
struct vip key4 = {.protocol = 6, .family = AF_INET};
|
||||||
struct bpf_program *prog;
|
struct bpf_program *prog;
|
||||||
|
struct perf_buffer *pb = NULL;
|
||||||
|
struct perf_buffer_opts pb_opts = {};
|
||||||
|
|
||||||
/* Load XDP program to introspect */
|
/* Load XDP program to introspect */
|
||||||
pkt_skel = test_xdp__open_and_load();
|
pkt_skel = test_xdp__open_and_load();
|
||||||
@@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void)
|
|||||||
if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
|
if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
/* Set up perf buffer */
|
||||||
|
pb_opts.sample_cb = on_sample;
|
||||||
|
pb_opts.ctx = &passed;
|
||||||
|
pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
|
||||||
|
1, &pb_opts);
|
||||||
|
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
|
||||||
|
goto out;
|
||||||
|
|
||||||
/* Run test program */
|
/* Run test program */
|
||||||
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
|
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
|
||||||
buf, &size, &retval, &duration);
|
buf, &size, &retval, &duration);
|
||||||
@@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void)
|
|||||||
err, errno, retval, size))
|
err, errno, retval, size))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
/* Make sure bpf_xdp_output() was triggered and it sent the expected
|
||||||
|
* data to the perf ring buffer.
|
||||||
|
*/
|
||||||
|
err = perf_buffer__poll(pb, 100);
|
||||||
|
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
CHECK_FAIL(!passed);
|
||||||
|
|
||||||
/* Verify test results */
|
/* Verify test results */
|
||||||
if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
|
if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
|
||||||
"result", "fentry failed err %llu\n",
|
"result", "fentry failed err %llu\n",
|
||||||
@@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void)
|
|||||||
"fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
|
"fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
if (pb)
|
||||||
|
perf_buffer__free(pb);
|
||||||
test_xdp__destroy(pkt_skel);
|
test_xdp__destroy(pkt_skel);
|
||||||
test_xdp_bpf2bpf__destroy(ftrace_skel);
|
test_xdp_bpf2bpf__destroy(ftrace_skel);
|
||||||
}
|
}
|
||||||
|
@@ -3,6 +3,8 @@
|
|||||||
#include <bpf/bpf_tracing.h>
|
#include <bpf/bpf_tracing.h>
|
||||||
#include <bpf/bpf_helpers.h>
|
#include <bpf/bpf_helpers.h>
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
||||||
|
|
||||||
struct net_device {
|
struct net_device {
|
||||||
/* Structure does not need to contain all entries,
|
/* Structure does not need to contain all entries,
|
||||||
* as "preserve_access_index" will use BTF to fix this...
|
* as "preserve_access_index" will use BTF to fix this...
|
||||||
@@ -27,10 +29,32 @@ struct xdp_buff {
|
|||||||
struct xdp_rxq_info *rxq;
|
struct xdp_rxq_info *rxq;
|
||||||
} __attribute__((preserve_access_index));
|
} __attribute__((preserve_access_index));
|
||||||
|
|
||||||
|
struct meta {
|
||||||
|
int ifindex;
|
||||||
|
int pkt_len;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||||
|
__uint(key_size, sizeof(int));
|
||||||
|
__uint(value_size, sizeof(int));
|
||||||
|
} perf_buf_map SEC(".maps");
|
||||||
|
|
||||||
__u64 test_result_fentry = 0;
|
__u64 test_result_fentry = 0;
|
||||||
SEC("fentry/FUNC")
|
SEC("fentry/FUNC")
|
||||||
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
|
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
|
||||||
{
|
{
|
||||||
|
struct meta meta;
|
||||||
|
void *data_end = (void *)(long)xdp->data_end;
|
||||||
|
void *data = (void *)(long)xdp->data;
|
||||||
|
|
||||||
|
meta.ifindex = xdp->rxq->dev->ifindex;
|
||||||
|
meta.pkt_len = data_end - data;
|
||||||
|
bpf_xdp_output(xdp, &perf_buf_map,
|
||||||
|
((__u64) meta.pkt_len << 32) |
|
||||||
|
BPF_F_CURRENT_CPU,
|
||||||
|
&meta, sizeof(meta));
|
||||||
|
|
||||||
test_result_fentry = xdp->rxq->dev->ifindex;
|
test_result_fentry = xdp->rxq->dev->ifindex;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user