Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2020-09-01

The following pull-request contains BPF updates for your *net-next* tree.

There are two small conflicts when pulling, resolve as follows:

1) Merge conflict in tools/lib/bpf/libbpf.c between 88a8212028 ("libbpf: Factor
   out common ELF operations and improve logging") in bpf-next and 1e891e513e
   ("libbpf: Fix map index used in error message") in net-next. Resolve by taking
   the hunk in bpf-next:

        [...]
        scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
        data = elf_sec_data(obj, scn);
        if (!scn || !data) {
                pr_warn("elf: failed to get %s map definitions for %s\n",
                        MAPS_ELF_SEC, obj->path);
                return -EINVAL;
        }
        [...]

2) Merge conflict in drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c between
   9647c57b11 ("xsk: i40e: ice: ixgbe: mlx5: Test for dma_need_sync earlier for
   better performance") in bpf-next and e20f0dbf20 ("net/mlx5e: RX, Add a prefetch
   command for small L1_CACHE_BYTES") in net-next. Resolve the two locations by retaining
   net_prefetch() and taking xsk_buff_dma_sync_for_cpu() from bpf-next. Should look like:

        [...]
        xdp_set_data_meta_invalid(xdp);
        xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
        net_prefetch(xdp->data);
        [...]

We've added 133 non-merge commits during the last 14 day(s) which contain
a total of 246 files changed, 13832 insertions(+), 3105 deletions(-).

The main changes are:

1) Initial support for sleepable BPF programs along with bpf_copy_from_user() helper
   for tracing to reliably access user memory, from Alexei Starovoitov.

2) Add BPF infra for writing and parsing TCP header options, from Martin KaFai Lau.

3) bpf_d_path() helper for returning full path for given 'struct path', from Jiri Olsa.

4) AF_XDP support for shared umems between devices and queues, from Magnus Karlsson.

5) Initial prep work for full BPF-to-BPF call support in libbpf, from Andrii Nakryiko.

6) Generalize bpf_sk_storage map & add local storage for inodes, from KP Singh.

7) Implement sockmap/hash updates from BPF context, from Lorenz Bauer.

8) BPF xor verification for scalar types & add BPF link iterator, from Yonghong Song.

9) Use target's prog type for BPF_PROG_TYPE_EXT prog verification, from Udip Pant.

10) Rework BPF tracing samples to use libbpf loader, from Daniel T. Lee.

11) Fix xdpsock sample to really cycle through all buffers, from Weqaar Janjua.

12) Improve type safety for tun/veth XDP frame handling, from Maciej Żenczykowski.

13) Various smaller cleanups and improvements all over the place.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2020-09-01 13:05:08 -07:00
246 changed files with 13815 additions and 3088 deletions

View File

@@ -1,6 +1,9 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Most of this file is copied from tools/lib/traceevent/Makefile
RM ?= rm
srctree = $(abs_srctree)
LIBBPF_VERSION := $(shell \
grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
sort -rV | head -n1 | cut -d'_' -f2)
@@ -56,7 +59,7 @@ ifndef VERBOSE
endif
FEATURE_USER = .libbpf
FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
FEATURE_TESTS = libelf zlib bpf
FEATURE_DISPLAY = libelf zlib bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
@@ -98,16 +101,8 @@ else
CFLAGS := -g -Wall
endif
ifeq ($(feature-libelf-mmap), 1)
override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
endif
ifeq ($(feature-reallocarray), 0)
override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
endif
# Append required CFLAGS
override CFLAGS += $(EXTRA_WARNINGS)
override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
@@ -196,7 +191,7 @@ $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
$(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^
$(OUTPUT)libbpf.pc:
$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
@@ -269,10 +264,10 @@ install: install_lib install_pkgconfig install_headers
### Cleaning rules
config-clean:
$(call QUIET_CLEAN, config)
$(call QUIET_CLEAN, feature-detect)
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
clean:
clean: config-clean
$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
$(SHARED_OBJDIR) $(STATIC_OBJDIR) \
@@ -299,7 +294,7 @@ cscope:
cscope -b -q -I $(srctree)/include -f cscope.out
tags:
rm -f TAGS tags
$(RM) -f TAGS tags
ls *.c *.h | xargs $(TAGS_PROG) -a
# Declare the contents of the .PHONY variable as phony. We keep that

View File

@@ -32,9 +32,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
/*
* When building perf, unistd.h is overridden. __NR_bpf is
* required to be defined explicitly.

View File

@@ -19,32 +19,52 @@ enum bpf_field_info_kind {
BPF_FIELD_RSHIFT_U64 = 5,
};
/* second argument to __builtin_btf_type_id() built-in */
enum bpf_type_id_kind {
BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */
BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */
};
/* second argument to __builtin_preserve_type_info() built-in */
enum bpf_type_info_kind {
BPF_TYPE_EXISTS = 0, /* type existence in target kernel */
BPF_TYPE_SIZE = 1, /* type size in target kernel */
};
/* second argument to __builtin_preserve_enum_value() built-in */
enum bpf_enum_value_kind {
BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */
BPF_ENUMVAL_VALUE = 1, /* enum value value relocation */
};
#define __CORE_RELO(src, field, info) \
__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
bpf_probe_read((void *)dst, \
__CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
bpf_probe_read_kernel( \
(void *)dst, \
__CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else
/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
* for big-endian we need to adjust destination pointer accordingly, based on
* field byte size
*/
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
__CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
bpf_probe_read_kernel( \
(void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
__CORE_RELO(src, fld, BYTE_SIZE), \
(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#endif
/*
* Extract bitfield, identified by s->field, and return its value as u64.
* All this is done in relocatable manner, so bitfield changes such as
* signedness, bit size, offset changes, this will be handled automatically.
* This version of macro is using bpf_probe_read() to read underlying integer
* storage. Macro functions as an expression and its return type is
* bpf_probe_read()'s return value: 0, on success, <0 on error.
* This version of macro is using bpf_probe_read_kernel() to read underlying
* integer storage. Macro functions as an expression and its return type is
* bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
*/
#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \
unsigned long long val = 0; \
@@ -92,15 +112,75 @@ enum bpf_field_info_kind {
__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
/*
* Convenience macro to get byte size of a field. Works for integers,
* Convenience macro to get the byte size of a field. Works for integers,
* struct/unions, pointers, arrays, and enums.
*/
#define bpf_core_field_size(field) \
__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
/*
* bpf_core_read() abstracts away bpf_probe_read() call and captures offset
* relocation for source address using __builtin_preserve_access_index()
* Convenience macro to get BTF type ID of a specified type, using a local BTF
* information. Return 32-bit unsigned integer with type ID from program's own
* BTF. Always succeeds.
*/
#define bpf_core_type_id_local(type) \
__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
/*
* Convenience macro to get BTF type ID of a target kernel's type that matches
* specified local type.
* Returns:
* - valid 32-bit unsigned type ID in kernel BTF;
* - 0, if no matching type was found in a target kernel BTF.
*/
#define bpf_core_type_id_kernel(type) \
__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
/*
* Convenience macro to check that provided named type
* (struct/union/enum/typedef) exists in a target kernel.
* Returns:
* 1, if such type is present in target kernel's BTF;
* 0, if no matching type is found.
*/
#define bpf_core_type_exists(type) \
__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
/*
* Convenience macro to get the byte size of a provided named type
* (struct/union/enum/typedef) in a target kernel.
* Returns:
* >= 0 size (in bytes), if type is present in target kernel's BTF;
* 0, if no matching type is found.
*/
#define bpf_core_type_size(type) \
__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
/*
* Convenience macro to check that provided enumerator value is defined in
* a target kernel.
* Returns:
* 1, if specified enum type and its enumerator value are present in target
* kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
#define bpf_core_enum_value_exists(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
/*
* Convenience macro to get the integer value of an enumerator value in
* a target kernel.
* Returns:
* 64-bit value, if specified enum type and its enumerator value are
* present in target kernel's BTF;
* 0, if no matching enum and/or enum value within that enum is found.
*/
#define bpf_core_enum_value(enum_type, enum_value) \
__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
/*
* bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
* offset relocation for source address using __builtin_preserve_access_index()
* built-in, provided by Clang.
*
* __builtin_preserve_access_index() takes as an argument an expression of
@@ -115,8 +195,8 @@ enum bpf_field_info_kind {
* (local) BTF, used to record relocation.
*/
#define bpf_core_read(dst, sz, src) \
bpf_probe_read(dst, sz, \
(const void *)__builtin_preserve_access_index(src))
bpf_probe_read_kernel(dst, sz, \
(const void *)__builtin_preserve_access_index(src))
/*
* bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
@@ -124,8 +204,8 @@ enum bpf_field_info_kind {
* argument.
*/
#define bpf_core_read_str(dst, sz, src) \
bpf_probe_read_str(dst, sz, \
(const void *)__builtin_preserve_access_index(src))
bpf_probe_read_kernel_str(dst, sz, \
(const void *)__builtin_preserve_access_index(src))
#define ___concat(a, b) a ## b
#define ___apply(fn, n) ___concat(fn, n)
@@ -239,15 +319,17 @@ enum bpf_field_info_kind {
* int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
*
* BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
* CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
* CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
* equivalent to:
* 1. const void *__t = s->a.b.c;
* 2. __t = __t->d.e;
* 3. __t = __t->f;
* 4. return __t->g;
*
* Equivalence is logical, because there is a heavy type casting/preservation
* involved, as well as all the reads are happening through bpf_probe_read()
* calls using __builtin_preserve_access_index() to emit CO-RE relocations.
* involved, as well as all the reads are happening through
* bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
* emit CO-RE relocations.
*
* N.B. Only up to 9 "field accessors" are supported, which should be more
* than enough for any practical purpose.

View File

@@ -32,6 +32,9 @@
#ifndef __always_inline
#define __always_inline __attribute__((always_inline))
#endif
#ifndef __noinline
#define __noinline __attribute__((noinline))
#endif
#ifndef __weak
#define __weak __attribute__((weak))
#endif

View File

@@ -8,9 +8,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
struct bpf_prog_linfo {
void *raw_linfo;
void *raw_jited_linfo;

View File

@@ -289,9 +289,9 @@ struct pt_regs;
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
#else
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
({ bpf_probe_read(&(ip), sizeof(ip), \
({ bpf_probe_read_kernel(&(ip), sizeof(ip), \
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif

View File

@@ -21,9 +21,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -61,7 +58,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t)
expand_by = max(btf->types_size >> 2, 16U);
new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
new_types = realloc(btf->types, sizeof(*new_types) * new_size);
new_types = libbpf_reallocarray(btf->types, new_size, sizeof(*new_types));
if (!new_types)
return -ENOMEM;
@@ -1131,14 +1128,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
return btf_ext_setup_info(btf_ext, &param);
}
static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
{
struct btf_ext_sec_setup_param param = {
.off = btf_ext->hdr->field_reloc_off,
.len = btf_ext->hdr->field_reloc_len,
.min_rec_size = sizeof(struct bpf_field_reloc),
.ext_info = &btf_ext->field_reloc_info,
.desc = "field_reloc",
.off = btf_ext->hdr->core_relo_off,
.len = btf_ext->hdr->core_relo_len,
.min_rec_size = sizeof(struct bpf_core_relo),
.ext_info = &btf_ext->core_relo_info,
.desc = "core_relo",
};
return btf_ext_setup_info(btf_ext, &param);
@@ -1217,10 +1214,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (err)
goto done;
if (btf_ext->hdr->hdr_len <
offsetofend(struct btf_ext_header, field_reloc_len))
if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
goto done;
err = btf_ext_setup_field_reloc(btf_ext);
err = btf_ext_setup_core_relos(btf_ext);
if (err)
goto done;
@@ -1575,7 +1571,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d,
__u32 *new_list;
d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
if (!new_list)
return -ENOMEM;
d->hypot_list = new_list;
@@ -1871,8 +1867,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
struct btf_str_ptr *new_ptrs;
strs.cap += max(strs.cnt / 2, 16U);
new_ptrs = realloc(strs.ptrs,
sizeof(strs.ptrs[0]) * strs.cap);
new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
if (!new_ptrs) {
err = -ENOMEM;
goto done;
@@ -2957,8 +2952,8 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
d->btf->nr_types = next_type_id - 1;
d->btf->types_size = d->btf->nr_types;
d->btf->hdr->type_len = p - types_start;
new_types = realloc(d->btf->types,
(1 + d->btf->nr_types) * sizeof(struct btf_type *));
new_types = libbpf_reallocarray(d->btf->types, (1 + d->btf->nr_types),
sizeof(struct btf_type *));
if (!new_types)
return -ENOMEM;
d->btf->types = new_types;

View File

@@ -24,44 +24,6 @@ struct btf_type;
struct bpf_object;
/*
* The .BTF.ext ELF section layout defined as
* struct btf_ext_header
* func_info subsection
*
* The func_info subsection layout:
* record size for struct bpf_func_info in the func_info subsection
* struct btf_sec_func_info for section #1
* a list of bpf_func_info records for section #1
* where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
* but may not be identical
* struct btf_sec_func_info for section #2
* a list of bpf_func_info records for section #2
* ......
*
* Note that the bpf_func_info record size in .BTF.ext may not
* be the same as the one defined in include/uapi/linux/bpf.h.
* The loader should ensure that record_size meets minimum
* requirement and pass the record as is to the kernel. The
* kernel will handle the func_info properly based on its contents.
*/
struct btf_ext_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
__u32 func_info_off;
__u32 func_info_len;
__u32 line_info_off;
__u32 line_info_len;
/* optional part of .BTF.ext header */
__u32 field_reloc_off;
__u32 field_reloc_len;
};
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);

View File

@@ -19,9 +19,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
@@ -323,8 +320,7 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
if (d->emit_queue_cnt >= d->emit_queue_cap) {
new_cap = max(16, d->emit_queue_cap * 3 / 2);
new_queue = realloc(d->emit_queue,
new_cap * sizeof(new_queue[0]));
new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0]));
if (!new_queue)
return -ENOMEM;
d->emit_queue = new_queue;
@@ -1003,8 +999,7 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
if (d->decl_stack_cnt >= d->decl_stack_cap) {
new_cap = max(16, d->decl_stack_cap * 3 / 2);
new_stack = realloc(d->decl_stack,
new_cap * sizeof(new_stack[0]));
new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0]));
if (!new_stack)
return -ENOMEM;
d->decl_stack = new_stack;

View File

@@ -15,6 +15,9 @@
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
/* prevent accidental re-addition of reallocarray() */
#pragma GCC poison reallocarray
/* start with 4 buckets */
#define HASHMAP_MIN_CAP_BITS 2

File diff suppressed because it is too large Load Diff

View File

@@ -588,8 +588,12 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
const struct perf_buffer_raw_opts *opts);
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,

View File

@@ -299,3 +299,12 @@ LIBBPF_0.1.0 {
btf__set_fd;
btf__set_pointer_size;
} LIBBPF_0.0.9;
LIBBPF_0.2.0 {
global:
perf_buffer__buffer_cnt;
perf_buffer__buffer_fd;
perf_buffer__epoll_fd;
perf_buffer__consume_buffer;
xsk_socket__create_shared;
} LIBBPF_0.1.0;

View File

@@ -9,6 +9,15 @@
#ifndef __LIBBPF_LIBBPF_INTERNAL_H
#define __LIBBPF_LIBBPF_INTERNAL_H
#include <stdlib.h>
#include <limits.h>
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
/* prevent accidental re-addition of reallocarray() */
#pragma GCC poison reallocarray
#include "libbpf.h"
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
@@ -23,6 +32,12 @@
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#ifndef min
# define min(x, y) ((x) < (y) ? (x) : (y))
#endif
@@ -63,6 +78,33 @@ do { \
#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
/*
* Re-implement glibc's reallocarray() for libbpf internal-only use.
* reallocarray(), unfortunately, is not available in all versions of glibc,
* so requires extra feature detection and using reallocarray() stub from
* <tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates
* build of libbpf unnecessarily and is just a maintenance burden. Instead,
* it's trivial to implement libbpf-specific internal version and use it
* throughout libbpf.
*/
static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
{
size_t total;
#if __has_builtin(__builtin_mul_overflow)
if (unlikely(__builtin_mul_overflow(nmemb, size, &total)))
return NULL;
#else
if (size == 0 || nmemb > ULONG_MAX / size)
return NULL;
total = nmemb * size;
#endif
return realloc(ptr, total);
}
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
const char *type_name)
@@ -105,18 +147,6 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
struct nlattr;
typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
int libbpf_netlink_open(unsigned int *nl_pid);
int libbpf_nl_get_link(int sock, unsigned int nl_pid,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
struct btf_ext_info {
/*
* info points to the individual info section (e.g. func_info and
@@ -138,6 +168,44 @@ struct btf_ext_info {
i < (sec)->num_info; \
i++, rec = (void *)rec + (seg)->rec_size)
/*
* The .BTF.ext ELF section layout defined as
* struct btf_ext_header
* func_info subsection
*
* The func_info subsection layout:
* record size for struct bpf_func_info in the func_info subsection
* struct btf_sec_func_info for section #1
* a list of bpf_func_info records for section #1
* where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
* but may not be identical
* struct btf_sec_func_info for section #2
* a list of bpf_func_info records for section #2
* ......
*
* Note that the bpf_func_info record size in .BTF.ext may not
* be the same as the one defined in include/uapi/linux/bpf.h.
* The loader should ensure that record_size meets minimum
* requirement and pass the record as is to the kernel. The
* kernel will handle the func_info properly based on its contents.
*/
struct btf_ext_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
__u32 func_info_off;
__u32 func_info_len;
__u32 line_info_off;
__u32 line_info_len;
/* optional part of .BTF.ext header */
__u32 core_relo_off;
__u32 core_relo_len;
};
struct btf_ext {
union {
struct btf_ext_header *hdr;
@@ -145,7 +213,7 @@ struct btf_ext {
};
struct btf_ext_info func_info;
struct btf_ext_info line_info;
struct btf_ext_info field_reloc_info;
struct btf_ext_info core_relo_info;
__u32 data_size;
};
@@ -170,32 +238,40 @@ struct bpf_line_info_min {
__u32 line_col;
};
/* bpf_field_info_kind encodes which aspect of captured field has to be
* adjusted by relocations. Currently supported values are:
* - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
* - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
* has to be adjusted by relocations.
*/
enum bpf_field_info_kind {
enum bpf_core_relo_kind {
BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
BPF_FIELD_BYTE_SIZE = 1,
BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
BPF_FIELD_SIGNED = 3,
BPF_FIELD_LSHIFT_U64 = 4,
BPF_FIELD_RSHIFT_U64 = 5,
BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
BPF_TYPE_SIZE = 9, /* type size in bytes */
BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
};
/* The minimum bpf_field_reloc checked by the loader
/* The minimum bpf_core_relo checked by the loader
*
* Field relocation captures the following data:
* CO-RE relocation captures the following data:
* - insn_off - instruction offset (in bytes) within a BPF program that needs
* its insn->imm field to be relocated with actual field info;
* - type_id - BTF type ID of the "root" (containing) entity of a relocatable
* field;
* type or field;
* - access_str_off - offset into corresponding .BTF string section. String
* itself encodes an accessed field using a sequence of field and array
* indicies, separated by colon (:). It's conceptually very close to LLVM's
* getelementptr ([0]) instruction's arguments for identifying offset to
* a field.
* interpretation depends on specific relocation kind:
* - for field-based relocations, string encodes an accessed field using
* a sequence of field and array indices, separated by colon (:). It's
* conceptually very close to LLVM's getelementptr ([0]) instruction's
* arguments for identifying offset to a field.
* - for type-based relocations, strings is expected to be just "0";
* - for enum value-based relocations, string contains an index of enum
* value within its enum type;
*
* Example to provide a better feel.
*
@@ -226,11 +302,11 @@ enum bpf_field_info_kind {
*
* [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
*/
struct bpf_field_reloc {
struct bpf_core_relo {
__u32 insn_off;
__u32 type_id;
__u32 access_str_off;
enum bpf_field_info_kind kind;
enum bpf_core_relo_kind kind;
};
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */

View File

@@ -17,9 +17,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
static bool grep(const char *buffer, const char *pattern)
{
return !!strstr(buffer, pattern);
@@ -173,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
return btf_fd;
}
static int load_sk_storage_btf(void)
static int load_local_storage_btf(void)
{
const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
/* struct bpf_spin_lock {
@@ -232,12 +229,13 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
key_size = 0;
break;
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_INODE_STORAGE:
btf_key_type_id = 1;
btf_value_type_id = 3;
value_size = 8;
max_entries = 0;
map_flags = BPF_F_NO_PREALLOC;
btf_fd = load_sk_storage_btf();
btf_fd = load_local_storage_btf();
if (btf_fd < 0)
return false;
break;

View File

@@ -15,13 +15,12 @@
#include "libbpf_internal.h"
#include "nlattr.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif
typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
void *cookie);
@@ -31,7 +30,7 @@ struct xdp_id_md {
struct xdp_link_info info;
};
int libbpf_netlink_open(__u32 *nl_pid)
static int libbpf_netlink_open(__u32 *nl_pid)
{
struct sockaddr_nl sa;
socklen_t addrlen;
@@ -283,6 +282,9 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags)
{
@@ -368,121 +370,3 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid,
return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
dump_link_nlmsg, cookie);
}
static int __dump_class_nlmsg(struct nlmsghdr *nlh,
libbpf_dump_nlmsg_t dump_class_nlmsg,
void *cookie)
{
struct nlattr *tb[TCA_MAX + 1], *attr;
struct tcmsg *t = NLMSG_DATA(nlh);
int len;
len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
return -LIBBPF_ERRNO__NLPARSE;
return dump_class_nlmsg(cookie, t, tb);
}
int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
{
struct {
struct nlmsghdr nlh;
struct tcmsg t;
} req = {
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
.nlh.nlmsg_type = RTM_GETTCLASS,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.t.tcm_family = AF_UNSPEC,
.t.tcm_ifindex = ifindex,
};
int seq = time(NULL);
req.nlh.nlmsg_seq = seq;
if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
return -errno;
return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
dump_class_nlmsg, cookie);
}
static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
void *cookie)
{
struct nlattr *tb[TCA_MAX + 1], *attr;
struct tcmsg *t = NLMSG_DATA(nlh);
int len;
len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
return -LIBBPF_ERRNO__NLPARSE;
return dump_qdisc_nlmsg(cookie, t, tb);
}
int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
{
struct {
struct nlmsghdr nlh;
struct tcmsg t;
} req = {
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
.nlh.nlmsg_type = RTM_GETQDISC,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.t.tcm_family = AF_UNSPEC,
.t.tcm_ifindex = ifindex,
};
int seq = time(NULL);
req.nlh.nlmsg_seq = seq;
if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
return -errno;
return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
dump_qdisc_nlmsg, cookie);
}
static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
libbpf_dump_nlmsg_t dump_filter_nlmsg,
void *cookie)
{
struct nlattr *tb[TCA_MAX + 1], *attr;
struct tcmsg *t = NLMSG_DATA(nlh);
int len;
len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
return -LIBBPF_ERRNO__NLPARSE;
return dump_filter_nlmsg(cookie, t, tb);
}
int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
{
struct {
struct nlmsghdr nlh;
struct tcmsg t;
} req = {
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
.nlh.nlmsg_type = RTM_GETTFILTER,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.t.tcm_family = AF_UNSPEC,
.t.tcm_ifindex = ifindex,
.t.tcm_parent = handle,
};
int seq = time(NULL);
req.nlh.nlmsg_seq = seq;
if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
return -errno;
return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
dump_filter_nlmsg, cookie);
}

View File

@@ -7,14 +7,11 @@
*/
#include <errno.h>
#include "nlattr.h"
#include "libbpf_internal.h"
#include <linux/rtnetlink.h>
#include <string.h>
#include <stdio.h>
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
#include <linux/rtnetlink.h>
#include "nlattr.h"
#include "libbpf_internal.h"
static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
[LIBBPF_NLA_U8] = sizeof(uint8_t),

View File

@@ -16,15 +16,11 @@
#include <asm/barrier.h>
#include <sys/mman.h>
#include <sys/epoll.h>
#include <tools/libc_compat.h>
#include "libbpf.h"
#include "libbpf_internal.h"
#include "bpf.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
struct ring {
ring_buffer_sample_fn sample_cb;
void *ctx;
@@ -82,12 +78,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
return -EINVAL;
}
tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
if (!tmp)
return -ENOMEM;
rb->rings = tmp;
tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
if (!tmp)
return -ENOMEM;
rb->events = tmp;

View File

@@ -20,6 +20,7 @@
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_xdp.h>
#include <linux/list.h>
#include <linux/sockios.h>
#include <net/if.h>
#include <sys/ioctl.h>
@@ -32,9 +33,6 @@
#include "libbpf_internal.h"
#include "xsk.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
#ifndef SOL_XDP
#define SOL_XDP 283
#endif
@@ -48,26 +46,35 @@
#endif
struct xsk_umem {
struct xsk_ring_prod *fill;
struct xsk_ring_cons *comp;
struct xsk_ring_prod *fill_save;
struct xsk_ring_cons *comp_save;
char *umem_area;
struct xsk_umem_config config;
int fd;
int refcount;
struct list_head ctx_list;
};
struct xsk_ctx {
struct xsk_ring_prod *fill;
struct xsk_ring_cons *comp;
__u32 queue_id;
struct xsk_umem *umem;
int refcount;
int ifindex;
struct list_head list;
int prog_fd;
int xsks_map_fd;
char ifname[IFNAMSIZ];
};
struct xsk_socket {
struct xsk_ring_cons *rx;
struct xsk_ring_prod *tx;
__u64 outstanding_tx;
struct xsk_umem *umem;
struct xsk_ctx *ctx;
struct xsk_socket_config config;
int fd;
int ifindex;
int prog_fd;
int xsks_map_fd;
__u32 queue_id;
char ifname[IFNAMSIZ];
};
struct xsk_nl_info {
@@ -203,15 +210,73 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
return -EINVAL;
}
static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp)
{
struct xdp_mmap_offsets off;
void *map;
int err;
err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
&umem->config.fill_size,
sizeof(umem->config.fill_size));
if (err)
return -errno;
err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
&umem->config.comp_size,
sizeof(umem->config.comp_size));
if (err)
return -errno;
err = xsk_get_mmap_offsets(fd, &off);
if (err)
return -errno;
map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
XDP_UMEM_PGOFF_FILL_RING);
if (map == MAP_FAILED)
return -errno;
fill->mask = umem->config.fill_size - 1;
fill->size = umem->config.fill_size;
fill->producer = map + off.fr.producer;
fill->consumer = map + off.fr.consumer;
fill->flags = map + off.fr.flags;
fill->ring = map + off.fr.desc;
fill->cached_cons = umem->config.fill_size;
map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
XDP_UMEM_PGOFF_COMPLETION_RING);
if (map == MAP_FAILED) {
err = -errno;
goto out_mmap;
}
comp->mask = umem->config.comp_size - 1;
comp->size = umem->config.comp_size;
comp->producer = map + off.cr.producer;
comp->consumer = map + off.cr.consumer;
comp->flags = map + off.cr.flags;
comp->ring = map + off.cr.desc;
return 0;
out_mmap:
munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
return err;
}
int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
__u64 size, struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_umem_config *usr_config)
{
struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xsk_umem *umem;
void *map;
int err;
if (!umem_area || !umem_ptr || !fill || !comp)
@@ -230,6 +295,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
}
umem->umem_area = umem_area;
INIT_LIST_HEAD(&umem->ctx_list);
xsk_set_umem_config(&umem->config, usr_config);
memset(&mr, 0, sizeof(mr));
@@ -244,71 +310,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
err = -errno;
goto out_socket;
}
err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
&umem->config.fill_size,
sizeof(umem->config.fill_size));
if (err) {
err = -errno;
err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
if (err)
goto out_socket;
}
err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
&umem->config.comp_size,
sizeof(umem->config.comp_size));
if (err) {
err = -errno;
goto out_socket;
}
err = xsk_get_mmap_offsets(umem->fd, &off);
if (err) {
err = -errno;
goto out_socket;
}
map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
XDP_UMEM_PGOFF_FILL_RING);
if (map == MAP_FAILED) {
err = -errno;
goto out_socket;
}
umem->fill = fill;
fill->mask = umem->config.fill_size - 1;
fill->size = umem->config.fill_size;
fill->producer = map + off.fr.producer;
fill->consumer = map + off.fr.consumer;
fill->flags = map + off.fr.flags;
fill->ring = map + off.fr.desc;
fill->cached_prod = *fill->producer;
/* cached_cons is "size" bigger than the real consumer pointer
* See xsk_prod_nb_free
*/
fill->cached_cons = *fill->consumer + umem->config.fill_size;
map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
XDP_UMEM_PGOFF_COMPLETION_RING);
if (map == MAP_FAILED) {
err = -errno;
goto out_mmap;
}
umem->comp = comp;
comp->mask = umem->config.comp_size - 1;
comp->size = umem->config.comp_size;
comp->producer = map + off.cr.producer;
comp->consumer = map + off.cr.consumer;
comp->flags = map + off.cr.flags;
comp->ring = map + off.cr.desc;
comp->cached_prod = *comp->producer;
comp->cached_cons = *comp->consumer;
umem->fill_save = fill;
umem->comp_save = comp;
*umem_ptr = umem;
return 0;
out_mmap:
munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
out_socket:
close(umem->fd);
out_umem_alloc:
@@ -342,6 +353,7 @@ DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
static const int log_buf_size = 16 * 1024;
struct xsk_ctx *ctx = xsk->ctx;
char log_buf[log_buf_size];
int err, prog_fd;
@@ -369,7 +381,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* *(u32 *)(r10 - 4) = r2 */
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
/* r1 = xskmap[] */
BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = XDP_PASS */
BPF_MOV64_IMM(BPF_REG_3, 2),
/* call bpf_redirect_map */
@@ -381,7 +393,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 += -4 */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
/* r1 = xskmap[] */
BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* call bpf_map_lookup_elem */
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
/* r1 = r0 */
@@ -393,7 +405,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 = *(u32 *)(r10 - 4) */
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
/* r1 = xskmap[] */
BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = 0 */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* call bpf_redirect_map */
@@ -411,19 +423,21 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
return prog_fd;
}
err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
xsk->config.xdp_flags);
if (err) {
close(prog_fd);
return err;
}
xsk->prog_fd = prog_fd;
ctx->prog_fd = prog_fd;
return 0;
}
static int xsk_get_max_queues(struct xsk_socket *xsk)
{
struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
struct xsk_ctx *ctx = xsk->ctx;
struct ifreq ifr = {};
int fd, err, ret;
@@ -432,7 +446,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
return -errno;
ifr.ifr_data = (void *)&channels;
memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
ifr.ifr_name[IFNAMSIZ - 1] = '\0';
err = ioctl(fd, SIOCETHTOOL, &ifr);
if (err && errno != EOPNOTSUPP) {
@@ -460,6 +474,7 @@ out:
static int xsk_create_bpf_maps(struct xsk_socket *xsk)
{
struct xsk_ctx *ctx = xsk->ctx;
int max_queues;
int fd;
@@ -472,15 +487,17 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
if (fd < 0)
return fd;
xsk->xsks_map_fd = fd;
ctx->xsks_map_fd = fd;
return 0;
}
static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{
bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
close(xsk->xsks_map_fd);
struct xsk_ctx *ctx = xsk->ctx;
bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
close(ctx->xsks_map_fd);
}
static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -488,10 +505,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
__u32 map_len = sizeof(struct bpf_map_info);
struct bpf_prog_info prog_info = {};
struct xsk_ctx *ctx = xsk->ctx;
struct bpf_map_info map_info;
int fd, err;
err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
return err;
@@ -505,11 +523,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
prog_info.nr_map_ids = num_maps;
prog_info.map_ids = (__u64)(unsigned long)map_ids;
err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
goto out_map_ids;
xsk->xsks_map_fd = -1;
ctx->xsks_map_fd = -1;
for (i = 0; i < prog_info.nr_map_ids; i++) {
fd = bpf_map_get_fd_by_id(map_ids[i]);
@@ -523,7 +541,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
if (!strcmp(map_info.name, "xsks_map")) {
xsk->xsks_map_fd = fd;
ctx->xsks_map_fd = fd;
continue;
}
@@ -531,7 +549,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
err = 0;
if (xsk->xsks_map_fd == -1)
if (ctx->xsks_map_fd == -1)
err = -ENOENT;
out_map_ids:
@@ -541,16 +559,19 @@ out_map_ids:
static int xsk_set_bpf_maps(struct xsk_socket *xsk)
{
return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
struct xsk_ctx *ctx = xsk->ctx;
return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
&xsk->fd, 0);
}
static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
{
struct xsk_ctx *ctx = xsk->ctx;
__u32 prog_id = 0;
int err;
err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
xsk->config.xdp_flags);
if (err)
return err;
@@ -566,12 +587,12 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err;
}
} else {
xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
if (xsk->prog_fd < 0)
ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
if (ctx->prog_fd < 0)
return -errno;
err = xsk_lookup_bpf_maps(xsk);
if (err) {
close(xsk->prog_fd);
close(ctx->prog_fd);
return err;
}
}
@@ -580,25 +601,110 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
err = xsk_set_bpf_maps(xsk);
if (err) {
xsk_delete_bpf_maps(xsk);
close(xsk->prog_fd);
close(ctx->prog_fd);
return err;
}
return 0;
}
int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
__u32 queue_id, struct xsk_umem *umem,
struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
const struct xsk_socket_config *usr_config)
static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
__u32 queue_id)
{
struct xsk_ctx *ctx;
if (list_empty(&umem->ctx_list))
return NULL;
list_for_each_entry(ctx, &umem->ctx_list, list) {
if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
ctx->refcount++;
return ctx;
}
}
return NULL;
}
static void xsk_put_ctx(struct xsk_ctx *ctx)
{
struct xsk_umem *umem = ctx->umem;
struct xdp_mmap_offsets off;
int err;
if (--ctx->refcount == 0) {
err = xsk_get_mmap_offsets(umem->fd, &off);
if (!err) {
munmap(ctx->fill->ring - off.fr.desc,
off.fr.desc + umem->config.fill_size *
sizeof(__u64));
munmap(ctx->comp->ring - off.cr.desc,
off.cr.desc + umem->config.comp_size *
sizeof(__u64));
}
list_del(&ctx->list);
free(ctx);
}
}
static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
struct xsk_umem *umem, int ifindex,
const char *ifname, __u32 queue_id,
struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp)
{
struct xsk_ctx *ctx;
int err;
ctx = calloc(1, sizeof(*ctx));
if (!ctx)
return NULL;
if (!umem->fill_save) {
err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
if (err) {
free(ctx);
return NULL;
}
} else if (umem->fill_save != fill || umem->comp_save != comp) {
/* Copy over rings to new structs. */
memcpy(fill, umem->fill_save, sizeof(*fill));
memcpy(comp, umem->comp_save, sizeof(*comp));
}
ctx->ifindex = ifindex;
ctx->refcount = 1;
ctx->umem = umem;
ctx->queue_id = queue_id;
memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
ctx->ifname[IFNAMSIZ - 1] = '\0';
umem->fill_save = NULL;
umem->comp_save = NULL;
ctx->fill = fill;
ctx->comp = comp;
list_add(&ctx->list, &umem->ctx_list);
return ctx;
}
int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
const char *ifname,
__u32 queue_id, struct xsk_umem *umem,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_socket_config *usr_config)
{
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
struct xsk_socket *xsk;
int err;
struct xsk_ctx *ctx;
int err, ifindex;
if (!umem || !xsk_ptr || !(rx || tx))
if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp)
return -EFAULT;
xsk = calloc(1, sizeof(*xsk));
@@ -609,10 +715,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
if (err)
goto out_xsk_alloc;
if (umem->refcount &&
!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
err = -EBUSY;
xsk->outstanding_tx = 0;
ifindex = if_nametoindex(ifname);
if (!ifindex) {
err = -errno;
goto out_xsk_alloc;
}
@@ -626,16 +732,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd = umem->fd;
}
xsk->outstanding_tx = 0;
xsk->queue_id = queue_id;
xsk->umem = umem;
xsk->ifindex = if_nametoindex(ifname);
if (!xsk->ifindex) {
err = -errno;
goto out_socket;
ctx = xsk_get_ctx(umem, ifindex, queue_id);
if (!ctx) {
ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
fill, comp);
if (!ctx) {
err = -ENOMEM;
goto out_socket;
}
}
memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
xsk->ifname[IFNAMSIZ - 1] = '\0';
xsk->ctx = ctx;
if (rx) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -643,7 +749,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.rx_size));
if (err) {
err = -errno;
goto out_socket;
goto out_put_ctx;
}
}
if (tx) {
@@ -652,14 +758,14 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.tx_size));
if (err) {
err = -errno;
goto out_socket;
goto out_put_ctx;
}
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
if (err) {
err = -errno;
goto out_socket;
goto out_put_ctx;
}
if (rx) {
@@ -669,7 +775,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd, XDP_PGOFF_RX_RING);
if (rx_map == MAP_FAILED) {
err = -errno;
goto out_socket;
goto out_put_ctx;
}
rx->mask = xsk->config.rx_size - 1;
@@ -708,10 +814,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->tx = tx;
sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = xsk->ifindex;
sxdp.sxdp_queue_id = xsk->queue_id;
sxdp.sxdp_ifindex = ctx->ifindex;
sxdp.sxdp_queue_id = ctx->queue_id;
if (umem->refcount > 1) {
sxdp.sxdp_flags = XDP_SHARED_UMEM;
sxdp.sxdp_flags |= XDP_SHARED_UMEM;
sxdp.sxdp_shared_umem_fd = umem->fd;
} else {
sxdp.sxdp_flags = xsk->config.bind_flags;
@@ -723,7 +829,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx;
}
xsk->prog_fd = -1;
ctx->prog_fd = -1;
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
@@ -742,6 +848,8 @@ out_mmap_rx:
if (rx)
munmap(rx_map, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
out_put_ctx:
xsk_put_ctx(ctx);
out_socket:
if (--umem->refcount)
close(xsk->fd);
@@ -750,25 +858,24 @@ out_xsk_alloc:
return err;
}
int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
__u32 queue_id, struct xsk_umem *umem,
struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
const struct xsk_socket_config *usr_config)
{
return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
rx, tx, umem->fill_save,
umem->comp_save, usr_config);
}
int xsk_umem__delete(struct xsk_umem *umem)
{
struct xdp_mmap_offsets off;
int err;
if (!umem)
return 0;
if (umem->refcount)
return -EBUSY;
err = xsk_get_mmap_offsets(umem->fd, &off);
if (!err) {
munmap(umem->fill->ring - off.fr.desc,
off.fr.desc + umem->config.fill_size * sizeof(__u64));
munmap(umem->comp->ring - off.cr.desc,
off.cr.desc + umem->config.comp_size * sizeof(__u64));
}
close(umem->fd);
free(umem);
@@ -778,15 +885,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk)
{
size_t desc_sz = sizeof(struct xdp_desc);
struct xsk_ctx *ctx = xsk->ctx;
struct xdp_mmap_offsets off;
int err;
if (!xsk)
return;
if (xsk->prog_fd != -1) {
if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
close(xsk->prog_fd);
close(ctx->prog_fd);
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@ -799,14 +907,15 @@ void xsk_socket__delete(struct xsk_socket *xsk)
munmap(xsk->tx->ring - off.tx.desc,
off.tx.desc + xsk->config.tx_size * desc_sz);
}
}
xsk->umem->refcount--;
xsk_put_ctx(ctx);
ctx->umem->refcount--;
/* Do not close an fd that also has an associated umem connected
* to it.
*/
if (xsk->fd != xsk->umem->fd)
if (xsk->fd != ctx->umem->fd)
close(xsk->fd);
free(xsk);
}

View File

@@ -234,6 +234,15 @@ LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
const struct xsk_socket_config *config);
LIBBPF_API int
xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
const char *ifname,
__u32 queue_id, struct xsk_umem *umem,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_socket_config *config);
/* Returns 0 for success and -EBUSY if the umem is still in use. */
LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);