Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-10-12 The main changes are: 1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong. 2) libbpf relocation support for different size load/store, from Andrii. 3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel. 4) BPF support for per-cpu variables, form Hao. 5) sockmap improvements, from John. ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -60,13 +60,13 @@ Q: Where can I find patches currently under discussion for BPF subsystem?
|
|||||||
A: All patches that are Cc'ed to netdev are queued for review under netdev
|
A: All patches that are Cc'ed to netdev are queued for review under netdev
|
||||||
patchwork project:
|
patchwork project:
|
||||||
|
|
||||||
http://patchwork.ozlabs.org/project/netdev/list/
|
https://patchwork.kernel.org/project/netdevbpf/list/
|
||||||
|
|
||||||
Those patches which target BPF, are assigned to a 'bpf' delegate for
|
Those patches which target BPF, are assigned to a 'bpf' delegate for
|
||||||
further processing from BPF maintainers. The current queue with
|
further processing from BPF maintainers. The current queue with
|
||||||
patches under review can be found at:
|
patches under review can be found at:
|
||||||
|
|
||||||
https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
|
https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
|
||||||
|
|
||||||
Once the patches have been reviewed by the BPF community as a whole
|
Once the patches have been reviewed by the BPF community as a whole
|
||||||
and approved by the BPF maintainers, their status in patchwork will be
|
and approved by the BPF maintainers, their status in patchwork will be
|
||||||
|
@@ -3263,7 +3263,7 @@ M: Daniel Borkmann <daniel@iogearbox.net>
|
|||||||
R: Martin KaFai Lau <kafai@fb.com>
|
R: Martin KaFai Lau <kafai@fb.com>
|
||||||
R: Song Liu <songliubraving@fb.com>
|
R: Song Liu <songliubraving@fb.com>
|
||||||
R: Yonghong Song <yhs@fb.com>
|
R: Yonghong Song <yhs@fb.com>
|
||||||
R: Andrii Nakryiko <andriin@fb.com>
|
R: Andrii Nakryiko <andrii@kernel.org>
|
||||||
R: John Fastabend <john.fastabend@gmail.com>
|
R: John Fastabend <john.fastabend@gmail.com>
|
||||||
R: KP Singh <kpsingh@chromium.org>
|
R: KP Singh <kpsingh@chromium.org>
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
|
@@ -420,6 +420,14 @@ static int veth_select_rxq(struct net_device *dev)
|
|||||||
return smp_processor_id() % dev->real_num_rx_queues;
|
return smp_processor_id() % dev->real_num_rx_queues;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct net_device *veth_peer_dev(struct net_device *dev)
|
||||||
|
{
|
||||||
|
struct veth_priv *priv = netdev_priv(dev);
|
||||||
|
|
||||||
|
/* Callers must be under RCU read side. */
|
||||||
|
return rcu_dereference(priv->peer);
|
||||||
|
}
|
||||||
|
|
||||||
static int veth_xdp_xmit(struct net_device *dev, int n,
|
static int veth_xdp_xmit(struct net_device *dev, int n,
|
||||||
struct xdp_frame **frames,
|
struct xdp_frame **frames,
|
||||||
u32 flags, bool ndo_xmit)
|
u32 flags, bool ndo_xmit)
|
||||||
@@ -1224,6 +1232,7 @@ static const struct net_device_ops veth_netdev_ops = {
|
|||||||
.ndo_set_rx_headroom = veth_set_rx_headroom,
|
.ndo_set_rx_headroom = veth_set_rx_headroom,
|
||||||
.ndo_bpf = veth_xdp,
|
.ndo_bpf = veth_xdp,
|
||||||
.ndo_xdp_xmit = veth_ndo_xdp_xmit,
|
.ndo_xdp_xmit = veth_ndo_xdp_xmit,
|
||||||
|
.ndo_get_peer_dev = veth_peer_dev,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
|
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
|
||||||
|
@@ -82,7 +82,7 @@ struct bpf_map_ops {
|
|||||||
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
|
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
|
||||||
int fd);
|
int fd);
|
||||||
void (*map_fd_put_ptr)(void *ptr);
|
void (*map_fd_put_ptr)(void *ptr);
|
||||||
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
|
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
|
||||||
u32 (*map_fd_sys_lookup_elem)(void *ptr);
|
u32 (*map_fd_sys_lookup_elem)(void *ptr);
|
||||||
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
|
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
|
||||||
struct seq_file *m);
|
struct seq_file *m);
|
||||||
@@ -293,6 +293,7 @@ enum bpf_arg_type {
|
|||||||
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
|
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
|
||||||
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
|
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
|
||||||
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
|
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
|
||||||
|
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
|
||||||
__BPF_ARG_TYPE_MAX,
|
__BPF_ARG_TYPE_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -307,6 +308,8 @@ enum bpf_return_type {
|
|||||||
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
|
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
|
||||||
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
|
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
|
||||||
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
|
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
|
||||||
|
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
|
||||||
|
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
|
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
|
||||||
@@ -405,6 +408,7 @@ enum bpf_reg_type {
|
|||||||
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
|
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
|
||||||
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
|
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
|
||||||
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
|
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
|
||||||
|
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The information passed from prog-specific *_is_valid_access
|
/* The information passed from prog-specific *_is_valid_access
|
||||||
@@ -1828,6 +1832,8 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
|
|||||||
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
|
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
|
||||||
extern const struct bpf_func_proto bpf_copy_from_user_proto;
|
extern const struct bpf_func_proto bpf_copy_from_user_proto;
|
||||||
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
|
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
|
||||||
|
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
|
||||||
|
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
|
||||||
|
|
||||||
const struct bpf_func_proto *bpf_tracing_func_proto(
|
const struct bpf_func_proto *bpf_tracing_func_proto(
|
||||||
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||||
|
@@ -308,6 +308,13 @@ struct bpf_insn_aux_data {
|
|||||||
u32 map_index; /* index into used_maps[] */
|
u32 map_index; /* index into used_maps[] */
|
||||||
u32 map_off; /* offset from value base address */
|
u32 map_off; /* offset from value base address */
|
||||||
};
|
};
|
||||||
|
struct {
|
||||||
|
enum bpf_reg_type reg_type; /* type of pseudo_btf_id */
|
||||||
|
union {
|
||||||
|
u32 btf_id; /* btf_id for struct typed var */
|
||||||
|
u32 mem_size; /* mem_size for non-struct typed var */
|
||||||
|
};
|
||||||
|
} btf_var;
|
||||||
};
|
};
|
||||||
u64 map_key_state; /* constant (32 bit) key tracking for maps */
|
u64 map_key_state; /* constant (32 bit) key tracking for maps */
|
||||||
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
||||||
|
@@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
|
|||||||
i < btf_type_vlen(struct_type); \
|
i < btf_type_vlen(struct_type); \
|
||||||
i++, member++)
|
i++, member++)
|
||||||
|
|
||||||
|
#define for_each_vsi(i, datasec_type, member) \
|
||||||
|
for (i = 0, member = btf_type_var_secinfo(datasec_type); \
|
||||||
|
i < btf_type_vlen(datasec_type); \
|
||||||
|
i++, member++)
|
||||||
|
|
||||||
static inline bool btf_type_is_ptr(const struct btf_type *t)
|
static inline bool btf_type_is_ptr(const struct btf_type *t)
|
||||||
{
|
{
|
||||||
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
|
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
|
||||||
@@ -145,6 +150,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
|
|||||||
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
|
return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool btf_type_is_var(const struct btf_type *t)
|
||||||
|
{
|
||||||
|
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* union is only a special case of struct:
|
||||||
|
* all its offsetof(member) == 0
|
||||||
|
*/
|
||||||
|
static inline bool btf_type_is_struct(const struct btf_type *t)
|
||||||
|
{
|
||||||
|
u8 kind = BTF_INFO_KIND(t->info);
|
||||||
|
|
||||||
|
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
|
||||||
|
}
|
||||||
|
|
||||||
static inline u16 btf_type_vlen(const struct btf_type *t)
|
static inline u16 btf_type_vlen(const struct btf_type *t)
|
||||||
{
|
{
|
||||||
return BTF_INFO_VLEN(t->info);
|
return BTF_INFO_VLEN(t->info);
|
||||||
@@ -179,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
|
|||||||
return (const struct btf_member *)(t + 1);
|
return (const struct btf_member *)(t + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline const struct btf_var_secinfo *btf_type_var_secinfo(
|
||||||
|
const struct btf_type *t)
|
||||||
|
{
|
||||||
|
return (const struct btf_var_secinfo *)(t + 1);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
|
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
|
||||||
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
|
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
|
||||||
|
@@ -1276,6 +1276,9 @@ struct netdev_net_notifier {
|
|||||||
* int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
|
* int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
|
||||||
* int cmd);
|
* int cmd);
|
||||||
* Add, change, delete or get information on an IPv4 tunnel.
|
* Add, change, delete or get information on an IPv4 tunnel.
|
||||||
|
* struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
|
||||||
|
* If a device is paired with a peer device, return the peer instance.
|
||||||
|
* The caller must be under RCU read context.
|
||||||
*/
|
*/
|
||||||
struct net_device_ops {
|
struct net_device_ops {
|
||||||
int (*ndo_init)(struct net_device *dev);
|
int (*ndo_init)(struct net_device *dev);
|
||||||
@@ -1483,6 +1486,7 @@ struct net_device_ops {
|
|||||||
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
|
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
|
||||||
int (*ndo_tunnel_ctl)(struct net_device *dev,
|
int (*ndo_tunnel_ctl)(struct net_device *dev,
|
||||||
struct ip_tunnel_parm *p, int cmd);
|
struct ip_tunnel_parm *p, int cmd);
|
||||||
|
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node);
|
|||||||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
|
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
|
||||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
|
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
|
||||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
|
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
|
||||||
|
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
|
||||||
|
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
|
||||||
|
|
||||||
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
||||||
struct sk_msg *msg);
|
struct sk_msg *msg);
|
||||||
|
@@ -2228,34 +2228,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
|
|||||||
#endif /* CONFIG_NET_SOCK_MSG */
|
#endif /* CONFIG_NET_SOCK_MSG */
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_BPF
|
#ifdef CONFIG_CGROUP_BPF
|
||||||
/* Copy the listen sk's HDR_OPT_CB flags to its child.
|
|
||||||
*
|
|
||||||
* During 3-Way-HandShake, the synack is usually sent from
|
|
||||||
* the listen sk with the HDR_OPT_CB flags set so that
|
|
||||||
* bpf-prog will be called to write the BPF hdr option.
|
|
||||||
*
|
|
||||||
* In fastopen, the child sk is used to send synack instead
|
|
||||||
* of the listen sk. Thus, inheriting the HDR_OPT_CB flags
|
|
||||||
* from the listen sk gives the bpf-prog a chance to write
|
|
||||||
* BPF hdr option in the synack pkt during fastopen.
|
|
||||||
*
|
|
||||||
* Both fastopen and non-fastopen child will inherit the
|
|
||||||
* HDR_OPT_CB flags to keep the bpf-prog having a consistent
|
|
||||||
* behavior when deciding to clear this cb flags (or not)
|
|
||||||
* during the PASSIVE_ESTABLISHED_CB.
|
|
||||||
*
|
|
||||||
* In the future, other cb flags could be inherited here also.
|
|
||||||
*/
|
|
||||||
static inline void bpf_skops_init_child(const struct sock *sk,
|
|
||||||
struct sock *child)
|
|
||||||
{
|
|
||||||
tcp_sk(child)->bpf_sock_ops_cb_flags =
|
|
||||||
tcp_sk(sk)->bpf_sock_ops_cb_flags &
|
|
||||||
(BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
|
|
||||||
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
|
|
||||||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
||||||
struct sk_buff *skb,
|
struct sk_buff *skb,
|
||||||
unsigned int end_offset)
|
unsigned int end_offset)
|
||||||
@@ -2264,11 +2236,6 @@ static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
|||||||
skops->skb_data_end = skb->data + end_offset;
|
skops->skb_data_end = skb->data + end_offset;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline void bpf_skops_init_child(const struct sock *sk,
|
|
||||||
struct sock *child)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
|
||||||
struct sk_buff *skb,
|
struct sk_buff *skb,
|
||||||
unsigned int end_offset)
|
unsigned int end_offset)
|
||||||
|
@@ -356,18 +356,36 @@ enum bpf_link_type {
|
|||||||
#define BPF_F_SLEEPABLE (1U << 4)
|
#define BPF_F_SLEEPABLE (1U << 4)
|
||||||
|
|
||||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||||
* two extensions:
|
* the following extensions:
|
||||||
*
|
*
|
||||||
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
|
* insn[0].src_reg: BPF_PSEUDO_MAP_FD
|
||||||
* insn[0].imm: map fd map fd
|
* insn[0].imm: map fd
|
||||||
* insn[1].imm: 0 offset into value
|
* insn[1].imm: 0
|
||||||
* insn[0].off: 0 0
|
* insn[0].off: 0
|
||||||
* insn[1].off: 0 0
|
* insn[1].off: 0
|
||||||
* ldimm64 rewrite: address of map address of map[0]+offset
|
* ldimm64 rewrite: address of map
|
||||||
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
|
* verifier type: CONST_PTR_TO_MAP
|
||||||
*/
|
*/
|
||||||
#define BPF_PSEUDO_MAP_FD 1
|
#define BPF_PSEUDO_MAP_FD 1
|
||||||
|
/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
|
||||||
|
* insn[0].imm: map fd
|
||||||
|
* insn[1].imm: offset into value
|
||||||
|
* insn[0].off: 0
|
||||||
|
* insn[1].off: 0
|
||||||
|
* ldimm64 rewrite: address of map[0]+offset
|
||||||
|
* verifier type: PTR_TO_MAP_VALUE
|
||||||
|
*/
|
||||||
#define BPF_PSEUDO_MAP_VALUE 2
|
#define BPF_PSEUDO_MAP_VALUE 2
|
||||||
|
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
|
||||||
|
* insn[0].imm: kernel btd id of VAR
|
||||||
|
* insn[1].imm: 0
|
||||||
|
* insn[0].off: 0
|
||||||
|
* insn[1].off: 0
|
||||||
|
* ldimm64 rewrite: address of the kernel variable
|
||||||
|
* verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
|
||||||
|
* is struct/union.
|
||||||
|
*/
|
||||||
|
#define BPF_PSEUDO_BTF_ID 3
|
||||||
|
|
||||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||||
* offset to another bpf function
|
* offset to another bpf function
|
||||||
@@ -417,6 +435,9 @@ enum {
|
|||||||
|
|
||||||
/* Share perf_event among processes */
|
/* Share perf_event among processes */
|
||||||
BPF_F_PRESERVE_ELEMS = (1U << 11),
|
BPF_F_PRESERVE_ELEMS = (1U << 11),
|
||||||
|
|
||||||
|
/* Create a map that is suitable to be an inner map with dynamic max entries */
|
||||||
|
BPF_F_INNER_MAP = (1U << 12),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Flags for BPF_PROG_QUERY. */
|
/* Flags for BPF_PROG_QUERY. */
|
||||||
@@ -1680,7 +1701,7 @@ union bpf_attr {
|
|||||||
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
||||||
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
||||||
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
||||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
|
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
|
||||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||||
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
||||||
* Return
|
* Return
|
||||||
@@ -2235,7 +2256,7 @@ union bpf_attr {
|
|||||||
* Description
|
* Description
|
||||||
* This helper is used in programs implementing policies at the
|
* This helper is used in programs implementing policies at the
|
||||||
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
|
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
|
||||||
* if the verdeict eBPF program returns **SK_PASS**), redirect it
|
* if the verdict eBPF program returns **SK_PASS**), redirect it
|
||||||
* to the socket referenced by *map* (of type
|
* to the socket referenced by *map* (of type
|
||||||
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
|
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
|
||||||
* egress interfaces can be used for redirection. The
|
* egress interfaces can be used for redirection. The
|
||||||
@@ -3661,10 +3682,59 @@ union bpf_attr {
|
|||||||
* Redirect the packet to another net device of index *ifindex*
|
* Redirect the packet to another net device of index *ifindex*
|
||||||
* and fill in L2 addresses from neighboring subsystem. This helper
|
* and fill in L2 addresses from neighboring subsystem. This helper
|
||||||
* is somewhat similar to **bpf_redirect**\ (), except that it
|
* is somewhat similar to **bpf_redirect**\ (), except that it
|
||||||
* fills in e.g. MAC addresses based on the L3 information from
|
* populates L2 addresses as well, meaning, internally, the helper
|
||||||
* the packet. This helper is supported for IPv4 and IPv6 protocols.
|
* performs a FIB lookup based on the skb's networking header to
|
||||||
|
* get the address of the next hop and then relies on the neighbor
|
||||||
|
* lookup for the L2 address of the nexthop.
|
||||||
|
*
|
||||||
* The *flags* argument is reserved and must be 0. The helper is
|
* The *flags* argument is reserved and must be 0. The helper is
|
||||||
* currently only supported for tc BPF program types.
|
* currently only supported for tc BPF program types, and enabled
|
||||||
|
* for IPv4 and IPv6 protocols.
|
||||||
|
* Return
|
||||||
|
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||||
|
* **TC_ACT_SHOT** on error.
|
||||||
|
*
|
||||||
|
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
|
||||||
|
* Description
|
||||||
|
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||||
|
* pointer to the percpu kernel variable on *cpu*. A ksym is an
|
||||||
|
* extern variable decorated with '__ksym'. For ksym, there is a
|
||||||
|
* global var (either static or global) defined of the same name
|
||||||
|
* in the kernel. The ksym is percpu if the global var is percpu.
|
||||||
|
* The returned pointer points to the global percpu var on *cpu*.
|
||||||
|
*
|
||||||
|
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
|
||||||
|
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
|
||||||
|
* happens if *cpu* is larger than nr_cpu_ids. The caller of
|
||||||
|
* bpf_per_cpu_ptr() must check the returned value.
|
||||||
|
* Return
|
||||||
|
* A pointer pointing to the kernel percpu variable on *cpu*, or
|
||||||
|
* NULL, if *cpu* is invalid.
|
||||||
|
*
|
||||||
|
* void *bpf_this_cpu_ptr(const void *percpu_ptr)
|
||||||
|
* Description
|
||||||
|
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||||
|
* pointer to the percpu kernel variable on this cpu. See the
|
||||||
|
* description of 'ksym' in **bpf_per_cpu_ptr**\ ().
|
||||||
|
*
|
||||||
|
* bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
|
||||||
|
* the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
|
||||||
|
* never return NULL.
|
||||||
|
* Return
|
||||||
|
* A pointer pointing to the kernel percpu variable on this cpu.
|
||||||
|
*
|
||||||
|
* long bpf_redirect_peer(u32 ifindex, u64 flags)
|
||||||
|
* Description
|
||||||
|
* Redirect the packet to another net device of index *ifindex*.
|
||||||
|
* This helper is somewhat similar to **bpf_redirect**\ (), except
|
||||||
|
* that the redirection happens to the *ifindex*' peer device and
|
||||||
|
* the netns switch takes place from ingress to ingress without
|
||||||
|
* going through the CPU's backlog queue.
|
||||||
|
*
|
||||||
|
* The *flags* argument is reserved and must be 0. The helper is
|
||||||
|
* currently only supported for tc BPF program types at the ingress
|
||||||
|
* hook and for veth device types. The peer device must reside in a
|
||||||
|
* different network namespace.
|
||||||
* Return
|
* Return
|
||||||
* The helper returns **TC_ACT_REDIRECT** on success or
|
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||||
* **TC_ACT_SHOT** on error.
|
* **TC_ACT_SHOT** on error.
|
||||||
@@ -3823,6 +3893,9 @@ union bpf_attr {
|
|||||||
FN(seq_printf_btf), \
|
FN(seq_printf_btf), \
|
||||||
FN(skb_cgroup_classid), \
|
FN(skb_cgroup_classid), \
|
||||||
FN(redirect_neigh), \
|
FN(redirect_neigh), \
|
||||||
|
FN(bpf_per_cpu_ptr), \
|
||||||
|
FN(bpf_this_cpu_ptr), \
|
||||||
|
FN(redirect_peer), \
|
||||||
/* */
|
/* */
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
|
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
#define ARRAY_CREATE_FLAG_MASK \
|
#define ARRAY_CREATE_FLAG_MASK \
|
||||||
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
|
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
|
||||||
BPF_F_PRESERVE_ELEMS)
|
BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
|
||||||
|
|
||||||
static void bpf_array_free_percpu(struct bpf_array *array)
|
static void bpf_array_free_percpu(struct bpf_array *array)
|
||||||
{
|
{
|
||||||
@@ -62,7 +62,7 @@ int array_map_alloc_check(union bpf_attr *attr)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
|
if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||||
attr->map_flags & BPF_F_MMAPABLE)
|
attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
|
if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
|
||||||
@@ -214,7 +214,7 @@ static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
|
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
|
||||||
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||||
{
|
{
|
||||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||||
struct bpf_insn *insn = insn_buf;
|
struct bpf_insn *insn = insn_buf;
|
||||||
@@ -223,6 +223,9 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
|||||||
const int map_ptr = BPF_REG_1;
|
const int map_ptr = BPF_REG_1;
|
||||||
const int index = BPF_REG_2;
|
const int index = BPF_REG_2;
|
||||||
|
|
||||||
|
if (map->map_flags & BPF_F_INNER_MAP)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
|
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
|
||||||
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
|
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
|
||||||
if (!map->bypass_spec_v1) {
|
if (!map->bypass_spec_v1) {
|
||||||
@@ -496,8 +499,10 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
|
|||||||
static bool array_map_meta_equal(const struct bpf_map *meta0,
|
static bool array_map_meta_equal(const struct bpf_map *meta0,
|
||||||
const struct bpf_map *meta1)
|
const struct bpf_map *meta1)
|
||||||
{
|
{
|
||||||
return meta0->max_entries == meta1->max_entries &&
|
if (!bpf_map_meta_equal(meta0, meta1))
|
||||||
bpf_map_meta_equal(meta0, meta1);
|
return false;
|
||||||
|
return meta0->map_flags & BPF_F_INNER_MAP ? true :
|
||||||
|
meta0->max_entries == meta1->max_entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bpf_iter_seq_array_map_info {
|
struct bpf_iter_seq_array_map_info {
|
||||||
@@ -1251,7 +1256,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
|
|||||||
return READ_ONCE(*inner_map);
|
return READ_ONCE(*inner_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 array_of_map_gen_lookup(struct bpf_map *map,
|
static int array_of_map_gen_lookup(struct bpf_map *map,
|
||||||
struct bpf_insn *insn_buf)
|
struct bpf_insn *insn_buf)
|
||||||
{
|
{
|
||||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||||
|
@@ -188,11 +188,6 @@
|
|||||||
i < btf_type_vlen(struct_type); \
|
i < btf_type_vlen(struct_type); \
|
||||||
i++, member++)
|
i++, member++)
|
||||||
|
|
||||||
#define for_each_vsi(i, struct_type, member) \
|
|
||||||
for (i = 0, member = btf_type_var_secinfo(struct_type); \
|
|
||||||
i < btf_type_vlen(struct_type); \
|
|
||||||
i++, member++)
|
|
||||||
|
|
||||||
#define for_each_vsi_from(i, from, struct_type, member) \
|
#define for_each_vsi_from(i, from, struct_type, member) \
|
||||||
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
|
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
|
||||||
i < btf_type_vlen(struct_type); \
|
i < btf_type_vlen(struct_type); \
|
||||||
@@ -440,16 +435,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
|
|||||||
return !t || btf_type_nosize(t);
|
return !t || btf_type_nosize(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* union is only a special case of struct:
|
|
||||||
* all its offsetof(member) == 0
|
|
||||||
*/
|
|
||||||
static bool btf_type_is_struct(const struct btf_type *t)
|
|
||||||
{
|
|
||||||
u8 kind = BTF_INFO_KIND(t->info);
|
|
||||||
|
|
||||||
return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool __btf_type_is_struct(const struct btf_type *t)
|
static bool __btf_type_is_struct(const struct btf_type *t)
|
||||||
{
|
{
|
||||||
return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
|
return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
|
||||||
@@ -460,11 +445,6 @@ static bool btf_type_is_array(const struct btf_type *t)
|
|||||||
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
|
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool btf_type_is_var(const struct btf_type *t)
|
|
||||||
{
|
|
||||||
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool btf_type_is_datasec(const struct btf_type *t)
|
static bool btf_type_is_datasec(const struct btf_type *t)
|
||||||
{
|
{
|
||||||
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
|
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
|
||||||
@@ -613,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
|
|||||||
return (const struct btf_var *)(t + 1);
|
return (const struct btf_var *)(t + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
|
|
||||||
{
|
|
||||||
return (const struct btf_var_secinfo *)(t + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
|
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
|
||||||
{
|
{
|
||||||
return kind_ops[BTF_INFO_KIND(t->info)];
|
return kind_ops[BTF_INFO_KIND(t->info)];
|
||||||
|
@@ -612,7 +612,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
|
|||||||
* bpf_prog
|
* bpf_prog
|
||||||
* __htab_map_lookup_elem
|
* __htab_map_lookup_elem
|
||||||
*/
|
*/
|
||||||
static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||||
{
|
{
|
||||||
struct bpf_insn *insn = insn_buf;
|
struct bpf_insn *insn = insn_buf;
|
||||||
const int ret = BPF_REG_0;
|
const int ret = BPF_REG_0;
|
||||||
@@ -651,7 +651,7 @@ static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
|
|||||||
return __htab_lru_map_lookup_elem(map, key, false);
|
return __htab_lru_map_lookup_elem(map, key, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
|
static int htab_lru_map_gen_lookup(struct bpf_map *map,
|
||||||
struct bpf_insn *insn_buf)
|
struct bpf_insn *insn_buf)
|
||||||
{
|
{
|
||||||
struct bpf_insn *insn = insn_buf;
|
struct bpf_insn *insn = insn_buf;
|
||||||
@@ -2070,7 +2070,7 @@ static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
|
|||||||
return READ_ONCE(*inner_map);
|
return READ_ONCE(*inner_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 htab_of_map_gen_lookup(struct bpf_map *map,
|
static int htab_of_map_gen_lookup(struct bpf_map *map,
|
||||||
struct bpf_insn *insn_buf)
|
struct bpf_insn *insn_buf)
|
||||||
{
|
{
|
||||||
struct bpf_insn *insn = insn_buf;
|
struct bpf_insn *insn = insn_buf;
|
||||||
|
@@ -623,6 +623,34 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
|
|||||||
.arg3_type = ARG_ANYTHING,
|
.arg3_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
|
||||||
|
{
|
||||||
|
if (cpu >= nr_cpu_ids)
|
||||||
|
return (unsigned long)NULL;
|
||||||
|
|
||||||
|
return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
|
||||||
|
.func = bpf_per_cpu_ptr,
|
||||||
|
.gpl_only = false,
|
||||||
|
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
|
||||||
|
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
|
||||||
|
.arg2_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
|
BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
|
||||||
|
{
|
||||||
|
return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
|
||||||
|
.func = bpf_this_cpu_ptr,
|
||||||
|
.gpl_only = false,
|
||||||
|
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
|
||||||
|
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
|
||||||
|
};
|
||||||
|
|
||||||
const struct bpf_func_proto bpf_get_current_task_proto __weak;
|
const struct bpf_func_proto bpf_get_current_task_proto __weak;
|
||||||
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
|
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
|
||||||
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
|
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
|
||||||
@@ -689,6 +717,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
|||||||
return &bpf_snprintf_btf_proto;
|
return &bpf_snprintf_btf_proto;
|
||||||
case BPF_FUNC_jiffies64:
|
case BPF_FUNC_jiffies64:
|
||||||
return &bpf_jiffies64_proto;
|
return &bpf_jiffies64_proto;
|
||||||
|
case BPF_FUNC_bpf_per_cpu_ptr:
|
||||||
|
return &bpf_per_cpu_ptr_proto;
|
||||||
|
case BPF_FUNC_bpf_this_cpu_ptr:
|
||||||
|
return &bpf_this_cpu_ptr_proto;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -17,6 +17,8 @@ int pcpu_freelist_init(struct pcpu_freelist *s)
|
|||||||
raw_spin_lock_init(&head->lock);
|
raw_spin_lock_init(&head->lock);
|
||||||
head->first = NULL;
|
head->first = NULL;
|
||||||
}
|
}
|
||||||
|
raw_spin_lock_init(&s->extralist.lock);
|
||||||
|
s->extralist.first = NULL;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,12 +42,50 @@ static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
|
|||||||
raw_spin_unlock(&head->lock);
|
raw_spin_unlock(&head->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool pcpu_freelist_try_push_extra(struct pcpu_freelist *s,
|
||||||
|
struct pcpu_freelist_node *node)
|
||||||
|
{
|
||||||
|
if (!raw_spin_trylock(&s->extralist.lock))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
pcpu_freelist_push_node(&s->extralist, node);
|
||||||
|
raw_spin_unlock(&s->extralist.lock);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
|
||||||
|
struct pcpu_freelist_node *node)
|
||||||
|
{
|
||||||
|
int cpu, orig_cpu;
|
||||||
|
|
||||||
|
orig_cpu = cpu = raw_smp_processor_id();
|
||||||
|
while (1) {
|
||||||
|
struct pcpu_freelist_head *head;
|
||||||
|
|
||||||
|
head = per_cpu_ptr(s->freelist, cpu);
|
||||||
|
if (raw_spin_trylock(&head->lock)) {
|
||||||
|
pcpu_freelist_push_node(head, node);
|
||||||
|
raw_spin_unlock(&head->lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
cpu = cpumask_next(cpu, cpu_possible_mask);
|
||||||
|
if (cpu >= nr_cpu_ids)
|
||||||
|
cpu = 0;
|
||||||
|
|
||||||
|
/* cannot lock any per cpu lock, try extralist */
|
||||||
|
if (cpu == orig_cpu &&
|
||||||
|
pcpu_freelist_try_push_extra(s, node))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void __pcpu_freelist_push(struct pcpu_freelist *s,
|
void __pcpu_freelist_push(struct pcpu_freelist *s,
|
||||||
struct pcpu_freelist_node *node)
|
struct pcpu_freelist_node *node)
|
||||||
{
|
{
|
||||||
struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
|
if (in_nmi())
|
||||||
|
___pcpu_freelist_push_nmi(s, node);
|
||||||
___pcpu_freelist_push(head, node);
|
else
|
||||||
|
___pcpu_freelist_push(this_cpu_ptr(s->freelist), node);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pcpu_freelist_push(struct pcpu_freelist *s,
|
void pcpu_freelist_push(struct pcpu_freelist *s,
|
||||||
@@ -81,7 +121,7 @@ again:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||||
{
|
{
|
||||||
struct pcpu_freelist_head *head;
|
struct pcpu_freelist_head *head;
|
||||||
struct pcpu_freelist_node *node;
|
struct pcpu_freelist_node *node;
|
||||||
@@ -102,8 +142,59 @@ struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
|||||||
if (cpu >= nr_cpu_ids)
|
if (cpu >= nr_cpu_ids)
|
||||||
cpu = 0;
|
cpu = 0;
|
||||||
if (cpu == orig_cpu)
|
if (cpu == orig_cpu)
|
||||||
return NULL;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* per cpu lists are all empty, try extralist */
|
||||||
|
raw_spin_lock(&s->extralist.lock);
|
||||||
|
node = s->extralist.first;
|
||||||
|
if (node)
|
||||||
|
s->extralist.first = node->next;
|
||||||
|
raw_spin_unlock(&s->extralist.lock);
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pcpu_freelist_node *
|
||||||
|
___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
|
||||||
|
{
|
||||||
|
struct pcpu_freelist_head *head;
|
||||||
|
struct pcpu_freelist_node *node;
|
||||||
|
int orig_cpu, cpu;
|
||||||
|
|
||||||
|
orig_cpu = cpu = raw_smp_processor_id();
|
||||||
|
while (1) {
|
||||||
|
head = per_cpu_ptr(s->freelist, cpu);
|
||||||
|
if (raw_spin_trylock(&head->lock)) {
|
||||||
|
node = head->first;
|
||||||
|
if (node) {
|
||||||
|
head->first = node->next;
|
||||||
|
raw_spin_unlock(&head->lock);
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
raw_spin_unlock(&head->lock);
|
||||||
|
}
|
||||||
|
cpu = cpumask_next(cpu, cpu_possible_mask);
|
||||||
|
if (cpu >= nr_cpu_ids)
|
||||||
|
cpu = 0;
|
||||||
|
if (cpu == orig_cpu)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cannot pop from per cpu lists, try extralist */
|
||||||
|
if (!raw_spin_trylock(&s->extralist.lock))
|
||||||
|
return NULL;
|
||||||
|
node = s->extralist.first;
|
||||||
|
if (node)
|
||||||
|
s->extralist.first = node->next;
|
||||||
|
raw_spin_unlock(&s->extralist.lock);
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||||
|
{
|
||||||
|
if (in_nmi())
|
||||||
|
return ___pcpu_freelist_pop_nmi(s);
|
||||||
|
return ___pcpu_freelist_pop(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
|
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||||
|
@@ -13,6 +13,7 @@ struct pcpu_freelist_head {
|
|||||||
|
|
||||||
struct pcpu_freelist {
|
struct pcpu_freelist {
|
||||||
struct pcpu_freelist_head __percpu *freelist;
|
struct pcpu_freelist_head __percpu *freelist;
|
||||||
|
struct pcpu_freelist_head extralist;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pcpu_freelist_node {
|
struct pcpu_freelist_node {
|
||||||
|
@@ -4323,8 +4323,10 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
|
|||||||
used_maps_old = prog->aux->used_maps;
|
used_maps_old = prog->aux->used_maps;
|
||||||
|
|
||||||
for (i = 0; i < prog->aux->used_map_cnt; i++)
|
for (i = 0; i < prog->aux->used_map_cnt; i++)
|
||||||
if (used_maps_old[i] == map)
|
if (used_maps_old[i] == map) {
|
||||||
|
bpf_map_put(map);
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
|
used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
|
||||||
sizeof(used_maps_new[0]),
|
sizeof(used_maps_new[0]),
|
||||||
|
@@ -238,6 +238,8 @@ struct bpf_call_arg_meta {
|
|||||||
u64 msize_max_value;
|
u64 msize_max_value;
|
||||||
int ref_obj_id;
|
int ref_obj_id;
|
||||||
int func_id;
|
int func_id;
|
||||||
|
u32 btf_id;
|
||||||
|
u32 ret_btf_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btf *btf_vmlinux;
|
struct btf *btf_vmlinux;
|
||||||
@@ -517,6 +519,7 @@ static const char * const reg_type_str[] = {
|
|||||||
[PTR_TO_XDP_SOCK] = "xdp_sock",
|
[PTR_TO_XDP_SOCK] = "xdp_sock",
|
||||||
[PTR_TO_BTF_ID] = "ptr_",
|
[PTR_TO_BTF_ID] = "ptr_",
|
||||||
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
|
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
|
||||||
|
[PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
|
||||||
[PTR_TO_MEM] = "mem",
|
[PTR_TO_MEM] = "mem",
|
||||||
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
|
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
|
||||||
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
|
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
|
||||||
@@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
|
|||||||
/* reg->off should be 0 for SCALAR_VALUE */
|
/* reg->off should be 0 for SCALAR_VALUE */
|
||||||
verbose(env, "%lld", reg->var_off.value + reg->off);
|
verbose(env, "%lld", reg->var_off.value + reg->off);
|
||||||
} else {
|
} else {
|
||||||
if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
|
if (t == PTR_TO_BTF_ID ||
|
||||||
|
t == PTR_TO_BTF_ID_OR_NULL ||
|
||||||
|
t == PTR_TO_PERCPU_BTF_ID)
|
||||||
verbose(env, "%s", kernel_type_name(reg->btf_id));
|
verbose(env, "%s", kernel_type_name(reg->btf_id));
|
||||||
verbose(env, "(id=%d", reg->id);
|
verbose(env, "(id=%d", reg->id);
|
||||||
if (reg_type_may_be_refcounted_or_null(t))
|
if (reg_type_may_be_refcounted_or_null(t))
|
||||||
@@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
|||||||
case PTR_TO_RDONLY_BUF_OR_NULL:
|
case PTR_TO_RDONLY_BUF_OR_NULL:
|
||||||
case PTR_TO_RDWR_BUF:
|
case PTR_TO_RDWR_BUF:
|
||||||
case PTR_TO_RDWR_BUF_OR_NULL:
|
case PTR_TO_RDWR_BUF_OR_NULL:
|
||||||
|
case PTR_TO_PERCPU_BTF_ID:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@@ -2221,6 +2227,20 @@ static bool register_is_const(struct bpf_reg_state *reg)
|
|||||||
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
|
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
|
||||||
|
{
|
||||||
|
return tnum_is_unknown(reg->var_off) &&
|
||||||
|
reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
|
||||||
|
reg->umin_value == 0 && reg->umax_value == U64_MAX &&
|
||||||
|
reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
|
||||||
|
reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool register_is_bounded(struct bpf_reg_state *reg)
|
||||||
|
{
|
||||||
|
return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
|
||||||
|
}
|
||||||
|
|
||||||
static bool __is_pointer_value(bool allow_ptr_leaks,
|
static bool __is_pointer_value(bool allow_ptr_leaks,
|
||||||
const struct bpf_reg_state *reg)
|
const struct bpf_reg_state *reg)
|
||||||
{
|
{
|
||||||
@@ -2272,7 +2292,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
|
|||||||
if (value_regno >= 0)
|
if (value_regno >= 0)
|
||||||
reg = &cur->regs[value_regno];
|
reg = &cur->regs[value_regno];
|
||||||
|
|
||||||
if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
|
if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
|
||||||
!register_is_null(reg) && env->bpf_capable) {
|
!register_is_null(reg) && env->bpf_capable) {
|
||||||
if (dst_reg != BPF_REG_FP) {
|
if (dst_reg != BPF_REG_FP) {
|
||||||
/* The backtracking logic can only recognize explicit
|
/* The backtracking logic can only recognize explicit
|
||||||
@@ -2667,7 +2687,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
|
|||||||
case BPF_PROG_TYPE_CGROUP_SKB:
|
case BPF_PROG_TYPE_CGROUP_SKB:
|
||||||
if (t == BPF_WRITE)
|
if (t == BPF_WRITE)
|
||||||
return false;
|
return false;
|
||||||
/* fallthrough */
|
fallthrough;
|
||||||
|
|
||||||
/* Program types with direct read + write access go here! */
|
/* Program types with direct read + write access go here! */
|
||||||
case BPF_PROG_TYPE_SCHED_CLS:
|
case BPF_PROG_TYPE_SCHED_CLS:
|
||||||
@@ -3978,6 +3998,7 @@ static const struct bpf_reg_types sock_types = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_NET
|
||||||
static const struct bpf_reg_types btf_id_sock_common_types = {
|
static const struct bpf_reg_types btf_id_sock_common_types = {
|
||||||
.types = {
|
.types = {
|
||||||
PTR_TO_SOCK_COMMON,
|
PTR_TO_SOCK_COMMON,
|
||||||
@@ -3988,6 +4009,7 @@ static const struct bpf_reg_types btf_id_sock_common_types = {
|
|||||||
},
|
},
|
||||||
.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
|
.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
static const struct bpf_reg_types mem_types = {
|
static const struct bpf_reg_types mem_types = {
|
||||||
.types = {
|
.types = {
|
||||||
@@ -4017,6 +4039,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
|
|||||||
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
|
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
|
||||||
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
|
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
|
||||||
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
|
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
|
||||||
|
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
|
||||||
|
|
||||||
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
||||||
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
|
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
|
||||||
@@ -4030,7 +4053,9 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
|||||||
[ARG_PTR_TO_CTX] = &context_types,
|
[ARG_PTR_TO_CTX] = &context_types,
|
||||||
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
|
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
|
||||||
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
|
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
|
||||||
|
#ifdef CONFIG_NET
|
||||||
[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
|
[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
|
||||||
|
#endif
|
||||||
[ARG_PTR_TO_SOCKET] = &fullsock_types,
|
[ARG_PTR_TO_SOCKET] = &fullsock_types,
|
||||||
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
|
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
|
||||||
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
|
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
|
||||||
@@ -4042,6 +4067,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
|
|||||||
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
|
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
|
||||||
[ARG_PTR_TO_INT] = &int_ptr_types,
|
[ARG_PTR_TO_INT] = &int_ptr_types,
|
||||||
[ARG_PTR_TO_LONG] = &int_ptr_types,
|
[ARG_PTR_TO_LONG] = &int_ptr_types,
|
||||||
|
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
|
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
|
||||||
@@ -4205,6 +4231,12 @@ skip_type_check:
|
|||||||
err = check_helper_mem_access(env, regno,
|
err = check_helper_mem_access(env, regno,
|
||||||
meta->map_ptr->value_size, false,
|
meta->map_ptr->value_size, false,
|
||||||
meta);
|
meta);
|
||||||
|
} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
|
||||||
|
if (!reg->btf_id) {
|
||||||
|
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
|
||||||
|
return -EACCES;
|
||||||
|
}
|
||||||
|
meta->ret_btf_id = reg->btf_id;
|
||||||
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
|
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
|
||||||
if (meta->func_id == BPF_FUNC_spin_lock) {
|
if (meta->func_id == BPF_FUNC_spin_lock) {
|
||||||
if (process_spin_lock(env, regno, true))
|
if (process_spin_lock(env, regno, true))
|
||||||
@@ -5114,6 +5146,35 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
|||||||
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
|
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
|
||||||
regs[BPF_REG_0].id = ++env->id_gen;
|
regs[BPF_REG_0].id = ++env->id_gen;
|
||||||
regs[BPF_REG_0].mem_size = meta.mem_size;
|
regs[BPF_REG_0].mem_size = meta.mem_size;
|
||||||
|
} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
|
||||||
|
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
|
||||||
|
const struct btf_type *t;
|
||||||
|
|
||||||
|
mark_reg_known_zero(env, regs, BPF_REG_0);
|
||||||
|
t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
|
||||||
|
if (!btf_type_is_struct(t)) {
|
||||||
|
u32 tsize;
|
||||||
|
const struct btf_type *ret;
|
||||||
|
const char *tname;
|
||||||
|
|
||||||
|
/* resolve the type size of ksym. */
|
||||||
|
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
|
||||||
|
if (IS_ERR(ret)) {
|
||||||
|
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||||
|
verbose(env, "unable to resolve the size of type '%s': %ld\n",
|
||||||
|
tname, PTR_ERR(ret));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
regs[BPF_REG_0].type =
|
||||||
|
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
|
||||||
|
PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
|
||||||
|
regs[BPF_REG_0].mem_size = tsize;
|
||||||
|
} else {
|
||||||
|
regs[BPF_REG_0].type =
|
||||||
|
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
|
||||||
|
PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
|
||||||
|
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
|
||||||
|
}
|
||||||
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
|
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
|
||||||
int ret_btf_id;
|
int ret_btf_id;
|
||||||
|
|
||||||
@@ -5432,7 +5493,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
|
|||||||
/* smin_val represents the known value */
|
/* smin_val represents the known value */
|
||||||
if (known && smin_val == 0 && opcode == BPF_ADD)
|
if (known && smin_val == 0 && opcode == BPF_ADD)
|
||||||
break;
|
break;
|
||||||
/* fall-through */
|
fallthrough;
|
||||||
case PTR_TO_PACKET_END:
|
case PTR_TO_PACKET_END:
|
||||||
case PTR_TO_SOCKET:
|
case PTR_TO_SOCKET:
|
||||||
case PTR_TO_SOCKET_OR_NULL:
|
case PTR_TO_SOCKET_OR_NULL:
|
||||||
@@ -6389,6 +6450,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
|
|||||||
src_reg = NULL;
|
src_reg = NULL;
|
||||||
if (dst_reg->type != SCALAR_VALUE)
|
if (dst_reg->type != SCALAR_VALUE)
|
||||||
ptr_reg = dst_reg;
|
ptr_reg = dst_reg;
|
||||||
|
else
|
||||||
|
/* Make sure ID is cleared otherwise dst_reg min/max could be
|
||||||
|
* incorrectly propagated into other registers by find_equal_scalars()
|
||||||
|
*/
|
||||||
|
dst_reg->id = 0;
|
||||||
if (BPF_SRC(insn->code) == BPF_X) {
|
if (BPF_SRC(insn->code) == BPF_X) {
|
||||||
src_reg = ®s[insn->src_reg];
|
src_reg = ®s[insn->src_reg];
|
||||||
if (src_reg->type != SCALAR_VALUE) {
|
if (src_reg->type != SCALAR_VALUE) {
|
||||||
@@ -6522,6 +6588,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||||||
/* case: R1 = R2
|
/* case: R1 = R2
|
||||||
* copy register state to dest reg
|
* copy register state to dest reg
|
||||||
*/
|
*/
|
||||||
|
if (src_reg->type == SCALAR_VALUE && !src_reg->id)
|
||||||
|
/* Assign src and dst registers the same ID
|
||||||
|
* that will be used by find_equal_scalars()
|
||||||
|
* to propagate min/max range.
|
||||||
|
*/
|
||||||
|
src_reg->id = ++env->id_gen;
|
||||||
*dst_reg = *src_reg;
|
*dst_reg = *src_reg;
|
||||||
dst_reg->live |= REG_LIVE_WRITTEN;
|
dst_reg->live |= REG_LIVE_WRITTEN;
|
||||||
dst_reg->subreg_def = DEF_NOT_SUBREG;
|
dst_reg->subreg_def = DEF_NOT_SUBREG;
|
||||||
@@ -6534,6 +6606,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||||||
return -EACCES;
|
return -EACCES;
|
||||||
} else if (src_reg->type == SCALAR_VALUE) {
|
} else if (src_reg->type == SCALAR_VALUE) {
|
||||||
*dst_reg = *src_reg;
|
*dst_reg = *src_reg;
|
||||||
|
/* Make sure ID is cleared otherwise
|
||||||
|
* dst_reg min/max could be incorrectly
|
||||||
|
* propagated into src_reg by find_equal_scalars()
|
||||||
|
*/
|
||||||
|
dst_reg->id = 0;
|
||||||
dst_reg->live |= REG_LIVE_WRITTEN;
|
dst_reg->live |= REG_LIVE_WRITTEN;
|
||||||
dst_reg->subreg_def = env->insn_idx + 1;
|
dst_reg->subreg_def = env->insn_idx + 1;
|
||||||
} else {
|
} else {
|
||||||
@@ -7322,6 +7399,30 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void find_equal_scalars(struct bpf_verifier_state *vstate,
|
||||||
|
struct bpf_reg_state *known_reg)
|
||||||
|
{
|
||||||
|
struct bpf_func_state *state;
|
||||||
|
struct bpf_reg_state *reg;
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i <= vstate->curframe; i++) {
|
||||||
|
state = vstate->frame[i];
|
||||||
|
for (j = 0; j < MAX_BPF_REG; j++) {
|
||||||
|
reg = &state->regs[j];
|
||||||
|
if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
|
||||||
|
*reg = *known_reg;
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_for_each_spilled_reg(j, state, reg) {
|
||||||
|
if (!reg)
|
||||||
|
continue;
|
||||||
|
if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
|
||||||
|
*reg = *known_reg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
||||||
struct bpf_insn *insn, int *insn_idx)
|
struct bpf_insn *insn, int *insn_idx)
|
||||||
{
|
{
|
||||||
@@ -7450,6 +7551,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
|||||||
reg_combine_min_max(&other_branch_regs[insn->src_reg],
|
reg_combine_min_max(&other_branch_regs[insn->src_reg],
|
||||||
&other_branch_regs[insn->dst_reg],
|
&other_branch_regs[insn->dst_reg],
|
||||||
src_reg, dst_reg, opcode);
|
src_reg, dst_reg, opcode);
|
||||||
|
if (src_reg->id) {
|
||||||
|
find_equal_scalars(this_branch, src_reg);
|
||||||
|
find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
} else if (dst_reg->type == SCALAR_VALUE) {
|
} else if (dst_reg->type == SCALAR_VALUE) {
|
||||||
reg_set_min_max(&other_branch_regs[insn->dst_reg],
|
reg_set_min_max(&other_branch_regs[insn->dst_reg],
|
||||||
@@ -7457,6 +7563,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
|
|||||||
opcode, is_jmp32);
|
opcode, is_jmp32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dst_reg->type == SCALAR_VALUE && dst_reg->id) {
|
||||||
|
find_equal_scalars(this_branch, dst_reg);
|
||||||
|
find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
|
||||||
|
}
|
||||||
|
|
||||||
/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
|
/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
|
||||||
* NOTE: these optimizations below are related with pointer comparison
|
* NOTE: these optimizations below are related with pointer comparison
|
||||||
* which will never be JMP32.
|
* which will never be JMP32.
|
||||||
@@ -7488,6 +7599,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||||||
{
|
{
|
||||||
struct bpf_insn_aux_data *aux = cur_aux(env);
|
struct bpf_insn_aux_data *aux = cur_aux(env);
|
||||||
struct bpf_reg_state *regs = cur_regs(env);
|
struct bpf_reg_state *regs = cur_regs(env);
|
||||||
|
struct bpf_reg_state *dst_reg;
|
||||||
struct bpf_map *map;
|
struct bpf_map *map;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
@@ -7504,25 +7616,45 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
dst_reg = ®s[insn->dst_reg];
|
||||||
if (insn->src_reg == 0) {
|
if (insn->src_reg == 0) {
|
||||||
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
|
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
|
||||||
|
|
||||||
regs[insn->dst_reg].type = SCALAR_VALUE;
|
dst_reg->type = SCALAR_VALUE;
|
||||||
__mark_reg_known(®s[insn->dst_reg], imm);
|
__mark_reg_known(®s[insn->dst_reg], imm);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
|
||||||
|
mark_reg_known_zero(env, regs, insn->dst_reg);
|
||||||
|
|
||||||
|
dst_reg->type = aux->btf_var.reg_type;
|
||||||
|
switch (dst_reg->type) {
|
||||||
|
case PTR_TO_MEM:
|
||||||
|
dst_reg->mem_size = aux->btf_var.mem_size;
|
||||||
|
break;
|
||||||
|
case PTR_TO_BTF_ID:
|
||||||
|
case PTR_TO_PERCPU_BTF_ID:
|
||||||
|
dst_reg->btf_id = aux->btf_var.btf_id;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
verbose(env, "bpf verifier is misconfigured\n");
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
map = env->used_maps[aux->map_index];
|
map = env->used_maps[aux->map_index];
|
||||||
mark_reg_known_zero(env, regs, insn->dst_reg);
|
mark_reg_known_zero(env, regs, insn->dst_reg);
|
||||||
regs[insn->dst_reg].map_ptr = map;
|
dst_reg->map_ptr = map;
|
||||||
|
|
||||||
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
|
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
|
||||||
regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
|
dst_reg->type = PTR_TO_MAP_VALUE;
|
||||||
regs[insn->dst_reg].off = aux->map_off;
|
dst_reg->off = aux->map_off;
|
||||||
if (map_value_has_spin_lock(map))
|
if (map_value_has_spin_lock(map))
|
||||||
regs[insn->dst_reg].id = ++env->id_gen;
|
dst_reg->id = ++env->id_gen;
|
||||||
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
|
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
|
||||||
regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
|
dst_reg->type = CONST_PTR_TO_MAP;
|
||||||
} else {
|
} else {
|
||||||
verbose(env, "bpf verifier is misconfigured\n");
|
verbose(env, "bpf verifier is misconfigured\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -9424,6 +9556,92 @@ process_bpf_exit:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* replace pseudo btf_id with kernel symbol address */
|
||||||
|
static int check_pseudo_btf_id(struct bpf_verifier_env *env,
|
||||||
|
struct bpf_insn *insn,
|
||||||
|
struct bpf_insn_aux_data *aux)
|
||||||
|
{
|
||||||
|
u32 datasec_id, type, id = insn->imm;
|
||||||
|
const struct btf_var_secinfo *vsi;
|
||||||
|
const struct btf_type *datasec;
|
||||||
|
const struct btf_type *t;
|
||||||
|
const char *sym_name;
|
||||||
|
bool percpu = false;
|
||||||
|
u64 addr;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!btf_vmlinux) {
|
||||||
|
verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (insn[1].imm != 0) {
|
||||||
|
verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
t = btf_type_by_id(btf_vmlinux, id);
|
||||||
|
if (!t) {
|
||||||
|
verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
|
||||||
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!btf_type_is_var(t)) {
|
||||||
|
verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
|
||||||
|
id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||||
|
addr = kallsyms_lookup_name(sym_name);
|
||||||
|
if (!addr) {
|
||||||
|
verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
|
||||||
|
sym_name);
|
||||||
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
|
||||||
|
BTF_KIND_DATASEC);
|
||||||
|
if (datasec_id > 0) {
|
||||||
|
datasec = btf_type_by_id(btf_vmlinux, datasec_id);
|
||||||
|
for_each_vsi(i, datasec, vsi) {
|
||||||
|
if (vsi->type == id) {
|
||||||
|
percpu = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
insn[0].imm = (u32)addr;
|
||||||
|
insn[1].imm = addr >> 32;
|
||||||
|
|
||||||
|
type = t->type;
|
||||||
|
t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
|
||||||
|
if (percpu) {
|
||||||
|
aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
|
||||||
|
aux->btf_var.btf_id = type;
|
||||||
|
} else if (!btf_type_is_struct(t)) {
|
||||||
|
const struct btf_type *ret;
|
||||||
|
const char *tname;
|
||||||
|
u32 tsize;
|
||||||
|
|
||||||
|
/* resolve the type size of ksym. */
|
||||||
|
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
|
||||||
|
if (IS_ERR(ret)) {
|
||||||
|
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||||
|
verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
|
||||||
|
tname, PTR_ERR(ret));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
aux->btf_var.reg_type = PTR_TO_MEM;
|
||||||
|
aux->btf_var.mem_size = tsize;
|
||||||
|
} else {
|
||||||
|
aux->btf_var.reg_type = PTR_TO_BTF_ID;
|
||||||
|
aux->btf_var.btf_id = type;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_map_prealloc(struct bpf_map *map)
|
static int check_map_prealloc(struct bpf_map *map)
|
||||||
{
|
{
|
||||||
return (map->map_type != BPF_MAP_TYPE_HASH &&
|
return (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||||
@@ -9534,10 +9752,14 @@ static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
|
|||||||
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
|
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* look for pseudo eBPF instructions that access map FDs and
|
/* find and rewrite pseudo imm in ld_imm64 instructions:
|
||||||
* replace them with actual map pointers
|
*
|
||||||
|
* 1. if it accesses map FD, replace it with actual map pointer.
|
||||||
|
* 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
|
||||||
|
*
|
||||||
|
* NOTE: btf_vmlinux is required for converting pseudo btf_id.
|
||||||
*/
|
*/
|
||||||
static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
|
static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
|
||||||
{
|
{
|
||||||
struct bpf_insn *insn = env->prog->insnsi;
|
struct bpf_insn *insn = env->prog->insnsi;
|
||||||
int insn_cnt = env->prog->len;
|
int insn_cnt = env->prog->len;
|
||||||
@@ -9578,6 +9800,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
|
|||||||
/* valid generic load 64-bit imm */
|
/* valid generic load 64-bit imm */
|
||||||
goto next_insn;
|
goto next_insn;
|
||||||
|
|
||||||
|
if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
|
||||||
|
aux = &env->insn_aux_data[i];
|
||||||
|
err = check_pseudo_btf_id(env, insn, aux);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
goto next_insn;
|
||||||
|
}
|
||||||
|
|
||||||
/* In final convert_pseudo_ld_imm64() step, this is
|
/* In final convert_pseudo_ld_imm64() step, this is
|
||||||
* converted into regular 64-bit imm load insn.
|
* converted into regular 64-bit imm load insn.
|
||||||
*/
|
*/
|
||||||
@@ -10819,7 +11049,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
|||||||
if (insn->imm == BPF_FUNC_map_lookup_elem &&
|
if (insn->imm == BPF_FUNC_map_lookup_elem &&
|
||||||
ops->map_gen_lookup) {
|
ops->map_gen_lookup) {
|
||||||
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
|
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
|
||||||
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
if (cnt == -EOPNOTSUPP)
|
||||||
|
goto patch_map_ops_generic;
|
||||||
|
if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
||||||
verbose(env, "bpf verifier is misconfigured\n");
|
verbose(env, "bpf verifier is misconfigured\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@@ -10849,7 +11081,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
|||||||
(int (*)(struct bpf_map *map, void *value))NULL));
|
(int (*)(struct bpf_map *map, void *value))NULL));
|
||||||
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
|
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
|
||||||
(int (*)(struct bpf_map *map, void *value))NULL));
|
(int (*)(struct bpf_map *map, void *value))NULL));
|
||||||
|
patch_map_ops_generic:
|
||||||
switch (insn->imm) {
|
switch (insn->imm) {
|
||||||
case BPF_FUNC_map_lookup_elem:
|
case BPF_FUNC_map_lookup_elem:
|
||||||
insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
|
insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
|
||||||
@@ -11633,10 +11865,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
|||||||
if (is_priv)
|
if (is_priv)
|
||||||
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
|
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
|
||||||
|
|
||||||
ret = replace_map_fd_with_map_ptr(env);
|
|
||||||
if (ret < 0)
|
|
||||||
goto skip_full_check;
|
|
||||||
|
|
||||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||||
ret = bpf_prog_offload_verifier_prep(env->prog);
|
ret = bpf_prog_offload_verifier_prep(env->prog);
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -11662,6 +11890,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto skip_full_check;
|
goto skip_full_check;
|
||||||
|
|
||||||
|
ret = resolve_pseudo_ldimm64(env);
|
||||||
|
if (ret < 0)
|
||||||
|
goto skip_full_check;
|
||||||
|
|
||||||
ret = check_cfg(env);
|
ret = check_cfg(env);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto skip_full_check;
|
goto skip_full_check;
|
||||||
|
@@ -1327,6 +1327,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||||||
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
|
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
|
||||||
case BPF_FUNC_snprintf_btf:
|
case BPF_FUNC_snprintf_btf:
|
||||||
return &bpf_snprintf_btf_proto;
|
return &bpf_snprintf_btf_proto;
|
||||||
|
case BPF_FUNC_bpf_per_cpu_ptr:
|
||||||
|
return &bpf_per_cpu_ptr_proto;
|
||||||
|
case BPF_FUNC_bpf_this_cpu_ptr:
|
||||||
|
return &bpf_this_cpu_ptr_proto;
|
||||||
default:
|
default:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@@ -1776,7 +1780,9 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
||||||
|
#ifdef CONFIG_NET
|
||||||
.test_run = bpf_prog_test_run_raw_tp,
|
.test_run = bpf_prog_test_run_raw_tp,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct bpf_verifier_ops tracing_verifier_ops = {
|
const struct bpf_verifier_ops tracing_verifier_ops = {
|
||||||
|
@@ -4930,7 +4930,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
|
|||||||
|
|
||||||
static inline struct sk_buff *
|
static inline struct sk_buff *
|
||||||
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
||||||
struct net_device *orig_dev)
|
struct net_device *orig_dev, bool *another)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_NET_CLS_ACT
|
#ifdef CONFIG_NET_CLS_ACT
|
||||||
struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
|
struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
|
||||||
@@ -4974,7 +4974,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
|||||||
* redirecting to another netdev
|
* redirecting to another netdev
|
||||||
*/
|
*/
|
||||||
__skb_push(skb, skb->mac_len);
|
__skb_push(skb, skb->mac_len);
|
||||||
skb_do_redirect(skb);
|
if (skb_do_redirect(skb) == -EAGAIN) {
|
||||||
|
__skb_pull(skb, skb->mac_len);
|
||||||
|
*another = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
case TC_ACT_CONSUMED:
|
case TC_ACT_CONSUMED:
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -5163,7 +5167,12 @@ another_round:
|
|||||||
skip_taps:
|
skip_taps:
|
||||||
#ifdef CONFIG_NET_INGRESS
|
#ifdef CONFIG_NET_INGRESS
|
||||||
if (static_branch_unlikely(&ingress_needed_key)) {
|
if (static_branch_unlikely(&ingress_needed_key)) {
|
||||||
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
|
bool another = false;
|
||||||
|
|
||||||
|
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
|
||||||
|
&another);
|
||||||
|
if (another)
|
||||||
|
goto another_round;
|
||||||
if (!skb)
|
if (!skb)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@@ -76,6 +76,7 @@
|
|||||||
#include <net/bpf_sk_storage.h>
|
#include <net/bpf_sk_storage.h>
|
||||||
#include <net/transp_v6.h>
|
#include <net/transp_v6.h>
|
||||||
#include <linux/btf_ids.h>
|
#include <linux/btf_ids.h>
|
||||||
|
#include <net/tls.h>
|
||||||
|
|
||||||
static const struct bpf_func_proto *
|
static const struct bpf_func_proto *
|
||||||
bpf_sk_base_func_proto(enum bpf_func_id func_id);
|
bpf_sk_base_func_proto(enum bpf_func_id func_id);
|
||||||
@@ -2379,8 +2380,9 @@ out:
|
|||||||
|
|
||||||
/* Internal, non-exposed redirect flags. */
|
/* Internal, non-exposed redirect flags. */
|
||||||
enum {
|
enum {
|
||||||
BPF_F_NEIGH = (1ULL << 1),
|
BPF_F_NEIGH = (1ULL << 1),
|
||||||
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH)
|
BPF_F_PEER = (1ULL << 2),
|
||||||
|
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER)
|
||||||
};
|
};
|
||||||
|
|
||||||
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
|
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
|
||||||
@@ -2429,19 +2431,35 @@ EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
|
|||||||
int skb_do_redirect(struct sk_buff *skb)
|
int skb_do_redirect(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||||
|
struct net *net = dev_net(skb->dev);
|
||||||
struct net_device *dev;
|
struct net_device *dev;
|
||||||
u32 flags = ri->flags;
|
u32 flags = ri->flags;
|
||||||
|
|
||||||
dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
|
dev = dev_get_by_index_rcu(net, ri->tgt_index);
|
||||||
ri->tgt_index = 0;
|
ri->tgt_index = 0;
|
||||||
if (unlikely(!dev)) {
|
ri->flags = 0;
|
||||||
kfree_skb(skb);
|
if (unlikely(!dev))
|
||||||
return -EINVAL;
|
goto out_drop;
|
||||||
}
|
if (flags & BPF_F_PEER) {
|
||||||
|
const struct net_device_ops *ops = dev->netdev_ops;
|
||||||
|
|
||||||
|
if (unlikely(!ops->ndo_get_peer_dev ||
|
||||||
|
!skb_at_tc_ingress(skb)))
|
||||||
|
goto out_drop;
|
||||||
|
dev = ops->ndo_get_peer_dev(dev);
|
||||||
|
if (unlikely(!dev ||
|
||||||
|
!is_skb_forwardable(dev, skb) ||
|
||||||
|
net_eq(net, dev_net(dev))))
|
||||||
|
goto out_drop;
|
||||||
|
skb->dev = dev;
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
return flags & BPF_F_NEIGH ?
|
return flags & BPF_F_NEIGH ?
|
||||||
__bpf_redirect_neigh(skb, dev) :
|
__bpf_redirect_neigh(skb, dev) :
|
||||||
__bpf_redirect(skb, dev, flags);
|
__bpf_redirect(skb, dev, flags);
|
||||||
|
out_drop:
|
||||||
|
kfree_skb(skb);
|
||||||
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
|
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
|
||||||
@@ -2465,6 +2483,27 @@ static const struct bpf_func_proto bpf_redirect_proto = {
|
|||||||
.arg2_type = ARG_ANYTHING,
|
.arg2_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
|
||||||
|
{
|
||||||
|
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||||
|
|
||||||
|
if (unlikely(flags))
|
||||||
|
return TC_ACT_SHOT;
|
||||||
|
|
||||||
|
ri->flags = BPF_F_PEER;
|
||||||
|
ri->tgt_index = ifindex;
|
||||||
|
|
||||||
|
return TC_ACT_REDIRECT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto bpf_redirect_peer_proto = {
|
||||||
|
.func = bpf_redirect_peer,
|
||||||
|
.gpl_only = false,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_ANYTHING,
|
||||||
|
.arg2_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
|
BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
|
||||||
{
|
{
|
||||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||||
@@ -3479,6 +3518,48 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
|
|||||||
SKB_MAX_ALLOC;
|
SKB_MAX_ALLOC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
|
||||||
|
u32, mode, u64, flags)
|
||||||
|
{
|
||||||
|
u32 len_diff_abs = abs(len_diff);
|
||||||
|
bool shrink = len_diff < 0;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (unlikely(flags || mode))
|
||||||
|
return -EINVAL;
|
||||||
|
if (unlikely(len_diff_abs > 0xfffU))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (!shrink) {
|
||||||
|
ret = skb_cow(skb, len_diff);
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
return ret;
|
||||||
|
__skb_push(skb, len_diff_abs);
|
||||||
|
memset(skb->data, 0, len_diff_abs);
|
||||||
|
} else {
|
||||||
|
if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
|
||||||
|
return -ENOMEM;
|
||||||
|
__skb_pull(skb, len_diff_abs);
|
||||||
|
}
|
||||||
|
bpf_compute_data_end_sk_skb(skb);
|
||||||
|
if (tls_sw_has_ctx_rx(skb->sk)) {
|
||||||
|
struct strp_msg *rxm = strp_msg(skb);
|
||||||
|
|
||||||
|
rxm->full_len += len_diff;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_func_proto sk_skb_adjust_room_proto = {
|
||||||
|
.func = sk_skb_adjust_room,
|
||||||
|
.gpl_only = false,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_CTX,
|
||||||
|
.arg2_type = ARG_ANYTHING,
|
||||||
|
.arg3_type = ARG_ANYTHING,
|
||||||
|
.arg4_type = ARG_ANYTHING,
|
||||||
|
};
|
||||||
|
|
||||||
BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
|
BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
|
||||||
u32, mode, u64, flags)
|
u32, mode, u64, flags)
|
||||||
{
|
{
|
||||||
@@ -4784,6 +4865,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
else
|
else
|
||||||
icsk->icsk_user_timeout = val;
|
icsk->icsk_user_timeout = val;
|
||||||
break;
|
break;
|
||||||
|
case TCP_NOTSENT_LOWAT:
|
||||||
|
tp->notsent_lowat = val;
|
||||||
|
sk->sk_write_space(sk);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
}
|
}
|
||||||
@@ -5149,7 +5234,6 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
|
|||||||
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
|
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
|
||||||
params->h_vlan_TCI = 0;
|
params->h_vlan_TCI = 0;
|
||||||
params->h_vlan_proto = 0;
|
params->h_vlan_proto = 0;
|
||||||
params->ifindex = dev->ifindex;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -5246,6 +5330,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||||||
dev = nhc->nhc_dev;
|
dev = nhc->nhc_dev;
|
||||||
|
|
||||||
params->rt_metric = res.fi->fib_priority;
|
params->rt_metric = res.fi->fib_priority;
|
||||||
|
params->ifindex = dev->ifindex;
|
||||||
|
|
||||||
/* xdp and cls_bpf programs are run in RCU-bh so
|
/* xdp and cls_bpf programs are run in RCU-bh so
|
||||||
* rcu_read_lock_bh is not needed here
|
* rcu_read_lock_bh is not needed here
|
||||||
@@ -5371,6 +5456,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||||||
|
|
||||||
dev = res.nh->fib_nh_dev;
|
dev = res.nh->fib_nh_dev;
|
||||||
params->rt_metric = res.f6i->fib6_metric;
|
params->rt_metric = res.f6i->fib6_metric;
|
||||||
|
params->ifindex = dev->ifindex;
|
||||||
|
|
||||||
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
|
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
|
||||||
* not needed here.
|
* not needed here.
|
||||||
@@ -6745,6 +6831,7 @@ bool bpf_helper_changes_pkt_data(void *func)
|
|||||||
func == bpf_skb_change_tail ||
|
func == bpf_skb_change_tail ||
|
||||||
func == sk_skb_change_tail ||
|
func == sk_skb_change_tail ||
|
||||||
func == bpf_skb_adjust_room ||
|
func == bpf_skb_adjust_room ||
|
||||||
|
func == sk_skb_adjust_room ||
|
||||||
func == bpf_skb_pull_data ||
|
func == bpf_skb_pull_data ||
|
||||||
func == sk_skb_pull_data ||
|
func == sk_skb_pull_data ||
|
||||||
func == bpf_clone_redirect ||
|
func == bpf_clone_redirect ||
|
||||||
@@ -7005,6 +7092,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||||||
return &bpf_redirect_proto;
|
return &bpf_redirect_proto;
|
||||||
case BPF_FUNC_redirect_neigh:
|
case BPF_FUNC_redirect_neigh:
|
||||||
return &bpf_redirect_neigh_proto;
|
return &bpf_redirect_neigh_proto;
|
||||||
|
case BPF_FUNC_redirect_peer:
|
||||||
|
return &bpf_redirect_peer_proto;
|
||||||
case BPF_FUNC_get_route_realm:
|
case BPF_FUNC_get_route_realm:
|
||||||
return &bpf_get_route_realm_proto;
|
return &bpf_get_route_realm_proto;
|
||||||
case BPF_FUNC_get_hash_recalc:
|
case BPF_FUNC_get_hash_recalc:
|
||||||
@@ -7218,6 +7307,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||||||
return &sk_skb_change_tail_proto;
|
return &sk_skb_change_tail_proto;
|
||||||
case BPF_FUNC_skb_change_head:
|
case BPF_FUNC_skb_change_head:
|
||||||
return &sk_skb_change_head_proto;
|
return &sk_skb_change_head_proto;
|
||||||
|
case BPF_FUNC_skb_adjust_room:
|
||||||
|
return &sk_skb_adjust_room_proto;
|
||||||
case BPF_FUNC_get_socket_cookie:
|
case BPF_FUNC_get_socket_cookie:
|
||||||
return &bpf_get_socket_cookie_proto;
|
return &bpf_get_socket_cookie_proto;
|
||||||
case BPF_FUNC_get_socket_uid:
|
case BPF_FUNC_get_socket_uid:
|
||||||
|
161
net/core/skmsg.c
161
net/core/skmsg.c
@@ -433,10 +433,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
|
|||||||
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||||
u32 off, u32 len, bool ingress)
|
u32 off, u32 len, bool ingress)
|
||||||
{
|
{
|
||||||
if (ingress)
|
if (!ingress) {
|
||||||
return sk_psock_skb_ingress(psock, skb);
|
if (!sock_writeable(psock->sk))
|
||||||
else
|
return -EAGAIN;
|
||||||
return skb_send_sock_locked(psock->sk, skb, off, len);
|
return skb_send_sock_locked(psock->sk, skb, off, len);
|
||||||
|
}
|
||||||
|
return sk_psock_skb_ingress(psock, skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sk_psock_backlog(struct work_struct *work)
|
static void sk_psock_backlog(struct work_struct *work)
|
||||||
@@ -625,6 +627,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
|
|||||||
rcu_assign_sk_user_data(sk, NULL);
|
rcu_assign_sk_user_data(sk, NULL);
|
||||||
if (psock->progs.skb_parser)
|
if (psock->progs.skb_parser)
|
||||||
sk_psock_stop_strp(sk, psock);
|
sk_psock_stop_strp(sk, psock);
|
||||||
|
else if (psock->progs.skb_verdict)
|
||||||
|
sk_psock_stop_verdict(sk, psock);
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
write_unlock_bh(&sk->sk_callback_lock);
|
||||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||||
|
|
||||||
@@ -682,19 +686,8 @@ EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
|||||||
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
|
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
|
||||||
struct sk_buff *skb)
|
struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
|
|
||||||
skb->sk = psock->sk;
|
|
||||||
bpf_compute_data_end_sk_skb(skb);
|
bpf_compute_data_end_sk_skb(skb);
|
||||||
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
return bpf_prog_run_pin_on_cpu(prog, skb);
|
||||||
/* strparser clones the skb before handing it to a upper layer,
|
|
||||||
* meaning skb_orphan has been called. We NULL sk on the way out
|
|
||||||
* to ensure we don't trigger a BUG_ON() in skb/sk operations
|
|
||||||
* later and because we are not charging the memory of this skb
|
|
||||||
* to any socket yet.
|
|
||||||
*/
|
|
||||||
skb->sk = NULL;
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
|
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
|
||||||
@@ -709,38 +702,35 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
|
|||||||
{
|
{
|
||||||
struct sk_psock *psock_other;
|
struct sk_psock *psock_other;
|
||||||
struct sock *sk_other;
|
struct sock *sk_other;
|
||||||
bool ingress;
|
|
||||||
|
|
||||||
sk_other = tcp_skb_bpf_redirect_fetch(skb);
|
sk_other = tcp_skb_bpf_redirect_fetch(skb);
|
||||||
|
/* This error is a buggy BPF program, it returned a redirect
|
||||||
|
* return code, but then didn't set a redirect interface.
|
||||||
|
*/
|
||||||
if (unlikely(!sk_other)) {
|
if (unlikely(!sk_other)) {
|
||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
psock_other = sk_psock(sk_other);
|
psock_other = sk_psock(sk_other);
|
||||||
|
/* This error indicates the socket is being torn down or had another
|
||||||
|
* error that caused the pipe to break. We can't send a packet on
|
||||||
|
* a socket that is in this state so we drop the skb.
|
||||||
|
*/
|
||||||
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
|
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
|
||||||
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
|
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
|
||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ingress = tcp_skb_bpf_ingress(skb);
|
skb_queue_tail(&psock_other->ingress_skb, skb);
|
||||||
if ((!ingress && sock_writeable(sk_other)) ||
|
schedule_work(&psock_other->work);
|
||||||
(ingress &&
|
|
||||||
atomic_read(&sk_other->sk_rmem_alloc) <=
|
|
||||||
sk_other->sk_rcvbuf)) {
|
|
||||||
if (!ingress)
|
|
||||||
skb_set_owner_w(skb, sk_other);
|
|
||||||
skb_queue_tail(&psock_other->ingress_skb, skb);
|
|
||||||
schedule_work(&psock_other->work);
|
|
||||||
} else {
|
|
||||||
kfree_skb(skb);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)
|
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
|
||||||
{
|
{
|
||||||
switch (verdict) {
|
switch (verdict) {
|
||||||
case __SK_REDIRECT:
|
case __SK_REDIRECT:
|
||||||
|
skb_set_owner_r(skb, sk);
|
||||||
sk_psock_skb_redirect(skb);
|
sk_psock_skb_redirect(skb);
|
||||||
break;
|
break;
|
||||||
case __SK_PASS:
|
case __SK_PASS:
|
||||||
@@ -758,11 +748,17 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
|
|||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||||
if (likely(prog)) {
|
if (likely(prog)) {
|
||||||
|
/* We skip full set_owner_r here because if we do a SK_PASS
|
||||||
|
* or SK_DROP we can skip skb memory accounting and use the
|
||||||
|
* TLS context.
|
||||||
|
*/
|
||||||
|
skb->sk = psock->sk;
|
||||||
tcp_skb_bpf_redirect_clear(skb);
|
tcp_skb_bpf_redirect_clear(skb);
|
||||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||||
|
skb->sk = NULL;
|
||||||
}
|
}
|
||||||
sk_psock_tls_verdict_apply(skb, ret);
|
sk_psock_tls_verdict_apply(skb, psock->sk, ret);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -771,7 +767,9 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
|
|||||||
static void sk_psock_verdict_apply(struct sk_psock *psock,
|
static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||||
struct sk_buff *skb, int verdict)
|
struct sk_buff *skb, int verdict)
|
||||||
{
|
{
|
||||||
|
struct tcp_skb_cb *tcp;
|
||||||
struct sock *sk_other;
|
struct sock *sk_other;
|
||||||
|
int err = -EIO;
|
||||||
|
|
||||||
switch (verdict) {
|
switch (verdict) {
|
||||||
case __SK_PASS:
|
case __SK_PASS:
|
||||||
@@ -780,16 +778,24 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
|
|||||||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||||
goto out_free;
|
goto out_free;
|
||||||
}
|
}
|
||||||
if (atomic_read(&sk_other->sk_rmem_alloc) <=
|
|
||||||
sk_other->sk_rcvbuf) {
|
|
||||||
struct tcp_skb_cb *tcp = TCP_SKB_CB(skb);
|
|
||||||
|
|
||||||
tcp->bpf.flags |= BPF_F_INGRESS;
|
tcp = TCP_SKB_CB(skb);
|
||||||
|
tcp->bpf.flags |= BPF_F_INGRESS;
|
||||||
|
|
||||||
|
/* If the queue is empty then we can submit directly
|
||||||
|
* into the msg queue. If its not empty we have to
|
||||||
|
* queue work otherwise we may get OOO data. Otherwise,
|
||||||
|
* if sk_psock_skb_ingress errors will be handled by
|
||||||
|
* retrying later from workqueue.
|
||||||
|
*/
|
||||||
|
if (skb_queue_empty(&psock->ingress_skb)) {
|
||||||
|
err = sk_psock_skb_ingress(psock, skb);
|
||||||
|
}
|
||||||
|
if (err < 0) {
|
||||||
skb_queue_tail(&psock->ingress_skb, skb);
|
skb_queue_tail(&psock->ingress_skb, skb);
|
||||||
schedule_work(&psock->work);
|
schedule_work(&psock->work);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
goto out_free;
|
break;
|
||||||
case __SK_REDIRECT:
|
case __SK_REDIRECT:
|
||||||
sk_psock_skb_redirect(skb);
|
sk_psock_skb_redirect(skb);
|
||||||
break;
|
break;
|
||||||
@@ -814,9 +820,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
|||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
skb_set_owner_r(skb, sk);
|
||||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||||
if (likely(prog)) {
|
if (likely(prog)) {
|
||||||
skb_orphan(skb);
|
|
||||||
tcp_skb_bpf_redirect_clear(skb);
|
tcp_skb_bpf_redirect_clear(skb);
|
||||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||||
@@ -839,8 +845,11 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
|
|||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
prog = READ_ONCE(psock->progs.skb_parser);
|
prog = READ_ONCE(psock->progs.skb_parser);
|
||||||
if (likely(prog))
|
if (likely(prog)) {
|
||||||
|
skb->sk = psock->sk;
|
||||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||||
|
skb->sk = NULL;
|
||||||
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -864,6 +873,57 @@ static void sk_psock_strp_data_ready(struct sock *sk)
|
|||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
|
||||||
|
unsigned int offset, size_t orig_len)
|
||||||
|
{
|
||||||
|
struct sock *sk = (struct sock *)desc->arg.data;
|
||||||
|
struct sk_psock *psock;
|
||||||
|
struct bpf_prog *prog;
|
||||||
|
int ret = __SK_DROP;
|
||||||
|
int len = skb->len;
|
||||||
|
|
||||||
|
/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
|
||||||
|
skb = skb_clone(skb, GFP_ATOMIC);
|
||||||
|
if (!skb) {
|
||||||
|
desc->error = -ENOMEM;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
psock = sk_psock(sk);
|
||||||
|
if (unlikely(!psock)) {
|
||||||
|
len = 0;
|
||||||
|
kfree_skb(skb);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
skb_set_owner_r(skb, sk);
|
||||||
|
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||||
|
if (likely(prog)) {
|
||||||
|
tcp_skb_bpf_redirect_clear(skb);
|
||||||
|
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||||
|
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||||
|
}
|
||||||
|
sk_psock_verdict_apply(psock, skb, ret);
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sk_psock_verdict_data_ready(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct socket *sock = sk->sk_socket;
|
||||||
|
read_descriptor_t desc;
|
||||||
|
|
||||||
|
if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
|
||||||
|
return;
|
||||||
|
|
||||||
|
desc.arg.data = sk;
|
||||||
|
desc.error = 0;
|
||||||
|
desc.count = 1;
|
||||||
|
|
||||||
|
sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
|
||||||
|
}
|
||||||
|
|
||||||
static void sk_psock_write_space(struct sock *sk)
|
static void sk_psock_write_space(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct sk_psock *psock;
|
struct sk_psock *psock;
|
||||||
@@ -893,6 +953,19 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
|||||||
return strp_init(&psock->parser.strp, sk, &cb);
|
return strp_init(&psock->parser.strp, sk, &cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
|
||||||
|
{
|
||||||
|
struct sk_psock_parser *parser = &psock->parser;
|
||||||
|
|
||||||
|
if (parser->enabled)
|
||||||
|
return;
|
||||||
|
|
||||||
|
parser->saved_data_ready = sk->sk_data_ready;
|
||||||
|
sk->sk_data_ready = sk_psock_verdict_data_ready;
|
||||||
|
sk->sk_write_space = sk_psock_write_space;
|
||||||
|
parser->enabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
||||||
{
|
{
|
||||||
struct sk_psock_parser *parser = &psock->parser;
|
struct sk_psock_parser *parser = &psock->parser;
|
||||||
@@ -918,3 +991,15 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
|
|||||||
strp_stop(&parser->strp);
|
strp_stop(&parser->strp);
|
||||||
parser->enabled = false;
|
parser->enabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
|
||||||
|
{
|
||||||
|
struct sk_psock_parser *parser = &psock->parser;
|
||||||
|
|
||||||
|
if (!parser->enabled)
|
||||||
|
return;
|
||||||
|
|
||||||
|
sk->sk_data_ready = parser->saved_data_ready;
|
||||||
|
parser->saved_data_ready = NULL;
|
||||||
|
parser->enabled = false;
|
||||||
|
}
|
||||||
|
@@ -148,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock,
|
|||||||
static void sock_map_del_link(struct sock *sk,
|
static void sock_map_del_link(struct sock *sk,
|
||||||
struct sk_psock *psock, void *link_raw)
|
struct sk_psock *psock, void *link_raw)
|
||||||
{
|
{
|
||||||
|
bool strp_stop = false, verdict_stop = false;
|
||||||
struct sk_psock_link *link, *tmp;
|
struct sk_psock_link *link, *tmp;
|
||||||
bool strp_stop = false;
|
|
||||||
|
|
||||||
spin_lock_bh(&psock->link_lock);
|
spin_lock_bh(&psock->link_lock);
|
||||||
list_for_each_entry_safe(link, tmp, &psock->link, list) {
|
list_for_each_entry_safe(link, tmp, &psock->link, list) {
|
||||||
@@ -159,14 +159,19 @@ static void sock_map_del_link(struct sock *sk,
|
|||||||
map);
|
map);
|
||||||
if (psock->parser.enabled && stab->progs.skb_parser)
|
if (psock->parser.enabled && stab->progs.skb_parser)
|
||||||
strp_stop = true;
|
strp_stop = true;
|
||||||
|
if (psock->parser.enabled && stab->progs.skb_verdict)
|
||||||
|
verdict_stop = true;
|
||||||
list_del(&link->list);
|
list_del(&link->list);
|
||||||
sk_psock_free_link(link);
|
sk_psock_free_link(link);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock_bh(&psock->link_lock);
|
spin_unlock_bh(&psock->link_lock);
|
||||||
if (strp_stop) {
|
if (strp_stop || verdict_stop) {
|
||||||
write_lock_bh(&sk->sk_callback_lock);
|
write_lock_bh(&sk->sk_callback_lock);
|
||||||
sk_psock_stop_strp(sk, psock);
|
if (strp_stop)
|
||||||
|
sk_psock_stop_strp(sk, psock);
|
||||||
|
else
|
||||||
|
sk_psock_stop_verdict(sk, psock);
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
write_unlock_bh(&sk->sk_callback_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -230,16 +235,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
|||||||
{
|
{
|
||||||
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
|
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
|
||||||
struct sk_psock *psock;
|
struct sk_psock *psock;
|
||||||
bool skb_progs;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
skb_verdict = READ_ONCE(progs->skb_verdict);
|
skb_verdict = READ_ONCE(progs->skb_verdict);
|
||||||
skb_parser = READ_ONCE(progs->skb_parser);
|
skb_parser = READ_ONCE(progs->skb_parser);
|
||||||
skb_progs = skb_parser && skb_verdict;
|
if (skb_verdict) {
|
||||||
if (skb_progs) {
|
|
||||||
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
|
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
|
||||||
if (IS_ERR(skb_verdict))
|
if (IS_ERR(skb_verdict))
|
||||||
return PTR_ERR(skb_verdict);
|
return PTR_ERR(skb_verdict);
|
||||||
|
}
|
||||||
|
if (skb_parser) {
|
||||||
skb_parser = bpf_prog_inc_not_zero(skb_parser);
|
skb_parser = bpf_prog_inc_not_zero(skb_parser);
|
||||||
if (IS_ERR(skb_parser)) {
|
if (IS_ERR(skb_parser)) {
|
||||||
bpf_prog_put(skb_verdict);
|
bpf_prog_put(skb_verdict);
|
||||||
@@ -264,7 +269,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
|||||||
|
|
||||||
if (psock) {
|
if (psock) {
|
||||||
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
|
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
|
||||||
(skb_progs && READ_ONCE(psock->progs.skb_parser))) {
|
(skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
|
||||||
|
(skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
|
||||||
sk_psock_put(sk, psock);
|
sk_psock_put(sk, psock);
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
goto out_progs;
|
goto out_progs;
|
||||||
@@ -285,28 +291,31 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
|
|||||||
goto out_drop;
|
goto out_drop;
|
||||||
|
|
||||||
write_lock_bh(&sk->sk_callback_lock);
|
write_lock_bh(&sk->sk_callback_lock);
|
||||||
if (skb_progs && !psock->parser.enabled) {
|
if (skb_parser && skb_verdict && !psock->parser.enabled) {
|
||||||
ret = sk_psock_init_strp(sk, psock);
|
ret = sk_psock_init_strp(sk, psock);
|
||||||
if (ret) {
|
if (ret)
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
goto out_unlock_drop;
|
||||||
goto out_drop;
|
|
||||||
}
|
|
||||||
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||||
psock_set_prog(&psock->progs.skb_parser, skb_parser);
|
psock_set_prog(&psock->progs.skb_parser, skb_parser);
|
||||||
sk_psock_start_strp(sk, psock);
|
sk_psock_start_strp(sk, psock);
|
||||||
|
} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
|
||||||
|
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||||
|
sk_psock_start_verdict(sk,psock);
|
||||||
}
|
}
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
write_unlock_bh(&sk->sk_callback_lock);
|
||||||
return 0;
|
return 0;
|
||||||
|
out_unlock_drop:
|
||||||
|
write_unlock_bh(&sk->sk_callback_lock);
|
||||||
out_drop:
|
out_drop:
|
||||||
sk_psock_put(sk, psock);
|
sk_psock_put(sk, psock);
|
||||||
out_progs:
|
out_progs:
|
||||||
if (msg_parser)
|
if (msg_parser)
|
||||||
bpf_prog_put(msg_parser);
|
bpf_prog_put(msg_parser);
|
||||||
out:
|
out:
|
||||||
if (skb_progs) {
|
if (skb_verdict)
|
||||||
bpf_prog_put(skb_verdict);
|
bpf_prog_put(skb_verdict);
|
||||||
|
if (skb_parser)
|
||||||
bpf_prog_put(skb_parser);
|
bpf_prog_put(skb_parser);
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -548,7 +548,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
|
|||||||
newtp->fastopen_req = NULL;
|
newtp->fastopen_req = NULL;
|
||||||
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
|
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
|
||||||
|
|
||||||
bpf_skops_init_child(sk, newsk);
|
|
||||||
tcp_bpf_clone(sk, newsk);
|
tcp_bpf_clone(sk, newsk);
|
||||||
|
|
||||||
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
|
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
|
||||||
|
@@ -3,9 +3,6 @@
|
|||||||
#include <net/xsk_buff_pool.h>
|
#include <net/xsk_buff_pool.h>
|
||||||
#include <net/xdp_sock.h>
|
#include <net/xdp_sock.h>
|
||||||
#include <net/xdp_sock_drv.h>
|
#include <net/xdp_sock_drv.h>
|
||||||
#include <linux/dma-direct.h>
|
|
||||||
#include <linux/dma-noncoherent.h>
|
|
||||||
#include <linux/swiotlb.h>
|
|
||||||
|
|
||||||
#include "xsk_queue.h"
|
#include "xsk_queue.h"
|
||||||
#include "xdp_umem.h"
|
#include "xdp_umem.h"
|
||||||
|
@@ -15,6 +15,10 @@
|
|||||||
|
|
||||||
struct xdp_ring {
|
struct xdp_ring {
|
||||||
u32 producer ____cacheline_aligned_in_smp;
|
u32 producer ____cacheline_aligned_in_smp;
|
||||||
|
/* Hinder the adjacent cache prefetcher to prefetch the consumer
|
||||||
|
* pointer if the producer pointer is touched and vice versa.
|
||||||
|
*/
|
||||||
|
u32 pad ____cacheline_aligned_in_smp;
|
||||||
u32 consumer ____cacheline_aligned_in_smp;
|
u32 consumer ____cacheline_aligned_in_smp;
|
||||||
u32 flags;
|
u32 flags;
|
||||||
};
|
};
|
||||||
|
@@ -132,7 +132,7 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||||
{
|
{
|
||||||
const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
|
const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
|
||||||
struct bpf_insn *insn = insn_buf;
|
struct bpf_insn *insn = insn_buf;
|
||||||
|
@@ -98,8 +98,8 @@ test_map_in_map-objs := test_map_in_map_user.o
|
|||||||
per_socket_stats_example-objs := cookie_uid_helper_example.o
|
per_socket_stats_example-objs := cookie_uid_helper_example.o
|
||||||
xdp_redirect-objs := xdp_redirect_user.o
|
xdp_redirect-objs := xdp_redirect_user.o
|
||||||
xdp_redirect_map-objs := xdp_redirect_map_user.o
|
xdp_redirect_map-objs := xdp_redirect_map_user.o
|
||||||
xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
|
xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
|
||||||
xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
|
xdp_monitor-objs := xdp_monitor_user.o
|
||||||
xdp_rxq_info-objs := xdp_rxq_info_user.o
|
xdp_rxq_info-objs := xdp_rxq_info_user.o
|
||||||
syscall_tp-objs := syscall_tp_user.o
|
syscall_tp-objs := syscall_tp_user.o
|
||||||
cpustat-objs := cpustat_user.o
|
cpustat-objs := cpustat_user.o
|
||||||
@@ -211,6 +211,8 @@ TPROGLDLIBS_xsk_fwd += -pthread
|
|||||||
# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
|
# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
|
||||||
LLC ?= llc
|
LLC ?= llc
|
||||||
CLANG ?= clang
|
CLANG ?= clang
|
||||||
|
OPT ?= opt
|
||||||
|
LLVM_DIS ?= llvm-dis
|
||||||
LLVM_OBJCOPY ?= llvm-objcopy
|
LLVM_OBJCOPY ?= llvm-objcopy
|
||||||
BTF_PAHOLE ?= pahole
|
BTF_PAHOLE ?= pahole
|
||||||
|
|
||||||
@@ -303,6 +305,11 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
|
|||||||
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
|
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
|
||||||
# But, there is no easy way to fix it, so just exclude it since it is
|
# But, there is no easy way to fix it, so just exclude it since it is
|
||||||
# useless for BPF samples.
|
# useless for BPF samples.
|
||||||
|
# below we use long chain of commands, clang | opt | llvm-dis | llc,
|
||||||
|
# to generate final object file. 'clang' compiles the source into IR
|
||||||
|
# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin
|
||||||
|
# processing (llvm12) and IR optimizations. 'llvm-dis' converts
|
||||||
|
# 'opt' output to IR, and finally 'llc' generates bpf byte code.
|
||||||
$(obj)/%.o: $(src)/%.c
|
$(obj)/%.o: $(src)/%.c
|
||||||
@echo " CLANG-bpf " $@
|
@echo " CLANG-bpf " $@
|
||||||
$(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
|
$(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
|
||||||
@@ -314,7 +321,9 @@ $(obj)/%.o: $(src)/%.c
|
|||||||
-Wno-address-of-packed-member -Wno-tautological-compare \
|
-Wno-address-of-packed-member -Wno-tautological-compare \
|
||||||
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
|
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
|
||||||
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
|
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
|
||||||
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
|
-O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
|
||||||
|
$(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
|
||||||
|
$(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
|
||||||
ifeq ($(DWARF2BTF),y)
|
ifeq ($(DWARF2BTF),y)
|
||||||
$(BTF_PAHOLE) -J $@
|
$(BTF_PAHOLE) -J $@
|
||||||
endif
|
endif
|
||||||
|
@@ -40,6 +40,7 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <linux/unistd.h>
|
#include <linux/unistd.h>
|
||||||
|
#include <linux/compiler.h>
|
||||||
|
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
#include <bpf/bpf.h>
|
#include <bpf/bpf.h>
|
||||||
@@ -483,7 +484,7 @@ int main(int argc, char **argv)
|
|||||||
"Option -%c requires an argument.\n\n",
|
"Option -%c requires an argument.\n\n",
|
||||||
optopt);
|
optopt);
|
||||||
case 'h':
|
case 'h':
|
||||||
fallthrough;
|
__fallthrough;
|
||||||
default:
|
default:
|
||||||
Usage();
|
Usage();
|
||||||
return 0;
|
return 0;
|
||||||
|
@@ -6,21 +6,21 @@
|
|||||||
#include <uapi/linux/bpf.h>
|
#include <uapi/linux/bpf.h>
|
||||||
#include <bpf/bpf_helpers.h>
|
#include <bpf/bpf_helpers.h>
|
||||||
|
|
||||||
struct bpf_map_def SEC("maps") redirect_err_cnt = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
.key_size = sizeof(u32),
|
__type(key, u32);
|
||||||
.value_size = sizeof(u64),
|
__type(value, u64);
|
||||||
.max_entries = 2,
|
__uint(max_entries, 2);
|
||||||
/* TODO: have entries for all possible errno's */
|
/* TODO: have entries for all possible errno's */
|
||||||
};
|
} redirect_err_cnt SEC(".maps");
|
||||||
|
|
||||||
#define XDP_UNKNOWN XDP_REDIRECT + 1
|
#define XDP_UNKNOWN XDP_REDIRECT + 1
|
||||||
struct bpf_map_def SEC("maps") exception_cnt = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
.key_size = sizeof(u32),
|
__type(key, u32);
|
||||||
.value_size = sizeof(u64),
|
__type(value, u64);
|
||||||
.max_entries = XDP_UNKNOWN + 1,
|
__uint(max_entries, XDP_UNKNOWN + 1);
|
||||||
};
|
} exception_cnt SEC(".maps");
|
||||||
|
|
||||||
/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
|
/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
|
||||||
* Code in: kernel/include/trace/events/xdp.h
|
* Code in: kernel/include/trace/events/xdp.h
|
||||||
@@ -129,19 +129,19 @@ struct datarec {
|
|||||||
};
|
};
|
||||||
#define MAX_CPUS 64
|
#define MAX_CPUS 64
|
||||||
|
|
||||||
struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
.key_size = sizeof(u32),
|
__type(key, u32);
|
||||||
.value_size = sizeof(struct datarec),
|
__type(value, struct datarec);
|
||||||
.max_entries = MAX_CPUS,
|
__uint(max_entries, MAX_CPUS);
|
||||||
};
|
} cpumap_enqueue_cnt SEC(".maps");
|
||||||
|
|
||||||
struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
.key_size = sizeof(u32),
|
__type(key, u32);
|
||||||
.value_size = sizeof(struct datarec),
|
__type(value, struct datarec);
|
||||||
.max_entries = 1,
|
__uint(max_entries, 1);
|
||||||
};
|
} cpumap_kthread_cnt SEC(".maps");
|
||||||
|
|
||||||
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
|
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
|
||||||
* Code in: kernel/include/trace/events/xdp.h
|
* Code in: kernel/include/trace/events/xdp.h
|
||||||
@@ -210,12 +210,12 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
.key_size = sizeof(u32),
|
__type(key, u32);
|
||||||
.value_size = sizeof(struct datarec),
|
__type(value, struct datarec);
|
||||||
.max_entries = 1,
|
__uint(max_entries, 1);
|
||||||
};
|
} devmap_xmit_cnt SEC(".maps");
|
||||||
|
|
||||||
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
|
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
|
||||||
* Code in: kernel/include/trace/events/xdp.h
|
* Code in: kernel/include/trace/events/xdp.h
|
||||||
|
@@ -26,12 +26,37 @@ static const char *__doc_err_only__=
|
|||||||
#include <net/if.h>
|
#include <net/if.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <signal.h>
|
||||||
#include <bpf/bpf.h>
|
#include <bpf/bpf.h>
|
||||||
#include "bpf_load.h"
|
#include <bpf/libbpf.h>
|
||||||
#include "bpf_util.h"
|
#include "bpf_util.h"
|
||||||
|
|
||||||
|
enum map_type {
|
||||||
|
REDIRECT_ERR_CNT,
|
||||||
|
EXCEPTION_CNT,
|
||||||
|
CPUMAP_ENQUEUE_CNT,
|
||||||
|
CPUMAP_KTHREAD_CNT,
|
||||||
|
DEVMAP_XMIT_CNT,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *const map_type_strings[] = {
|
||||||
|
[REDIRECT_ERR_CNT] = "redirect_err_cnt",
|
||||||
|
[EXCEPTION_CNT] = "exception_cnt",
|
||||||
|
[CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
|
||||||
|
[CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
|
||||||
|
[DEVMAP_XMIT_CNT] = "devmap_xmit_cnt",
|
||||||
|
};
|
||||||
|
|
||||||
|
#define NUM_MAP 5
|
||||||
|
#define NUM_TP 8
|
||||||
|
|
||||||
|
static int tp_cnt;
|
||||||
|
static int map_cnt;
|
||||||
static int verbose = 1;
|
static int verbose = 1;
|
||||||
static bool debug = false;
|
static bool debug = false;
|
||||||
|
struct bpf_map *map_data[NUM_MAP] = {};
|
||||||
|
struct bpf_link *tp_links[NUM_TP] = {};
|
||||||
|
struct bpf_object *obj;
|
||||||
|
|
||||||
static const struct option long_options[] = {
|
static const struct option long_options[] = {
|
||||||
{"help", no_argument, NULL, 'h' },
|
{"help", no_argument, NULL, 'h' },
|
||||||
@@ -41,6 +66,16 @@ static const struct option long_options[] = {
|
|||||||
{0, 0, NULL, 0 }
|
{0, 0, NULL, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void int_exit(int sig)
|
||||||
|
{
|
||||||
|
/* Detach tracepoints */
|
||||||
|
while (tp_cnt)
|
||||||
|
bpf_link__destroy(tp_links[--tp_cnt]);
|
||||||
|
|
||||||
|
bpf_object__close(obj);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
|
/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
|
||||||
#define EXIT_FAIL_MEM 5
|
#define EXIT_FAIL_MEM 5
|
||||||
|
|
||||||
@@ -483,23 +518,23 @@ static bool stats_collect(struct stats_record *rec)
|
|||||||
* this can happen by someone running perf-record -e
|
* this can happen by someone running perf-record -e
|
||||||
*/
|
*/
|
||||||
|
|
||||||
fd = map_data[0].fd; /* map0: redirect_err_cnt */
|
fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]);
|
||||||
for (i = 0; i < REDIR_RES_MAX; i++)
|
for (i = 0; i < REDIR_RES_MAX; i++)
|
||||||
map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
|
map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
|
||||||
|
|
||||||
fd = map_data[1].fd; /* map1: exception_cnt */
|
fd = bpf_map__fd(map_data[EXCEPTION_CNT]);
|
||||||
for (i = 0; i < XDP_ACTION_MAX; i++) {
|
for (i = 0; i < XDP_ACTION_MAX; i++) {
|
||||||
map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
|
map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
|
fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]);
|
||||||
for (i = 0; i < MAX_CPUS; i++)
|
for (i = 0; i < MAX_CPUS; i++)
|
||||||
map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
|
map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
|
||||||
|
|
||||||
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
|
fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]);
|
||||||
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
|
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
|
||||||
|
|
||||||
fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
|
fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]);
|
||||||
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
|
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -598,8 +633,8 @@ static void stats_poll(int interval, bool err_only)
|
|||||||
|
|
||||||
/* TODO Need more advanced stats on error types */
|
/* TODO Need more advanced stats on error types */
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
printf(" - Stats map0: %s\n", map_data[0].name);
|
printf(" - Stats map0: %s\n", bpf_map__name(map_data[0]));
|
||||||
printf(" - Stats map1: %s\n", map_data[1].name);
|
printf(" - Stats map1: %s\n", bpf_map__name(map_data[1]));
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
@@ -618,44 +653,51 @@ static void stats_poll(int interval, bool err_only)
|
|||||||
|
|
||||||
static void print_bpf_prog_info(void)
|
static void print_bpf_prog_info(void)
|
||||||
{
|
{
|
||||||
int i;
|
struct bpf_program *prog;
|
||||||
|
struct bpf_map *map;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
/* Prog info */
|
/* Prog info */
|
||||||
printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
|
printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt);
|
||||||
for (i = 0; i < prog_cnt; i++) {
|
bpf_object__for_each_program(prog, obj) {
|
||||||
printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
|
printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog));
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
i = 0;
|
||||||
/* Maps info */
|
/* Maps info */
|
||||||
printf("Loaded BPF prog have %d map(s)\n", map_data_count);
|
printf("Loaded BPF prog have %d map(s)\n", map_cnt);
|
||||||
for (i = 0; i < map_data_count; i++) {
|
bpf_object__for_each_map(map, obj) {
|
||||||
char *name = map_data[i].name;
|
const char *name = bpf_map__name(map);
|
||||||
int fd = map_data[i].fd;
|
int fd = bpf_map__fd(map);
|
||||||
|
|
||||||
printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
|
printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Event info */
|
/* Event info */
|
||||||
printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
|
printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt);
|
||||||
for (i = 0; i < prog_cnt; i++) {
|
for (i = 0; i < tp_cnt; i++) {
|
||||||
if (event_fd[i] != -1)
|
int fd = bpf_link__fd(tp_links[i]);
|
||||||
printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
|
|
||||||
|
if (fd != -1)
|
||||||
|
printf(" - event_fd[%d] = fd(%d)\n", i, fd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||||
|
struct bpf_program *prog;
|
||||||
int longindex = 0, opt;
|
int longindex = 0, opt;
|
||||||
int ret = EXIT_SUCCESS;
|
int ret = EXIT_FAILURE;
|
||||||
char bpf_obj_file[256];
|
enum map_type type;
|
||||||
|
char filename[256];
|
||||||
|
|
||||||
/* Default settings: */
|
/* Default settings: */
|
||||||
bool errors_only = true;
|
bool errors_only = true;
|
||||||
int interval = 2;
|
int interval = 2;
|
||||||
|
|
||||||
snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
|
|
||||||
|
|
||||||
/* Parse commands line args */
|
/* Parse commands line args */
|
||||||
while ((opt = getopt_long(argc, argv, "hDSs:",
|
while ((opt = getopt_long(argc, argv, "hDSs:",
|
||||||
long_options, &longindex)) != -1) {
|
long_options, &longindex)) != -1) {
|
||||||
@@ -672,40 +714,79 @@ int main(int argc, char **argv)
|
|||||||
case 'h':
|
case 'h':
|
||||||
default:
|
default:
|
||||||
usage(argv);
|
usage(argv);
|
||||||
return EXIT_FAILURE;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||||
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
||||||
perror("setrlimit(RLIMIT_MEMLOCK)");
|
perror("setrlimit(RLIMIT_MEMLOCK)");
|
||||||
return EXIT_FAILURE;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (load_bpf_file(bpf_obj_file)) {
|
/* Remove tracepoint program when program is interrupted or killed */
|
||||||
printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
|
signal(SIGINT, int_exit);
|
||||||
return EXIT_FAILURE;
|
signal(SIGTERM, int_exit);
|
||||||
|
|
||||||
|
obj = bpf_object__open_file(filename, NULL);
|
||||||
|
if (libbpf_get_error(obj)) {
|
||||||
|
printf("ERROR: opening BPF object file failed\n");
|
||||||
|
obj = NULL;
|
||||||
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (!prog_fd[0]) {
|
|
||||||
printf("ERROR - load_bpf_file: %s\n", strerror(errno));
|
/* load BPF program */
|
||||||
return EXIT_FAILURE;
|
if (bpf_object__load(obj)) {
|
||||||
|
printf("ERROR: loading BPF object file failed\n");
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (type = 0; type < NUM_MAP; type++) {
|
||||||
|
map_data[type] =
|
||||||
|
bpf_object__find_map_by_name(obj, map_type_strings[type]);
|
||||||
|
|
||||||
|
if (libbpf_get_error(map_data[type])) {
|
||||||
|
printf("ERROR: finding a map in obj file failed\n");
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
map_cnt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_object__for_each_program(prog, obj) {
|
||||||
|
tp_links[tp_cnt] = bpf_program__attach(prog);
|
||||||
|
if (libbpf_get_error(tp_links[tp_cnt])) {
|
||||||
|
printf("ERROR: bpf_program__attach failed\n");
|
||||||
|
tp_links[tp_cnt] = NULL;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
tp_cnt++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (debug) {
|
if (debug) {
|
||||||
print_bpf_prog_info();
|
print_bpf_prog_info();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unload/stop tracepoint event by closing fd's */
|
/* Unload/stop tracepoint event by closing bpf_link's */
|
||||||
if (errors_only) {
|
if (errors_only) {
|
||||||
/* The prog_fd[i] and event_fd[i] depend on the
|
/* The bpf_link[i] depend on the order of
|
||||||
* order the functions was defined in _kern.c
|
* the functions was defined in _kern.c
|
||||||
*/
|
*/
|
||||||
close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
|
bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */
|
||||||
close(prog_fd[2]); /* func: trace_xdp_redirect */
|
tp_links[2] = NULL;
|
||||||
close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
|
|
||||||
close(prog_fd[3]); /* func: trace_xdp_redirect_map */
|
bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */
|
||||||
|
tp_links[3] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
stats_poll(interval, errors_only);
|
stats_poll(interval, errors_only);
|
||||||
|
|
||||||
|
ret = EXIT_SUCCESS;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
/* Detach tracepoints */
|
||||||
|
while (tp_cnt)
|
||||||
|
bpf_link__destroy(tp_links[--tp_cnt]);
|
||||||
|
|
||||||
|
bpf_object__close(obj);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@@ -37,18 +37,35 @@ static __u32 prog_id;
|
|||||||
|
|
||||||
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
||||||
static int n_cpus;
|
static int n_cpus;
|
||||||
static int cpu_map_fd;
|
|
||||||
static int rx_cnt_map_fd;
|
enum map_type {
|
||||||
static int redirect_err_cnt_map_fd;
|
CPU_MAP,
|
||||||
static int cpumap_enqueue_cnt_map_fd;
|
RX_CNT,
|
||||||
static int cpumap_kthread_cnt_map_fd;
|
REDIRECT_ERR_CNT,
|
||||||
static int cpus_available_map_fd;
|
CPUMAP_ENQUEUE_CNT,
|
||||||
static int cpus_count_map_fd;
|
CPUMAP_KTHREAD_CNT,
|
||||||
static int cpus_iterator_map_fd;
|
CPUS_AVAILABLE,
|
||||||
static int exception_cnt_map_fd;
|
CPUS_COUNT,
|
||||||
|
CPUS_ITERATOR,
|
||||||
|
EXCEPTION_CNT,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *const map_type_strings[] = {
|
||||||
|
[CPU_MAP] = "cpu_map",
|
||||||
|
[RX_CNT] = "rx_cnt",
|
||||||
|
[REDIRECT_ERR_CNT] = "redirect_err_cnt",
|
||||||
|
[CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
|
||||||
|
[CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
|
||||||
|
[CPUS_AVAILABLE] = "cpus_available",
|
||||||
|
[CPUS_COUNT] = "cpus_count",
|
||||||
|
[CPUS_ITERATOR] = "cpus_iterator",
|
||||||
|
[EXCEPTION_CNT] = "exception_cnt",
|
||||||
|
};
|
||||||
|
|
||||||
#define NUM_TP 5
|
#define NUM_TP 5
|
||||||
struct bpf_link *tp_links[NUM_TP] = { 0 };
|
#define NUM_MAP 9
|
||||||
|
struct bpf_link *tp_links[NUM_TP] = {};
|
||||||
|
static int map_fds[NUM_MAP];
|
||||||
static int tp_cnt = 0;
|
static int tp_cnt = 0;
|
||||||
|
|
||||||
/* Exit return codes */
|
/* Exit return codes */
|
||||||
@@ -527,20 +544,20 @@ static void stats_collect(struct stats_record *rec)
|
|||||||
{
|
{
|
||||||
int fd, i;
|
int fd, i;
|
||||||
|
|
||||||
fd = rx_cnt_map_fd;
|
fd = map_fds[RX_CNT];
|
||||||
map_collect_percpu(fd, 0, &rec->rx_cnt);
|
map_collect_percpu(fd, 0, &rec->rx_cnt);
|
||||||
|
|
||||||
fd = redirect_err_cnt_map_fd;
|
fd = map_fds[REDIRECT_ERR_CNT];
|
||||||
map_collect_percpu(fd, 1, &rec->redir_err);
|
map_collect_percpu(fd, 1, &rec->redir_err);
|
||||||
|
|
||||||
fd = cpumap_enqueue_cnt_map_fd;
|
fd = map_fds[CPUMAP_ENQUEUE_CNT];
|
||||||
for (i = 0; i < n_cpus; i++)
|
for (i = 0; i < n_cpus; i++)
|
||||||
map_collect_percpu(fd, i, &rec->enq[i]);
|
map_collect_percpu(fd, i, &rec->enq[i]);
|
||||||
|
|
||||||
fd = cpumap_kthread_cnt_map_fd;
|
fd = map_fds[CPUMAP_KTHREAD_CNT];
|
||||||
map_collect_percpu(fd, 0, &rec->kthread);
|
map_collect_percpu(fd, 0, &rec->kthread);
|
||||||
|
|
||||||
fd = exception_cnt_map_fd;
|
fd = map_fds[EXCEPTION_CNT];
|
||||||
map_collect_percpu(fd, 0, &rec->exception);
|
map_collect_percpu(fd, 0, &rec->exception);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -565,7 +582,7 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
|
|||||||
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
|
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
|
||||||
* the kernel for the cpu.
|
* the kernel for the cpu.
|
||||||
*/
|
*/
|
||||||
ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0);
|
ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
|
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
|
||||||
exit(EXIT_FAIL_BPF);
|
exit(EXIT_FAIL_BPF);
|
||||||
@@ -574,21 +591,21 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
|
|||||||
/* Inform bpf_prog's that a new CPU is available to select
|
/* Inform bpf_prog's that a new CPU is available to select
|
||||||
* from via some control maps.
|
* from via some control maps.
|
||||||
*/
|
*/
|
||||||
ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
|
ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "Add to avail CPUs failed\n");
|
fprintf(stderr, "Add to avail CPUs failed\n");
|
||||||
exit(EXIT_FAIL_BPF);
|
exit(EXIT_FAIL_BPF);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* When not replacing/updating existing entry, bump the count */
|
/* When not replacing/updating existing entry, bump the count */
|
||||||
ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
|
ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "Failed reading curr cpus_count\n");
|
fprintf(stderr, "Failed reading curr cpus_count\n");
|
||||||
exit(EXIT_FAIL_BPF);
|
exit(EXIT_FAIL_BPF);
|
||||||
}
|
}
|
||||||
if (new) {
|
if (new) {
|
||||||
curr_cpus_count++;
|
curr_cpus_count++;
|
||||||
ret = bpf_map_update_elem(cpus_count_map_fd, &key,
|
ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
|
||||||
&curr_cpus_count, 0);
|
&curr_cpus_count, 0);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "Failed write curr cpus_count\n");
|
fprintf(stderr, "Failed write curr cpus_count\n");
|
||||||
@@ -612,7 +629,7 @@ static void mark_cpus_unavailable(void)
|
|||||||
int ret, i;
|
int ret, i;
|
||||||
|
|
||||||
for (i = 0; i < n_cpus; i++) {
|
for (i = 0; i < n_cpus; i++) {
|
||||||
ret = bpf_map_update_elem(cpus_available_map_fd, &i,
|
ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
|
||||||
&invalid_cpu, 0);
|
&invalid_cpu, 0);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "Failed marking CPU unavailable\n");
|
fprintf(stderr, "Failed marking CPU unavailable\n");
|
||||||
@@ -665,68 +682,37 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
|
|||||||
free_stats_record(prev);
|
free_stats_record(prev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bpf_link * attach_tp(struct bpf_object *obj,
|
static int init_tracepoints(struct bpf_object *obj)
|
||||||
const char *tp_category,
|
|
||||||
const char* tp_name)
|
|
||||||
{
|
{
|
||||||
struct bpf_program *prog;
|
struct bpf_program *prog;
|
||||||
struct bpf_link *link;
|
|
||||||
char sec_name[PATH_MAX];
|
|
||||||
int len;
|
|
||||||
|
|
||||||
len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
|
bpf_object__for_each_program(prog, obj) {
|
||||||
tp_category, tp_name);
|
if (bpf_program__is_tracepoint(prog) != true)
|
||||||
if (len < 0)
|
continue;
|
||||||
exit(EXIT_FAIL);
|
|
||||||
|
|
||||||
prog = bpf_object__find_program_by_title(obj, sec_name);
|
tp_links[tp_cnt] = bpf_program__attach(prog);
|
||||||
if (!prog) {
|
if (libbpf_get_error(tp_links[tp_cnt])) {
|
||||||
fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
|
tp_links[tp_cnt] = NULL;
|
||||||
exit(EXIT_FAIL_BPF);
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
tp_cnt++;
|
||||||
}
|
}
|
||||||
|
|
||||||
link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
|
return 0;
|
||||||
if (libbpf_get_error(link))
|
|
||||||
exit(EXIT_FAIL_BPF);
|
|
||||||
|
|
||||||
return link;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void init_tracepoints(struct bpf_object *obj) {
|
|
||||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
|
|
||||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
|
|
||||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
|
|
||||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
|
|
||||||
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int init_map_fds(struct bpf_object *obj)
|
static int init_map_fds(struct bpf_object *obj)
|
||||||
{
|
{
|
||||||
/* Maps updated by tracepoints */
|
enum map_type type;
|
||||||
redirect_err_cnt_map_fd =
|
|
||||||
bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
|
|
||||||
exception_cnt_map_fd =
|
|
||||||
bpf_object__find_map_fd_by_name(obj, "exception_cnt");
|
|
||||||
cpumap_enqueue_cnt_map_fd =
|
|
||||||
bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
|
|
||||||
cpumap_kthread_cnt_map_fd =
|
|
||||||
bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
|
|
||||||
|
|
||||||
/* Maps used by XDP */
|
for (type = 0; type < NUM_MAP; type++) {
|
||||||
rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
|
map_fds[type] =
|
||||||
cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
|
bpf_object__find_map_fd_by_name(obj,
|
||||||
cpus_available_map_fd =
|
map_type_strings[type]);
|
||||||
bpf_object__find_map_fd_by_name(obj, "cpus_available");
|
|
||||||
cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
|
|
||||||
cpus_iterator_map_fd =
|
|
||||||
bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
|
|
||||||
|
|
||||||
if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
|
if (map_fds[type] < 0)
|
||||||
redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
|
return -ENOENT;
|
||||||
cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
|
}
|
||||||
cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
|
|
||||||
exception_cnt_map_fd < 0)
|
|
||||||
return -ENOENT;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -795,13 +781,13 @@ int main(int argc, char **argv)
|
|||||||
bool stress_mode = false;
|
bool stress_mode = false;
|
||||||
struct bpf_program *prog;
|
struct bpf_program *prog;
|
||||||
struct bpf_object *obj;
|
struct bpf_object *obj;
|
||||||
|
int err = EXIT_FAIL;
|
||||||
char filename[256];
|
char filename[256];
|
||||||
int added_cpus = 0;
|
int added_cpus = 0;
|
||||||
int longindex = 0;
|
int longindex = 0;
|
||||||
int interval = 2;
|
int interval = 2;
|
||||||
int add_cpu = -1;
|
int add_cpu = -1;
|
||||||
int opt, err;
|
int opt, prog_fd;
|
||||||
int prog_fd;
|
|
||||||
int *cpu, i;
|
int *cpu, i;
|
||||||
__u32 qsize;
|
__u32 qsize;
|
||||||
|
|
||||||
@@ -824,24 +810,29 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
|
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
|
||||||
return EXIT_FAIL;
|
return err;
|
||||||
|
|
||||||
if (prog_fd < 0) {
|
if (prog_fd < 0) {
|
||||||
fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
|
fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
return EXIT_FAIL;
|
return err;
|
||||||
}
|
}
|
||||||
init_tracepoints(obj);
|
|
||||||
|
if (init_tracepoints(obj) < 0) {
|
||||||
|
fprintf(stderr, "ERR: bpf_program__attach failed\n");
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
if (init_map_fds(obj) < 0) {
|
if (init_map_fds(obj) < 0) {
|
||||||
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
|
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
|
||||||
return EXIT_FAIL;
|
return err;
|
||||||
}
|
}
|
||||||
mark_cpus_unavailable();
|
mark_cpus_unavailable();
|
||||||
|
|
||||||
cpu = malloc(n_cpus * sizeof(int));
|
cpu = malloc(n_cpus * sizeof(int));
|
||||||
if (!cpu) {
|
if (!cpu) {
|
||||||
fprintf(stderr, "failed to allocate cpu array\n");
|
fprintf(stderr, "failed to allocate cpu array\n");
|
||||||
return EXIT_FAIL;
|
return err;
|
||||||
}
|
}
|
||||||
memset(cpu, 0, n_cpus * sizeof(int));
|
memset(cpu, 0, n_cpus * sizeof(int));
|
||||||
|
|
||||||
@@ -960,14 +951,12 @@ int main(int argc, char **argv)
|
|||||||
prog = bpf_object__find_program_by_title(obj, prog_name);
|
prog = bpf_object__find_program_by_title(obj, prog_name);
|
||||||
if (!prog) {
|
if (!prog) {
|
||||||
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
|
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
|
||||||
err = EXIT_FAIL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
prog_fd = bpf_program__fd(prog);
|
prog_fd = bpf_program__fd(prog);
|
||||||
if (prog_fd < 0) {
|
if (prog_fd < 0) {
|
||||||
fprintf(stderr, "bpf_program__fd failed\n");
|
fprintf(stderr, "bpf_program__fd failed\n");
|
||||||
err = EXIT_FAIL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -986,6 +975,8 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
stats_poll(interval, use_separators, prog_name, mprog_name,
|
stats_poll(interval, use_separators, prog_name, mprog_name,
|
||||||
&value, stress_mode);
|
&value, stress_mode);
|
||||||
|
|
||||||
|
err = EXIT_OK;
|
||||||
out:
|
out:
|
||||||
free(cpu);
|
free(cpu);
|
||||||
return err;
|
return err;
|
||||||
|
@@ -5,14 +5,12 @@
|
|||||||
#include <bpf/bpf_helpers.h>
|
#include <bpf/bpf_helpers.h>
|
||||||
|
|
||||||
#define SAMPLE_SIZE 64ul
|
#define SAMPLE_SIZE 64ul
|
||||||
#define MAX_CPUS 128
|
|
||||||
|
|
||||||
struct bpf_map_def SEC("maps") my_map = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||||
.key_size = sizeof(int),
|
__uint(key_size, sizeof(int));
|
||||||
.value_size = sizeof(u32),
|
__uint(value_size, sizeof(u32));
|
||||||
.max_entries = MAX_CPUS,
|
} my_map SEC(".maps");
|
||||||
};
|
|
||||||
|
|
||||||
SEC("xdp_sample")
|
SEC("xdp_sample")
|
||||||
int xdp_sample_prog(struct xdp_md *ctx)
|
int xdp_sample_prog(struct xdp_md *ctx)
|
||||||
|
@@ -18,7 +18,6 @@
|
|||||||
|
|
||||||
#include "perf-sys.h"
|
#include "perf-sys.h"
|
||||||
|
|
||||||
#define MAX_CPUS 128
|
|
||||||
static int if_idx;
|
static int if_idx;
|
||||||
static char *if_name;
|
static char *if_name;
|
||||||
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
||||||
|
@@ -11,6 +11,7 @@
|
|||||||
#include <linux/if_xdp.h>
|
#include <linux/if_xdp.h>
|
||||||
#include <linux/if_ether.h>
|
#include <linux/if_ether.h>
|
||||||
#include <linux/ip.h>
|
#include <linux/ip.h>
|
||||||
|
#include <linux/limits.h>
|
||||||
#include <linux/udp.h>
|
#include <linux/udp.h>
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
@@ -79,6 +80,10 @@ static u16 opt_pkt_size = MIN_PKT_SIZE;
|
|||||||
static u32 opt_pkt_fill_pattern = 0x12345678;
|
static u32 opt_pkt_fill_pattern = 0x12345678;
|
||||||
static bool opt_extra_stats;
|
static bool opt_extra_stats;
|
||||||
static bool opt_quiet;
|
static bool opt_quiet;
|
||||||
|
static bool opt_app_stats;
|
||||||
|
static const char *opt_irq_str = "";
|
||||||
|
static u32 irq_no;
|
||||||
|
static int irqs_at_init = -1;
|
||||||
static int opt_poll;
|
static int opt_poll;
|
||||||
static int opt_interval = 1;
|
static int opt_interval = 1;
|
||||||
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
|
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
|
||||||
@@ -91,18 +96,7 @@ static bool opt_need_wakeup = true;
|
|||||||
static u32 opt_num_xsks = 1;
|
static u32 opt_num_xsks = 1;
|
||||||
static u32 prog_id;
|
static u32 prog_id;
|
||||||
|
|
||||||
struct xsk_umem_info {
|
struct xsk_ring_stats {
|
||||||
struct xsk_ring_prod fq;
|
|
||||||
struct xsk_ring_cons cq;
|
|
||||||
struct xsk_umem *umem;
|
|
||||||
void *buffer;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct xsk_socket_info {
|
|
||||||
struct xsk_ring_cons rx;
|
|
||||||
struct xsk_ring_prod tx;
|
|
||||||
struct xsk_umem_info *umem;
|
|
||||||
struct xsk_socket *xsk;
|
|
||||||
unsigned long rx_npkts;
|
unsigned long rx_npkts;
|
||||||
unsigned long tx_npkts;
|
unsigned long tx_npkts;
|
||||||
unsigned long rx_dropped_npkts;
|
unsigned long rx_dropped_npkts;
|
||||||
@@ -119,6 +113,41 @@ struct xsk_socket_info {
|
|||||||
unsigned long prev_rx_full_npkts;
|
unsigned long prev_rx_full_npkts;
|
||||||
unsigned long prev_rx_fill_empty_npkts;
|
unsigned long prev_rx_fill_empty_npkts;
|
||||||
unsigned long prev_tx_empty_npkts;
|
unsigned long prev_tx_empty_npkts;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct xsk_driver_stats {
|
||||||
|
unsigned long intrs;
|
||||||
|
unsigned long prev_intrs;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct xsk_app_stats {
|
||||||
|
unsigned long rx_empty_polls;
|
||||||
|
unsigned long fill_fail_polls;
|
||||||
|
unsigned long copy_tx_sendtos;
|
||||||
|
unsigned long tx_wakeup_sendtos;
|
||||||
|
unsigned long opt_polls;
|
||||||
|
unsigned long prev_rx_empty_polls;
|
||||||
|
unsigned long prev_fill_fail_polls;
|
||||||
|
unsigned long prev_copy_tx_sendtos;
|
||||||
|
unsigned long prev_tx_wakeup_sendtos;
|
||||||
|
unsigned long prev_opt_polls;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct xsk_umem_info {
|
||||||
|
struct xsk_ring_prod fq;
|
||||||
|
struct xsk_ring_cons cq;
|
||||||
|
struct xsk_umem *umem;
|
||||||
|
void *buffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct xsk_socket_info {
|
||||||
|
struct xsk_ring_cons rx;
|
||||||
|
struct xsk_ring_prod tx;
|
||||||
|
struct xsk_umem_info *umem;
|
||||||
|
struct xsk_socket *xsk;
|
||||||
|
struct xsk_ring_stats ring_stats;
|
||||||
|
struct xsk_app_stats app_stats;
|
||||||
|
struct xsk_driver_stats drv_stats;
|
||||||
u32 outstanding_tx;
|
u32 outstanding_tx;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -173,18 +202,151 @@ static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk)
|
|||||||
return err;
|
return err;
|
||||||
|
|
||||||
if (optlen == sizeof(struct xdp_statistics)) {
|
if (optlen == sizeof(struct xdp_statistics)) {
|
||||||
xsk->rx_dropped_npkts = stats.rx_dropped;
|
xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped;
|
||||||
xsk->rx_invalid_npkts = stats.rx_invalid_descs;
|
xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs;
|
||||||
xsk->tx_invalid_npkts = stats.tx_invalid_descs;
|
xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs;
|
||||||
xsk->rx_full_npkts = stats.rx_ring_full;
|
xsk->ring_stats.rx_full_npkts = stats.rx_ring_full;
|
||||||
xsk->rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
|
xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
|
||||||
xsk->tx_empty_npkts = stats.tx_ring_empty_descs;
|
xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void dump_app_stats(long dt)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < num_socks && xsks[i]; i++) {
|
||||||
|
char *fmt = "%-18s %'-14.0f %'-14lu\n";
|
||||||
|
double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps,
|
||||||
|
tx_wakeup_sendtos_ps, opt_polls_ps;
|
||||||
|
|
||||||
|
rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls -
|
||||||
|
xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt;
|
||||||
|
fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls -
|
||||||
|
xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt;
|
||||||
|
copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos -
|
||||||
|
xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt;
|
||||||
|
tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos -
|
||||||
|
xsks[i]->app_stats.prev_tx_wakeup_sendtos)
|
||||||
|
* 1000000000. / dt;
|
||||||
|
opt_polls_ps = (xsks[i]->app_stats.opt_polls -
|
||||||
|
xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt;
|
||||||
|
|
||||||
|
printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count");
|
||||||
|
printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls);
|
||||||
|
printf(fmt, "fill fail polls", fill_fail_polls_ps,
|
||||||
|
xsks[i]->app_stats.fill_fail_polls);
|
||||||
|
printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps,
|
||||||
|
xsks[i]->app_stats.copy_tx_sendtos);
|
||||||
|
printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps,
|
||||||
|
xsks[i]->app_stats.tx_wakeup_sendtos);
|
||||||
|
printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls);
|
||||||
|
|
||||||
|
xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls;
|
||||||
|
xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls;
|
||||||
|
xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos;
|
||||||
|
xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
|
||||||
|
xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool get_interrupt_number(void)
|
||||||
|
{
|
||||||
|
FILE *f_int_proc;
|
||||||
|
char line[4096];
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
|
f_int_proc = fopen("/proc/interrupts", "r");
|
||||||
|
if (f_int_proc == NULL) {
|
||||||
|
printf("Failed to open /proc/interrupts.\n");
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!feof(f_int_proc) && !found) {
|
||||||
|
/* Make sure to read a full line at a time */
|
||||||
|
if (fgets(line, sizeof(line), f_int_proc) == NULL ||
|
||||||
|
line[strlen(line) - 1] != '\n') {
|
||||||
|
printf("Error reading from interrupts file\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract interrupt number from line */
|
||||||
|
if (strstr(line, opt_irq_str) != NULL) {
|
||||||
|
irq_no = atoi(line);
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f_int_proc);
|
||||||
|
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_irqs(void)
|
||||||
|
{
|
||||||
|
char count_path[PATH_MAX];
|
||||||
|
int total_intrs = -1;
|
||||||
|
FILE *f_count_proc;
|
||||||
|
char line[4096];
|
||||||
|
|
||||||
|
snprintf(count_path, sizeof(count_path),
|
||||||
|
"/sys/kernel/irq/%i/per_cpu_count", irq_no);
|
||||||
|
f_count_proc = fopen(count_path, "r");
|
||||||
|
if (f_count_proc == NULL) {
|
||||||
|
printf("Failed to open %s\n", count_path);
|
||||||
|
return total_intrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fgets(line, sizeof(line), f_count_proc) == NULL ||
|
||||||
|
line[strlen(line) - 1] != '\n') {
|
||||||
|
printf("Error reading from %s\n", count_path);
|
||||||
|
} else {
|
||||||
|
static const char com[2] = ",";
|
||||||
|
char *token;
|
||||||
|
|
||||||
|
total_intrs = 0;
|
||||||
|
token = strtok(line, com);
|
||||||
|
while (token != NULL) {
|
||||||
|
/* sum up interrupts across all cores */
|
||||||
|
total_intrs += atoi(token);
|
||||||
|
token = strtok(NULL, com);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f_count_proc);
|
||||||
|
|
||||||
|
return total_intrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dump_driver_stats(long dt)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < num_socks && xsks[i]; i++) {
|
||||||
|
char *fmt = "%-18s %'-14.0f %'-14lu\n";
|
||||||
|
double intrs_ps;
|
||||||
|
int n_ints = get_irqs();
|
||||||
|
|
||||||
|
if (n_ints < 0) {
|
||||||
|
printf("error getting intr info for intr %i\n", irq_no);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
xsks[i]->drv_stats.intrs = n_ints - irqs_at_init;
|
||||||
|
|
||||||
|
intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) *
|
||||||
|
1000000000. / dt;
|
||||||
|
|
||||||
|
printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count");
|
||||||
|
printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs);
|
||||||
|
|
||||||
|
xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void dump_stats(void)
|
static void dump_stats(void)
|
||||||
{
|
{
|
||||||
unsigned long now = get_nsecs();
|
unsigned long now = get_nsecs();
|
||||||
@@ -194,67 +356,83 @@ static void dump_stats(void)
|
|||||||
prev_time = now;
|
prev_time = now;
|
||||||
|
|
||||||
for (i = 0; i < num_socks && xsks[i]; i++) {
|
for (i = 0; i < num_socks && xsks[i]; i++) {
|
||||||
char *fmt = "%-15s %'-11.0f %'-11lu\n";
|
char *fmt = "%-18s %'-14.0f %'-14lu\n";
|
||||||
double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps,
|
double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps,
|
||||||
tx_invalid_pps, tx_empty_pps;
|
tx_invalid_pps, tx_empty_pps;
|
||||||
|
|
||||||
rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
|
rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) *
|
||||||
1000000000. / dt;
|
1000000000. / dt;
|
||||||
tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
|
tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) *
|
||||||
1000000000. / dt;
|
1000000000. / dt;
|
||||||
|
|
||||||
printf("\n sock%d@", i);
|
printf("\n sock%d@", i);
|
||||||
print_benchmark(false);
|
print_benchmark(false);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
|
printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts",
|
||||||
dt / 1000000000.);
|
dt / 1000000000.);
|
||||||
printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
|
printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts);
|
||||||
printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
|
printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts);
|
||||||
|
|
||||||
xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
|
xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts;
|
||||||
xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
|
xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts;
|
||||||
|
|
||||||
if (opt_extra_stats) {
|
if (opt_extra_stats) {
|
||||||
if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) {
|
if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) {
|
||||||
dropped_pps = (xsks[i]->rx_dropped_npkts -
|
dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts -
|
||||||
xsks[i]->prev_rx_dropped_npkts) * 1000000000. / dt;
|
xsks[i]->ring_stats.prev_rx_dropped_npkts) *
|
||||||
rx_invalid_pps = (xsks[i]->rx_invalid_npkts -
|
1000000000. / dt;
|
||||||
xsks[i]->prev_rx_invalid_npkts) * 1000000000. / dt;
|
rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts -
|
||||||
tx_invalid_pps = (xsks[i]->tx_invalid_npkts -
|
xsks[i]->ring_stats.prev_rx_invalid_npkts) *
|
||||||
xsks[i]->prev_tx_invalid_npkts) * 1000000000. / dt;
|
1000000000. / dt;
|
||||||
full_pps = (xsks[i]->rx_full_npkts -
|
tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts -
|
||||||
xsks[i]->prev_rx_full_npkts) * 1000000000. / dt;
|
xsks[i]->ring_stats.prev_tx_invalid_npkts) *
|
||||||
fill_empty_pps = (xsks[i]->rx_fill_empty_npkts -
|
1000000000. / dt;
|
||||||
xsks[i]->prev_rx_fill_empty_npkts)
|
full_pps = (xsks[i]->ring_stats.rx_full_npkts -
|
||||||
* 1000000000. / dt;
|
xsks[i]->ring_stats.prev_rx_full_npkts) *
|
||||||
tx_empty_pps = (xsks[i]->tx_empty_npkts -
|
1000000000. / dt;
|
||||||
xsks[i]->prev_tx_empty_npkts) * 1000000000. / dt;
|
fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts -
|
||||||
|
xsks[i]->ring_stats.prev_rx_fill_empty_npkts) *
|
||||||
|
1000000000. / dt;
|
||||||
|
tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts -
|
||||||
|
xsks[i]->ring_stats.prev_tx_empty_npkts) *
|
||||||
|
1000000000. / dt;
|
||||||
|
|
||||||
printf(fmt, "rx dropped", dropped_pps,
|
printf(fmt, "rx dropped", dropped_pps,
|
||||||
xsks[i]->rx_dropped_npkts);
|
xsks[i]->ring_stats.rx_dropped_npkts);
|
||||||
printf(fmt, "rx invalid", rx_invalid_pps,
|
printf(fmt, "rx invalid", rx_invalid_pps,
|
||||||
xsks[i]->rx_invalid_npkts);
|
xsks[i]->ring_stats.rx_invalid_npkts);
|
||||||
printf(fmt, "tx invalid", tx_invalid_pps,
|
printf(fmt, "tx invalid", tx_invalid_pps,
|
||||||
xsks[i]->tx_invalid_npkts);
|
xsks[i]->ring_stats.tx_invalid_npkts);
|
||||||
printf(fmt, "rx queue full", full_pps,
|
printf(fmt, "rx queue full", full_pps,
|
||||||
xsks[i]->rx_full_npkts);
|
xsks[i]->ring_stats.rx_full_npkts);
|
||||||
printf(fmt, "fill ring empty", fill_empty_pps,
|
printf(fmt, "fill ring empty", fill_empty_pps,
|
||||||
xsks[i]->rx_fill_empty_npkts);
|
xsks[i]->ring_stats.rx_fill_empty_npkts);
|
||||||
printf(fmt, "tx ring empty", tx_empty_pps,
|
printf(fmt, "tx ring empty", tx_empty_pps,
|
||||||
xsks[i]->tx_empty_npkts);
|
xsks[i]->ring_stats.tx_empty_npkts);
|
||||||
|
|
||||||
xsks[i]->prev_rx_dropped_npkts = xsks[i]->rx_dropped_npkts;
|
xsks[i]->ring_stats.prev_rx_dropped_npkts =
|
||||||
xsks[i]->prev_rx_invalid_npkts = xsks[i]->rx_invalid_npkts;
|
xsks[i]->ring_stats.rx_dropped_npkts;
|
||||||
xsks[i]->prev_tx_invalid_npkts = xsks[i]->tx_invalid_npkts;
|
xsks[i]->ring_stats.prev_rx_invalid_npkts =
|
||||||
xsks[i]->prev_rx_full_npkts = xsks[i]->rx_full_npkts;
|
xsks[i]->ring_stats.rx_invalid_npkts;
|
||||||
xsks[i]->prev_rx_fill_empty_npkts = xsks[i]->rx_fill_empty_npkts;
|
xsks[i]->ring_stats.prev_tx_invalid_npkts =
|
||||||
xsks[i]->prev_tx_empty_npkts = xsks[i]->tx_empty_npkts;
|
xsks[i]->ring_stats.tx_invalid_npkts;
|
||||||
|
xsks[i]->ring_stats.prev_rx_full_npkts =
|
||||||
|
xsks[i]->ring_stats.rx_full_npkts;
|
||||||
|
xsks[i]->ring_stats.prev_rx_fill_empty_npkts =
|
||||||
|
xsks[i]->ring_stats.rx_fill_empty_npkts;
|
||||||
|
xsks[i]->ring_stats.prev_tx_empty_npkts =
|
||||||
|
xsks[i]->ring_stats.tx_empty_npkts;
|
||||||
} else {
|
} else {
|
||||||
printf("%-15s\n", "Error retrieving extra stats");
|
printf("%-15s\n", "Error retrieving extra stats");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opt_app_stats)
|
||||||
|
dump_app_stats(dt);
|
||||||
|
if (irq_no)
|
||||||
|
dump_driver_stats(dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_benchmark_done(void)
|
static bool is_benchmark_done(void)
|
||||||
@@ -693,6 +871,17 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
|
|||||||
if (ret)
|
if (ret)
|
||||||
exit_with_error(-ret);
|
exit_with_error(-ret);
|
||||||
|
|
||||||
|
xsk->app_stats.rx_empty_polls = 0;
|
||||||
|
xsk->app_stats.fill_fail_polls = 0;
|
||||||
|
xsk->app_stats.copy_tx_sendtos = 0;
|
||||||
|
xsk->app_stats.tx_wakeup_sendtos = 0;
|
||||||
|
xsk->app_stats.opt_polls = 0;
|
||||||
|
xsk->app_stats.prev_rx_empty_polls = 0;
|
||||||
|
xsk->app_stats.prev_fill_fail_polls = 0;
|
||||||
|
xsk->app_stats.prev_copy_tx_sendtos = 0;
|
||||||
|
xsk->app_stats.prev_tx_wakeup_sendtos = 0;
|
||||||
|
xsk->app_stats.prev_opt_polls = 0;
|
||||||
|
|
||||||
return xsk;
|
return xsk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -720,6 +909,8 @@ static struct option long_options[] = {
|
|||||||
{"tx-pkt-pattern", required_argument, 0, 'P'},
|
{"tx-pkt-pattern", required_argument, 0, 'P'},
|
||||||
{"extra-stats", no_argument, 0, 'x'},
|
{"extra-stats", no_argument, 0, 'x'},
|
||||||
{"quiet", no_argument, 0, 'Q'},
|
{"quiet", no_argument, 0, 'Q'},
|
||||||
|
{"app-stats", no_argument, 0, 'a'},
|
||||||
|
{"irq-string", no_argument, 0, 'I'},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -756,6 +947,8 @@ static void usage(const char *prog)
|
|||||||
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
|
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
|
||||||
" -x, --extra-stats Display extra statistics.\n"
|
" -x, --extra-stats Display extra statistics.\n"
|
||||||
" -Q, --quiet Do not display any stats.\n"
|
" -Q, --quiet Do not display any stats.\n"
|
||||||
|
" -a, --app-stats Display application (syscall) statistics.\n"
|
||||||
|
" -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n"
|
||||||
"\n";
|
"\n";
|
||||||
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
|
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
|
||||||
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
|
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
|
||||||
@@ -771,7 +964,7 @@ static void parse_command_line(int argc, char **argv)
|
|||||||
opterr = 0;
|
opterr = 0;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQ",
|
c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:",
|
||||||
long_options, &option_index);
|
long_options, &option_index);
|
||||||
if (c == -1)
|
if (c == -1)
|
||||||
break;
|
break;
|
||||||
@@ -857,6 +1050,19 @@ static void parse_command_line(int argc, char **argv)
|
|||||||
break;
|
break;
|
||||||
case 'Q':
|
case 'Q':
|
||||||
opt_quiet = 1;
|
opt_quiet = 1;
|
||||||
|
break;
|
||||||
|
case 'a':
|
||||||
|
opt_app_stats = 1;
|
||||||
|
break;
|
||||||
|
case 'I':
|
||||||
|
opt_irq_str = optarg;
|
||||||
|
if (get_interrupt_number())
|
||||||
|
irqs_at_init = get_irqs();
|
||||||
|
if (irqs_at_init < 0) {
|
||||||
|
fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str);
|
||||||
|
usage(basename(argv[0]));
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
usage(basename(argv[0]));
|
usage(basename(argv[0]));
|
||||||
@@ -908,8 +1114,10 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
|
|||||||
* is driven by the NAPI loop. So as an optimization, we do not have to call
|
* is driven by the NAPI loop. So as an optimization, we do not have to call
|
||||||
* sendto() all the time in zero-copy mode for l2fwd.
|
* sendto() all the time in zero-copy mode for l2fwd.
|
||||||
*/
|
*/
|
||||||
if (opt_xdp_bind_flags & XDP_COPY)
|
if (opt_xdp_bind_flags & XDP_COPY) {
|
||||||
|
xsk->app_stats.copy_tx_sendtos++;
|
||||||
kick_tx(xsk);
|
kick_tx(xsk);
|
||||||
|
}
|
||||||
|
|
||||||
ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
|
ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
|
||||||
xsk->outstanding_tx;
|
xsk->outstanding_tx;
|
||||||
@@ -924,8 +1132,10 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
|
|||||||
while (ret != rcvd) {
|
while (ret != rcvd) {
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
exit_with_error(-ret);
|
exit_with_error(-ret);
|
||||||
if (xsk_ring_prod__needs_wakeup(&umem->fq))
|
if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
|
||||||
|
xsk->app_stats.fill_fail_polls++;
|
||||||
ret = poll(fds, num_socks, opt_timeout);
|
ret = poll(fds, num_socks, opt_timeout);
|
||||||
|
}
|
||||||
ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
|
ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -936,7 +1146,7 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
|
|||||||
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
|
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
|
||||||
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
|
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
|
||||||
xsk->outstanding_tx -= rcvd;
|
xsk->outstanding_tx -= rcvd;
|
||||||
xsk->tx_npkts += rcvd;
|
xsk->ring_stats.tx_npkts += rcvd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -949,14 +1159,16 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk,
|
|||||||
if (!xsk->outstanding_tx)
|
if (!xsk->outstanding_tx)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
|
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
|
||||||
|
xsk->app_stats.tx_wakeup_sendtos++;
|
||||||
kick_tx(xsk);
|
kick_tx(xsk);
|
||||||
|
}
|
||||||
|
|
||||||
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
|
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
|
||||||
if (rcvd > 0) {
|
if (rcvd > 0) {
|
||||||
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
|
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
|
||||||
xsk->outstanding_tx -= rcvd;
|
xsk->outstanding_tx -= rcvd;
|
||||||
xsk->tx_npkts += rcvd;
|
xsk->ring_stats.tx_npkts += rcvd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -968,8 +1180,10 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||||||
|
|
||||||
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
|
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
|
||||||
if (!rcvd) {
|
if (!rcvd) {
|
||||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
|
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
|
||||||
|
xsk->app_stats.rx_empty_polls++;
|
||||||
ret = poll(fds, num_socks, opt_timeout);
|
ret = poll(fds, num_socks, opt_timeout);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -977,8 +1191,10 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||||||
while (ret != rcvd) {
|
while (ret != rcvd) {
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
exit_with_error(-ret);
|
exit_with_error(-ret);
|
||||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
|
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
|
||||||
|
xsk->app_stats.fill_fail_polls++;
|
||||||
ret = poll(fds, num_socks, opt_timeout);
|
ret = poll(fds, num_socks, opt_timeout);
|
||||||
|
}
|
||||||
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
|
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -996,7 +1212,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||||||
|
|
||||||
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
|
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
|
||||||
xsk_ring_cons__release(&xsk->rx, rcvd);
|
xsk_ring_cons__release(&xsk->rx, rcvd);
|
||||||
xsk->rx_npkts += rcvd;
|
xsk->ring_stats.rx_npkts += rcvd;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rx_drop_all(void)
|
static void rx_drop_all(void)
|
||||||
@@ -1011,6 +1227,8 @@ static void rx_drop_all(void)
|
|||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (opt_poll) {
|
if (opt_poll) {
|
||||||
|
for (i = 0; i < num_socks; i++)
|
||||||
|
xsks[i]->app_stats.opt_polls++;
|
||||||
ret = poll(fds, num_socks, opt_timeout);
|
ret = poll(fds, num_socks, opt_timeout);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
continue;
|
continue;
|
||||||
@@ -1091,6 +1309,8 @@ static void tx_only_all(void)
|
|||||||
int batch_size = get_batch_size(pkt_cnt);
|
int batch_size = get_batch_size(pkt_cnt);
|
||||||
|
|
||||||
if (opt_poll) {
|
if (opt_poll) {
|
||||||
|
for (i = 0; i < num_socks; i++)
|
||||||
|
xsks[i]->app_stats.opt_polls++;
|
||||||
ret = poll(fds, num_socks, opt_timeout);
|
ret = poll(fds, num_socks, opt_timeout);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
continue;
|
continue;
|
||||||
@@ -1122,8 +1342,10 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||||||
|
|
||||||
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
|
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
|
||||||
if (!rcvd) {
|
if (!rcvd) {
|
||||||
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
|
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
|
||||||
|
xsk->app_stats.rx_empty_polls++;
|
||||||
ret = poll(fds, num_socks, opt_timeout);
|
ret = poll(fds, num_socks, opt_timeout);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1132,8 +1354,10 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
exit_with_error(-ret);
|
exit_with_error(-ret);
|
||||||
complete_tx_l2fwd(xsk, fds);
|
complete_tx_l2fwd(xsk, fds);
|
||||||
if (xsk_ring_prod__needs_wakeup(&xsk->tx))
|
if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
|
||||||
|
xsk->app_stats.tx_wakeup_sendtos++;
|
||||||
kick_tx(xsk);
|
kick_tx(xsk);
|
||||||
|
}
|
||||||
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
|
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1155,7 +1379,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
|
|||||||
xsk_ring_prod__submit(&xsk->tx, rcvd);
|
xsk_ring_prod__submit(&xsk->tx, rcvd);
|
||||||
xsk_ring_cons__release(&xsk->rx, rcvd);
|
xsk_ring_cons__release(&xsk->rx, rcvd);
|
||||||
|
|
||||||
xsk->rx_npkts += rcvd;
|
xsk->ring_stats.rx_npkts += rcvd;
|
||||||
xsk->outstanding_tx += rcvd;
|
xsk->outstanding_tx += rcvd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1171,6 +1395,8 @@ static void l2fwd_all(void)
|
|||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (opt_poll) {
|
if (opt_poll) {
|
||||||
|
for (i = 0; i < num_socks; i++)
|
||||||
|
xsks[i]->app_stats.opt_polls++;
|
||||||
ret = poll(fds, num_socks, opt_timeout);
|
ret = poll(fds, num_socks, opt_timeout);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
continue;
|
continue;
|
||||||
|
@@ -356,18 +356,36 @@ enum bpf_link_type {
|
|||||||
#define BPF_F_SLEEPABLE (1U << 4)
|
#define BPF_F_SLEEPABLE (1U << 4)
|
||||||
|
|
||||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||||
* two extensions:
|
* the following extensions:
|
||||||
*
|
*
|
||||||
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
|
* insn[0].src_reg: BPF_PSEUDO_MAP_FD
|
||||||
* insn[0].imm: map fd map fd
|
* insn[0].imm: map fd
|
||||||
* insn[1].imm: 0 offset into value
|
* insn[1].imm: 0
|
||||||
* insn[0].off: 0 0
|
* insn[0].off: 0
|
||||||
* insn[1].off: 0 0
|
* insn[1].off: 0
|
||||||
* ldimm64 rewrite: address of map address of map[0]+offset
|
* ldimm64 rewrite: address of map
|
||||||
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
|
* verifier type: CONST_PTR_TO_MAP
|
||||||
*/
|
*/
|
||||||
#define BPF_PSEUDO_MAP_FD 1
|
#define BPF_PSEUDO_MAP_FD 1
|
||||||
|
/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
|
||||||
|
* insn[0].imm: map fd
|
||||||
|
* insn[1].imm: offset into value
|
||||||
|
* insn[0].off: 0
|
||||||
|
* insn[1].off: 0
|
||||||
|
* ldimm64 rewrite: address of map[0]+offset
|
||||||
|
* verifier type: PTR_TO_MAP_VALUE
|
||||||
|
*/
|
||||||
#define BPF_PSEUDO_MAP_VALUE 2
|
#define BPF_PSEUDO_MAP_VALUE 2
|
||||||
|
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
|
||||||
|
* insn[0].imm: kernel btd id of VAR
|
||||||
|
* insn[1].imm: 0
|
||||||
|
* insn[0].off: 0
|
||||||
|
* insn[1].off: 0
|
||||||
|
* ldimm64 rewrite: address of the kernel variable
|
||||||
|
* verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
|
||||||
|
* is struct/union.
|
||||||
|
*/
|
||||||
|
#define BPF_PSEUDO_BTF_ID 3
|
||||||
|
|
||||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||||
* offset to another bpf function
|
* offset to another bpf function
|
||||||
@@ -417,6 +435,9 @@ enum {
|
|||||||
|
|
||||||
/* Share perf_event among processes */
|
/* Share perf_event among processes */
|
||||||
BPF_F_PRESERVE_ELEMS = (1U << 11),
|
BPF_F_PRESERVE_ELEMS = (1U << 11),
|
||||||
|
|
||||||
|
/* Create a map that is suitable to be an inner map with dynamic max entries */
|
||||||
|
BPF_F_INNER_MAP = (1U << 12),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Flags for BPF_PROG_QUERY. */
|
/* Flags for BPF_PROG_QUERY. */
|
||||||
@@ -1680,7 +1701,7 @@ union bpf_attr {
|
|||||||
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
||||||
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
||||||
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
||||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
|
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
|
||||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||||
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
||||||
* Return
|
* Return
|
||||||
@@ -2235,7 +2256,7 @@ union bpf_attr {
|
|||||||
* Description
|
* Description
|
||||||
* This helper is used in programs implementing policies at the
|
* This helper is used in programs implementing policies at the
|
||||||
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
|
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
|
||||||
* if the verdeict eBPF program returns **SK_PASS**), redirect it
|
* if the verdict eBPF program returns **SK_PASS**), redirect it
|
||||||
* to the socket referenced by *map* (of type
|
* to the socket referenced by *map* (of type
|
||||||
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
|
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
|
||||||
* egress interfaces can be used for redirection. The
|
* egress interfaces can be used for redirection. The
|
||||||
@@ -3661,10 +3682,59 @@ union bpf_attr {
|
|||||||
* Redirect the packet to another net device of index *ifindex*
|
* Redirect the packet to another net device of index *ifindex*
|
||||||
* and fill in L2 addresses from neighboring subsystem. This helper
|
* and fill in L2 addresses from neighboring subsystem. This helper
|
||||||
* is somewhat similar to **bpf_redirect**\ (), except that it
|
* is somewhat similar to **bpf_redirect**\ (), except that it
|
||||||
* fills in e.g. MAC addresses based on the L3 information from
|
* populates L2 addresses as well, meaning, internally, the helper
|
||||||
* the packet. This helper is supported for IPv4 and IPv6 protocols.
|
* performs a FIB lookup based on the skb's networking header to
|
||||||
|
* get the address of the next hop and then relies on the neighbor
|
||||||
|
* lookup for the L2 address of the nexthop.
|
||||||
|
*
|
||||||
* The *flags* argument is reserved and must be 0. The helper is
|
* The *flags* argument is reserved and must be 0. The helper is
|
||||||
* currently only supported for tc BPF program types.
|
* currently only supported for tc BPF program types, and enabled
|
||||||
|
* for IPv4 and IPv6 protocols.
|
||||||
|
* Return
|
||||||
|
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||||
|
* **TC_ACT_SHOT** on error.
|
||||||
|
*
|
||||||
|
* void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
|
||||||
|
* Description
|
||||||
|
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||||
|
* pointer to the percpu kernel variable on *cpu*. A ksym is an
|
||||||
|
* extern variable decorated with '__ksym'. For ksym, there is a
|
||||||
|
* global var (either static or global) defined of the same name
|
||||||
|
* in the kernel. The ksym is percpu if the global var is percpu.
|
||||||
|
* The returned pointer points to the global percpu var on *cpu*.
|
||||||
|
*
|
||||||
|
* bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
|
||||||
|
* kernel, except that bpf_per_cpu_ptr() may return NULL. This
|
||||||
|
* happens if *cpu* is larger than nr_cpu_ids. The caller of
|
||||||
|
* bpf_per_cpu_ptr() must check the returned value.
|
||||||
|
* Return
|
||||||
|
* A pointer pointing to the kernel percpu variable on *cpu*, or
|
||||||
|
* NULL, if *cpu* is invalid.
|
||||||
|
*
|
||||||
|
* void *bpf_this_cpu_ptr(const void *percpu_ptr)
|
||||||
|
* Description
|
||||||
|
* Take a pointer to a percpu ksym, *percpu_ptr*, and return a
|
||||||
|
* pointer to the percpu kernel variable on this cpu. See the
|
||||||
|
* description of 'ksym' in **bpf_per_cpu_ptr**\ ().
|
||||||
|
*
|
||||||
|
* bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
|
||||||
|
* the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
|
||||||
|
* never return NULL.
|
||||||
|
* Return
|
||||||
|
* A pointer pointing to the kernel percpu variable on this cpu.
|
||||||
|
*
|
||||||
|
* long bpf_redirect_peer(u32 ifindex, u64 flags)
|
||||||
|
* Description
|
||||||
|
* Redirect the packet to another net device of index *ifindex*.
|
||||||
|
* This helper is somewhat similar to **bpf_redirect**\ (), except
|
||||||
|
* that the redirection happens to the *ifindex*' peer device and
|
||||||
|
* the netns switch takes place from ingress to ingress without
|
||||||
|
* going through the CPU's backlog queue.
|
||||||
|
*
|
||||||
|
* The *flags* argument is reserved and must be 0. The helper is
|
||||||
|
* currently only supported for tc BPF program types at the ingress
|
||||||
|
* hook and for veth device types. The peer device must reside in a
|
||||||
|
* different network namespace.
|
||||||
* Return
|
* Return
|
||||||
* The helper returns **TC_ACT_REDIRECT** on success or
|
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||||
* **TC_ACT_SHOT** on error.
|
* **TC_ACT_SHOT** on error.
|
||||||
@@ -3823,6 +3893,9 @@ union bpf_attr {
|
|||||||
FN(seq_printf_btf), \
|
FN(seq_printf_btf), \
|
||||||
FN(skb_cgroup_classid), \
|
FN(skb_cgroup_classid), \
|
||||||
FN(redirect_neigh), \
|
FN(redirect_neigh), \
|
||||||
|
FN(bpf_per_cpu_ptr), \
|
||||||
|
FN(bpf_this_cpu_ptr), \
|
||||||
|
FN(redirect_peer), \
|
||||||
/* */
|
/* */
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
|
@@ -390,6 +390,12 @@ struct extern_desc {
|
|||||||
} kcfg;
|
} kcfg;
|
||||||
struct {
|
struct {
|
||||||
unsigned long long addr;
|
unsigned long long addr;
|
||||||
|
|
||||||
|
/* target btf_id of the corresponding kernel var. */
|
||||||
|
int vmlinux_btf_id;
|
||||||
|
|
||||||
|
/* local btf_id of the ksym extern's type. */
|
||||||
|
__u32 type_id;
|
||||||
} ksym;
|
} ksym;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@@ -2522,12 +2528,23 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
|
|||||||
{
|
{
|
||||||
bool need_vmlinux_btf = false;
|
bool need_vmlinux_btf = false;
|
||||||
struct bpf_program *prog;
|
struct bpf_program *prog;
|
||||||
int err;
|
int i, err;
|
||||||
|
|
||||||
/* CO-RE relocations need kernel BTF */
|
/* CO-RE relocations need kernel BTF */
|
||||||
if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
|
if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
|
||||||
need_vmlinux_btf = true;
|
need_vmlinux_btf = true;
|
||||||
|
|
||||||
|
/* Support for typed ksyms needs kernel BTF */
|
||||||
|
for (i = 0; i < obj->nr_extern; i++) {
|
||||||
|
const struct extern_desc *ext;
|
||||||
|
|
||||||
|
ext = &obj->externs[i];
|
||||||
|
if (ext->type == EXT_KSYM && ext->ksym.type_id) {
|
||||||
|
need_vmlinux_btf = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bpf_object__for_each_program(prog, obj) {
|
bpf_object__for_each_program(prog, obj) {
|
||||||
if (!prog->load)
|
if (!prog->load)
|
||||||
continue;
|
continue;
|
||||||
@@ -3156,16 +3173,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
|
|||||||
return -ENOTSUP;
|
return -ENOTSUP;
|
||||||
}
|
}
|
||||||
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
|
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
|
||||||
const struct btf_type *vt;
|
|
||||||
|
|
||||||
ksym_sec = sec;
|
ksym_sec = sec;
|
||||||
ext->type = EXT_KSYM;
|
ext->type = EXT_KSYM;
|
||||||
|
skip_mods_and_typedefs(obj->btf, t->type,
|
||||||
vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
|
&ext->ksym.type_id);
|
||||||
if (!btf_is_void(vt)) {
|
|
||||||
pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
|
|
||||||
return -ENOTSUP;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
pr_warn("unrecognized extern section '%s'\n", sec_name);
|
pr_warn("unrecognized extern section '%s'\n", sec_name);
|
||||||
return -ENOTSUP;
|
return -ENOTSUP;
|
||||||
@@ -4192,6 +4203,36 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int init_map_slots(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
const struct bpf_map *targ_map;
|
||||||
|
unsigned int i;
|
||||||
|
int fd, err;
|
||||||
|
|
||||||
|
for (i = 0; i < map->init_slots_sz; i++) {
|
||||||
|
if (!map->init_slots[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
targ_map = map->init_slots[i];
|
||||||
|
fd = bpf_map__fd(targ_map);
|
||||||
|
err = bpf_map_update_elem(map->fd, &i, &fd, 0);
|
||||||
|
if (err) {
|
||||||
|
err = -errno;
|
||||||
|
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
|
||||||
|
map->name, i, targ_map->name,
|
||||||
|
fd, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
|
||||||
|
map->name, i, targ_map->name, fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
zfree(&map->init_slots);
|
||||||
|
map->init_slots_sz = 0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
bpf_object__create_maps(struct bpf_object *obj)
|
bpf_object__create_maps(struct bpf_object *obj)
|
||||||
{
|
{
|
||||||
@@ -4215,47 +4256,29 @@ bpf_object__create_maps(struct bpf_object *obj)
|
|||||||
if (map->fd >= 0) {
|
if (map->fd >= 0) {
|
||||||
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
|
pr_debug("map '%s': skipping creation (preset fd=%d)\n",
|
||||||
map->name, map->fd);
|
map->name, map->fd);
|
||||||
continue;
|
} else {
|
||||||
}
|
err = bpf_object__create_map(obj, map);
|
||||||
|
if (err)
|
||||||
err = bpf_object__create_map(obj, map);
|
|
||||||
if (err)
|
|
||||||
goto err_out;
|
|
||||||
|
|
||||||
pr_debug("map '%s': created successfully, fd=%d\n", map->name,
|
|
||||||
map->fd);
|
|
||||||
|
|
||||||
if (bpf_map__is_internal(map)) {
|
|
||||||
err = bpf_object__populate_internal_map(obj, map);
|
|
||||||
if (err < 0) {
|
|
||||||
zclose(map->fd);
|
|
||||||
goto err_out;
|
goto err_out;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (map->init_slots_sz) {
|
pr_debug("map '%s': created successfully, fd=%d\n",
|
||||||
for (j = 0; j < map->init_slots_sz; j++) {
|
map->name, map->fd);
|
||||||
const struct bpf_map *targ_map;
|
|
||||||
int fd;
|
|
||||||
|
|
||||||
if (!map->init_slots[j])
|
if (bpf_map__is_internal(map)) {
|
||||||
continue;
|
err = bpf_object__populate_internal_map(obj, map);
|
||||||
|
if (err < 0) {
|
||||||
targ_map = map->init_slots[j];
|
zclose(map->fd);
|
||||||
fd = bpf_map__fd(targ_map);
|
goto err_out;
|
||||||
err = bpf_map_update_elem(map->fd, &j, &fd, 0);
|
}
|
||||||
if (err) {
|
}
|
||||||
err = -errno;
|
|
||||||
pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
|
if (map->init_slots_sz) {
|
||||||
map->name, j, targ_map->name,
|
err = init_map_slots(map);
|
||||||
fd, err);
|
if (err < 0) {
|
||||||
|
zclose(map->fd);
|
||||||
goto err_out;
|
goto err_out;
|
||||||
}
|
}
|
||||||
pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
|
|
||||||
map->name, j, targ_map->name, fd);
|
|
||||||
}
|
}
|
||||||
zfree(&map->init_slots);
|
|
||||||
map->init_slots_sz = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (map->pin_path && !map->pinned) {
|
if (map->pin_path && !map->pinned) {
|
||||||
@@ -5017,16 +5040,19 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
|
|||||||
static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
||||||
const struct bpf_core_relo *relo,
|
const struct bpf_core_relo *relo,
|
||||||
const struct bpf_core_spec *spec,
|
const struct bpf_core_spec *spec,
|
||||||
__u32 *val, bool *validate)
|
__u32 *val, __u32 *field_sz, __u32 *type_id,
|
||||||
|
bool *validate)
|
||||||
{
|
{
|
||||||
const struct bpf_core_accessor *acc;
|
const struct bpf_core_accessor *acc;
|
||||||
const struct btf_type *t;
|
const struct btf_type *t;
|
||||||
__u32 byte_off, byte_sz, bit_off, bit_sz;
|
__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
|
||||||
const struct btf_member *m;
|
const struct btf_member *m;
|
||||||
const struct btf_type *mt;
|
const struct btf_type *mt;
|
||||||
bool bitfield;
|
bool bitfield;
|
||||||
__s64 sz;
|
__s64 sz;
|
||||||
|
|
||||||
|
*field_sz = 0;
|
||||||
|
|
||||||
if (relo->kind == BPF_FIELD_EXISTS) {
|
if (relo->kind == BPF_FIELD_EXISTS) {
|
||||||
*val = spec ? 1 : 0;
|
*val = spec ? 1 : 0;
|
||||||
return 0;
|
return 0;
|
||||||
@@ -5042,6 +5068,12 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||||||
if (!acc->name) {
|
if (!acc->name) {
|
||||||
if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
|
if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
|
||||||
*val = spec->bit_offset / 8;
|
*val = spec->bit_offset / 8;
|
||||||
|
/* remember field size for load/store mem size */
|
||||||
|
sz = btf__resolve_size(spec->btf, acc->type_id);
|
||||||
|
if (sz < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
*field_sz = sz;
|
||||||
|
*type_id = acc->type_id;
|
||||||
} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
|
} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
|
||||||
sz = btf__resolve_size(spec->btf, acc->type_id);
|
sz = btf__resolve_size(spec->btf, acc->type_id);
|
||||||
if (sz < 0)
|
if (sz < 0)
|
||||||
@@ -5058,7 +5090,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||||||
}
|
}
|
||||||
|
|
||||||
m = btf_members(t) + acc->idx;
|
m = btf_members(t) + acc->idx;
|
||||||
mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
|
mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
|
||||||
bit_off = spec->bit_offset;
|
bit_off = spec->bit_offset;
|
||||||
bit_sz = btf_member_bitfield_size(t, acc->idx);
|
bit_sz = btf_member_bitfield_size(t, acc->idx);
|
||||||
|
|
||||||
@@ -5078,7 +5110,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||||||
byte_off = bit_off / 8 / byte_sz * byte_sz;
|
byte_off = bit_off / 8 / byte_sz * byte_sz;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sz = btf__resolve_size(spec->btf, m->type);
|
sz = btf__resolve_size(spec->btf, field_type_id);
|
||||||
if (sz < 0)
|
if (sz < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
byte_sz = sz;
|
byte_sz = sz;
|
||||||
@@ -5096,6 +5128,10 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
|
|||||||
switch (relo->kind) {
|
switch (relo->kind) {
|
||||||
case BPF_FIELD_BYTE_OFFSET:
|
case BPF_FIELD_BYTE_OFFSET:
|
||||||
*val = byte_off;
|
*val = byte_off;
|
||||||
|
if (!bitfield) {
|
||||||
|
*field_sz = byte_sz;
|
||||||
|
*type_id = field_type_id;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case BPF_FIELD_BYTE_SIZE:
|
case BPF_FIELD_BYTE_SIZE:
|
||||||
*val = byte_sz;
|
*val = byte_sz;
|
||||||
@@ -5196,6 +5232,19 @@ struct bpf_core_relo_res
|
|||||||
bool poison;
|
bool poison;
|
||||||
/* some relocations can't be validated against orig_val */
|
/* some relocations can't be validated against orig_val */
|
||||||
bool validate;
|
bool validate;
|
||||||
|
/* for field byte offset relocations or the forms:
|
||||||
|
* *(T *)(rX + <off>) = rY
|
||||||
|
* rX = *(T *)(rY + <off>),
|
||||||
|
* we remember original and resolved field size to adjust direct
|
||||||
|
* memory loads of pointers and integers; this is necessary for 32-bit
|
||||||
|
* host kernel architectures, but also allows to automatically
|
||||||
|
* relocate fields that were resized from, e.g., u32 to u64, etc.
|
||||||
|
*/
|
||||||
|
bool fail_memsz_adjust;
|
||||||
|
__u32 orig_sz;
|
||||||
|
__u32 orig_type_id;
|
||||||
|
__u32 new_sz;
|
||||||
|
__u32 new_type_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Calculate original and target relocation values, given local and target
|
/* Calculate original and target relocation values, given local and target
|
||||||
@@ -5217,10 +5266,56 @@ static int bpf_core_calc_relo(const struct bpf_program *prog,
|
|||||||
res->new_val = 0;
|
res->new_val = 0;
|
||||||
res->poison = false;
|
res->poison = false;
|
||||||
res->validate = true;
|
res->validate = true;
|
||||||
|
res->fail_memsz_adjust = false;
|
||||||
|
res->orig_sz = res->new_sz = 0;
|
||||||
|
res->orig_type_id = res->new_type_id = 0;
|
||||||
|
|
||||||
if (core_relo_is_field_based(relo->kind)) {
|
if (core_relo_is_field_based(relo->kind)) {
|
||||||
err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
|
err = bpf_core_calc_field_relo(prog, relo, local_spec,
|
||||||
err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
|
&res->orig_val, &res->orig_sz,
|
||||||
|
&res->orig_type_id, &res->validate);
|
||||||
|
err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
|
||||||
|
&res->new_val, &res->new_sz,
|
||||||
|
&res->new_type_id, NULL);
|
||||||
|
if (err)
|
||||||
|
goto done;
|
||||||
|
/* Validate if it's safe to adjust load/store memory size.
|
||||||
|
* Adjustments are performed only if original and new memory
|
||||||
|
* sizes differ.
|
||||||
|
*/
|
||||||
|
res->fail_memsz_adjust = false;
|
||||||
|
if (res->orig_sz != res->new_sz) {
|
||||||
|
const struct btf_type *orig_t, *new_t;
|
||||||
|
|
||||||
|
orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
|
||||||
|
new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
|
||||||
|
|
||||||
|
/* There are two use cases in which it's safe to
|
||||||
|
* adjust load/store's mem size:
|
||||||
|
* - reading a 32-bit kernel pointer, while on BPF
|
||||||
|
* size pointers are always 64-bit; in this case
|
||||||
|
* it's safe to "downsize" instruction size due to
|
||||||
|
* pointer being treated as unsigned integer with
|
||||||
|
* zero-extended upper 32-bits;
|
||||||
|
* - reading unsigned integers, again due to
|
||||||
|
* zero-extension is preserving the value correctly.
|
||||||
|
*
|
||||||
|
* In all other cases it's incorrect to attempt to
|
||||||
|
* load/store field because read value will be
|
||||||
|
* incorrect, so we poison relocated instruction.
|
||||||
|
*/
|
||||||
|
if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
|
||||||
|
goto done;
|
||||||
|
if (btf_is_int(orig_t) && btf_is_int(new_t) &&
|
||||||
|
btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
|
||||||
|
btf_int_encoding(new_t) != BTF_INT_SIGNED)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
/* mark as invalid mem size adjustment, but this will
|
||||||
|
* only be checked for LDX/STX/ST insns
|
||||||
|
*/
|
||||||
|
res->fail_memsz_adjust = true;
|
||||||
|
}
|
||||||
} else if (core_relo_is_type_based(relo->kind)) {
|
} else if (core_relo_is_type_based(relo->kind)) {
|
||||||
err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
|
err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
|
||||||
err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
|
err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
|
||||||
@@ -5229,6 +5324,7 @@ static int bpf_core_calc_relo(const struct bpf_program *prog,
|
|||||||
err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
|
err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
if (err == -EUCLEAN) {
|
if (err == -EUCLEAN) {
|
||||||
/* EUCLEAN is used to signal instruction poisoning request */
|
/* EUCLEAN is used to signal instruction poisoning request */
|
||||||
res->poison = true;
|
res->poison = true;
|
||||||
@@ -5268,6 +5364,28 @@ static bool is_ldimm64(struct bpf_insn *insn)
|
|||||||
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
|
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
|
||||||
|
{
|
||||||
|
switch (BPF_SIZE(insn->code)) {
|
||||||
|
case BPF_DW: return 8;
|
||||||
|
case BPF_W: return 4;
|
||||||
|
case BPF_H: return 2;
|
||||||
|
case BPF_B: return 1;
|
||||||
|
default: return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int insn_bytes_to_bpf_size(__u32 sz)
|
||||||
|
{
|
||||||
|
switch (sz) {
|
||||||
|
case 8: return BPF_DW;
|
||||||
|
case 4: return BPF_W;
|
||||||
|
case 2: return BPF_H;
|
||||||
|
case 1: return BPF_B;
|
||||||
|
default: return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Patch relocatable BPF instruction.
|
* Patch relocatable BPF instruction.
|
||||||
*
|
*
|
||||||
@@ -5277,10 +5395,13 @@ static bool is_ldimm64(struct bpf_insn *insn)
|
|||||||
* spec, and is checked before patching instruction. If actual insn->imm value
|
* spec, and is checked before patching instruction. If actual insn->imm value
|
||||||
* is wrong, bail out with error.
|
* is wrong, bail out with error.
|
||||||
*
|
*
|
||||||
* Currently three kinds of BPF instructions are supported:
|
* Currently supported classes of BPF instruction are:
|
||||||
* 1. rX = <imm> (assignment with immediate operand);
|
* 1. rX = <imm> (assignment with immediate operand);
|
||||||
* 2. rX += <imm> (arithmetic operations with immediate operand);
|
* 2. rX += <imm> (arithmetic operations with immediate operand);
|
||||||
* 3. rX = <imm64> (load with 64-bit immediate value).
|
* 3. rX = <imm64> (load with 64-bit immediate value);
|
||||||
|
* 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
|
||||||
|
* 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
|
||||||
|
* 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
|
||||||
*/
|
*/
|
||||||
static int bpf_core_patch_insn(struct bpf_program *prog,
|
static int bpf_core_patch_insn(struct bpf_program *prog,
|
||||||
const struct bpf_core_relo *relo,
|
const struct bpf_core_relo *relo,
|
||||||
@@ -5304,6 +5425,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
|
|||||||
class = BPF_CLASS(insn->code);
|
class = BPF_CLASS(insn->code);
|
||||||
|
|
||||||
if (res->poison) {
|
if (res->poison) {
|
||||||
|
poison:
|
||||||
/* poison second part of ldimm64 to avoid confusing error from
|
/* poison second part of ldimm64 to avoid confusing error from
|
||||||
* verifier about "unknown opcode 00"
|
* verifier about "unknown opcode 00"
|
||||||
*/
|
*/
|
||||||
@@ -5346,10 +5468,39 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
|
|||||||
prog->name, relo_idx, insn_idx, new_val);
|
prog->name, relo_idx, insn_idx, new_val);
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
}
|
}
|
||||||
|
if (res->fail_memsz_adjust) {
|
||||||
|
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
|
||||||
|
"Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
|
||||||
|
prog->name, relo_idx, insn_idx);
|
||||||
|
goto poison;
|
||||||
|
}
|
||||||
|
|
||||||
orig_val = insn->off;
|
orig_val = insn->off;
|
||||||
insn->off = new_val;
|
insn->off = new_val;
|
||||||
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
|
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
|
||||||
prog->name, relo_idx, insn_idx, orig_val, new_val);
|
prog->name, relo_idx, insn_idx, orig_val, new_val);
|
||||||
|
|
||||||
|
if (res->new_sz != res->orig_sz) {
|
||||||
|
int insn_bytes_sz, insn_bpf_sz;
|
||||||
|
|
||||||
|
insn_bytes_sz = insn_bpf_size_to_bytes(insn);
|
||||||
|
if (insn_bytes_sz != res->orig_sz) {
|
||||||
|
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
|
||||||
|
prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
|
||||||
|
if (insn_bpf_sz < 0) {
|
||||||
|
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
|
||||||
|
prog->name, relo_idx, insn_idx, res->new_sz);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
|
||||||
|
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
|
||||||
|
prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case BPF_LD: {
|
case BPF_LD: {
|
||||||
__u64 imm;
|
__u64 imm;
|
||||||
@@ -5691,7 +5842,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (targ_btf_path)
|
if (targ_btf_path)
|
||||||
targ_btf = btf__parse_elf(targ_btf_path, NULL);
|
targ_btf = btf__parse(targ_btf_path, NULL);
|
||||||
else
|
else
|
||||||
targ_btf = obj->btf_vmlinux;
|
targ_btf = obj->btf_vmlinux;
|
||||||
if (IS_ERR_OR_NULL(targ_btf)) {
|
if (IS_ERR_OR_NULL(targ_btf)) {
|
||||||
@@ -5742,6 +5893,11 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
|
|||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
/* no need to apply CO-RE relocation if the program is
|
||||||
|
* not going to be loaded
|
||||||
|
*/
|
||||||
|
if (!prog->load)
|
||||||
|
continue;
|
||||||
|
|
||||||
err = bpf_core_apply_relo(prog, rec, i, obj->btf,
|
err = bpf_core_apply_relo(prog, rec, i, obj->btf,
|
||||||
targ_btf, cand_cache);
|
targ_btf, cand_cache);
|
||||||
@@ -5800,8 +5956,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
|
|||||||
insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
|
insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
|
||||||
insn[1].imm = ext->kcfg.data_off;
|
insn[1].imm = ext->kcfg.data_off;
|
||||||
} else /* EXT_KSYM */ {
|
} else /* EXT_KSYM */ {
|
||||||
insn[0].imm = (__u32)ext->ksym.addr;
|
if (ext->ksym.type_id) { /* typed ksyms */
|
||||||
insn[1].imm = ext->ksym.addr >> 32;
|
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
|
||||||
|
insn[0].imm = ext->ksym.vmlinux_btf_id;
|
||||||
|
} else { /* typeless ksyms */
|
||||||
|
insn[0].imm = (__u32)ext->ksym.addr;
|
||||||
|
insn[1].imm = ext->ksym.addr >> 32;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
relo->processed = true;
|
relo->processed = true;
|
||||||
break;
|
break;
|
||||||
@@ -6933,10 +7094,72 @@ out:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
|
||||||
|
{
|
||||||
|
struct extern_desc *ext;
|
||||||
|
int i, id;
|
||||||
|
|
||||||
|
for (i = 0; i < obj->nr_extern; i++) {
|
||||||
|
const struct btf_type *targ_var, *targ_type;
|
||||||
|
__u32 targ_type_id, local_type_id;
|
||||||
|
const char *targ_var_name;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ext = &obj->externs[i];
|
||||||
|
if (ext->type != EXT_KSYM || !ext->ksym.type_id)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
|
||||||
|
BTF_KIND_VAR);
|
||||||
|
if (id <= 0) {
|
||||||
|
pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
|
||||||
|
ext->name);
|
||||||
|
return -ESRCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find local type_id */
|
||||||
|
local_type_id = ext->ksym.type_id;
|
||||||
|
|
||||||
|
/* find target type_id */
|
||||||
|
targ_var = btf__type_by_id(obj->btf_vmlinux, id);
|
||||||
|
targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
|
||||||
|
targ_var->name_off);
|
||||||
|
targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
|
||||||
|
targ_var->type,
|
||||||
|
&targ_type_id);
|
||||||
|
|
||||||
|
ret = bpf_core_types_are_compat(obj->btf, local_type_id,
|
||||||
|
obj->btf_vmlinux, targ_type_id);
|
||||||
|
if (ret <= 0) {
|
||||||
|
const struct btf_type *local_type;
|
||||||
|
const char *targ_name, *local_name;
|
||||||
|
|
||||||
|
local_type = btf__type_by_id(obj->btf, local_type_id);
|
||||||
|
local_name = btf__name_by_offset(obj->btf,
|
||||||
|
local_type->name_off);
|
||||||
|
targ_name = btf__name_by_offset(obj->btf_vmlinux,
|
||||||
|
targ_type->name_off);
|
||||||
|
|
||||||
|
pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
|
||||||
|
ext->name, local_type_id,
|
||||||
|
btf_kind_str(local_type), local_name, targ_type_id,
|
||||||
|
btf_kind_str(targ_type), targ_name);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ext->is_set = true;
|
||||||
|
ext->ksym.vmlinux_btf_id = id;
|
||||||
|
pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
|
||||||
|
ext->name, id, btf_kind_str(targ_var), targ_var_name);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int bpf_object__resolve_externs(struct bpf_object *obj,
|
static int bpf_object__resolve_externs(struct bpf_object *obj,
|
||||||
const char *extra_kconfig)
|
const char *extra_kconfig)
|
||||||
{
|
{
|
||||||
bool need_config = false, need_kallsyms = false;
|
bool need_config = false, need_kallsyms = false;
|
||||||
|
bool need_vmlinux_btf = false;
|
||||||
struct extern_desc *ext;
|
struct extern_desc *ext;
|
||||||
void *kcfg_data = NULL;
|
void *kcfg_data = NULL;
|
||||||
int err, i;
|
int err, i;
|
||||||
@@ -6967,7 +7190,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
|
|||||||
strncmp(ext->name, "CONFIG_", 7) == 0) {
|
strncmp(ext->name, "CONFIG_", 7) == 0) {
|
||||||
need_config = true;
|
need_config = true;
|
||||||
} else if (ext->type == EXT_KSYM) {
|
} else if (ext->type == EXT_KSYM) {
|
||||||
need_kallsyms = true;
|
if (ext->ksym.type_id)
|
||||||
|
need_vmlinux_btf = true;
|
||||||
|
else
|
||||||
|
need_kallsyms = true;
|
||||||
} else {
|
} else {
|
||||||
pr_warn("unrecognized extern '%s'\n", ext->name);
|
pr_warn("unrecognized extern '%s'\n", ext->name);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -6996,6 +7222,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
|
|||||||
if (err)
|
if (err)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
if (need_vmlinux_btf) {
|
||||||
|
err = bpf_object__resolve_ksyms_btf_id(obj);
|
||||||
|
if (err)
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
for (i = 0; i < obj->nr_extern; i++) {
|
for (i = 0; i < obj->nr_extern; i++) {
|
||||||
ext = &obj->externs[i];
|
ext = &obj->externs[i];
|
||||||
|
|
||||||
@@ -7028,10 +7259,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = bpf_object__probe_loading(obj);
|
err = bpf_object__probe_loading(obj);
|
||||||
|
err = err ? : bpf_object__load_vmlinux_btf(obj);
|
||||||
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
|
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
|
||||||
err = err ? : bpf_object__sanitize_and_load_btf(obj);
|
err = err ? : bpf_object__sanitize_and_load_btf(obj);
|
||||||
err = err ? : bpf_object__sanitize_maps(obj);
|
err = err ? : bpf_object__sanitize_maps(obj);
|
||||||
err = err ? : bpf_object__load_vmlinux_btf(obj);
|
|
||||||
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
|
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
|
||||||
err = err ? : bpf_object__create_maps(obj);
|
err = err ? : bpf_object__create_maps(obj);
|
||||||
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
|
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
|
||||||
@@ -10353,9 +10584,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
|
|||||||
btf_id = libbpf_find_prog_btf_id(attach_func_name,
|
btf_id = libbpf_find_prog_btf_id(attach_func_name,
|
||||||
attach_prog_fd);
|
attach_prog_fd);
|
||||||
else
|
else
|
||||||
btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
|
btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
|
||||||
attach_func_name,
|
prog->expected_attach_type);
|
||||||
prog->expected_attach_type);
|
|
||||||
|
|
||||||
if (btf_id < 0)
|
if (btf_id < 0)
|
||||||
return btf_id;
|
return btf_id;
|
||||||
|
@@ -705,7 +705,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
|
|||||||
struct xsk_ctx *ctx;
|
struct xsk_ctx *ctx;
|
||||||
int err, ifindex;
|
int err, ifindex;
|
||||||
|
|
||||||
if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp)
|
if (!umem || !xsk_ptr || !(rx || tx))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
xsk = calloc(1, sizeof(*xsk));
|
xsk = calloc(1, sizeof(*xsk));
|
||||||
@@ -735,6 +735,11 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
|
|||||||
|
|
||||||
ctx = xsk_get_ctx(umem, ifindex, queue_id);
|
ctx = xsk_get_ctx(umem, ifindex, queue_id);
|
||||||
if (!ctx) {
|
if (!ctx) {
|
||||||
|
if (!fill || !comp) {
|
||||||
|
err = -EFAULT;
|
||||||
|
goto out_socket;
|
||||||
|
}
|
||||||
|
|
||||||
ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
|
ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
|
||||||
fill, comp);
|
fill, comp);
|
||||||
if (!ctx) {
|
if (!ctx) {
|
||||||
|
@@ -7,6 +7,44 @@ General instructions on running selftests can be found in
|
|||||||
Additional information about selftest failures are
|
Additional information about selftest failures are
|
||||||
documented here.
|
documented here.
|
||||||
|
|
||||||
|
profiler[23] test failures with clang/llvm <12.0.0
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
With clang/llvm <12.0.0, the profiler[23] test may fail.
|
||||||
|
The symptom looks like
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
// r9 is a pointer to map_value
|
||||||
|
// r7 is a scalar
|
||||||
|
17: bf 96 00 00 00 00 00 00 r6 = r9
|
||||||
|
18: 0f 76 00 00 00 00 00 00 r6 += r7
|
||||||
|
math between map_value pointer and register with unbounded min value is not allowed
|
||||||
|
|
||||||
|
// the instructions below will not be seen in the verifier log
|
||||||
|
19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1
|
||||||
|
20: bf 96 00 00 00 00 00 00 r6 = r9
|
||||||
|
// r6 is used here
|
||||||
|
|
||||||
|
The verifier will reject such code with above error.
|
||||||
|
At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
|
||||||
|
the insn 20 undoes map_value addition. It is currently impossible for the
|
||||||
|
verifier to understand such speculative pointer arithmetic.
|
||||||
|
Hence
|
||||||
|
https://reviews.llvm.org/D85570
|
||||||
|
addresses it on the compiler side. It was committed on llvm 12.
|
||||||
|
|
||||||
|
The corresponding C code
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
|
||||||
|
filepart_length = bpf_probe_read_str(payload, ...);
|
||||||
|
if (filepart_length <= MAX_PATH) {
|
||||||
|
barrier_var(filepart_length); // workaround
|
||||||
|
payload += filepart_length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bpf_iter test failures with clang/llvm 10.0.0
|
bpf_iter test failures with clang/llvm 10.0.0
|
||||||
=============================================
|
=============================================
|
||||||
|
|
||||||
|
@@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = {
|
|||||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||||
.matches = {
|
.matches = {
|
||||||
{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||||
{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
{8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||||
{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
||||||
{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
{10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||||
{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
|
{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
|
||||||
{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
{12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||||
{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
|
{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
|
||||||
{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
|
{14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
|
||||||
{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
|
{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
|
||||||
{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
|
{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
|
||||||
},
|
},
|
||||||
@@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = {
|
|||||||
* the total offset is 4-byte aligned and meets the
|
* the total offset is 4-byte aligned and meets the
|
||||||
* load's requirements.
|
* load's requirements.
|
||||||
*/
|
*/
|
||||||
{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
|
{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
|
||||||
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = {
|
|||||||
/* Adding 14 makes R6 be (4n+2) */
|
/* Adding 14 makes R6 be (4n+2) */
|
||||||
{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
|
{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
|
||||||
/* Subtracting from packet pointer overflows ubounds */
|
/* Subtracting from packet pointer overflows ubounds */
|
||||||
{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
|
{13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
|
||||||
/* New unknown value in R7 is (4n), >= 76 */
|
/* New unknown value in R7 is (4n), >= 76 */
|
||||||
{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
|
{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
|
||||||
/* Adding it to packet pointer gives nice bounds again */
|
/* Adding it to packet pointer gives nice bounds again */
|
||||||
{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
{16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
||||||
/* At the time the word size load is performed from R5,
|
/* At the time the word size load is performed from R5,
|
||||||
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
|
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
|
||||||
* which is 2. Then the variable offset is (4n+2), so
|
* which is 2. Then the variable offset is (4n+2), so
|
||||||
* the total offset is 4-byte aligned and meets the
|
* the total offset is 4-byte aligned and meets the
|
||||||
* load's requirements.
|
* load's requirements.
|
||||||
*/
|
*/
|
||||||
{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
{20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@@ -55,10 +55,10 @@ static int kern_sync_rcu(void)
|
|||||||
|
|
||||||
static void test_lookup_update(void)
|
static void test_lookup_update(void)
|
||||||
{
|
{
|
||||||
int err, key = 0, val, i;
|
int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
|
||||||
|
int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
|
||||||
struct test_btf_map_in_map *skel;
|
struct test_btf_map_in_map *skel;
|
||||||
int outer_arr_fd, outer_hash_fd;
|
int err, key = 0, val, i, fd;
|
||||||
int fd, map1_fd, map2_fd, map1_id, map2_id;
|
|
||||||
|
|
||||||
skel = test_btf_map_in_map__open_and_load();
|
skel = test_btf_map_in_map__open_and_load();
|
||||||
if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
|
if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
|
||||||
@@ -70,32 +70,45 @@ static void test_lookup_update(void)
|
|||||||
|
|
||||||
map1_fd = bpf_map__fd(skel->maps.inner_map1);
|
map1_fd = bpf_map__fd(skel->maps.inner_map1);
|
||||||
map2_fd = bpf_map__fd(skel->maps.inner_map2);
|
map2_fd = bpf_map__fd(skel->maps.inner_map2);
|
||||||
|
map3_fd = bpf_map__fd(skel->maps.inner_map3);
|
||||||
|
map4_fd = bpf_map__fd(skel->maps.inner_map4);
|
||||||
|
map5_fd = bpf_map__fd(skel->maps.inner_map5);
|
||||||
|
outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn);
|
||||||
outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
|
outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
|
||||||
outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
|
outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
|
||||||
|
|
||||||
/* inner1 = input, inner2 = input + 1 */
|
/* inner1 = input, inner2 = input + 1, inner3 = input + 2 */
|
||||||
map1_fd = bpf_map__fd(skel->maps.inner_map1);
|
|
||||||
bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
|
bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
|
||||||
map2_fd = bpf_map__fd(skel->maps.inner_map2);
|
|
||||||
bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
|
bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
|
||||||
|
bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0);
|
||||||
skel->bss->input = 1;
|
skel->bss->input = 1;
|
||||||
usleep(1);
|
usleep(1);
|
||||||
|
|
||||||
bpf_map_lookup_elem(map1_fd, &key, &val);
|
bpf_map_lookup_elem(map1_fd, &key, &val);
|
||||||
CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
|
CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
|
||||||
bpf_map_lookup_elem(map2_fd, &key, &val);
|
bpf_map_lookup_elem(map2_fd, &key, &val);
|
||||||
CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
|
CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
|
||||||
|
bpf_map_lookup_elem(map3_fd, &key, &val);
|
||||||
|
CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3);
|
||||||
|
|
||||||
/* inner1 = input + 1, inner2 = input */
|
/* inner2 = input, inner1 = input + 1, inner4 = input + 2 */
|
||||||
bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
|
bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
|
||||||
bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
|
bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
|
||||||
|
bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0);
|
||||||
skel->bss->input = 3;
|
skel->bss->input = 3;
|
||||||
usleep(1);
|
usleep(1);
|
||||||
|
|
||||||
bpf_map_lookup_elem(map1_fd, &key, &val);
|
bpf_map_lookup_elem(map1_fd, &key, &val);
|
||||||
CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
|
CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
|
||||||
bpf_map_lookup_elem(map2_fd, &key, &val);
|
bpf_map_lookup_elem(map2_fd, &key, &val);
|
||||||
CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
|
CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
|
||||||
|
bpf_map_lookup_elem(map4_fd, &key, &val);
|
||||||
|
CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5);
|
||||||
|
|
||||||
|
/* inner5 = input + 2 */
|
||||||
|
bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0);
|
||||||
|
skel->bss->input = 5;
|
||||||
|
usleep(1);
|
||||||
|
bpf_map_lookup_elem(map5_fd, &key, &val);
|
||||||
|
CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7);
|
||||||
|
|
||||||
for (i = 0; i < 5; i++) {
|
for (i = 0; i < 5; i++) {
|
||||||
val = i % 2 ? map1_fd : map2_fd;
|
val = i % 2 ? map1_fd : map2_fd;
|
||||||
@@ -106,7 +119,13 @@ static void test_lookup_update(void)
|
|||||||
}
|
}
|
||||||
err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
|
err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
|
||||||
if (CHECK_FAIL(err)) {
|
if (CHECK_FAIL(err)) {
|
||||||
printf("failed to update hash_of_maps on iter #%d\n", i);
|
printf("failed to update array_of_maps on iter #%d\n", i);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
val = i % 2 ? map4_fd : map5_fd;
|
||||||
|
err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0);
|
||||||
|
if (CHECK_FAIL(err)) {
|
||||||
|
printf("failed to update array_of_maps (dyn) on iter #%d\n", i);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
225
tools/testing/selftests/bpf/prog_tests/core_autosize.c
Normal file
225
tools/testing/selftests/bpf/prog_tests/core_autosize.c
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
|
||||||
|
#include <test_progs.h>
|
||||||
|
#include <bpf/btf.h>
|
||||||
|
|
||||||
|
/* real layout and sizes according to test's (32-bit) BTF
|
||||||
|
* needs to be defined before skeleton is included */
|
||||||
|
struct test_struct___real {
|
||||||
|
unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
|
||||||
|
unsigned int val2;
|
||||||
|
unsigned long long val1;
|
||||||
|
unsigned short val3;
|
||||||
|
unsigned char val4;
|
||||||
|
unsigned char _pad;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "test_core_autosize.skel.h"
|
||||||
|
|
||||||
|
static int duration = 0;
|
||||||
|
|
||||||
|
static struct {
|
||||||
|
unsigned long long ptr_samesized;
|
||||||
|
unsigned long long val1_samesized;
|
||||||
|
unsigned long long val2_samesized;
|
||||||
|
unsigned long long val3_samesized;
|
||||||
|
unsigned long long val4_samesized;
|
||||||
|
struct test_struct___real output_samesized;
|
||||||
|
|
||||||
|
unsigned long long ptr_downsized;
|
||||||
|
unsigned long long val1_downsized;
|
||||||
|
unsigned long long val2_downsized;
|
||||||
|
unsigned long long val3_downsized;
|
||||||
|
unsigned long long val4_downsized;
|
||||||
|
struct test_struct___real output_downsized;
|
||||||
|
|
||||||
|
unsigned long long ptr_probed;
|
||||||
|
unsigned long long val1_probed;
|
||||||
|
unsigned long long val2_probed;
|
||||||
|
unsigned long long val3_probed;
|
||||||
|
unsigned long long val4_probed;
|
||||||
|
|
||||||
|
unsigned long long ptr_signed;
|
||||||
|
unsigned long long val1_signed;
|
||||||
|
unsigned long long val2_signed;
|
||||||
|
unsigned long long val3_signed;
|
||||||
|
unsigned long long val4_signed;
|
||||||
|
struct test_struct___real output_signed;
|
||||||
|
} out;
|
||||||
|
|
||||||
|
void test_core_autosize(void)
|
||||||
|
{
|
||||||
|
char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
|
||||||
|
int err, fd = -1, zero = 0;
|
||||||
|
int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
|
||||||
|
struct test_core_autosize* skel = NULL;
|
||||||
|
struct bpf_object_load_attr load_attr = {};
|
||||||
|
struct bpf_program *prog;
|
||||||
|
struct bpf_map *bss_map;
|
||||||
|
struct btf *btf = NULL;
|
||||||
|
size_t written;
|
||||||
|
const void *raw_data;
|
||||||
|
__u32 raw_sz;
|
||||||
|
FILE *f = NULL;
|
||||||
|
|
||||||
|
btf = btf__new_empty();
|
||||||
|
if (!ASSERT_OK_PTR(btf, "empty_btf"))
|
||||||
|
return;
|
||||||
|
/* Emit the following struct with 32-bit pointer size:
|
||||||
|
*
|
||||||
|
* struct test_struct {
|
||||||
|
* void *ptr;
|
||||||
|
* unsigned long val2;
|
||||||
|
* unsigned long long val1;
|
||||||
|
* unsigned short val3;
|
||||||
|
* unsigned char val4;
|
||||||
|
* char: 8;
|
||||||
|
* };
|
||||||
|
*
|
||||||
|
* This struct is going to be used as the "kernel BTF" for this test.
|
||||||
|
* It's equivalent memory-layout-wise to test_struct__real above.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* force 32-bit pointer size */
|
||||||
|
btf__set_pointer_size(btf, 4);
|
||||||
|
|
||||||
|
char_id = btf__add_int(btf, "unsigned char", 1, 0);
|
||||||
|
ASSERT_EQ(char_id, 1, "char_id");
|
||||||
|
short_id = btf__add_int(btf, "unsigned short", 2, 0);
|
||||||
|
ASSERT_EQ(short_id, 2, "short_id");
|
||||||
|
/* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */
|
||||||
|
int_id = btf__add_int(btf, "long unsigned int", 4, 0);
|
||||||
|
ASSERT_EQ(int_id, 3, "int_id");
|
||||||
|
long_long_id = btf__add_int(btf, "unsigned long long", 8, 0);
|
||||||
|
ASSERT_EQ(long_long_id, 4, "long_long_id");
|
||||||
|
void_ptr_id = btf__add_ptr(btf, 0);
|
||||||
|
ASSERT_EQ(void_ptr_id, 5, "void_ptr_id");
|
||||||
|
|
||||||
|
id = btf__add_struct(btf, "test_struct", 20 /* bytes */);
|
||||||
|
ASSERT_EQ(id, 6, "struct_id");
|
||||||
|
err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0);
|
||||||
|
err = err ?: btf__add_field(btf, "val2", int_id, 32, 0);
|
||||||
|
err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0);
|
||||||
|
err = err ?: btf__add_field(btf, "val3", short_id, 128, 0);
|
||||||
|
err = err ?: btf__add_field(btf, "val4", char_id, 144, 0);
|
||||||
|
ASSERT_OK(err, "struct_fields");
|
||||||
|
|
||||||
|
fd = mkstemp(btf_file);
|
||||||
|
if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd))
|
||||||
|
goto cleanup;
|
||||||
|
f = fdopen(fd, "w");
|
||||||
|
if (!ASSERT_OK_PTR(f, "btf_fdopen"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
raw_data = btf__get_raw_data(btf, &raw_sz);
|
||||||
|
if (!ASSERT_OK_PTR(raw_data, "raw_data"))
|
||||||
|
goto cleanup;
|
||||||
|
written = fwrite(raw_data, 1, raw_sz, f);
|
||||||
|
if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno))
|
||||||
|
goto cleanup;
|
||||||
|
fflush(f);
|
||||||
|
fclose(f);
|
||||||
|
f = NULL;
|
||||||
|
close(fd);
|
||||||
|
fd = -1;
|
||||||
|
|
||||||
|
/* open and load BPF program with custom BTF as the kernel BTF */
|
||||||
|
skel = test_core_autosize__open();
|
||||||
|
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* disable handle_signed() for now */
|
||||||
|
prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
|
||||||
|
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||||
|
goto cleanup;
|
||||||
|
bpf_program__set_autoload(prog, false);
|
||||||
|
|
||||||
|
load_attr.obj = skel->obj;
|
||||||
|
load_attr.target_btf_path = btf_file;
|
||||||
|
err = bpf_object__load_xattr(&load_attr);
|
||||||
|
if (!ASSERT_OK(err, "prog_load"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize");
|
||||||
|
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||||
|
goto cleanup;
|
||||||
|
skel->links.handle_samesize = bpf_program__attach(prog);
|
||||||
|
if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize");
|
||||||
|
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||||
|
goto cleanup;
|
||||||
|
skel->links.handle_downsize = bpf_program__attach(prog);
|
||||||
|
if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
prog = bpf_object__find_program_by_name(skel->obj, "handle_probed");
|
||||||
|
if (!ASSERT_OK_PTR(prog, "prog_find"))
|
||||||
|
goto cleanup;
|
||||||
|
skel->links.handle_probed = bpf_program__attach(prog);
|
||||||
|
if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
usleep(1);
|
||||||
|
|
||||||
|
bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss");
|
||||||
|
if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
|
||||||
|
if (!ASSERT_OK(err, "bss_lookup"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized");
|
||||||
|
ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized");
|
||||||
|
ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized");
|
||||||
|
ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized");
|
||||||
|
ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized");
|
||||||
|
ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized");
|
||||||
|
ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized");
|
||||||
|
ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized");
|
||||||
|
ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized");
|
||||||
|
ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized");
|
||||||
|
|
||||||
|
ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized");
|
||||||
|
ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized");
|
||||||
|
ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized");
|
||||||
|
ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized");
|
||||||
|
ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized");
|
||||||
|
ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized");
|
||||||
|
ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized");
|
||||||
|
ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized");
|
||||||
|
ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized");
|
||||||
|
ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized");
|
||||||
|
|
||||||
|
ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed");
|
||||||
|
ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed");
|
||||||
|
ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed");
|
||||||
|
ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed");
|
||||||
|
ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed");
|
||||||
|
|
||||||
|
test_core_autosize__destroy(skel);
|
||||||
|
skel = NULL;
|
||||||
|
|
||||||
|
/* now re-load with handle_signed() enabled, it should fail loading */
|
||||||
|
skel = test_core_autosize__open();
|
||||||
|
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
load_attr.obj = skel->obj;
|
||||||
|
load_attr.target_btf_path = btf_file;
|
||||||
|
err = bpf_object__load_xattr(&load_attr);
|
||||||
|
if (!ASSERT_ERR(err, "bad_prog_load"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (f)
|
||||||
|
fclose(f);
|
||||||
|
if (fd >= 0)
|
||||||
|
close(fd);
|
||||||
|
remove(btf_file);
|
||||||
|
btf__free(btf);
|
||||||
|
test_core_autosize__destroy(skel);
|
||||||
|
}
|
@@ -7,40 +7,28 @@
|
|||||||
|
|
||||||
static int duration;
|
static int duration;
|
||||||
|
|
||||||
static __u64 kallsyms_find(const char *sym)
|
|
||||||
{
|
|
||||||
char type, name[500];
|
|
||||||
__u64 addr, res = 0;
|
|
||||||
FILE *f;
|
|
||||||
|
|
||||||
f = fopen("/proc/kallsyms", "r");
|
|
||||||
if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
|
|
||||||
if (strcmp(name, sym) == 0) {
|
|
||||||
res = addr;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CHECK(false, "not_found", "symbol %s not found\n", sym);
|
|
||||||
out:
|
|
||||||
fclose(f);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
void test_ksyms(void)
|
void test_ksyms(void)
|
||||||
{
|
{
|
||||||
__u64 per_cpu_start_addr = kallsyms_find("__per_cpu_start");
|
|
||||||
__u64 link_fops_addr = kallsyms_find("bpf_link_fops");
|
|
||||||
const char *btf_path = "/sys/kernel/btf/vmlinux";
|
const char *btf_path = "/sys/kernel/btf/vmlinux";
|
||||||
struct test_ksyms *skel;
|
struct test_ksyms *skel;
|
||||||
struct test_ksyms__data *data;
|
struct test_ksyms__data *data;
|
||||||
|
__u64 link_fops_addr, per_cpu_start_addr;
|
||||||
struct stat st;
|
struct stat st;
|
||||||
__u64 btf_size;
|
__u64 btf_size;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
err = kallsyms_find("bpf_link_fops", &link_fops_addr);
|
||||||
|
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||||
|
return;
|
||||||
|
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
|
||||||
|
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||||
|
return;
|
||||||
|
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
|
||||||
|
return;
|
||||||
|
|
||||||
if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
|
if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
|
||||||
return;
|
return;
|
||||||
btf_size = st.st_size;
|
btf_size = st.st_size;
|
||||||
|
88
tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
Normal file
88
tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Google */
|
||||||
|
|
||||||
|
#include <test_progs.h>
|
||||||
|
#include <bpf/libbpf.h>
|
||||||
|
#include <bpf/btf.h>
|
||||||
|
#include "test_ksyms_btf.skel.h"
|
||||||
|
|
||||||
|
static int duration;
|
||||||
|
|
||||||
|
void test_ksyms_btf(void)
|
||||||
|
{
|
||||||
|
__u64 runqueues_addr, bpf_prog_active_addr;
|
||||||
|
__u32 this_rq_cpu;
|
||||||
|
int this_bpf_prog_active;
|
||||||
|
struct test_ksyms_btf *skel = NULL;
|
||||||
|
struct test_ksyms_btf__data *data;
|
||||||
|
struct btf *btf;
|
||||||
|
int percpu_datasec;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = kallsyms_find("runqueues", &runqueues_addr);
|
||||||
|
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||||
|
return;
|
||||||
|
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr);
|
||||||
|
if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
|
||||||
|
return;
|
||||||
|
if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
btf = libbpf_find_kernel_btf();
|
||||||
|
if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
|
||||||
|
PTR_ERR(btf)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
|
||||||
|
BTF_KIND_DATASEC);
|
||||||
|
if (percpu_datasec < 0) {
|
||||||
|
printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
|
||||||
|
__func__);
|
||||||
|
test__skip();
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
skel = test_ksyms_btf__open_and_load();
|
||||||
|
if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
err = test_ksyms_btf__attach(skel);
|
||||||
|
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
/* trigger tracepoint */
|
||||||
|
usleep(1);
|
||||||
|
|
||||||
|
data = skel->data;
|
||||||
|
CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr",
|
||||||
|
"got %llu, exp %llu\n",
|
||||||
|
(unsigned long long)data->out__runqueues_addr,
|
||||||
|
(unsigned long long)runqueues_addr);
|
||||||
|
CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr",
|
||||||
|
"got %llu, exp %llu\n",
|
||||||
|
(unsigned long long)data->out__bpf_prog_active_addr,
|
||||||
|
(unsigned long long)bpf_prog_active_addr);
|
||||||
|
|
||||||
|
CHECK(data->out__rq_cpu == -1, "rq_cpu",
|
||||||
|
"got %u, exp != -1\n", data->out__rq_cpu);
|
||||||
|
CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active",
|
||||||
|
"got %d, exp >= 0\n", data->out__bpf_prog_active);
|
||||||
|
CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu",
|
||||||
|
"got %u, exp 0\n", data->out__cpu_0_rq_cpu);
|
||||||
|
|
||||||
|
this_rq_cpu = data->out__this_rq_cpu;
|
||||||
|
CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu",
|
||||||
|
"got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu);
|
||||||
|
|
||||||
|
this_bpf_prog_active = data->out__this_bpf_prog_active;
|
||||||
|
CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active",
|
||||||
|
"got %d, exp %d\n", this_bpf_prog_active,
|
||||||
|
data->out__bpf_prog_active);
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
btf__free(btf);
|
||||||
|
test_ksyms_btf__destroy(skel);
|
||||||
|
}
|
@@ -37,7 +37,7 @@ void test_pinning(void)
|
|||||||
struct stat statbuf = {};
|
struct stat statbuf = {};
|
||||||
struct bpf_object *obj;
|
struct bpf_object *obj;
|
||||||
struct bpf_map *map;
|
struct bpf_map *map;
|
||||||
int err;
|
int err, map_fd;
|
||||||
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
|
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
|
||||||
.pin_root_path = custpath,
|
.pin_root_path = custpath,
|
||||||
);
|
);
|
||||||
@@ -213,6 +213,53 @@ void test_pinning(void)
|
|||||||
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
/* remove the custom pin path to re-test it with reuse fd below */
|
||||||
|
err = unlink(custpinpath);
|
||||||
|
if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
err = rmdir(custpath);
|
||||||
|
if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
bpf_object__close(obj);
|
||||||
|
|
||||||
|
/* test pinning at custom path with reuse fd */
|
||||||
|
obj = bpf_object__open_file(file, NULL);
|
||||||
|
err = libbpf_get_error(obj);
|
||||||
|
if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
|
||||||
|
obj = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32),
|
||||||
|
sizeof(__u64), 1, 0);
|
||||||
|
if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
map = bpf_object__find_map_by_name(obj, "pinmap");
|
||||||
|
if (CHECK(!map, "find map", "NULL map"))
|
||||||
|
goto close_map_fd;
|
||||||
|
|
||||||
|
err = bpf_map__reuse_fd(map, map_fd);
|
||||||
|
if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno))
|
||||||
|
goto close_map_fd;
|
||||||
|
|
||||||
|
err = bpf_map__set_pin_path(map, custpinpath);
|
||||||
|
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
|
||||||
|
goto close_map_fd;
|
||||||
|
|
||||||
|
err = bpf_object__load(obj);
|
||||||
|
if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
|
||||||
|
goto close_map_fd;
|
||||||
|
|
||||||
|
/* check that pinmap was pinned at the custom path */
|
||||||
|
err = stat(custpinpath, &statbuf);
|
||||||
|
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
|
||||||
|
goto close_map_fd;
|
||||||
|
|
||||||
|
close_map_fd:
|
||||||
|
close(map_fd);
|
||||||
out:
|
out:
|
||||||
unlink(pinpath);
|
unlink(pinpath);
|
||||||
unlink(nopinpath);
|
unlink(nopinpath);
|
||||||
|
@@ -198,7 +198,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
|
|||||||
{
|
{
|
||||||
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
|
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
|
||||||
int err, len, src_fd, iter_fd, duration = 0;
|
int err, len, src_fd, iter_fd, duration = 0;
|
||||||
union bpf_iter_link_info linfo = {0};
|
union bpf_iter_link_info linfo = {};
|
||||||
__u32 i, num_sockets, num_elems;
|
__u32 i, num_sockets, num_elems;
|
||||||
struct bpf_iter_sockmap *skel;
|
struct bpf_iter_sockmap *skel;
|
||||||
__s64 *sock_fd = NULL;
|
__s64 *sock_fd = NULL;
|
||||||
|
@@ -264,9 +264,19 @@ static int check_error_linum(const struct sk_fds *sk_fds)
|
|||||||
|
|
||||||
static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
|
static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
|
||||||
{
|
{
|
||||||
|
const __u32 expected_inherit_cb_flags =
|
||||||
|
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
|
||||||
|
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
|
||||||
|
BPF_SOCK_OPS_STATE_CB_FLAG;
|
||||||
|
|
||||||
if (sk_fds_shutdown(sk_fds))
|
if (sk_fds_shutdown(sk_fds))
|
||||||
goto check_linum;
|
goto check_linum;
|
||||||
|
|
||||||
|
if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags,
|
||||||
|
"Unexpected inherit_cb_flags", "0x%x != 0x%x\n",
|
||||||
|
skel->bss->inherit_cb_flags, expected_inherit_cb_flags))
|
||||||
|
goto check_linum;
|
||||||
|
|
||||||
if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
|
if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
|
||||||
"passive_hdr_stg"))
|
"passive_hdr_stg"))
|
||||||
goto check_linum;
|
goto check_linum;
|
||||||
@@ -321,6 +331,8 @@ static void reset_test(void)
|
|||||||
memset(&skel->bss->active_estab_in, 0, optsize);
|
memset(&skel->bss->active_estab_in, 0, optsize);
|
||||||
memset(&skel->bss->active_fin_in, 0, optsize);
|
memset(&skel->bss->active_fin_in, 0, optsize);
|
||||||
|
|
||||||
|
skel->bss->inherit_cb_flags = 0;
|
||||||
|
|
||||||
skel->data->test_kind = TCPOPT_EXP;
|
skel->data->test_kind = TCPOPT_EXP;
|
||||||
skel->data->test_magic = 0xeB9F;
|
skel->data->test_magic = 0xeB9F;
|
||||||
|
|
||||||
|
72
tools/testing/selftests/bpf/prog_tests/test_profiler.c
Normal file
72
tools/testing/selftests/bpf/prog_tests/test_profiler.c
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
#include <test_progs.h>
|
||||||
|
#include "progs/profiler.h"
|
||||||
|
#include "profiler1.skel.h"
|
||||||
|
#include "profiler2.skel.h"
|
||||||
|
#include "profiler3.skel.h"
|
||||||
|
|
||||||
|
static int sanity_run(struct bpf_program *prog)
|
||||||
|
{
|
||||||
|
struct bpf_prog_test_run_attr test_attr = {};
|
||||||
|
__u64 args[] = {1, 2, 3};
|
||||||
|
__u32 duration = 0;
|
||||||
|
int err, prog_fd;
|
||||||
|
|
||||||
|
prog_fd = bpf_program__fd(prog);
|
||||||
|
test_attr.prog_fd = prog_fd;
|
||||||
|
test_attr.ctx_in = args;
|
||||||
|
test_attr.ctx_size_in = sizeof(args);
|
||||||
|
err = bpf_prog_test_run_xattr(&test_attr);
|
||||||
|
if (CHECK(err || test_attr.retval, "test_run",
|
||||||
|
"err %d errno %d retval %d duration %d\n",
|
||||||
|
err, errno, test_attr.retval, duration))
|
||||||
|
return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_test_profiler(void)
|
||||||
|
{
|
||||||
|
struct profiler1 *profiler1_skel = NULL;
|
||||||
|
struct profiler2 *profiler2_skel = NULL;
|
||||||
|
struct profiler3 *profiler3_skel = NULL;
|
||||||
|
__u32 duration = 0;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
profiler1_skel = profiler1__open_and_load();
|
||||||
|
if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
err = profiler1__attach(profiler1_skel);
|
||||||
|
if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
profiler2_skel = profiler2__open_and_load();
|
||||||
|
if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
err = profiler2__attach(profiler2_skel);
|
||||||
|
if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
profiler3_skel = profiler3__open_and_load();
|
||||||
|
if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n"))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
err = profiler3__attach(profiler3_skel);
|
||||||
|
if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec))
|
||||||
|
goto cleanup;
|
||||||
|
cleanup:
|
||||||
|
profiler1__destroy(profiler1_skel);
|
||||||
|
profiler2__destroy(profiler2_skel);
|
||||||
|
profiler3__destroy(profiler3_skel);
|
||||||
|
}
|
@@ -25,7 +25,7 @@ void test_xdp_noinline(void)
|
|||||||
__u8 flags;
|
__u8 flags;
|
||||||
} real_def = {.dst = MAGIC_VAL};
|
} real_def = {.dst = MAGIC_VAL};
|
||||||
__u32 ch_key = 11, real_num = 3;
|
__u32 ch_key = 11, real_num = 3;
|
||||||
__u32 duration, retval, size;
|
__u32 duration = 0, retval, size;
|
||||||
int err, i;
|
int err, i;
|
||||||
__u64 bytes = 0, pkts = 0;
|
__u64 bytes = 0, pkts = 0;
|
||||||
char buf[128];
|
char buf[128];
|
||||||
|
@@ -23,6 +23,10 @@
|
|||||||
#define TCP_CA_NAME_MAX 16
|
#define TCP_CA_NAME_MAX 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef TCP_NOTSENT_LOWAT
|
||||||
|
#define TCP_NOTSENT_LOWAT 25
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef IFNAMSIZ
|
#ifndef IFNAMSIZ
|
||||||
#define IFNAMSIZ 16
|
#define IFNAMSIZ 16
|
||||||
#endif
|
#endif
|
||||||
@@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
|
||||||
|
{
|
||||||
|
int lowat = 65535;
|
||||||
|
|
||||||
|
if (ctx->type == SOCK_STREAM) {
|
||||||
|
if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
SEC("cgroup/connect4")
|
SEC("cgroup/connect4")
|
||||||
int connect_v4_prog(struct bpf_sock_addr *ctx)
|
int connect_v4_prog(struct bpf_sock_addr *ctx)
|
||||||
{
|
{
|
||||||
@@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
|
|||||||
if (set_keepalive(ctx))
|
if (set_keepalive(ctx))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (set_notsent_lowat(ctx))
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
|
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
|
||||||
return 0;
|
return 0;
|
||||||
else if (ctx->type == SOCK_STREAM)
|
else if (ctx->type == SOCK_STREAM)
|
||||||
|
177
tools/testing/selftests/bpf/progs/profiler.h
Normal file
177
tools/testing/selftests/bpf/progs/profiler.h
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define TASK_COMM_LEN 16
|
||||||
|
#define MAX_ANCESTORS 4
|
||||||
|
#define MAX_PATH 256
|
||||||
|
#define KILL_TARGET_LEN 64
|
||||||
|
#define CTL_MAXNAME 10
|
||||||
|
#define MAX_ARGS_LEN 4096
|
||||||
|
#define MAX_FILENAME_LEN 512
|
||||||
|
#define MAX_ENVIRON_LEN 8192
|
||||||
|
#define MAX_PATH_DEPTH 32
|
||||||
|
#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH)
|
||||||
|
#define MAX_CGROUPS_PATH_DEPTH 8
|
||||||
|
|
||||||
|
#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN
|
||||||
|
|
||||||
|
#define MAX_CGROUP_PAYLOAD_LEN \
|
||||||
|
(MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH))
|
||||||
|
|
||||||
|
#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
|
||||||
|
|
||||||
|
#define MAX_SYSCTL_PAYLOAD_LEN \
|
||||||
|
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH)
|
||||||
|
|
||||||
|
#define MAX_KILL_PAYLOAD_LEN \
|
||||||
|
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \
|
||||||
|
KILL_TARGET_LEN)
|
||||||
|
|
||||||
|
#define MAX_EXEC_PAYLOAD_LEN \
|
||||||
|
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \
|
||||||
|
MAX_ARGS_LEN + MAX_ENVIRON_LEN)
|
||||||
|
|
||||||
|
#define MAX_FILEMOD_PAYLOAD_LEN \
|
||||||
|
(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \
|
||||||
|
MAX_FILEPATH_LENGTH)
|
||||||
|
|
||||||
|
enum data_type {
|
||||||
|
INVALID_EVENT,
|
||||||
|
EXEC_EVENT,
|
||||||
|
FORK_EVENT,
|
||||||
|
KILL_EVENT,
|
||||||
|
SYSCTL_EVENT,
|
||||||
|
FILEMOD_EVENT,
|
||||||
|
MAX_DATA_TYPE_EVENT
|
||||||
|
};
|
||||||
|
|
||||||
|
enum filemod_type {
|
||||||
|
FMOD_OPEN,
|
||||||
|
FMOD_LINK,
|
||||||
|
FMOD_SYMLINK,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ancestors_data_t {
|
||||||
|
pid_t ancestor_pids[MAX_ANCESTORS];
|
||||||
|
uint32_t ancestor_exec_ids[MAX_ANCESTORS];
|
||||||
|
uint64_t ancestor_start_times[MAX_ANCESTORS];
|
||||||
|
uint32_t num_ancestors;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct var_metadata_t {
|
||||||
|
enum data_type type;
|
||||||
|
pid_t pid;
|
||||||
|
uint32_t exec_id;
|
||||||
|
uid_t uid;
|
||||||
|
gid_t gid;
|
||||||
|
uint64_t start_time;
|
||||||
|
uint32_t cpu_id;
|
||||||
|
uint64_t bpf_stats_num_perf_events;
|
||||||
|
uint64_t bpf_stats_start_ktime_ns;
|
||||||
|
uint8_t comm_length;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cgroup_data_t {
|
||||||
|
ino_t cgroup_root_inode;
|
||||||
|
ino_t cgroup_proc_inode;
|
||||||
|
uint64_t cgroup_root_mtime;
|
||||||
|
uint64_t cgroup_proc_mtime;
|
||||||
|
uint16_t cgroup_root_length;
|
||||||
|
uint16_t cgroup_proc_length;
|
||||||
|
uint16_t cgroup_full_length;
|
||||||
|
int cgroup_full_path_root_pos;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct var_sysctl_data_t {
|
||||||
|
struct var_metadata_t meta;
|
||||||
|
struct cgroup_data_t cgroup_data;
|
||||||
|
struct ancestors_data_t ancestors_info;
|
||||||
|
uint8_t sysctl_val_length;
|
||||||
|
uint16_t sysctl_path_length;
|
||||||
|
char payload[MAX_SYSCTL_PAYLOAD_LEN];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct var_kill_data_t {
|
||||||
|
struct var_metadata_t meta;
|
||||||
|
struct cgroup_data_t cgroup_data;
|
||||||
|
struct ancestors_data_t ancestors_info;
|
||||||
|
pid_t kill_target_pid;
|
||||||
|
int kill_sig;
|
||||||
|
uint32_t kill_count;
|
||||||
|
uint64_t last_kill_time;
|
||||||
|
uint8_t kill_target_name_length;
|
||||||
|
uint8_t kill_target_cgroup_proc_length;
|
||||||
|
char payload[MAX_KILL_PAYLOAD_LEN];
|
||||||
|
size_t payload_length;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct var_exec_data_t {
|
||||||
|
struct var_metadata_t meta;
|
||||||
|
struct cgroup_data_t cgroup_data;
|
||||||
|
pid_t parent_pid;
|
||||||
|
uint32_t parent_exec_id;
|
||||||
|
uid_t parent_uid;
|
||||||
|
uint64_t parent_start_time;
|
||||||
|
uint16_t bin_path_length;
|
||||||
|
uint16_t cmdline_length;
|
||||||
|
uint16_t environment_length;
|
||||||
|
char payload[MAX_EXEC_PAYLOAD_LEN];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct var_fork_data_t {
|
||||||
|
struct var_metadata_t meta;
|
||||||
|
pid_t parent_pid;
|
||||||
|
uint32_t parent_exec_id;
|
||||||
|
uint64_t parent_start_time;
|
||||||
|
char payload[MAX_METADATA_PAYLOAD_LEN];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct var_filemod_data_t {
|
||||||
|
struct var_metadata_t meta;
|
||||||
|
struct cgroup_data_t cgroup_data;
|
||||||
|
enum filemod_type fmod_type;
|
||||||
|
unsigned int dst_flags;
|
||||||
|
uint32_t src_device_id;
|
||||||
|
uint32_t dst_device_id;
|
||||||
|
ino_t src_inode;
|
||||||
|
ino_t dst_inode;
|
||||||
|
uint16_t src_filepath_length;
|
||||||
|
uint16_t dst_filepath_length;
|
||||||
|
char payload[MAX_FILEMOD_PAYLOAD_LEN];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct profiler_config_struct {
|
||||||
|
bool fetch_cgroups_from_bpf;
|
||||||
|
ino_t cgroup_fs_inode;
|
||||||
|
ino_t cgroup_login_session_inode;
|
||||||
|
uint64_t kill_signals_mask;
|
||||||
|
ino_t inode_filter;
|
||||||
|
uint32_t stale_info_secs;
|
||||||
|
bool use_variable_buffers;
|
||||||
|
bool read_environ_from_exec;
|
||||||
|
bool enable_cgroup_v1_resolver;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_func_stats_data {
|
||||||
|
uint64_t time_elapsed_ns;
|
||||||
|
uint64_t num_executions;
|
||||||
|
uint64_t num_perf_events;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_func_stats_ctx {
|
||||||
|
uint64_t start_time_ns;
|
||||||
|
struct bpf_func_stats_data* bpf_func_stats_data_val;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum bpf_function_id {
|
||||||
|
profiler_bpf_proc_sys_write,
|
||||||
|
profiler_bpf_sched_process_exec,
|
||||||
|
profiler_bpf_sched_process_exit,
|
||||||
|
profiler_bpf_sys_enter_kill,
|
||||||
|
profiler_bpf_do_filp_open_ret,
|
||||||
|
profiler_bpf_sched_process_fork,
|
||||||
|
profiler_bpf_vfs_link,
|
||||||
|
profiler_bpf_vfs_symlink,
|
||||||
|
profiler_bpf_max_function_id
|
||||||
|
};
|
969
tools/testing/selftests/bpf/progs/profiler.inc.h
Normal file
969
tools/testing/selftests/bpf/progs/profiler.inc.h
Normal file
@@ -0,0 +1,969 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
#include <vmlinux.h>
|
||||||
|
#include <bpf/bpf_core_read.h>
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
#include <bpf/bpf_tracing.h>
|
||||||
|
|
||||||
|
#include "profiler.h"
|
||||||
|
|
||||||
|
#ifndef NULL
|
||||||
|
#define NULL 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define O_WRONLY 00000001
|
||||||
|
#define O_RDWR 00000002
|
||||||
|
#define O_DIRECTORY 00200000
|
||||||
|
#define __O_TMPFILE 020000000
|
||||||
|
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
|
||||||
|
#define MAX_ERRNO 4095
|
||||||
|
#define S_IFMT 00170000
|
||||||
|
#define S_IFSOCK 0140000
|
||||||
|
#define S_IFLNK 0120000
|
||||||
|
#define S_IFREG 0100000
|
||||||
|
#define S_IFBLK 0060000
|
||||||
|
#define S_IFDIR 0040000
|
||||||
|
#define S_IFCHR 0020000
|
||||||
|
#define S_IFIFO 0010000
|
||||||
|
#define S_ISUID 0004000
|
||||||
|
#define S_ISGID 0002000
|
||||||
|
#define S_ISVTX 0001000
|
||||||
|
#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
|
||||||
|
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
|
||||||
|
#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
|
||||||
|
#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
|
||||||
|
#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
|
||||||
|
#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
|
||||||
|
#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
|
||||||
|
|
||||||
|
#define KILL_DATA_ARRAY_SIZE 8
|
||||||
|
|
||||||
|
struct var_kill_data_arr_t {
|
||||||
|
struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
|
||||||
|
};
|
||||||
|
|
||||||
|
union any_profiler_data_t {
|
||||||
|
struct var_exec_data_t var_exec;
|
||||||
|
struct var_kill_data_t var_kill;
|
||||||
|
struct var_sysctl_data_t var_sysctl;
|
||||||
|
struct var_filemod_data_t var_filemod;
|
||||||
|
struct var_fork_data_t var_fork;
|
||||||
|
struct var_kill_data_arr_t var_kill_data_arr;
|
||||||
|
};
|
||||||
|
|
||||||
|
volatile struct profiler_config_struct bpf_config = {};
|
||||||
|
|
||||||
|
#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
|
||||||
|
#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
|
||||||
|
#define CGROUP_LOGIN_SESSION_INODE \
|
||||||
|
(bpf_config.cgroup_login_session_inode)
|
||||||
|
#define KILL_SIGNALS (bpf_config.kill_signals_mask)
|
||||||
|
#define STALE_INFO (bpf_config.stale_info_secs)
|
||||||
|
#define INODE_FILTER (bpf_config.inode_filter)
|
||||||
|
#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
|
||||||
|
#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
|
||||||
|
|
||||||
|
struct kernfs_iattrs___52 {
|
||||||
|
struct iattr ia_iattr;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct kernfs_node___52 {
|
||||||
|
union /* kernfs_node_id */ {
|
||||||
|
struct {
|
||||||
|
u32 ino;
|
||||||
|
u32 generation;
|
||||||
|
};
|
||||||
|
u64 id;
|
||||||
|
} id;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
__type(key, u32);
|
||||||
|
__type(value, union any_profiler_data_t);
|
||||||
|
} data_heap SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||||
|
__uint(key_size, sizeof(int));
|
||||||
|
__uint(value_size, sizeof(int));
|
||||||
|
} events SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_HASH);
|
||||||
|
__uint(max_entries, KILL_DATA_ARRAY_SIZE);
|
||||||
|
__type(key, u32);
|
||||||
|
__type(value, struct var_kill_data_arr_t);
|
||||||
|
} var_tpid_to_data SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
|
__uint(max_entries, profiler_bpf_max_function_id);
|
||||||
|
__type(key, u32);
|
||||||
|
__type(value, struct bpf_func_stats_data);
|
||||||
|
} bpf_func_stats SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_HASH);
|
||||||
|
__type(key, u32);
|
||||||
|
__type(value, bool);
|
||||||
|
__uint(max_entries, 16);
|
||||||
|
} allowed_devices SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_HASH);
|
||||||
|
__type(key, u64);
|
||||||
|
__type(value, bool);
|
||||||
|
__uint(max_entries, 1024);
|
||||||
|
} allowed_file_inodes SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_HASH);
|
||||||
|
__type(key, u64);
|
||||||
|
__type(value, bool);
|
||||||
|
__uint(max_entries, 1024);
|
||||||
|
} allowed_directory_inodes SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_HASH);
|
||||||
|
__type(key, u32);
|
||||||
|
__type(value, bool);
|
||||||
|
__uint(max_entries, 16);
|
||||||
|
} disallowed_exec_inodes SEC(".maps");
|
||||||
|
|
||||||
|
#ifndef ARRAY_SIZE
|
||||||
|
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static INLINE bool IS_ERR(const void* ptr)
|
||||||
|
{
|
||||||
|
return IS_ERR_VALUE((unsigned long)ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE u32 get_userspace_pid()
|
||||||
|
{
|
||||||
|
return bpf_get_current_pid_tgid() >> 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE bool is_init_process(u32 tgid)
|
||||||
|
{
|
||||||
|
return tgid == 1 || tgid == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE unsigned long
|
||||||
|
probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
|
||||||
|
{
|
||||||
|
len = len < max ? len : max;
|
||||||
|
if (len > 1) {
|
||||||
|
if (bpf_probe_read(dst, len, src))
|
||||||
|
return 0;
|
||||||
|
} else if (len == 1) {
|
||||||
|
if (bpf_probe_read(dst, 1, src))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
|
||||||
|
int spid)
|
||||||
|
{
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
|
||||||
|
if (arr_struct->array[i].meta.pid == spid)
|
||||||
|
return i;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE void populate_ancestors(struct task_struct* task,
|
||||||
|
struct ancestors_data_t* ancestors_data)
|
||||||
|
{
|
||||||
|
struct task_struct* parent = task;
|
||||||
|
u32 num_ancestors, ppid;
|
||||||
|
|
||||||
|
ancestors_data->num_ancestors = 0;
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
|
||||||
|
parent = BPF_CORE_READ(parent, real_parent);
|
||||||
|
if (parent == NULL)
|
||||||
|
break;
|
||||||
|
ppid = BPF_CORE_READ(parent, tgid);
|
||||||
|
if (is_init_process(ppid))
|
||||||
|
break;
|
||||||
|
ancestors_data->ancestor_pids[num_ancestors] = ppid;
|
||||||
|
ancestors_data->ancestor_exec_ids[num_ancestors] =
|
||||||
|
BPF_CORE_READ(parent, self_exec_id);
|
||||||
|
ancestors_data->ancestor_start_times[num_ancestors] =
|
||||||
|
BPF_CORE_READ(parent, start_time);
|
||||||
|
ancestors_data->num_ancestors = num_ancestors;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
|
||||||
|
struct kernfs_node* cgroup_root_node,
|
||||||
|
void* payload,
|
||||||
|
int* root_pos)
|
||||||
|
{
|
||||||
|
void* payload_start = payload;
|
||||||
|
size_t filepart_length;
|
||||||
|
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
|
||||||
|
filepart_length =
|
||||||
|
bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
|
||||||
|
if (!cgroup_node)
|
||||||
|
return payload;
|
||||||
|
if (cgroup_node == cgroup_root_node)
|
||||||
|
*root_pos = payload - payload_start;
|
||||||
|
if (filepart_length <= MAX_PATH) {
|
||||||
|
barrier_var(filepart_length);
|
||||||
|
payload += filepart_length;
|
||||||
|
}
|
||||||
|
cgroup_node = BPF_CORE_READ(cgroup_node, parent);
|
||||||
|
}
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ino_t get_inode_from_kernfs(struct kernfs_node* node)
|
||||||
|
{
|
||||||
|
struct kernfs_node___52* node52 = (void*)node;
|
||||||
|
|
||||||
|
if (bpf_core_field_exists(node52->id.ino)) {
|
||||||
|
barrier_var(node52);
|
||||||
|
return BPF_CORE_READ(node52, id.ino);
|
||||||
|
} else {
|
||||||
|
barrier_var(node);
|
||||||
|
return (u64)BPF_CORE_READ(node, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int pids_cgrp_id = 1;
|
||||||
|
|
||||||
|
static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
|
||||||
|
struct task_struct* task,
|
||||||
|
void* payload)
|
||||||
|
{
|
||||||
|
struct kernfs_node* root_kernfs =
|
||||||
|
BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
|
||||||
|
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
|
||||||
|
|
||||||
|
if (ENABLE_CGROUP_V1_RESOLVER) {
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
||||||
|
struct cgroup_subsys_state* subsys =
|
||||||
|
BPF_CORE_READ(task, cgroups, subsys[i]);
|
||||||
|
if (subsys != NULL) {
|
||||||
|
int subsys_id = BPF_CORE_READ(subsys, ss, id);
|
||||||
|
if (subsys_id == pids_cgrp_id) {
|
||||||
|
proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
|
||||||
|
root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
|
||||||
|
cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
|
||||||
|
|
||||||
|
if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
|
||||||
|
cgroup_data->cgroup_root_mtime =
|
||||||
|
BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
|
||||||
|
cgroup_data->cgroup_proc_mtime =
|
||||||
|
BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
|
||||||
|
} else {
|
||||||
|
struct kernfs_iattrs___52* root_iattr =
|
||||||
|
(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
|
||||||
|
cgroup_data->cgroup_root_mtime =
|
||||||
|
BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
|
||||||
|
|
||||||
|
struct kernfs_iattrs___52* proc_iattr =
|
||||||
|
(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
|
||||||
|
cgroup_data->cgroup_proc_mtime =
|
||||||
|
BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroup_data->cgroup_root_length = 0;
|
||||||
|
cgroup_data->cgroup_proc_length = 0;
|
||||||
|
cgroup_data->cgroup_full_length = 0;
|
||||||
|
|
||||||
|
size_t cgroup_root_length =
|
||||||
|
bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
|
||||||
|
barrier_var(cgroup_root_length);
|
||||||
|
if (cgroup_root_length <= MAX_PATH) {
|
||||||
|
barrier_var(cgroup_root_length);
|
||||||
|
cgroup_data->cgroup_root_length = cgroup_root_length;
|
||||||
|
payload += cgroup_root_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t cgroup_proc_length =
|
||||||
|
bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
|
||||||
|
barrier_var(cgroup_proc_length);
|
||||||
|
if (cgroup_proc_length <= MAX_PATH) {
|
||||||
|
barrier_var(cgroup_proc_length);
|
||||||
|
cgroup_data->cgroup_proc_length = cgroup_proc_length;
|
||||||
|
payload += cgroup_proc_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FETCH_CGROUPS_FROM_BPF) {
|
||||||
|
cgroup_data->cgroup_full_path_root_pos = -1;
|
||||||
|
void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
|
||||||
|
&cgroup_data->cgroup_full_path_root_pos);
|
||||||
|
cgroup_data->cgroup_full_length = payload_end_pos - payload;
|
||||||
|
payload = payload_end_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (void*)payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
|
||||||
|
struct task_struct* task,
|
||||||
|
u32 pid, void* payload)
|
||||||
|
{
|
||||||
|
u64 uid_gid = bpf_get_current_uid_gid();
|
||||||
|
|
||||||
|
metadata->uid = (u32)uid_gid;
|
||||||
|
metadata->gid = uid_gid >> 32;
|
||||||
|
metadata->pid = pid;
|
||||||
|
metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
|
||||||
|
metadata->start_time = BPF_CORE_READ(task, start_time);
|
||||||
|
metadata->comm_length = 0;
|
||||||
|
|
||||||
|
size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
|
||||||
|
barrier_var(comm_length);
|
||||||
|
if (comm_length <= TASK_COMM_LEN) {
|
||||||
|
barrier_var(comm_length);
|
||||||
|
metadata->comm_length = comm_length;
|
||||||
|
payload += comm_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (void*)payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE struct var_kill_data_t*
|
||||||
|
get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
|
||||||
|
{
|
||||||
|
int zero = 0;
|
||||||
|
struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
|
||||||
|
if (kill_data == NULL)
|
||||||
|
return NULL;
|
||||||
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
|
|
||||||
|
void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
|
||||||
|
payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
|
||||||
|
size_t payload_length = payload - (void*)kill_data->payload;
|
||||||
|
kill_data->payload_length = payload_length;
|
||||||
|
populate_ancestors(task, &kill_data->ancestors_info);
|
||||||
|
kill_data->meta.type = KILL_EVENT;
|
||||||
|
kill_data->kill_target_pid = tpid;
|
||||||
|
kill_data->kill_sig = sig;
|
||||||
|
kill_data->kill_count = 1;
|
||||||
|
kill_data->last_kill_time = bpf_ktime_get_ns();
|
||||||
|
return kill_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
|
||||||
|
{
|
||||||
|
if ((KILL_SIGNALS & (1ULL << sig)) == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
u32 spid = get_userspace_pid();
|
||||||
|
struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
|
||||||
|
|
||||||
|
if (arr_struct == NULL) {
|
||||||
|
struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
|
||||||
|
int zero = 0;
|
||||||
|
|
||||||
|
if (kill_data == NULL)
|
||||||
|
return 0;
|
||||||
|
arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
if (arr_struct == NULL)
|
||||||
|
return 0;
|
||||||
|
bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
|
||||||
|
} else {
|
||||||
|
int index = get_var_spid_index(arr_struct, spid);
|
||||||
|
|
||||||
|
if (index == -1) {
|
||||||
|
struct var_kill_data_t* kill_data =
|
||||||
|
get_var_kill_data(ctx, spid, tpid, sig);
|
||||||
|
if (kill_data == NULL)
|
||||||
|
return 0;
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
|
||||||
|
if (arr_struct->array[i].meta.pid == 0) {
|
||||||
|
bpf_probe_read(&arr_struct->array[i],
|
||||||
|
sizeof(arr_struct->array[i]), kill_data);
|
||||||
|
bpf_map_update_elem(&var_tpid_to_data, &tpid,
|
||||||
|
arr_struct, 0);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct var_kill_data_t* kill_data = &arr_struct->array[index];
|
||||||
|
|
||||||
|
u64 delta_sec =
|
||||||
|
(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
|
||||||
|
|
||||||
|
if (delta_sec < STALE_INFO) {
|
||||||
|
kill_data->kill_count++;
|
||||||
|
kill_data->last_kill_time = bpf_ktime_get_ns();
|
||||||
|
bpf_probe_read(&arr_struct->array[index],
|
||||||
|
sizeof(arr_struct->array[index]),
|
||||||
|
kill_data);
|
||||||
|
} else {
|
||||||
|
struct var_kill_data_t* kill_data =
|
||||||
|
get_var_kill_data(ctx, spid, tpid, sig);
|
||||||
|
if (kill_data == NULL)
|
||||||
|
return 0;
|
||||||
|
bpf_probe_read(&arr_struct->array[index],
|
||||||
|
sizeof(arr_struct->array[index]),
|
||||||
|
kill_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
|
||||||
|
enum bpf_function_id func_id)
|
||||||
|
{
|
||||||
|
int func_id_key = func_id;
|
||||||
|
|
||||||
|
bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
|
||||||
|
bpf_stat_ctx->bpf_func_stats_data_val =
|
||||||
|
bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
|
||||||
|
if (bpf_stat_ctx->bpf_func_stats_data_val)
|
||||||
|
bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
|
||||||
|
{
|
||||||
|
if (bpf_stat_ctx->bpf_func_stats_data_val)
|
||||||
|
bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
|
||||||
|
bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE void
|
||||||
|
bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
|
||||||
|
struct var_metadata_t* meta)
|
||||||
|
{
|
||||||
|
if (bpf_stat_ctx->bpf_func_stats_data_val) {
|
||||||
|
bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
|
||||||
|
meta->bpf_stats_num_perf_events =
|
||||||
|
bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
|
||||||
|
}
|
||||||
|
meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
|
||||||
|
meta->cpu_id = bpf_get_smp_processor_id();
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE size_t
|
||||||
|
read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
|
||||||
|
{
|
||||||
|
size_t length = 0;
|
||||||
|
size_t filepart_length;
|
||||||
|
struct dentry* parent_dentry;
|
||||||
|
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
|
||||||
|
filepart_length = bpf_probe_read_str(payload, MAX_PATH,
|
||||||
|
BPF_CORE_READ(filp_dentry, d_name.name));
|
||||||
|
barrier_var(filepart_length);
|
||||||
|
if (filepart_length > MAX_PATH)
|
||||||
|
break;
|
||||||
|
barrier_var(filepart_length);
|
||||||
|
payload += filepart_length;
|
||||||
|
length += filepart_length;
|
||||||
|
|
||||||
|
parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
|
||||||
|
if (filp_dentry == parent_dentry)
|
||||||
|
break;
|
||||||
|
filp_dentry = parent_dentry;
|
||||||
|
}
|
||||||
|
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE bool
|
||||||
|
is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
|
||||||
|
{
|
||||||
|
struct dentry* parent_dentry;
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
|
||||||
|
u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
|
||||||
|
bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
|
||||||
|
|
||||||
|
if (allowed_dir != NULL)
|
||||||
|
return true;
|
||||||
|
parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
|
||||||
|
if (filp_dentry == parent_dentry)
|
||||||
|
break;
|
||||||
|
filp_dentry = parent_dentry;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
|
||||||
|
u32* device_id,
|
||||||
|
u64* file_ino)
|
||||||
|
{
|
||||||
|
u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
|
||||||
|
*device_id = dev_id;
|
||||||
|
bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
|
||||||
|
|
||||||
|
if (allowed_device == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
|
||||||
|
*file_ino = ino;
|
||||||
|
bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
|
||||||
|
|
||||||
|
if (allowed_file == NULL)
|
||||||
|
if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("kprobe/proc_sys_write")
|
||||||
|
ssize_t BPF_KPROBE(kprobe__proc_sys_write,
|
||||||
|
struct file* filp, const char* buf,
|
||||||
|
size_t count, loff_t* ppos)
|
||||||
|
{
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
|
||||||
|
|
||||||
|
u32 pid = get_userspace_pid();
|
||||||
|
int zero = 0;
|
||||||
|
struct var_sysctl_data_t* sysctl_data =
|
||||||
|
bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
if (!sysctl_data)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
|
sysctl_data->meta.type = SYSCTL_EVENT;
|
||||||
|
void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
|
||||||
|
payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
|
||||||
|
|
||||||
|
populate_ancestors(task, &sysctl_data->ancestors_info);
|
||||||
|
|
||||||
|
sysctl_data->sysctl_val_length = 0;
|
||||||
|
sysctl_data->sysctl_path_length = 0;
|
||||||
|
|
||||||
|
size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
|
||||||
|
barrier_var(sysctl_val_length);
|
||||||
|
if (sysctl_val_length <= CTL_MAXNAME) {
|
||||||
|
barrier_var(sysctl_val_length);
|
||||||
|
sysctl_data->sysctl_val_length = sysctl_val_length;
|
||||||
|
payload += sysctl_val_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
|
||||||
|
BPF_CORE_READ(filp, f_path.dentry, d_name.name));
|
||||||
|
barrier_var(sysctl_path_length);
|
||||||
|
if (sysctl_path_length <= MAX_PATH) {
|
||||||
|
barrier_var(sysctl_path_length);
|
||||||
|
sysctl_data->sysctl_path_length = sysctl_path_length;
|
||||||
|
payload += sysctl_path_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
|
||||||
|
unsigned long data_len = payload - (void*)sysctl_data;
|
||||||
|
data_len = data_len > sizeof(struct var_sysctl_data_t)
|
||||||
|
? sizeof(struct var_sysctl_data_t)
|
||||||
|
: data_len;
|
||||||
|
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
|
||||||
|
out:
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("tracepoint/syscalls/sys_enter_kill")
|
||||||
|
int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
|
||||||
|
{
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
|
||||||
|
int pid = ctx->args[0];
|
||||||
|
int sig = ctx->args[1];
|
||||||
|
int ret = trace_var_sys_kill(ctx, pid, sig);
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
|
||||||
|
SEC("raw_tracepoint/sched_process_exit")
|
||||||
|
int raw_tracepoint__sched_process_exit(void* ctx)
|
||||||
|
{
|
||||||
|
int zero = 0;
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
|
||||||
|
|
||||||
|
u32 tpid = get_userspace_pid();
|
||||||
|
|
||||||
|
struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
|
||||||
|
struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
|
||||||
|
if (arr_struct == NULL || kill_data == NULL)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
|
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
|
||||||
|
|
||||||
|
#ifdef UNROLL
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
|
||||||
|
struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
|
||||||
|
|
||||||
|
if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
|
||||||
|
bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
|
||||||
|
void* payload = kill_data->payload;
|
||||||
|
size_t offset = kill_data->payload_length;
|
||||||
|
if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
|
||||||
|
return 0;
|
||||||
|
payload += offset;
|
||||||
|
|
||||||
|
kill_data->kill_target_name_length = 0;
|
||||||
|
kill_data->kill_target_cgroup_proc_length = 0;
|
||||||
|
|
||||||
|
size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
|
||||||
|
barrier_var(comm_length);
|
||||||
|
if (comm_length <= TASK_COMM_LEN) {
|
||||||
|
barrier_var(comm_length);
|
||||||
|
kill_data->kill_target_name_length = comm_length;
|
||||||
|
payload += comm_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
|
||||||
|
BPF_CORE_READ(proc_kernfs, name));
|
||||||
|
barrier_var(cgroup_proc_length);
|
||||||
|
if (cgroup_proc_length <= KILL_TARGET_LEN) {
|
||||||
|
barrier_var(cgroup_proc_length);
|
||||||
|
kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
|
||||||
|
payload += cgroup_proc_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
|
||||||
|
unsigned long data_len = (void*)payload - (void*)kill_data;
|
||||||
|
data_len = data_len > sizeof(struct var_kill_data_t)
|
||||||
|
? sizeof(struct var_kill_data_t)
|
||||||
|
: data_len;
|
||||||
|
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bpf_map_delete_elem(&var_tpid_to_data, &tpid);
|
||||||
|
out:
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("raw_tracepoint/sched_process_exec")
|
||||||
|
int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
|
||||||
|
{
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
|
||||||
|
|
||||||
|
struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
|
||||||
|
u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
|
||||||
|
|
||||||
|
bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
|
||||||
|
if (should_filter_binprm != NULL)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
int zero = 0;
|
||||||
|
struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
if (!proc_exec_data)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (INODE_FILTER && inode != INODE_FILTER)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
u32 pid = get_userspace_pid();
|
||||||
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
|
|
||||||
|
proc_exec_data->meta.type = EXEC_EVENT;
|
||||||
|
proc_exec_data->bin_path_length = 0;
|
||||||
|
proc_exec_data->cmdline_length = 0;
|
||||||
|
proc_exec_data->environment_length = 0;
|
||||||
|
void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
|
||||||
|
proc_exec_data->payload);
|
||||||
|
payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
|
||||||
|
|
||||||
|
struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
|
||||||
|
proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
|
||||||
|
proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
|
||||||
|
proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
|
||||||
|
proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
|
||||||
|
|
||||||
|
const char* filename = BPF_CORE_READ(bprm, filename);
|
||||||
|
size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
|
||||||
|
barrier_var(bin_path_length);
|
||||||
|
if (bin_path_length <= MAX_FILENAME_LEN) {
|
||||||
|
barrier_var(bin_path_length);
|
||||||
|
proc_exec_data->bin_path_length = bin_path_length;
|
||||||
|
payload += bin_path_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
|
||||||
|
void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
|
||||||
|
unsigned int cmdline_length = probe_read_lim(payload, arg_start,
|
||||||
|
arg_end - arg_start, MAX_ARGS_LEN);
|
||||||
|
|
||||||
|
if (cmdline_length <= MAX_ARGS_LEN) {
|
||||||
|
barrier_var(cmdline_length);
|
||||||
|
proc_exec_data->cmdline_length = cmdline_length;
|
||||||
|
payload += cmdline_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (READ_ENVIRON_FROM_EXEC) {
|
||||||
|
void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
|
||||||
|
void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
|
||||||
|
unsigned long env_len = probe_read_lim(payload, env_start,
|
||||||
|
env_end - env_start, MAX_ENVIRON_LEN);
|
||||||
|
if (cmdline_length <= MAX_ENVIRON_LEN) {
|
||||||
|
proc_exec_data->environment_length = env_len;
|
||||||
|
payload += env_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
|
||||||
|
unsigned long data_len = payload - (void*)proc_exec_data;
|
||||||
|
data_len = data_len > sizeof(struct var_exec_data_t)
|
||||||
|
? sizeof(struct var_exec_data_t)
|
||||||
|
: data_len;
|
||||||
|
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
|
||||||
|
out:
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("kretprobe/do_filp_open")
|
||||||
|
int kprobe_ret__do_filp_open(struct pt_regs* ctx)
|
||||||
|
{
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
|
||||||
|
|
||||||
|
struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
|
||||||
|
|
||||||
|
if (filp == NULL || IS_ERR(filp))
|
||||||
|
goto out;
|
||||||
|
unsigned int flags = BPF_CORE_READ(filp, f_flags);
|
||||||
|
if ((flags & (O_RDWR | O_WRONLY)) == 0)
|
||||||
|
goto out;
|
||||||
|
if ((flags & O_TMPFILE) > 0)
|
||||||
|
goto out;
|
||||||
|
struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
|
||||||
|
umode_t mode = BPF_CORE_READ(file_inode, i_mode);
|
||||||
|
if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
|
||||||
|
S_ISSOCK(mode))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
|
||||||
|
u32 device_id = 0;
|
||||||
|
u64 file_ino = 0;
|
||||||
|
if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
int zero = 0;
|
||||||
|
struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
if (!filemod_data)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
u32 pid = get_userspace_pid();
|
||||||
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
|
|
||||||
|
filemod_data->meta.type = FILEMOD_EVENT;
|
||||||
|
filemod_data->fmod_type = FMOD_OPEN;
|
||||||
|
filemod_data->dst_flags = flags;
|
||||||
|
filemod_data->src_inode = 0;
|
||||||
|
filemod_data->dst_inode = file_ino;
|
||||||
|
filemod_data->src_device_id = 0;
|
||||||
|
filemod_data->dst_device_id = device_id;
|
||||||
|
filemod_data->src_filepath_length = 0;
|
||||||
|
filemod_data->dst_filepath_length = 0;
|
||||||
|
|
||||||
|
void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
|
||||||
|
filemod_data->payload);
|
||||||
|
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
|
||||||
|
|
||||||
|
size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
|
||||||
|
barrier_var(len);
|
||||||
|
if (len <= MAX_FILEPATH_LENGTH) {
|
||||||
|
barrier_var(len);
|
||||||
|
payload += len;
|
||||||
|
filemod_data->dst_filepath_length = len;
|
||||||
|
}
|
||||||
|
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
|
||||||
|
unsigned long data_len = payload - (void*)filemod_data;
|
||||||
|
data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
|
||||||
|
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
|
||||||
|
out:
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("kprobe/vfs_link")
|
||||||
|
int BPF_KPROBE(kprobe__vfs_link,
|
||||||
|
struct dentry* old_dentry, struct inode* dir,
|
||||||
|
struct dentry* new_dentry, struct inode** delegated_inode)
|
||||||
|
{
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
|
||||||
|
|
||||||
|
u32 src_device_id = 0;
|
||||||
|
u64 src_file_ino = 0;
|
||||||
|
u32 dst_device_id = 0;
|
||||||
|
u64 dst_file_ino = 0;
|
||||||
|
if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
|
||||||
|
!is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
int zero = 0;
|
||||||
|
struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
if (!filemod_data)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
u32 pid = get_userspace_pid();
|
||||||
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
|
|
||||||
|
filemod_data->meta.type = FILEMOD_EVENT;
|
||||||
|
filemod_data->fmod_type = FMOD_LINK;
|
||||||
|
filemod_data->dst_flags = 0;
|
||||||
|
filemod_data->src_inode = src_file_ino;
|
||||||
|
filemod_data->dst_inode = dst_file_ino;
|
||||||
|
filemod_data->src_device_id = src_device_id;
|
||||||
|
filemod_data->dst_device_id = dst_device_id;
|
||||||
|
filemod_data->src_filepath_length = 0;
|
||||||
|
filemod_data->dst_filepath_length = 0;
|
||||||
|
|
||||||
|
void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
|
||||||
|
filemod_data->payload);
|
||||||
|
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
|
||||||
|
|
||||||
|
size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
|
||||||
|
barrier_var(len);
|
||||||
|
if (len <= MAX_FILEPATH_LENGTH) {
|
||||||
|
barrier_var(len);
|
||||||
|
payload += len;
|
||||||
|
filemod_data->src_filepath_length = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = read_absolute_file_path_from_dentry(new_dentry, payload);
|
||||||
|
barrier_var(len);
|
||||||
|
if (len <= MAX_FILEPATH_LENGTH) {
|
||||||
|
barrier_var(len);
|
||||||
|
payload += len;
|
||||||
|
filemod_data->dst_filepath_length = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
|
||||||
|
unsigned long data_len = payload - (void*)filemod_data;
|
||||||
|
data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
|
||||||
|
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
|
||||||
|
out:
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("kprobe/vfs_symlink")
|
||||||
|
int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
|
||||||
|
const char* oldname)
|
||||||
|
{
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
|
||||||
|
|
||||||
|
u32 dst_device_id = 0;
|
||||||
|
u64 dst_file_ino = 0;
|
||||||
|
if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
int zero = 0;
|
||||||
|
struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
if (!filemod_data)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
u32 pid = get_userspace_pid();
|
||||||
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
||||||
|
|
||||||
|
filemod_data->meta.type = FILEMOD_EVENT;
|
||||||
|
filemod_data->fmod_type = FMOD_SYMLINK;
|
||||||
|
filemod_data->dst_flags = 0;
|
||||||
|
filemod_data->src_inode = 0;
|
||||||
|
filemod_data->dst_inode = dst_file_ino;
|
||||||
|
filemod_data->src_device_id = 0;
|
||||||
|
filemod_data->dst_device_id = dst_device_id;
|
||||||
|
filemod_data->src_filepath_length = 0;
|
||||||
|
filemod_data->dst_filepath_length = 0;
|
||||||
|
|
||||||
|
void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
|
||||||
|
filemod_data->payload);
|
||||||
|
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
|
||||||
|
|
||||||
|
size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
|
||||||
|
barrier_var(len);
|
||||||
|
if (len <= MAX_FILEPATH_LENGTH) {
|
||||||
|
barrier_var(len);
|
||||||
|
payload += len;
|
||||||
|
filemod_data->src_filepath_length = len;
|
||||||
|
}
|
||||||
|
len = read_absolute_file_path_from_dentry(dentry, payload);
|
||||||
|
barrier_var(len);
|
||||||
|
if (len <= MAX_FILEPATH_LENGTH) {
|
||||||
|
barrier_var(len);
|
||||||
|
payload += len;
|
||||||
|
filemod_data->dst_filepath_length = len;
|
||||||
|
}
|
||||||
|
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
|
||||||
|
unsigned long data_len = payload - (void*)filemod_data;
|
||||||
|
data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
|
||||||
|
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
|
||||||
|
out:
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("raw_tracepoint/sched_process_fork")
|
||||||
|
int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
|
||||||
|
{
|
||||||
|
struct bpf_func_stats_ctx stats_ctx;
|
||||||
|
bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
|
||||||
|
|
||||||
|
int zero = 0;
|
||||||
|
struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
|
||||||
|
if (!fork_data)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
struct task_struct* parent = (struct task_struct*)ctx->args[0];
|
||||||
|
struct task_struct* child = (struct task_struct*)ctx->args[1];
|
||||||
|
fork_data->meta.type = FORK_EVENT;
|
||||||
|
|
||||||
|
void* payload = populate_var_metadata(&fork_data->meta, child,
|
||||||
|
BPF_CORE_READ(child, pid), fork_data->payload);
|
||||||
|
fork_data->parent_pid = BPF_CORE_READ(parent, pid);
|
||||||
|
fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
|
||||||
|
fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
|
||||||
|
bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
|
||||||
|
|
||||||
|
unsigned long data_len = payload - (void*)fork_data;
|
||||||
|
data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
|
||||||
|
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
|
||||||
|
out:
|
||||||
|
bpf_stats_exit(&stats_ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
char _license[] SEC("license") = "GPL";
|
6
tools/testing/selftests/bpf/progs/profiler1.c
Normal file
6
tools/testing/selftests/bpf/progs/profiler1.c
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
|
||||||
|
#define UNROLL
|
||||||
|
#define INLINE __always_inline
|
||||||
|
#include "profiler.inc.h"
|
6
tools/testing/selftests/bpf/progs/profiler2.c
Normal file
6
tools/testing/selftests/bpf/progs/profiler2.c
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
#define barrier_var(var) /**/
|
||||||
|
/* undef #define UNROLL */
|
||||||
|
#define INLINE /**/
|
||||||
|
#include "profiler.inc.h"
|
6
tools/testing/selftests/bpf/progs/profiler3.c
Normal file
6
tools/testing/selftests/bpf/progs/profiler3.c
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
#define barrier_var(var) /**/
|
||||||
|
#define UNROLL
|
||||||
|
#define INLINE __noinline
|
||||||
|
#include "profiler.inc.h"
|
@@ -41,6 +41,43 @@ struct outer_arr {
|
|||||||
.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
|
.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct inner_map_sz3 {
|
||||||
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
|
__uint(map_flags, BPF_F_INNER_MAP);
|
||||||
|
__uint(max_entries, 3);
|
||||||
|
__type(key, int);
|
||||||
|
__type(value, int);
|
||||||
|
} inner_map3 SEC(".maps"),
|
||||||
|
inner_map4 SEC(".maps");
|
||||||
|
|
||||||
|
struct inner_map_sz4 {
|
||||||
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
|
__uint(map_flags, BPF_F_INNER_MAP);
|
||||||
|
__uint(max_entries, 5);
|
||||||
|
__type(key, int);
|
||||||
|
__type(value, int);
|
||||||
|
} inner_map5 SEC(".maps");
|
||||||
|
|
||||||
|
struct outer_arr_dyn {
|
||||||
|
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
|
||||||
|
__uint(max_entries, 3);
|
||||||
|
__uint(key_size, sizeof(int));
|
||||||
|
__uint(value_size, sizeof(int));
|
||||||
|
__array(values, struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
|
__uint(map_flags, BPF_F_INNER_MAP);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
__type(key, int);
|
||||||
|
__type(value, int);
|
||||||
|
});
|
||||||
|
} outer_arr_dyn SEC(".maps") = {
|
||||||
|
.values = {
|
||||||
|
[0] = (void *)&inner_map3,
|
||||||
|
[1] = (void *)&inner_map4,
|
||||||
|
[2] = (void *)&inner_map5,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
struct outer_hash {
|
struct outer_hash {
|
||||||
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
|
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
|
||||||
__uint(max_entries, 5);
|
__uint(max_entries, 5);
|
||||||
@@ -101,6 +138,12 @@ int handle__sys_enter(void *ctx)
|
|||||||
val = input + 1;
|
val = input + 1;
|
||||||
bpf_map_update_elem(inner_map, &key, &val, 0);
|
bpf_map_update_elem(inner_map, &key, &val, 0);
|
||||||
|
|
||||||
|
inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
|
||||||
|
if (!inner_map)
|
||||||
|
return 1;
|
||||||
|
val = input + 2;
|
||||||
|
bpf_map_update_elem(inner_map, &key, &val, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
172
tools/testing/selftests/bpf/progs/test_core_autosize.c
Normal file
172
tools/testing/selftests/bpf/progs/test_core_autosize.c
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Facebook */
|
||||||
|
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
#include <bpf/bpf_core_read.h>
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
||||||
|
|
||||||
|
/* fields of exactly the same size */
|
||||||
|
struct test_struct___samesize {
|
||||||
|
void *ptr;
|
||||||
|
unsigned long long val1;
|
||||||
|
unsigned int val2;
|
||||||
|
unsigned short val3;
|
||||||
|
unsigned char val4;
|
||||||
|
} __attribute((preserve_access_index));
|
||||||
|
|
||||||
|
/* unsigned fields that have to be downsized by libbpf */
|
||||||
|
struct test_struct___downsize {
|
||||||
|
void *ptr;
|
||||||
|
unsigned long val1;
|
||||||
|
unsigned long val2;
|
||||||
|
unsigned long val3;
|
||||||
|
unsigned long val4;
|
||||||
|
/* total sz: 40 */
|
||||||
|
} __attribute__((preserve_access_index));
|
||||||
|
|
||||||
|
/* fields with signed integers of wrong size, should be rejected */
|
||||||
|
struct test_struct___signed {
|
||||||
|
void *ptr;
|
||||||
|
long val1;
|
||||||
|
long val2;
|
||||||
|
long val3;
|
||||||
|
long val4;
|
||||||
|
} __attribute((preserve_access_index));
|
||||||
|
|
||||||
|
/* real layout and sizes according to test's (32-bit) BTF */
|
||||||
|
struct test_struct___real {
|
||||||
|
unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
|
||||||
|
unsigned int val2;
|
||||||
|
unsigned long long val1;
|
||||||
|
unsigned short val3;
|
||||||
|
unsigned char val4;
|
||||||
|
unsigned char _pad;
|
||||||
|
/* total sz: 20 */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct test_struct___real input = {
|
||||||
|
.ptr = 0x01020304,
|
||||||
|
.val1 = 0x1020304050607080,
|
||||||
|
.val2 = 0x0a0b0c0d,
|
||||||
|
.val3 = 0xfeed,
|
||||||
|
.val4 = 0xb9,
|
||||||
|
._pad = 0xff, /* make sure no accidental zeros are present */
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned long long ptr_samesized = 0;
|
||||||
|
unsigned long long val1_samesized = 0;
|
||||||
|
unsigned long long val2_samesized = 0;
|
||||||
|
unsigned long long val3_samesized = 0;
|
||||||
|
unsigned long long val4_samesized = 0;
|
||||||
|
struct test_struct___real output_samesized = {};
|
||||||
|
|
||||||
|
unsigned long long ptr_downsized = 0;
|
||||||
|
unsigned long long val1_downsized = 0;
|
||||||
|
unsigned long long val2_downsized = 0;
|
||||||
|
unsigned long long val3_downsized = 0;
|
||||||
|
unsigned long long val4_downsized = 0;
|
||||||
|
struct test_struct___real output_downsized = {};
|
||||||
|
|
||||||
|
unsigned long long ptr_probed = 0;
|
||||||
|
unsigned long long val1_probed = 0;
|
||||||
|
unsigned long long val2_probed = 0;
|
||||||
|
unsigned long long val3_probed = 0;
|
||||||
|
unsigned long long val4_probed = 0;
|
||||||
|
|
||||||
|
unsigned long long ptr_signed = 0;
|
||||||
|
unsigned long long val1_signed = 0;
|
||||||
|
unsigned long long val2_signed = 0;
|
||||||
|
unsigned long long val3_signed = 0;
|
||||||
|
unsigned long long val4_signed = 0;
|
||||||
|
struct test_struct___real output_signed = {};
|
||||||
|
|
||||||
|
SEC("raw_tp/sys_exit")
|
||||||
|
int handle_samesize(void *ctx)
|
||||||
|
{
|
||||||
|
struct test_struct___samesize *in = (void *)&input;
|
||||||
|
struct test_struct___samesize *out = (void *)&output_samesized;
|
||||||
|
|
||||||
|
ptr_samesized = (unsigned long long)in->ptr;
|
||||||
|
val1_samesized = in->val1;
|
||||||
|
val2_samesized = in->val2;
|
||||||
|
val3_samesized = in->val3;
|
||||||
|
val4_samesized = in->val4;
|
||||||
|
|
||||||
|
out->ptr = in->ptr;
|
||||||
|
out->val1 = in->val1;
|
||||||
|
out->val2 = in->val2;
|
||||||
|
out->val3 = in->val3;
|
||||||
|
out->val4 = in->val4;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("raw_tp/sys_exit")
|
||||||
|
int handle_downsize(void *ctx)
|
||||||
|
{
|
||||||
|
struct test_struct___downsize *in = (void *)&input;
|
||||||
|
struct test_struct___downsize *out = (void *)&output_downsized;
|
||||||
|
|
||||||
|
ptr_downsized = (unsigned long long)in->ptr;
|
||||||
|
val1_downsized = in->val1;
|
||||||
|
val2_downsized = in->val2;
|
||||||
|
val3_downsized = in->val3;
|
||||||
|
val4_downsized = in->val4;
|
||||||
|
|
||||||
|
out->ptr = in->ptr;
|
||||||
|
out->val1 = in->val1;
|
||||||
|
out->val2 = in->val2;
|
||||||
|
out->val3 = in->val3;
|
||||||
|
out->val4 = in->val4;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("raw_tp/sys_enter")
|
||||||
|
int handle_probed(void *ctx)
|
||||||
|
{
|
||||||
|
struct test_struct___downsize *in = (void *)&input;
|
||||||
|
__u64 tmp;
|
||||||
|
|
||||||
|
tmp = 0;
|
||||||
|
bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
|
||||||
|
ptr_probed = tmp;
|
||||||
|
|
||||||
|
tmp = 0;
|
||||||
|
bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
|
||||||
|
val1_probed = tmp;
|
||||||
|
|
||||||
|
tmp = 0;
|
||||||
|
bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
|
||||||
|
val2_probed = tmp;
|
||||||
|
|
||||||
|
tmp = 0;
|
||||||
|
bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
|
||||||
|
val3_probed = tmp;
|
||||||
|
|
||||||
|
tmp = 0;
|
||||||
|
bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
|
||||||
|
val4_probed = tmp;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("raw_tp/sys_enter")
|
||||||
|
int handle_signed(void *ctx)
|
||||||
|
{
|
||||||
|
struct test_struct___signed *in = (void *)&input;
|
||||||
|
struct test_struct___signed *out = (void *)&output_signed;
|
||||||
|
|
||||||
|
val2_signed = in->val2;
|
||||||
|
val3_signed = in->val3;
|
||||||
|
val4_signed = in->val4;
|
||||||
|
|
||||||
|
out->val2= in->val2;
|
||||||
|
out->val3= in->val3;
|
||||||
|
out->val4= in->val4;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
55
tools/testing/selftests/bpf/progs/test_ksyms_btf.c
Normal file
55
tools/testing/selftests/bpf/progs/test_ksyms_btf.c
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2020 Google */
|
||||||
|
|
||||||
|
#include "vmlinux.h"
|
||||||
|
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
|
||||||
|
__u64 out__runqueues_addr = -1;
|
||||||
|
__u64 out__bpf_prog_active_addr = -1;
|
||||||
|
|
||||||
|
__u32 out__rq_cpu = -1; /* percpu struct fields */
|
||||||
|
int out__bpf_prog_active = -1; /* percpu int */
|
||||||
|
|
||||||
|
__u32 out__this_rq_cpu = -1;
|
||||||
|
int out__this_bpf_prog_active = -1;
|
||||||
|
|
||||||
|
__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */
|
||||||
|
|
||||||
|
extern const struct rq runqueues __ksym; /* struct type global var. */
|
||||||
|
extern const int bpf_prog_active __ksym; /* int type global var. */
|
||||||
|
|
||||||
|
SEC("raw_tp/sys_enter")
|
||||||
|
int handler(const void *ctx)
|
||||||
|
{
|
||||||
|
struct rq *rq;
|
||||||
|
int *active;
|
||||||
|
__u32 cpu;
|
||||||
|
|
||||||
|
out__runqueues_addr = (__u64)&runqueues;
|
||||||
|
out__bpf_prog_active_addr = (__u64)&bpf_prog_active;
|
||||||
|
|
||||||
|
cpu = bpf_get_smp_processor_id();
|
||||||
|
|
||||||
|
/* test bpf_per_cpu_ptr() */
|
||||||
|
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
|
||||||
|
if (rq)
|
||||||
|
out__rq_cpu = rq->cpu;
|
||||||
|
active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
|
||||||
|
if (active)
|
||||||
|
out__bpf_prog_active = *active;
|
||||||
|
|
||||||
|
rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
|
||||||
|
if (rq) /* should always be valid, but we can't spare the check. */
|
||||||
|
out__cpu_0_rq_cpu = rq->cpu;
|
||||||
|
|
||||||
|
/* test bpf_this_cpu_ptr */
|
||||||
|
rq = (struct rq *)bpf_this_cpu_ptr(&runqueues);
|
||||||
|
out__this_rq_cpu = rq->cpu;
|
||||||
|
active = (int *)bpf_this_cpu_ptr(&bpf_prog_active);
|
||||||
|
out__this_bpf_prog_active = *active;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
@@ -304,10 +304,10 @@ int misc_estab(struct bpf_sock_ops *skops)
|
|||||||
passive_lport_n = __bpf_htons(passive_lport_h);
|
passive_lport_n = __bpf_htons(passive_lport_h);
|
||||||
bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
|
bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
|
||||||
&true_val, sizeof(true_val));
|
&true_val, sizeof(true_val));
|
||||||
set_hdr_cb_flags(skops);
|
set_hdr_cb_flags(skops, 0);
|
||||||
break;
|
break;
|
||||||
case BPF_SOCK_OPS_TCP_CONNECT_CB:
|
case BPF_SOCK_OPS_TCP_CONNECT_CB:
|
||||||
set_hdr_cb_flags(skops);
|
set_hdr_cb_flags(skops, 0);
|
||||||
break;
|
break;
|
||||||
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
||||||
return handle_parse_hdr(skops);
|
return handle_parse_hdr(skops);
|
||||||
|
@@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SEC("sk_skb3")
|
static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
|
||||||
int bpf_prog3(struct __sk_buff *skb)
|
|
||||||
{
|
{
|
||||||
const int one = 1;
|
int err = bpf_skb_pull_data(skb, 6 + offset);
|
||||||
int err, *f, ret = SK_PASS;
|
|
||||||
void *data_end;
|
void *data_end;
|
||||||
char *c;
|
char *c;
|
||||||
|
|
||||||
err = bpf_skb_pull_data(skb, 19);
|
|
||||||
if (err)
|
if (err)
|
||||||
goto tls_out;
|
return;
|
||||||
|
|
||||||
c = (char *)(long)skb->data;
|
c = (char *)(long)skb->data;
|
||||||
data_end = (void *)(long)skb->data_end;
|
data_end = (void *)(long)skb->data_end;
|
||||||
|
|
||||||
if (c + 18 < data_end)
|
if (c + 5 + offset < data_end)
|
||||||
memcpy(&c[13], "PASS", 4);
|
memcpy(c + offset, "PASS", 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("sk_skb3")
|
||||||
|
int bpf_prog3(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
int err, *f, ret = SK_PASS;
|
||||||
|
const int one = 1;
|
||||||
|
|
||||||
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
|
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
|
||||||
if (f && *f) {
|
if (f && *f) {
|
||||||
__u64 flags = 0;
|
__u64 flags = 0;
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
flags = *f;
|
flags = *f;
|
||||||
|
|
||||||
|
err = bpf_skb_adjust_room(skb, -13, 0, 0);
|
||||||
|
if (err)
|
||||||
|
return SK_DROP;
|
||||||
|
err = bpf_skb_adjust_room(skb, 4, 0, 0);
|
||||||
|
if (err)
|
||||||
|
return SK_DROP;
|
||||||
|
bpf_write_pass(skb, 0);
|
||||||
#ifdef SOCKMAP
|
#ifdef SOCKMAP
|
||||||
return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
|
return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
|
||||||
#else
|
#else
|
||||||
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
|
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
|
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
|
||||||
if (f && *f)
|
if (f && *f)
|
||||||
ret = SK_DROP;
|
ret = SK_DROP;
|
||||||
|
err = bpf_skb_adjust_room(skb, 4, 0, 0);
|
||||||
|
if (err)
|
||||||
|
return SK_DROP;
|
||||||
|
bpf_write_pass(skb, 13);
|
||||||
tls_out:
|
tls_out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@@ -13,17 +13,10 @@
|
|||||||
#include <bpf/bpf_helpers.h>
|
#include <bpf/bpf_helpers.h>
|
||||||
#include <bpf/bpf_endian.h>
|
#include <bpf/bpf_endian.h>
|
||||||
|
|
||||||
#ifndef barrier_data
|
|
||||||
# define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ctx_ptr
|
#ifndef ctx_ptr
|
||||||
# define ctx_ptr(field) (void *)(long)(field)
|
# define ctx_ptr(field) (void *)(long)(field)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define dst_to_src_tmp 0xeeddddeeU
|
|
||||||
#define src_to_dst_tmp 0xeeffffeeU
|
|
||||||
|
|
||||||
#define ip4_src 0xac100164 /* 172.16.1.100 */
|
#define ip4_src 0xac100164 /* 172.16.1.100 */
|
||||||
#define ip4_dst 0xac100264 /* 172.16.2.100 */
|
#define ip4_dst 0xac100264 /* 172.16.2.100 */
|
||||||
|
|
||||||
@@ -39,6 +32,18 @@
|
|||||||
a.s6_addr32[3] == b.s6_addr32[3])
|
a.s6_addr32[3] == b.s6_addr32[3])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
enum {
|
||||||
|
dev_src,
|
||||||
|
dev_dst,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_map_def SEC("maps") ifindex_map = {
|
||||||
|
.type = BPF_MAP_TYPE_ARRAY,
|
||||||
|
.key_size = sizeof(int),
|
||||||
|
.value_size = sizeof(int),
|
||||||
|
.max_entries = 2,
|
||||||
|
};
|
||||||
|
|
||||||
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
|
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
|
||||||
__be32 addr)
|
__be32 addr)
|
||||||
{
|
{
|
||||||
@@ -73,7 +78,14 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
|
|||||||
return v6_equal(ip6h->daddr, addr);
|
return v6_equal(ip6h->daddr, addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
SEC("chk_neigh") int tc_chk(struct __sk_buff *skb)
|
static __always_inline int get_dev_ifindex(int which)
|
||||||
|
{
|
||||||
|
int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
|
||||||
|
|
||||||
|
return ifindex ? *ifindex : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
|
||||||
{
|
{
|
||||||
void *data_end = ctx_ptr(skb->data_end);
|
void *data_end = ctx_ptr(skb->data_end);
|
||||||
void *data = ctx_ptr(skb->data);
|
void *data = ctx_ptr(skb->data);
|
||||||
@@ -87,7 +99,6 @@ SEC("chk_neigh") int tc_chk(struct __sk_buff *skb)
|
|||||||
|
|
||||||
SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
|
SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
|
||||||
{
|
{
|
||||||
int idx = dst_to_src_tmp;
|
|
||||||
__u8 zero[ETH_ALEN * 2];
|
__u8 zero[ETH_ALEN * 2];
|
||||||
bool redirect = false;
|
bool redirect = false;
|
||||||
|
|
||||||
@@ -103,19 +114,15 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
|
|||||||
if (!redirect)
|
if (!redirect)
|
||||||
return TC_ACT_OK;
|
return TC_ACT_OK;
|
||||||
|
|
||||||
barrier_data(&idx);
|
|
||||||
idx = bpf_ntohl(idx);
|
|
||||||
|
|
||||||
__builtin_memset(&zero, 0, sizeof(zero));
|
__builtin_memset(&zero, 0, sizeof(zero));
|
||||||
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
|
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
|
||||||
return TC_ACT_SHOT;
|
return TC_ACT_SHOT;
|
||||||
|
|
||||||
return bpf_redirect_neigh(idx, 0);
|
return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
SEC("src_ingress") int tc_src(struct __sk_buff *skb)
|
SEC("src_ingress") int tc_src(struct __sk_buff *skb)
|
||||||
{
|
{
|
||||||
int idx = src_to_dst_tmp;
|
|
||||||
__u8 zero[ETH_ALEN * 2];
|
__u8 zero[ETH_ALEN * 2];
|
||||||
bool redirect = false;
|
bool redirect = false;
|
||||||
|
|
||||||
@@ -131,14 +138,11 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
|
|||||||
if (!redirect)
|
if (!redirect)
|
||||||
return TC_ACT_OK;
|
return TC_ACT_OK;
|
||||||
|
|
||||||
barrier_data(&idx);
|
|
||||||
idx = bpf_ntohl(idx);
|
|
||||||
|
|
||||||
__builtin_memset(&zero, 0, sizeof(zero));
|
__builtin_memset(&zero, 0, sizeof(zero));
|
||||||
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
|
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
|
||||||
return TC_ACT_SHOT;
|
return TC_ACT_SHOT;
|
||||||
|
|
||||||
return bpf_redirect_neigh(idx, 0);
|
return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
char __license[] SEC("license") = "GPL";
|
char __license[] SEC("license") = "GPL";
|
||||||
|
45
tools/testing/selftests/bpf/progs/test_tc_peer.c
Normal file
45
tools/testing/selftests/bpf/progs/test_tc_peer.c
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <linux/stddef.h>
|
||||||
|
#include <linux/pkt_cls.h>
|
||||||
|
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
|
||||||
|
enum {
|
||||||
|
dev_src,
|
||||||
|
dev_dst,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_map_def SEC("maps") ifindex_map = {
|
||||||
|
.type = BPF_MAP_TYPE_ARRAY,
|
||||||
|
.key_size = sizeof(int),
|
||||||
|
.value_size = sizeof(int),
|
||||||
|
.max_entries = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
static __always_inline int get_dev_ifindex(int which)
|
||||||
|
{
|
||||||
|
int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
|
||||||
|
|
||||||
|
return ifindex ? *ifindex : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
return TC_ACT_SHOT;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("src_ingress") int tc_src(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
char __license[] SEC("license") = "GPL";
|
@@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
__u8 test_kind = TCPOPT_EXP;
|
__u8 test_kind = TCPOPT_EXP;
|
||||||
__u16 test_magic = 0xeB9F;
|
__u16 test_magic = 0xeB9F;
|
||||||
|
__u32 inherit_cb_flags = 0;
|
||||||
|
|
||||||
struct bpf_test_option passive_synack_out = {};
|
struct bpf_test_option passive_synack_out = {};
|
||||||
struct bpf_test_option passive_fin_out = {};
|
struct bpf_test_option passive_fin_out = {};
|
||||||
@@ -467,6 +468,8 @@ static int handle_passive_estab(struct bpf_sock_ops *skops)
|
|||||||
struct tcphdr *th;
|
struct tcphdr *th;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
|
||||||
|
|
||||||
err = load_option(skops, &passive_estab_in, true);
|
err = load_option(skops, &passive_estab_in, true);
|
||||||
if (err == -ENOENT) {
|
if (err == -ENOENT) {
|
||||||
/* saved_syn is not found. It was in syncookie mode.
|
/* saved_syn is not found. It was in syncookie mode.
|
||||||
@@ -600,10 +603,10 @@ int estab(struct bpf_sock_ops *skops)
|
|||||||
case BPF_SOCK_OPS_TCP_LISTEN_CB:
|
case BPF_SOCK_OPS_TCP_LISTEN_CB:
|
||||||
bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
|
bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
|
||||||
&true_val, sizeof(true_val));
|
&true_val, sizeof(true_val));
|
||||||
set_hdr_cb_flags(skops);
|
set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
|
||||||
break;
|
break;
|
||||||
case BPF_SOCK_OPS_TCP_CONNECT_CB:
|
case BPF_SOCK_OPS_TCP_CONNECT_CB:
|
||||||
set_hdr_cb_flags(skops);
|
set_hdr_cb_flags(skops, 0);
|
||||||
break;
|
break;
|
||||||
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
||||||
return handle_parse_hdr(skops);
|
return handle_parse_hdr(skops);
|
||||||
|
@@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir;
|
|||||||
int ktls;
|
int ktls;
|
||||||
int peek_flag;
|
int peek_flag;
|
||||||
int skb_use_parser;
|
int skb_use_parser;
|
||||||
|
int txmsg_omit_skb_parser;
|
||||||
|
|
||||||
static const struct option long_options[] = {
|
static const struct option long_options[] = {
|
||||||
{"help", no_argument, NULL, 'h' },
|
{"help", no_argument, NULL, 'h' },
|
||||||
@@ -111,6 +112,7 @@ static const struct option long_options[] = {
|
|||||||
{"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
|
{"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
|
||||||
{"ktls", no_argument, &ktls, 1 },
|
{"ktls", no_argument, &ktls, 1 },
|
||||||
{"peek", no_argument, &peek_flag, 1 },
|
{"peek", no_argument, &peek_flag, 1 },
|
||||||
|
{"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1},
|
||||||
{"whitelist", required_argument, NULL, 'n' },
|
{"whitelist", required_argument, NULL, 'n' },
|
||||||
{"blacklist", required_argument, NULL, 'b' },
|
{"blacklist", required_argument, NULL, 'b' },
|
||||||
{0, 0, NULL, 0 }
|
{0, 0, NULL, 0 }
|
||||||
@@ -175,6 +177,7 @@ static void test_reset(void)
|
|||||||
txmsg_apply = txmsg_cork = 0;
|
txmsg_apply = txmsg_cork = 0;
|
||||||
txmsg_ingress = txmsg_redir_skb = 0;
|
txmsg_ingress = txmsg_redir_skb = 0;
|
||||||
txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
|
txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
|
||||||
|
txmsg_omit_skb_parser = 0;
|
||||||
skb_use_parser = 0;
|
skb_use_parser = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
|
|||||||
if (i == 0 && txmsg_ktls_skb) {
|
if (i == 0 && txmsg_ktls_skb) {
|
||||||
if (msg->msg_iov[i].iov_len < 4)
|
if (msg->msg_iov[i].iov_len < 4)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
if (txmsg_ktls_skb_redir) {
|
if (memcmp(d, "PASS", 4) != 0) {
|
||||||
if (memcmp(&d[13], "PASS", 4) != 0) {
|
fprintf(stderr,
|
||||||
fprintf(stderr,
|
"detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
|
||||||
"detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
|
i, 0, d[0], d[1], d[2], d[3]);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
|
||||||
d[13] = 0;
|
|
||||||
d[14] = 1;
|
|
||||||
d[15] = 2;
|
|
||||||
d[16] = 3;
|
|
||||||
j = 13;
|
|
||||||
} else if (txmsg_ktls_skb) {
|
|
||||||
if (memcmp(d, "PASS", 4) != 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
d[0] = 0;
|
|
||||||
d[1] = 1;
|
|
||||||
d[2] = 2;
|
|
||||||
d[3] = 3;
|
|
||||||
}
|
}
|
||||||
|
j = 4; /* advance index past PASS header */
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; j < msg->msg_iov[i].iov_len && size; j++) {
|
for (; j < msg->msg_iov[i].iov_len && size; j++) {
|
||||||
@@ -927,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
|
|||||||
goto run;
|
goto run;
|
||||||
|
|
||||||
/* Attach programs to sockmap */
|
/* Attach programs to sockmap */
|
||||||
err = bpf_prog_attach(prog_fd[0], map_fd[0],
|
if (!txmsg_omit_skb_parser) {
|
||||||
BPF_SK_SKB_STREAM_PARSER, 0);
|
err = bpf_prog_attach(prog_fd[0], map_fd[0],
|
||||||
if (err) {
|
BPF_SK_SKB_STREAM_PARSER, 0);
|
||||||
fprintf(stderr,
|
if (err) {
|
||||||
"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
|
fprintf(stderr,
|
||||||
prog_fd[0], map_fd[0], err, strerror(errno));
|
"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
|
||||||
return err;
|
prog_fd[0], map_fd[0], err, strerror(errno));
|
||||||
|
return err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = bpf_prog_attach(prog_fd[1], map_fd[0],
|
err = bpf_prog_attach(prog_fd[1], map_fd[0],
|
||||||
@@ -946,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
|
|||||||
|
|
||||||
/* Attach programs to TLS sockmap */
|
/* Attach programs to TLS sockmap */
|
||||||
if (txmsg_ktls_skb) {
|
if (txmsg_ktls_skb) {
|
||||||
err = bpf_prog_attach(prog_fd[0], map_fd[8],
|
if (!txmsg_omit_skb_parser) {
|
||||||
BPF_SK_SKB_STREAM_PARSER, 0);
|
err = bpf_prog_attach(prog_fd[0], map_fd[8],
|
||||||
if (err) {
|
BPF_SK_SKB_STREAM_PARSER, 0);
|
||||||
fprintf(stderr,
|
if (err) {
|
||||||
"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
|
fprintf(stderr,
|
||||||
prog_fd[0], map_fd[8], err, strerror(errno));
|
"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
|
||||||
return err;
|
prog_fd[0], map_fd[8], err, strerror(errno));
|
||||||
|
return err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = bpf_prog_attach(prog_fd[2], map_fd[8],
|
err = bpf_prog_attach(prog_fd[2], map_fd[8],
|
||||||
@@ -1480,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
|
|||||||
txmsg_ktls_skb_drop = 0;
|
txmsg_ktls_skb_drop = 0;
|
||||||
txmsg_ktls_skb_redir = 1;
|
txmsg_ktls_skb_redir = 1;
|
||||||
test_exec(cgrp, opt);
|
test_exec(cgrp, opt);
|
||||||
|
txmsg_ktls_skb_redir = 0;
|
||||||
|
|
||||||
|
/* Tests that omit skb_parser */
|
||||||
|
txmsg_omit_skb_parser = 1;
|
||||||
|
ktls = 0;
|
||||||
|
txmsg_ktls_skb = 0;
|
||||||
|
test_exec(cgrp, opt);
|
||||||
|
|
||||||
|
txmsg_ktls_skb_drop = 1;
|
||||||
|
test_exec(cgrp, opt);
|
||||||
|
txmsg_ktls_skb_drop = 0;
|
||||||
|
|
||||||
|
txmsg_ktls_skb_redir = 1;
|
||||||
|
test_exec(cgrp, opt);
|
||||||
|
|
||||||
|
ktls = 1;
|
||||||
|
test_exec(cgrp, opt);
|
||||||
|
txmsg_omit_skb_parser = 0;
|
||||||
|
|
||||||
opt->data_test = data;
|
opt->data_test = data;
|
||||||
ktls = k;
|
ktls = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Test cork with hung data. This tests poor usage patterns where
|
/* Test cork with hung data. This tests poor usage patterns where
|
||||||
* cork can leave data on the ring if user program is buggy and
|
* cork can leave data on the ring if user program is buggy and
|
||||||
* doesn't flush them somehow. They do take some time however
|
* doesn't flush them somehow. They do take some time however
|
||||||
|
@@ -1,168 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
|
||||||
#
|
|
||||||
# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
|
|
||||||
# between src and dst. The netns fwd has veth links to each src and dst. The
|
|
||||||
# client is in src and server in dst. The test installs a TC BPF program to each
|
|
||||||
# host facing veth in fwd which calls into bpf_redirect_peer() to perform the
|
|
||||||
# neigh addr population and redirect; it also installs a dropper prog on the
|
|
||||||
# egress side to drop skbs if neigh addrs were not populated.
|
|
||||||
|
|
||||||
if [[ $EUID -ne 0 ]]; then
|
|
||||||
echo "This script must be run as root"
|
|
||||||
echo "FAIL"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check that nc, dd, ping, ping6 and timeout are present
|
|
||||||
command -v nc >/dev/null 2>&1 || \
|
|
||||||
{ echo >&2 "nc is not available"; exit 1; }
|
|
||||||
command -v dd >/dev/null 2>&1 || \
|
|
||||||
{ echo >&2 "dd is not available"; exit 1; }
|
|
||||||
command -v timeout >/dev/null 2>&1 || \
|
|
||||||
{ echo >&2 "timeout is not available"; exit 1; }
|
|
||||||
command -v ping >/dev/null 2>&1 || \
|
|
||||||
{ echo >&2 "ping is not available"; exit 1; }
|
|
||||||
command -v ping6 >/dev/null 2>&1 || \
|
|
||||||
{ echo >&2 "ping6 is not available"; exit 1; }
|
|
||||||
|
|
||||||
readonly GREEN='\033[0;92m'
|
|
||||||
readonly RED='\033[0;31m'
|
|
||||||
readonly NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
readonly PING_ARG="-c 3 -w 10 -q"
|
|
||||||
|
|
||||||
readonly TIMEOUT=10
|
|
||||||
|
|
||||||
readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
|
|
||||||
readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
|
|
||||||
readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
|
|
||||||
|
|
||||||
readonly IP4_SRC="172.16.1.100"
|
|
||||||
readonly IP4_DST="172.16.2.100"
|
|
||||||
|
|
||||||
readonly IP6_SRC="::1:dead:beef:cafe"
|
|
||||||
readonly IP6_DST="::2:dead:beef:cafe"
|
|
||||||
|
|
||||||
readonly IP4_SLL="169.254.0.1"
|
|
||||||
readonly IP4_DLL="169.254.0.2"
|
|
||||||
readonly IP4_NET="169.254.0.0"
|
|
||||||
|
|
||||||
cleanup()
|
|
||||||
{
|
|
||||||
ip netns del ${NS_SRC}
|
|
||||||
ip netns del ${NS_FWD}
|
|
||||||
ip netns del ${NS_DST}
|
|
||||||
}
|
|
||||||
|
|
||||||
trap cleanup EXIT
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
ip netns add "${NS_SRC}"
|
|
||||||
ip netns add "${NS_FWD}"
|
|
||||||
ip netns add "${NS_DST}"
|
|
||||||
|
|
||||||
ip link add veth_src type veth peer name veth_src_fwd
|
|
||||||
ip link add veth_dst type veth peer name veth_dst_fwd
|
|
||||||
|
|
||||||
ip link set veth_src netns ${NS_SRC}
|
|
||||||
ip link set veth_src_fwd netns ${NS_FWD}
|
|
||||||
|
|
||||||
ip link set veth_dst netns ${NS_DST}
|
|
||||||
ip link set veth_dst_fwd netns ${NS_FWD}
|
|
||||||
|
|
||||||
ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
|
|
||||||
ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
|
|
||||||
|
|
||||||
# The fwd netns automatically get a v6 LL address / routes, but also needs v4
|
|
||||||
# one in order to start ARP probing. IP4_NET route is added to the endpoints
|
|
||||||
# so that the ARP processing will reply.
|
|
||||||
|
|
||||||
ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
|
|
||||||
ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
|
|
||||||
|
|
||||||
ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
|
|
||||||
ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
|
|
||||||
|
|
||||||
ip -netns ${NS_SRC} link set dev veth_src up
|
|
||||||
ip -netns ${NS_FWD} link set dev veth_src_fwd up
|
|
||||||
|
|
||||||
ip -netns ${NS_DST} link set dev veth_dst up
|
|
||||||
ip -netns ${NS_FWD} link set dev veth_dst_fwd up
|
|
||||||
|
|
||||||
ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
|
|
||||||
ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
|
|
||||||
ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
|
|
||||||
|
|
||||||
ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
|
|
||||||
ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
|
|
||||||
|
|
||||||
ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
|
|
||||||
ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
|
|
||||||
ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
|
|
||||||
|
|
||||||
ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
|
|
||||||
ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
|
|
||||||
|
|
||||||
fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
|
|
||||||
fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
|
|
||||||
|
|
||||||
ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
|
|
||||||
ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
|
|
||||||
|
|
||||||
ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
|
|
||||||
ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
|
|
||||||
|
|
||||||
veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex | awk '{printf "%08x\n", $1}')
|
|
||||||
veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex | awk '{printf "%08x\n", $1}')
|
|
||||||
|
|
||||||
xxd -p < test_tc_neigh.o | sed "s/eeddddee/$veth_src/g" | xxd -r -p > test_tc_neigh.x.o
|
|
||||||
xxd -p < test_tc_neigh.x.o | sed "s/eeffffee/$veth_dst/g" | xxd -r -p > test_tc_neigh.y.o
|
|
||||||
|
|
||||||
ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
|
|
||||||
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.y.o sec src_ingress
|
|
||||||
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh
|
|
||||||
|
|
||||||
ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
|
|
||||||
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.y.o sec dst_ingress
|
|
||||||
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh
|
|
||||||
|
|
||||||
rm -f test_tc_neigh.x.o test_tc_neigh.y.o
|
|
||||||
|
|
||||||
ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
|
|
||||||
ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
|
|
||||||
|
|
||||||
set +e
|
|
||||||
|
|
||||||
TEST="TCPv4 connectivity test"
|
|
||||||
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
|
||||||
|
|
||||||
TEST="TCPv6 connectivity test"
|
|
||||||
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
|
||||||
|
|
||||||
TEST="ICMPv4 connectivity test"
|
|
||||||
ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
|
||||||
|
|
||||||
TEST="ICMPv6 connectivity test"
|
|
||||||
ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo -e "${TEST}: ${RED}FAIL${NC}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
|
204
tools/testing/selftests/bpf/test_tc_redirect.sh
Executable file
204
tools/testing/selftests/bpf/test_tc_redirect.sh
Executable file
@@ -0,0 +1,204 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
#
|
||||||
|
# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
|
||||||
|
# between src and dst. The netns fwd has veth links to each src and dst. The
|
||||||
|
# client is in src and server in dst. The test installs a TC BPF program to each
|
||||||
|
# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
|
||||||
|
# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
|
||||||
|
# switch from ingress side; it also installs a checker prog on the egress side
|
||||||
|
# to drop unexpected traffic.
|
||||||
|
|
||||||
|
if [[ $EUID -ne 0 ]]; then
|
||||||
|
echo "This script must be run as root"
|
||||||
|
echo "FAIL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# check that needed tools are present
|
||||||
|
command -v nc >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "nc is not available"; exit 1; }
|
||||||
|
command -v dd >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "dd is not available"; exit 1; }
|
||||||
|
command -v timeout >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "timeout is not available"; exit 1; }
|
||||||
|
command -v ping >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "ping is not available"; exit 1; }
|
||||||
|
command -v ping6 >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "ping6 is not available"; exit 1; }
|
||||||
|
command -v perl >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "perl is not available"; exit 1; }
|
||||||
|
command -v jq >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "jq is not available"; exit 1; }
|
||||||
|
command -v bpftool >/dev/null 2>&1 || \
|
||||||
|
{ echo >&2 "bpftool is not available"; exit 1; }
|
||||||
|
|
||||||
|
readonly GREEN='\033[0;92m'
|
||||||
|
readonly RED='\033[0;31m'
|
||||||
|
readonly NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
readonly PING_ARG="-c 3 -w 10 -q"
|
||||||
|
|
||||||
|
readonly TIMEOUT=10
|
||||||
|
|
||||||
|
readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
|
||||||
|
readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
|
||||||
|
readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
|
||||||
|
|
||||||
|
readonly IP4_SRC="172.16.1.100"
|
||||||
|
readonly IP4_DST="172.16.2.100"
|
||||||
|
|
||||||
|
readonly IP6_SRC="::1:dead:beef:cafe"
|
||||||
|
readonly IP6_DST="::2:dead:beef:cafe"
|
||||||
|
|
||||||
|
readonly IP4_SLL="169.254.0.1"
|
||||||
|
readonly IP4_DLL="169.254.0.2"
|
||||||
|
readonly IP4_NET="169.254.0.0"
|
||||||
|
|
||||||
|
netns_cleanup()
|
||||||
|
{
|
||||||
|
ip netns del ${NS_SRC}
|
||||||
|
ip netns del ${NS_FWD}
|
||||||
|
ip netns del ${NS_DST}
|
||||||
|
}
|
||||||
|
|
||||||
|
netns_setup()
|
||||||
|
{
|
||||||
|
ip netns add "${NS_SRC}"
|
||||||
|
ip netns add "${NS_FWD}"
|
||||||
|
ip netns add "${NS_DST}"
|
||||||
|
|
||||||
|
ip link add veth_src type veth peer name veth_src_fwd
|
||||||
|
ip link add veth_dst type veth peer name veth_dst_fwd
|
||||||
|
|
||||||
|
ip link set veth_src netns ${NS_SRC}
|
||||||
|
ip link set veth_src_fwd netns ${NS_FWD}
|
||||||
|
|
||||||
|
ip link set veth_dst netns ${NS_DST}
|
||||||
|
ip link set veth_dst_fwd netns ${NS_FWD}
|
||||||
|
|
||||||
|
ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
|
||||||
|
ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
|
||||||
|
|
||||||
|
# The fwd netns automatically get a v6 LL address / routes, but also
|
||||||
|
# needs v4 one in order to start ARP probing. IP4_NET route is added
|
||||||
|
# to the endpoints so that the ARP processing will reply.
|
||||||
|
|
||||||
|
ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
|
||||||
|
ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
|
||||||
|
|
||||||
|
ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
|
||||||
|
ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
|
||||||
|
|
||||||
|
ip -netns ${NS_SRC} link set dev veth_src up
|
||||||
|
ip -netns ${NS_FWD} link set dev veth_src_fwd up
|
||||||
|
|
||||||
|
ip -netns ${NS_DST} link set dev veth_dst up
|
||||||
|
ip -netns ${NS_FWD} link set dev veth_dst_fwd up
|
||||||
|
|
||||||
|
ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
|
||||||
|
ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
|
||||||
|
ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
|
||||||
|
|
||||||
|
ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
|
||||||
|
ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
|
||||||
|
|
||||||
|
ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
|
||||||
|
ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
|
||||||
|
ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
|
||||||
|
|
||||||
|
ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
|
||||||
|
ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
|
||||||
|
|
||||||
|
fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
|
||||||
|
fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
|
||||||
|
|
||||||
|
ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
|
||||||
|
ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
|
||||||
|
|
||||||
|
ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
|
||||||
|
ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
|
||||||
|
}
|
||||||
|
|
||||||
|
netns_test_connectivity()
|
||||||
|
{
|
||||||
|
set +e
|
||||||
|
|
||||||
|
ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
|
||||||
|
ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
|
||||||
|
|
||||||
|
TEST="TCPv4 connectivity test"
|
||||||
|
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||||
|
|
||||||
|
TEST="TCPv6 connectivity test"
|
||||||
|
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||||
|
|
||||||
|
TEST="ICMPv4 connectivity test"
|
||||||
|
ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||||
|
|
||||||
|
TEST="ICMPv6 connectivity test"
|
||||||
|
ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo -e "${TEST}: ${RED}FAIL${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo -e "${TEST}: ${GREEN}PASS${NC}"
|
||||||
|
|
||||||
|
set -e
|
||||||
|
}
|
||||||
|
|
||||||
|
hex_mem_str()
|
||||||
|
{
|
||||||
|
perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
|
||||||
|
}
|
||||||
|
|
||||||
|
netns_setup_bpf()
|
||||||
|
{
|
||||||
|
local obj=$1
|
||||||
|
|
||||||
|
ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
|
||||||
|
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
|
||||||
|
ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
|
||||||
|
|
||||||
|
ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
|
||||||
|
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
|
||||||
|
ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
|
||||||
|
|
||||||
|
veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
|
||||||
|
veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
|
||||||
|
|
||||||
|
progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
|
||||||
|
for prog in $progs; do
|
||||||
|
map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
|
||||||
|
if [ ! -z "$map" ]; then
|
||||||
|
bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
|
||||||
|
bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
trap netns_cleanup EXIT
|
||||||
|
set -e
|
||||||
|
|
||||||
|
netns_setup
|
||||||
|
netns_setup_bpf test_tc_neigh.o
|
||||||
|
netns_test_connectivity
|
||||||
|
netns_cleanup
|
||||||
|
netns_setup
|
||||||
|
netns_setup_bpf test_tc_peer.o
|
||||||
|
netns_test_connectivity
|
@@ -110,12 +110,13 @@ static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
|
|||||||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
|
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops)
|
static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra)
|
||||||
{
|
{
|
||||||
bpf_sock_ops_cb_flags_set(skops,
|
bpf_sock_ops_cb_flags_set(skops,
|
||||||
skops->bpf_sock_ops_cb_flags |
|
skops->bpf_sock_ops_cb_flags |
|
||||||
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
|
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
|
||||||
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
|
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
|
||||||
|
extra);
|
||||||
}
|
}
|
||||||
static inline void
|
static inline void
|
||||||
clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
|
clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
|
||||||
|
@@ -90,6 +90,33 @@ long ksym_get_addr(const char *name)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
|
||||||
|
* this is faster than load + find.
|
||||||
|
*/
|
||||||
|
int kallsyms_find(const char *sym, unsigned long long *addr)
|
||||||
|
{
|
||||||
|
char type, name[500];
|
||||||
|
unsigned long long value;
|
||||||
|
int err = 0;
|
||||||
|
FILE *f;
|
||||||
|
|
||||||
|
f = fopen("/proc/kallsyms", "r");
|
||||||
|
if (!f)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
|
||||||
|
if (strcmp(name, sym) == 0) {
|
||||||
|
*addr = value;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = -ENOENT;
|
||||||
|
|
||||||
|
out:
|
||||||
|
fclose(f);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
void read_trace_pipe(void)
|
void read_trace_pipe(void)
|
||||||
{
|
{
|
||||||
int trace_fd;
|
int trace_fd;
|
||||||
|
@@ -12,6 +12,10 @@ struct ksym {
|
|||||||
int load_kallsyms(void);
|
int load_kallsyms(void);
|
||||||
struct ksym *ksym_search(long key);
|
struct ksym *ksym_search(long key);
|
||||||
long ksym_get_addr(const char *name);
|
long ksym_get_addr(const char *name);
|
||||||
|
|
||||||
|
/* open kallsyms and find addresses on the fly, faster than load + search. */
|
||||||
|
int kallsyms_find(const char *sym, unsigned long long *addr);
|
||||||
|
|
||||||
void read_trace_pipe(void);
|
void read_trace_pipe(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
"empty prog",
|
"empty prog",
|
||||||
.insns = {
|
.insns = {
|
||||||
},
|
},
|
||||||
.errstr = "unknown opcode 00",
|
.errstr = "last insn is not an exit or jmp",
|
||||||
.result = REJECT,
|
.result = REJECT,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@@ -529,7 +529,7 @@
|
|||||||
},
|
},
|
||||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||||
.result = REJECT,
|
.result = REJECT,
|
||||||
.errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
|
.errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)",
|
||||||
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
|
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@@ -50,14 +50,6 @@
|
|||||||
.errstr = "invalid bpf_ld_imm64 insn",
|
.errstr = "invalid bpf_ld_imm64 insn",
|
||||||
.result = REJECT,
|
.result = REJECT,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"test5 ld_imm64",
|
|
||||||
.insns = {
|
|
||||||
BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
|
|
||||||
},
|
|
||||||
.errstr = "invalid bpf_ld_imm64 insn",
|
|
||||||
.result = REJECT,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"test6 ld_imm64",
|
"test6 ld_imm64",
|
||||||
.insns = {
|
.insns = {
|
||||||
|
243
tools/testing/selftests/bpf/verifier/regalloc.c
Normal file
243
tools/testing/selftests/bpf/verifier/regalloc.c
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
{
|
||||||
|
"regalloc basic",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4),
|
||||||
|
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = ACCEPT,
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc negative",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4),
|
||||||
|
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||||
|
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = REJECT,
|
||||||
|
.errstr = "invalid access to map value, value_size=48 off=48 size=1",
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc src_reg mark",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_3, 0),
|
||||||
|
BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = ACCEPT,
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc src_reg negative",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_3, 0),
|
||||||
|
BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = REJECT,
|
||||||
|
.errstr = "invalid access to map value, value_size=48 off=44 size=8",
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc and spill",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7),
|
||||||
|
/* r0 has upper bound that should propagate into r2 */
|
||||||
|
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
|
||||||
|
BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
|
||||||
|
/* r3 has lower and upper bounds */
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = ACCEPT,
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc and spill negative",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7),
|
||||||
|
/* r0 has upper bound that should propagate into r2 */
|
||||||
|
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
|
||||||
|
BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
|
||||||
|
/* r3 has lower and upper bounds */
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = REJECT,
|
||||||
|
.errstr = "invalid access to map value, value_size=48 off=48 size=8",
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc three regs",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5),
|
||||||
|
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = ACCEPT,
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc after call",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
|
||||||
|
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4),
|
||||||
|
BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = ACCEPT,
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regalloc in callee",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
|
||||||
|
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||||
|
BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
|
||||||
|
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
|
||||||
|
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5),
|
||||||
|
BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_48b = { 4 },
|
||||||
|
.result = ACCEPT,
|
||||||
|
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||||
|
},
|
Reference in New Issue
Block a user