Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-02-16

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) numerous libbpf API improvements, from Andrii, Andrey, Yonghong.

2) test all bpf progs in alu32 mode, from Jiong.

3) skb->sk access and bpf_sk_fullsock(), bpf_tcp_sock() helpers, from Martin.

4) support for IP encap in lwt bpf progs, from Peter.

5) remove XDP_QUERY_XSK_UMEM dead code, from Jan.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2019-02-16 22:56:34 -08:00
92 changed files with 2574 additions and 483 deletions

View File

@@ -29,3 +29,4 @@ test_netcnt
test_section_names
test_tcpnotify_user
test_libbpf
alu32

View File

@@ -23,42 +23,19 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user
test_netcnt test_tcpnotify_user test_sock_fields
BPF_OBJ_FILES = \
test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \
test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \
test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \
sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \
xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
# Objects are built with default compilation flags and with sub-register
# code-gen enabled.
BPF_OBJ_FILES_DUAL_COMPILE = \
test_pkt_access.o test_pkt_access.o test_xdp.o test_adjust_tail.o \
test_l4lb.o test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \
test_obj_id.o test_pkt_md_access.o test_tracepoint.o \
test_stacktrace_map.o test_stacktrace_map.o test_stacktrace_build_id.o \
test_stacktrace_build_id.o test_get_stack_rawtp.o \
test_get_stack_rawtp.o test_tracepoint.o test_sk_lookup_kern.o \
test_queue_map.o test_stack_map.o
TEST_GEN_FILES = $(BPF_OBJ_FILES) $(BPF_OBJ_FILES_DUAL_COMPILE)
# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor
# support which is the first version to contain both ALU32 and JMP32
# instructions.
# Also test sub-register code-gen if LLVM has eBPF v3 processor support which
# contains both ALU32 and JMP32 instructions.
SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \
$(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \
$(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \
$(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \
grep 'if w')
ifneq ($(SUBREG_CODEGEN),)
TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES_DUAL_COMPILE))
TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES))
endif
# Order correspond to 'make run_tests' order
@@ -73,7 +50,8 @@ TEST_PROGS := test_kmod.sh \
test_lirc_mode2.sh \
test_skb_cgroup_id.sh \
test_flow_dissector.sh \
test_xdp_vlan.sh
test_xdp_vlan.sh \
test_lwt_ip_encap.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@@ -111,6 +89,7 @@ $(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
.PHONY: force
@@ -188,7 +167,8 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \
$(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \
trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS)
$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32
$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \
$(ALU32_BUILD_DIR)/test_progs_32
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \
@@ -200,7 +180,7 @@ endif
# Have one program compiled without "-target bpf" to test whether libbpf loads
# it successfully
$(OUTPUT)/test_xdp.o: test_xdp.c
$(OUTPUT)/test_xdp.o: progs/test_xdp.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
@@ -208,7 +188,7 @@ ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif
$(OUTPUT)/%.o: %.c
$(OUTPUT)/%.o: progs/%.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@

View File

@@ -176,6 +176,10 @@ static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) =
(void *) BPF_FUNC_spin_lock;
static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) =
(void *) BPF_FUNC_spin_unlock;
static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_sk_fullsock;
static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_tcp_sock;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions

View File

@@ -58,4 +58,13 @@ static inline unsigned int bpf_num_possible_cpus(void)
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
#ifndef sizeof_field
#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
#endif
#ifndef offsetofend
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
#endif
#endif /* __BPF_UTIL__ */

View File

@@ -0,0 +1,85 @@
// SPDX-License-Identifier: GPL-2.0
#include <stddef.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
struct grehdr {
__be16 flags;
__be16 protocol;
};
SEC("encap_gre")
int bpf_lwt_encap_gre(struct __sk_buff *skb)
{
struct encap_hdr {
struct iphdr iph;
struct grehdr greh;
} hdr;
int err;
memset(&hdr, 0, sizeof(struct encap_hdr));
hdr.iph.ihl = 5;
hdr.iph.version = 4;
hdr.iph.ttl = 0x40;
hdr.iph.protocol = 47; /* IPPROTO_GRE */
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
hdr.iph.saddr = 0x640110ac; /* 172.16.1.100 */
hdr.iph.daddr = 0x641010ac; /* 172.16.16.100 */
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
hdr.iph.saddr = 0xac100164; /* 172.16.1.100 */
hdr.iph.daddr = 0xac101064; /* 172.16.16.100 */
#else
#error "Fix your compiler's __BYTE_ORDER__?!"
#endif
hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr));
hdr.greh.protocol = skb->protocol;
err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
sizeof(struct encap_hdr));
if (err)
return BPF_DROP;
return BPF_LWT_REROUTE;
}
SEC("encap_gre6")
int bpf_lwt_encap_gre6(struct __sk_buff *skb)
{
struct encap_hdr {
struct ipv6hdr ip6hdr;
struct grehdr greh;
} hdr;
int err;
memset(&hdr, 0, sizeof(struct encap_hdr));
hdr.ip6hdr.version = 6;
hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr));
hdr.ip6hdr.nexthdr = 47; /* IPPROTO_GRE */
hdr.ip6hdr.hop_limit = 0x40;
/* fb01::1 */
hdr.ip6hdr.saddr.s6_addr[0] = 0xfb;
hdr.ip6hdr.saddr.s6_addr[1] = 1;
hdr.ip6hdr.saddr.s6_addr[15] = 1;
/* fb10::1 */
hdr.ip6hdr.daddr.s6_addr[0] = 0xfb;
hdr.ip6hdr.daddr.s6_addr[1] = 0x10;
hdr.ip6hdr.daddr.s6_addr[15] = 1;
hdr.greh.protocol = skb->protocol;
err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
sizeof(struct encap_hdr));
if (err)
return BPF_DROP;
return BPF_LWT_REROUTE;
}
char _license[] SEC("license") = "GPL";

View File

@@ -0,0 +1,152 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <netinet/in.h>
#include <stdbool.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
enum bpf_array_idx {
SRV_IDX,
CLI_IDX,
__NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") addr_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct sockaddr_in6),
.max_entries = __NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_sock),
.max_entries = __NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") tcp_sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_tcp_sock),
.max_entries = __NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") linum_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 1,
};
static bool is_loopback6(__u32 *a6)
{
return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
}
static void skcpy(struct bpf_sock *dst,
const struct bpf_sock *src)
{
dst->bound_dev_if = src->bound_dev_if;
dst->family = src->family;
dst->type = src->type;
dst->protocol = src->protocol;
dst->mark = src->mark;
dst->priority = src->priority;
dst->src_ip4 = src->src_ip4;
dst->src_ip6[0] = src->src_ip6[0];
dst->src_ip6[1] = src->src_ip6[1];
dst->src_ip6[2] = src->src_ip6[2];
dst->src_ip6[3] = src->src_ip6[3];
dst->src_port = src->src_port;
dst->dst_ip4 = src->dst_ip4;
dst->dst_ip6[0] = src->dst_ip6[0];
dst->dst_ip6[1] = src->dst_ip6[1];
dst->dst_ip6[2] = src->dst_ip6[2];
dst->dst_ip6[3] = src->dst_ip6[3];
dst->dst_port = src->dst_port;
dst->state = src->state;
}
static void tpcpy(struct bpf_tcp_sock *dst,
const struct bpf_tcp_sock *src)
{
dst->snd_cwnd = src->snd_cwnd;
dst->srtt_us = src->srtt_us;
dst->rtt_min = src->rtt_min;
dst->snd_ssthresh = src->snd_ssthresh;
dst->rcv_nxt = src->rcv_nxt;
dst->snd_nxt = src->snd_nxt;
dst->snd_una = src->snd_una;
dst->mss_cache = src->mss_cache;
dst->ecn_flags = src->ecn_flags;
dst->rate_delivered = src->rate_delivered;
dst->rate_interval_us = src->rate_interval_us;
dst->packets_out = src->packets_out;
dst->retrans_out = src->retrans_out;
dst->total_retrans = src->total_retrans;
dst->segs_in = src->segs_in;
dst->data_segs_in = src->data_segs_in;
dst->segs_out = src->segs_out;
dst->data_segs_out = src->data_segs_out;
dst->lost_out = src->lost_out;
dst->sacked_out = src->sacked_out;
dst->bytes_received = src->bytes_received;
dst->bytes_acked = src->bytes_acked;
}
#define RETURN { \
linum = __LINE__; \
bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \
return 1; \
}
SEC("cgroup_skb/egress")
int read_sock_fields(struct __sk_buff *skb)
{
__u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx;
struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
__u32 linum, idx0 = 0;
sk = skb->sk;
if (!sk || sk->state == 10)
RETURN;
sk = bpf_sk_fullsock(sk);
if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
!is_loopback6(sk->src_ip6))
RETURN;
tp = bpf_tcp_sock(sk);
if (!tp)
RETURN;
srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
if (!srv_sa6 || !cli_sa6)
RETURN;
if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
idx = srv_idx;
else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
idx = cli_idx;
else
RETURN;
sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx);
tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx);
if (!sk_ret || !tp_ret)
RETURN;
skcpy(sk_ret, sk);
tpcpy(tp_ret, tp);
RETURN;
}
char _license[] SEC("license") = "GPL";

View File

@@ -5879,15 +5879,17 @@ static void dump_btf_strings(const char *strs, __u32 len)
static int do_test_dedup(unsigned int test_num)
{
const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
int err = 0, i;
__u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len;
void *raw_btf;
unsigned int raw_btf_size;
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
const struct btf_header *test_hdr, *expect_hdr;
struct btf *test_btf = NULL, *expect_btf = NULL;
const void *test_btf_data, *expect_btf_data;
const char *ret_test_next_str, *ret_expect_next_str;
const char *test_strs, *expect_strs;
const char *test_str_cur, *test_str_end;
const char *expect_str_cur, *expect_str_end;
unsigned int raw_btf_size;
void *raw_btf;
int err = 0, i;
fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
@@ -5924,23 +5926,34 @@ static int do_test_dedup(unsigned int test_num)
goto done;
}
btf__get_strings(test_btf, &test_strs, &test_str_len);
btf__get_strings(expect_btf, &expect_strs, &expect_str_len);
if (CHECK(test_str_len != expect_str_len,
"test_str_len:%u != expect_str_len:%u",
test_str_len, expect_str_len)) {
test_btf_data = btf__get_raw_data(test_btf, &test_btf_size);
expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size);
if (CHECK(test_btf_size != expect_btf_size,
"test_btf_size:%u != expect_btf_size:%u",
test_btf_size, expect_btf_size)) {
err = -1;
goto done;
}
test_hdr = test_btf_data;
test_strs = test_btf_data + test_hdr->str_off;
expect_hdr = expect_btf_data;
expect_strs = expect_btf_data + expect_hdr->str_off;
if (CHECK(test_hdr->str_len != expect_hdr->str_len,
"test_hdr->str_len:%u != expect_hdr->str_len:%u",
test_hdr->str_len, expect_hdr->str_len)) {
fprintf(stderr, "\ntest strings:\n");
dump_btf_strings(test_strs, test_str_len);
dump_btf_strings(test_strs, test_hdr->str_len);
fprintf(stderr, "\nexpected strings:\n");
dump_btf_strings(expect_strs, expect_str_len);
dump_btf_strings(expect_strs, expect_hdr->str_len);
err = -1;
goto done;
}
test_str_cur = test_strs;
test_str_end = test_strs + test_str_len;
test_str_end = test_strs + test_hdr->str_len;
expect_str_cur = expect_strs;
expect_str_end = expect_strs + expect_str_len;
expect_str_end = expect_strs + expect_hdr->str_len;
while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
size_t test_len, expect_len;

View File

@@ -0,0 +1,376 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Setup/topology:
#
# NS1 NS2 NS3
# veth1 <---> veth2 veth3 <---> veth4 (the top route)
# veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
#
# each vethN gets IPv[4|6]_N address
#
# IPv*_SRC = IPv*_1
# IPv*_DST = IPv*_4
#
# all tests test pings from IPv*_SRC to IPv*_DST
#
# by default, routes are configured to allow packets to go
# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
#
# a GRE device is installed in NS3 with IPv*_GRE, and
# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
# (the bottom route)
#
# Tests:
#
# 1. routes NS2->IPv*_DST are brought down, so the only way a ping
# from IP*_SRC to IP*_DST can work is via IPv*_GRE
#
# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
# that encaps the packets with an IP/GRE header to route to IPv*_GRE
#
# ping: SRC->[encap at veth1:egress]->GRE:decap->DST
# ping replies go DST->SRC directly
#
# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2
# that encaps the packets with an IP/GRE header to route to IPv*_GRE
#
# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
# ping replies go DST->SRC directly
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
echo "FAIL"
exit 1
fi
readonly NS1="ns1-$(mktemp -u XXXXXX)"
readonly NS2="ns2-$(mktemp -u XXXXXX)"
readonly NS3="ns3-$(mktemp -u XXXXXX)"
readonly IPv4_1="172.16.1.100"
readonly IPv4_2="172.16.2.100"
readonly IPv4_3="172.16.3.100"
readonly IPv4_4="172.16.4.100"
readonly IPv4_5="172.16.5.100"
readonly IPv4_6="172.16.6.100"
readonly IPv4_7="172.16.7.100"
readonly IPv4_8="172.16.8.100"
readonly IPv4_GRE="172.16.16.100"
readonly IPv4_SRC=$IPv4_1
readonly IPv4_DST=$IPv4_4
readonly IPv6_1="fb01::1"
readonly IPv6_2="fb02::1"
readonly IPv6_3="fb03::1"
readonly IPv6_4="fb04::1"
readonly IPv6_5="fb05::1"
readonly IPv6_6="fb06::1"
readonly IPv6_7="fb07::1"
readonly IPv6_8="fb08::1"
readonly IPv6_GRE="fb10::1"
readonly IPv6_SRC=$IPv6_1
readonly IPv6_DST=$IPv6_4
TEST_STATUS=0
TESTS_SUCCEEDED=0
TESTS_FAILED=0
process_test_results()
{
if [[ "${TEST_STATUS}" -eq 0 ]] ; then
echo "PASS"
TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
else
echo "FAIL"
TESTS_FAILED=$((TESTS_FAILED+1))
fi
}
print_test_summary_and_exit()
{
echo "passed tests: ${TESTS_SUCCEEDED}"
echo "failed tests: ${TESTS_FAILED}"
if [ "${TESTS_FAILED}" -eq "0" ] ; then
exit 0
else
exit 1
fi
}
setup()
{
set -e # exit on error
TEST_STATUS=0
# create devices and namespaces
ip netns add "${NS1}"
ip netns add "${NS2}"
ip netns add "${NS3}"
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
ip link add veth7 type veth peer name veth8
ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
ip link set veth1 netns ${NS1}
ip link set veth2 netns ${NS2}
ip link set veth3 netns ${NS2}
ip link set veth4 netns ${NS3}
ip link set veth5 netns ${NS1}
ip link set veth6 netns ${NS2}
ip link set veth7 netns ${NS2}
ip link set veth8 netns ${NS3}
# configure addesses: the top route (1-2-3-4)
ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1
ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2
ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3
ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4
ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
# configure addresses: the bottom route (5-6-7-8)
ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5
ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6
ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7
ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8
ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
ip -netns ${NS1} link set dev veth1 up
ip -netns ${NS2} link set dev veth2 up
ip -netns ${NS2} link set dev veth3 up
ip -netns ${NS3} link set dev veth4 up
ip -netns ${NS1} link set dev veth5 up
ip -netns ${NS2} link set dev veth6 up
ip -netns ${NS2} link set dev veth7 up
ip -netns ${NS3} link set dev veth8 up
# configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
# the bottom route to specific bottom addresses
# NS1
# top route
ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1
ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} # go top by default
ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1
ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} # go top by default
# bottom route
ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5
ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6}
ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6}
ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5
ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6}
ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6}
# NS2
# top route
ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2
ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3
ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2
ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3
# bottom route
ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6
ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7
ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6
ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7
# NS3
# top route
ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4
ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3}
ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3}
ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
# bottom route
ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8
ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7}
ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7}
ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
# configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
ip -netns ${NS3} link set gre_dev up
ip -netns ${NS3} addr add ${IPv4_GRE} nodad dev gre_dev
ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6}
ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8}
# configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
ip -netns ${NS3} link set gre6_dev up
ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6}
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8}
# rp_filter gets confused by what these tests are doing, so disable it
ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
sleep 1 # reduce flakiness
set +e
}
cleanup()
{
ip netns del ${NS1} 2> /dev/null
ip netns del ${NS2} 2> /dev/null
ip netns del ${NS3} 2> /dev/null
}
trap cleanup EXIT
remove_routes_to_gredev()
{
ip -netns ${NS1} route del ${IPv4_GRE} dev veth5
ip -netns ${NS2} route del ${IPv4_GRE} dev veth7
ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5
ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7
}
add_unreachable_routes_to_gredev()
{
ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32
ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32
ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128
ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128
}
test_ping()
{
local readonly PROTO=$1
local readonly EXPECTED=$2
local RET=0
if [ "${PROTO}" == "IPv4" ] ; then
ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null
RET=$?
elif [ "${PROTO}" == "IPv6" ] ; then
ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null
RET=$?
else
echo " test_ping: unknown PROTO: ${PROTO}"
TEST_STATUS=1
fi
if [ "0" != "${RET}" ]; then
RET=1
fi
if [ "${EXPECTED}" != "${RET}" ] ; then
echo " test_ping failed: expected: ${EXPECTED}; got ${RET}"
TEST_STATUS=1
fi
}
test_egress()
{
local readonly ENCAP=$1
echo "starting egress ${ENCAP} encap test"
setup
# by default, pings work
test_ping IPv4 0
test_ping IPv6 0
# remove NS2->DST routes, ping fails
ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3
ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
test_ping IPv4 1
test_ping IPv6 1
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
elif [ "${ENCAP}" == "IPv6" ] ; then
ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
else
echo " unknown encap ${ENCAP}"
TEST_STATUS=1
fi
test_ping IPv4 0
test_ping IPv6 0
# a negative test: remove routes to GRE devices: ping fails
remove_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
# another negative test
add_unreachable_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
cleanup
process_test_results
}
test_ingress()
{
local readonly ENCAP=$1
echo "starting ingress ${ENCAP} encap test"
setup
# need to wait a bit for IPv6 to autoconf, otherwise
# ping6 sometimes fails with "unable to bind to address"
# by default, pings work
test_ping IPv4 0
test_ping IPv6 0
# remove NS2->DST routes, pings fail
ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3
ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
test_ping IPv4 1
test_ping IPv6 1
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
elif [ "${ENCAP}" == "IPv6" ] ; then
ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
else
echo "FAIL: unknown encap ${ENCAP}"
fi
test_ping IPv4 0
test_ping IPv6 0
# a negative test: remove routes to GRE devices: ping fails
remove_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
# another negative test
add_unreachable_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
cleanup
process_test_results
}
test_egress IPv4
test_egress IPv6
test_ingress IPv4
test_ingress IPv6
print_test_summary_and_exit

View File

@@ -20,6 +20,7 @@
#define MAX_INSNS 512
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static bool verbose = false;
struct sock_test {
const char *descr;
@@ -325,6 +326,7 @@ static int load_sock_prog(const struct bpf_insn *prog,
enum bpf_attach_type attach_type)
{
struct bpf_load_program_attr attr;
int ret;
memset(&attr, 0, sizeof(struct bpf_load_program_attr));
attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
@@ -332,8 +334,13 @@ static int load_sock_prog(const struct bpf_insn *prog,
attr.insns = prog;
attr.insns_cnt = probe_prog_length(attr.insns);
attr.license = "GPL";
attr.log_level = 2;
return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
if (verbose && ret < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
return ret;
}
static int attach_sock_prog(int cgfd, int progfd,

View File

@@ -0,0 +1,327 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <sys/socket.h>
#include <sys/epoll.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
enum bpf_array_idx {
SRV_IDX,
CLI_IDX,
__NR_BPF_ARRAY_IDX,
};
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
printf(format); \
printf("\n"); \
exit(-1); \
} \
})
#define TEST_CGROUP "/test-bpf-sock-fields"
#define DATA "Hello BPF!"
#define DATA_LEN sizeof(DATA)
static struct sockaddr_in6 srv_sa6, cli_sa6;
static int linum_map_fd;
static int addr_map_fd;
static int tp_map_fd;
static int sk_map_fd;
static __u32 srv_idx = SRV_IDX;
static __u32 cli_idx = CLI_IDX;
static void init_loopback6(struct sockaddr_in6 *sa6)
{
memset(sa6, 0, sizeof(*sa6));
sa6->sin6_family = AF_INET6;
sa6->sin6_addr = in6addr_loopback;
}
static void print_sk(const struct bpf_sock *sk)
{
char src_ip4[24], dst_ip4[24];
char src_ip6[64], dst_ip6[64];
inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
"src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
"dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
sk->mark, sk->priority,
sk->src_ip4, src_ip4,
sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
src_ip6, sk->src_port,
sk->dst_ip4, dst_ip4,
sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
dst_ip6, ntohs(sk->dst_port));
}
static void print_tp(const struct bpf_tcp_sock *tp)
{
printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
"snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
"rate_delivered:%u rate_interval_us:%u packets_out:%u "
"retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
"segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
"bytes_received:%llu bytes_acked:%llu\n",
tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
tp->packets_out, tp->retrans_out, tp->total_retrans,
tp->segs_in, tp->data_segs_in, tp->segs_out,
tp->data_segs_out, tp->lost_out, tp->sacked_out,
tp->bytes_received, tp->bytes_acked);
}
static void check_result(void)
{
struct bpf_tcp_sock srv_tp, cli_tp;
struct bpf_sock srv_sk, cli_sk;
__u32 linum, idx0 = 0;
int err;
err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum);
CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)",
"err:%d errno:%d", err, errno);
printf("srv_sk: ");
print_sk(&srv_sk);
printf("\n");
printf("cli_sk: ");
print_sk(&cli_sk);
printf("\n");
printf("srv_tp: ");
print_tp(&srv_tp);
printf("\n");
printf("cli_tp: ");
print_tp(&cli_tp);
printf("\n");
CHECK(srv_sk.state == 10 ||
!srv_sk.state ||
srv_sk.family != AF_INET6 ||
srv_sk.protocol != IPPROTO_TCP ||
memcmp(srv_sk.src_ip6, &in6addr_loopback,
sizeof(srv_sk.src_ip6)) ||
memcmp(srv_sk.dst_ip6, &in6addr_loopback,
sizeof(srv_sk.dst_ip6)) ||
srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
srv_sk.dst_port != cli_sa6.sin6_port,
"Unexpected srv_sk", "Check srv_sk output. linum:%u", linum);
CHECK(cli_sk.state == 10 ||
!cli_sk.state ||
cli_sk.family != AF_INET6 ||
cli_sk.protocol != IPPROTO_TCP ||
memcmp(cli_sk.src_ip6, &in6addr_loopback,
sizeof(cli_sk.src_ip6)) ||
memcmp(cli_sk.dst_ip6, &in6addr_loopback,
sizeof(cli_sk.dst_ip6)) ||
cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
cli_sk.dst_port != srv_sa6.sin6_port,
"Unexpected cli_sk", "Check cli_sk output. linum:%u", linum);
CHECK(srv_tp.data_segs_out != 1 ||
srv_tp.data_segs_in ||
srv_tp.snd_cwnd != 10 ||
srv_tp.total_retrans ||
srv_tp.bytes_acked != DATA_LEN,
"Unexpected srv_tp", "Check srv_tp output. linum:%u", linum);
CHECK(cli_tp.data_segs_out ||
cli_tp.data_segs_in != 1 ||
cli_tp.snd_cwnd != 10 ||
cli_tp.total_retrans ||
cli_tp.bytes_received != DATA_LEN,
"Unexpected cli_tp", "Check cli_tp output. linum:%u", linum);
}
static void test(void)
{
int listen_fd, cli_fd, accept_fd, epfd, err;
struct epoll_event ev;
socklen_t addrlen;
addrlen = sizeof(struct sockaddr_in6);
ev.events = EPOLLIN;
epfd = epoll_create(1);
CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
/* Prepare listen_fd */
listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
listen_fd, errno);
init_loopback6(&srv_sa6);
err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
err = listen(listen_fd, 1);
CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
/* Prepare cli_fd */
cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
init_loopback6(&cli_sa6);
err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
err, errno);
/* Update addr_map with srv_sa6 and cli_sa6 */
err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
/* Connect from cli_sa6 to srv_sa6 */
err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
CHECK(err && errno != EINPROGRESS,
"connect(cli_fd)", "err:%d errno:%d", err, errno);
ev.data.fd = listen_fd;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
err, errno);
/* Accept the connection */
/* Have some timeout in accept(listen_fd). Just in case. */
err = epoll_wait(epfd, &ev, 1, 1000);
CHECK(err != 1 || ev.data.fd != listen_fd,
"epoll_wait(listen_fd)",
"err:%d errno:%d ev.data.fd:%d listen_fd:%d",
err, errno, ev.data.fd, listen_fd);
accept_fd = accept(listen_fd, NULL, NULL);
CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
accept_fd, errno);
close(listen_fd);
/* Send some data from accept_fd to cli_fd */
err = send(accept_fd, DATA, DATA_LEN, 0);
CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
err, errno);
/* Have some timeout in recv(cli_fd). Just in case. */
ev.data.fd = cli_fd;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
err, errno);
err = epoll_wait(epfd, &ev, 1, 1000);
CHECK(err != 1 || ev.data.fd != cli_fd,
"epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
err, errno, ev.data.fd, cli_fd);
err = recv(cli_fd, NULL, 0, MSG_TRUNC);
CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
close(epfd);
close(accept_fd);
close(cli_fd);
check_result();
}
int main(int argc, char **argv)
{
struct bpf_prog_load_attr attr = {
.file = "test_sock_fields_kern.o",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.expected_attach_type = BPF_CGROUP_INET_EGRESS,
};
int cgroup_fd, prog_fd, err;
struct bpf_object *obj;
struct bpf_map *map;
err = setup_cgroup_environment();
CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
err, errno);
atexit(cleanup_cgroup_environment);
/* Create a cgroup, get fd, and join it */
cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
"cgroup_fd:%d errno:%d", cgroup_fd, errno);
err = join_cgroup(TEST_CGROUP);
CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
"err:%d errno%d", err, errno);
close(cgroup_fd);
map = bpf_object__find_map_by_name(obj, "addr_map");
CHECK(!map, "cannot find addr_map", "(null)");
addr_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "sock_result_map");
CHECK(!map, "cannot find sock_result_map", "(null)");
sk_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
tp_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "linum_map");
CHECK(!map, "cannot find linum_map", "(null)");
linum_map_fd = bpf_map__fd(map);
test();
bpf_object__close(obj);
cleanup_cgroup_environment();
printf("PASS\n");
return 0;
}

View File

@@ -547,7 +547,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.errstr = "cannot write into socket",
.errstr = "cannot write into sock",
.result = REJECT,
},
{
@@ -562,7 +562,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.errstr = "invalid bpf_sock access off=0 size=8",
.errstr = "invalid sock access off=0 size=8",
.result = REJECT,
},
{

View File

@@ -0,0 +1,384 @@
{
"skb->sk: no NULL check",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid mem access 'sock_common_or_null'",
},
{
"skb->sk: sk->family [non fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"skb->sk: sk->type [fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock_common access",
},
{
"bpf_sk_fullsock(skb->sk): no !skb->sk check",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "type=sock_common_or_null expected=sock_common",
},
{
"sk_fullsock(skb->sk): no NULL check on ret",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid mem access 'sock_or_null'",
},
{
"sk_fullsock(skb->sk): sk->type [fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->family [non fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->state [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->type [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->protocol [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): beyond last field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"bpf_tcp_sock(skb->sk): no !skb->sk check",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "type=sock_common_or_null expected=sock_common",
},
{
"bpf_tcp_sock(skb->sk): no NULL check on ret",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid mem access 'tcp_sock_or_null'",
},
{
"bpf_tcp_sock(skb->sk): tp->snd_cwnd",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"bpf_tcp_sock(skb->sk): tp->bytes_acked",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"bpf_tcp_sock(skb->sk): beyond last field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid tcp_sock access",
},
{
"bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"bpf_sk_release(skb->sk)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "type=sock_common expected=sock",
},
{
"bpf_sk_release(bpf_sk_fullsock(skb->sk))",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "reference has not been acquired before",
},
{
"bpf_sk_release(bpf_tcp_sock(skb->sk))",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "type=tcp_sock expected=sock",
},

View File

@@ -365,7 +365,7 @@
},
.result = REJECT,
//.errstr = "same insn cannot be used with different pointers",
.errstr = "cannot write into socket",
.errstr = "cannot write into sock",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{