Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-09-01

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add AF_XDP zero-copy support for i40e driver (!), from Björn and Magnus.

2) BPF verifier improvements by giving each register its own liveness
   chain which allows to simplify and getting rid of skip_callee() logic,
   from Edward.

3) Add bpf fs pretty print support for percpu arraymap, percpu hashmap
   and percpu lru hashmap. Also add generic percpu formatted print on
   bpftool so the same can be dumped there, from Yonghong.

4) Add bpf_{set,get}sockopt() helper support for TCP_SAVE_SYN and
   TCP_SAVED_SYN options to allow reflection of tos/tclass from received
   SYN packet, from Nikita.

5) Misc improvements to the BPF sockmap test cases in terms of cgroup v2
   interaction and removal of incorrect shutdown() calls, from John.

6) Few cleanups in xdp_umem_assign_dev() and xdpsock samples, from Prashant.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2018-08-31 17:41:08 -07:00
33 changed files with 2066 additions and 375 deletions

View File

@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <bpf/bpf.h>
#include <sys/resource.h>
#include <libelf.h>
@@ -45,7 +46,6 @@ static int count_result(int err)
return err;
}
#define min(a, b) ((a) < (b) ? (a) : (b))
#define __printf(a, b) __attribute__((format(printf, a, b)))
__printf(1, 2)
@@ -130,6 +130,7 @@ struct btf_raw_test {
bool map_create_err;
bool ordered_map;
bool lossless_map;
bool percpu_map;
int hdr_len_delta;
int type_off_delta;
int str_off_delta;
@@ -2157,6 +2158,7 @@ static struct btf_pprint_test_meta {
const char *map_name;
bool ordered_map;
bool lossless_map;
bool percpu_map;
} pprint_tests_meta[] = {
{
.descr = "BTF pretty print array",
@@ -2164,6 +2166,7 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_array",
.ordered_map = true,
.lossless_map = true,
.percpu_map = false,
},
{
@@ -2172,6 +2175,7 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_hash",
.ordered_map = false,
.lossless_map = true,
.percpu_map = false,
},
{
@@ -2180,30 +2184,83 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_lru_hash",
.ordered_map = false,
.lossless_map = false,
.percpu_map = false,
},
{
.descr = "BTF pretty print percpu array",
.map_type = BPF_MAP_TYPE_PERCPU_ARRAY,
.map_name = "pprint_test_percpu_array",
.ordered_map = true,
.lossless_map = true,
.percpu_map = true,
},
{
.descr = "BTF pretty print percpu hash",
.map_type = BPF_MAP_TYPE_PERCPU_HASH,
.map_name = "pprint_test_percpu_hash",
.ordered_map = false,
.lossless_map = true,
.percpu_map = true,
},
{
.descr = "BTF pretty print lru percpu hash",
.map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
.map_name = "pprint_test_lru_percpu_hash",
.ordered_map = false,
.lossless_map = false,
.percpu_map = true,
},
};
static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i,
int num_cpus, int rounded_value_size)
{
v->ui32 = i;
v->si32 = -i;
v->unused_bits2a = 3;
v->bits28 = i;
v->unused_bits2b = 3;
v->ui64 = i;
v->aenum = i & 0x03;
int cpu;
for (cpu = 0; cpu < num_cpus; cpu++) {
v->ui32 = i + cpu;
v->si32 = -i;
v->unused_bits2a = 3;
v->bits28 = i;
v->unused_bits2b = 3;
v->ui64 = i;
v->aenum = i & 0x03;
v = (void *)v + rounded_value_size;
}
}
static int check_line(const char *expected_line, int nexpected_line,
int expected_line_len, const char *line)
{
if (CHECK(nexpected_line == expected_line_len,
"expected_line is too long"))
return -1;
if (strcmp(expected_line, line)) {
fprintf(stderr, "unexpected pprint output\n");
fprintf(stderr, "expected: %s", expected_line);
fprintf(stderr, " read: %s", line);
return -1;
}
return 0;
}
static int do_test_pprint(void)
{
const struct btf_raw_test *test = &pprint_test_template;
struct bpf_create_map_attr create_attr = {};
bool ordered_map, lossless_map, percpu_map;
int err, ret, num_cpus, rounded_value_size;
struct pprint_mapv *mapv = NULL;
unsigned int key, nr_read_elems;
bool ordered_map, lossless_map;
int map_fd = -1, btf_fd = -1;
struct pprint_mapv mapv = {};
unsigned int raw_btf_size;
char expected_line[255];
FILE *pin_file = NULL;
@@ -2212,7 +2269,6 @@ static int do_test_pprint(void)
char *line = NULL;
uint8_t *raw_btf;
ssize_t nread;
int err, ret;
fprintf(stderr, "%s......", test->descr);
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
@@ -2261,9 +2317,18 @@ static int do_test_pprint(void)
if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
goto done;
percpu_map = test->percpu_map;
num_cpus = percpu_map ? bpf_num_possible_cpus() : 1;
rounded_value_size = round_up(sizeof(struct pprint_mapv), 8);
mapv = calloc(num_cpus, rounded_value_size);
if (CHECK(!mapv, "mapv allocation failure")) {
err = -1;
goto done;
}
for (key = 0; key < test->max_entries; key++) {
set_pprint_mapv(&mapv, key);
bpf_map_update_elem(map_fd, &key, &mapv, 0);
set_pprint_mapv(mapv, key, num_cpus, rounded_value_size);
bpf_map_update_elem(map_fd, &key, mapv, 0);
}
pin_file = fopen(pin_path, "r");
@@ -2286,33 +2351,74 @@ static int do_test_pprint(void)
ordered_map = test->ordered_map;
lossless_map = test->lossless_map;
do {
struct pprint_mapv *cmapv;
ssize_t nexpected_line;
unsigned int next_key;
int cpu;
next_key = ordered_map ? nr_read_elems : atoi(line);
set_pprint_mapv(&mapv, next_key);
nexpected_line = snprintf(expected_line, sizeof(expected_line),
"%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
next_key,
mapv.ui32, mapv.si32,
mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
mapv.ui64,
mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
pprint_enum_str[mapv.aenum]);
set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size);
cmapv = mapv;
if (CHECK(nexpected_line == sizeof(expected_line),
"expected_line is too long")) {
err = -1;
goto done;
for (cpu = 0; cpu < num_cpus; cpu++) {
if (percpu_map) {
/* for percpu map, the format looks like:
* <key>: {
* cpu0: <value_on_cpu0>
* cpu1: <value_on_cpu1>
* ...
* cpun: <value_on_cpun>
* }
*
* let us verify the line containing the key here.
*/
if (cpu == 0) {
nexpected_line = snprintf(expected_line,
sizeof(expected_line),
"%u: {\n",
next_key);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
if (err == -1)
goto done;
}
/* read value@cpu */
nread = getline(&line, &line_len, pin_file);
if (nread < 0)
break;
}
nexpected_line = snprintf(expected_line, sizeof(expected_line),
"%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
"{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
percpu_map ? "\tcpu" : "",
percpu_map ? cpu : next_key,
cmapv->ui32, cmapv->si32,
cmapv->unused_bits2a,
cmapv->bits28,
cmapv->unused_bits2b,
cmapv->ui64,
cmapv->ui8a[0], cmapv->ui8a[1],
cmapv->ui8a[2], cmapv->ui8a[3],
cmapv->ui8a[4], cmapv->ui8a[5],
cmapv->ui8a[6], cmapv->ui8a[7],
pprint_enum_str[cmapv->aenum]);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
if (err == -1)
goto done;
cmapv = (void *)cmapv + rounded_value_size;
}
if (strcmp(expected_line, line)) {
err = -1;
fprintf(stderr, "unexpected pprint output\n");
fprintf(stderr, "expected: %s", expected_line);
fprintf(stderr, " read: %s", line);
goto done;
if (percpu_map) {
/* skip the last bracket for the percpu map */
nread = getline(&line, &line_len, pin_file);
if (nread < 0)
break;
}
nread = getline(&line, &line_len, pin_file);
@@ -2334,6 +2440,8 @@ static int do_test_pprint(void)
err = 0;
done:
if (mapv)
free(mapv);
if (!err)
fprintf(stderr, "OK");
if (*btf_log_buf && (err || args.always_log))
@@ -2361,6 +2469,7 @@ static int test_pprint(void)
pprint_test_template.map_name = pprint_tests_meta[i].map_name;
pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map;
err |= count_result(do_test_pprint());
}

View File

@@ -469,8 +469,6 @@ static int sendmsg_test(struct sockmap_options *opt)
fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
shutdown(p2, SHUT_RDWR);
shutdown(p1, SHUT_RDWR);
if (s.end.tv_sec - s.start.tv_sec) {
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
@@ -500,7 +498,6 @@ static int sendmsg_test(struct sockmap_options *opt)
fprintf(stderr,
"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
shutdown(c1, SHUT_RDWR);
if (s.end.tv_sec - s.start.tv_sec) {
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
@@ -1348,9 +1345,9 @@ static int populate_progs(char *bpf_file)
return 0;
}
static int __test_suite(char *bpf_file)
static int __test_suite(int cg_fd, char *bpf_file)
{
int cg_fd, err;
int err, cleanup = cg_fd;
err = populate_progs(bpf_file);
if (err < 0) {
@@ -1358,22 +1355,24 @@ static int __test_suite(char *bpf_file)
return err;
}
if (setup_cgroup_environment()) {
fprintf(stderr, "ERROR: cgroup env failed\n");
return -EINVAL;
}
cg_fd = create_and_get_cgroup(CG_PATH);
if (cg_fd < 0) {
fprintf(stderr,
"ERROR: (%i) open cg path failed: %s\n",
cg_fd, optarg);
return cg_fd;
}
if (setup_cgroup_environment()) {
fprintf(stderr, "ERROR: cgroup env failed\n");
return -EINVAL;
}
if (join_cgroup(CG_PATH)) {
fprintf(stderr, "ERROR: failed to join cgroup\n");
return -EINVAL;
cg_fd = create_and_get_cgroup(CG_PATH);
if (cg_fd < 0) {
fprintf(stderr,
"ERROR: (%i) open cg path failed: %s\n",
cg_fd, optarg);
return cg_fd;
}
if (join_cgroup(CG_PATH)) {
fprintf(stderr, "ERROR: failed to join cgroup\n");
return -EINVAL;
}
}
/* Tests basic commands and APIs with range of iov values */
@@ -1394,20 +1393,24 @@ static int __test_suite(char *bpf_file)
out:
printf("Summary: %i PASSED %i FAILED\n", passed, failed);
cleanup_cgroup_environment();
close(cg_fd);
if (cleanup < 0) {
cleanup_cgroup_environment();
close(cg_fd);
}
return err;
}
static int test_suite(void)
static int test_suite(int cg_fd)
{
int err;
err = __test_suite(BPF_SOCKMAP_FILENAME);
err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
if (err)
goto out;
err = __test_suite(BPF_SOCKHASH_FILENAME);
err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
out:
if (cg_fd > -1)
close(cg_fd);
return err;
}
@@ -1420,7 +1423,7 @@ int main(int argc, char **argv)
int test = PING_PONG;
if (argc < 2)
return test_suite();
return test_suite(-1);
while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
long_options, &longindex)) != -1) {
@@ -1486,6 +1489,9 @@ int main(int argc, char **argv)
}
}
if (argc <= 3 && cg_fd)
return test_suite(cg_fd);
if (!cg_fd) {
fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
argv[0]);

View File

@@ -5,6 +5,7 @@
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
@@ -17,6 +18,13 @@ struct bpf_map_def SEC("maps") global_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct tcpbpf_globals),
.max_entries = 4,
};
struct bpf_map_def SEC("maps") sockopt_results = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(int),
.max_entries = 2,
};
@@ -45,11 +53,14 @@ int _version SEC("version") = 1;
SEC("sockops")
int bpf_testcb(struct bpf_sock_ops *skops)
{
int rv = -1;
int bad_call_rv = 0;
char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
struct tcphdr *thdr;
int good_call_rv = 0;
int op;
int bad_call_rv = 0;
int save_syn = 1;
int rv = -1;
int v = 0;
int op;
op = (int) skops->op;
@@ -82,6 +93,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
v = 0xff;
rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
sizeof(v));
if (skops->family == AF_INET6) {
v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN,
header, (sizeof(struct ipv6hdr) +
sizeof(struct tcphdr)));
if (!v) {
int offset = sizeof(struct ipv6hdr);
thdr = (struct tcphdr *)(header + offset);
v = thdr->syn;
__u32 key = 1;
bpf_map_update_elem(&sockopt_results, &key, &v,
BPF_ANY);
}
}
break;
case BPF_SOCK_OPS_RTO_CB:
break;
@@ -111,6 +137,12 @@ int bpf_testcb(struct bpf_sock_ops *skops)
break;
case BPF_SOCK_OPS_TCP_LISTEN_CB:
bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
&save_syn, sizeof(save_syn));
/* Update global map w/ result of setsock opt */
__u32 key = 0;
bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
break;
default:
rv = -1;

View File

@@ -54,6 +54,26 @@ err:
return -1;
}
int verify_sockopt_result(int sock_map_fd)
{
__u32 key = 0;
int res;
int rv;
/* check setsockopt for SAVE_SYN */
rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
EXPECT_EQ(0, rv, "d");
EXPECT_EQ(0, res, "d");
key = 1;
/* check getsockopt for SAVED_SYN */
rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
EXPECT_EQ(0, rv, "d");
EXPECT_EQ(1, res, "d");
return 0;
err:
return -1;
}
static int bpf_find_map(const char *test, struct bpf_object *obj,
const char *name)
{
@@ -70,11 +90,11 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
int main(int argc, char **argv)
{
const char *file = "test_tcpbpf_kern.o";
int prog_fd, map_fd, sock_map_fd;
struct tcpbpf_globals g = {0};
const char *cg_path = "/foo";
int error = EXIT_FAILURE;
struct bpf_object *obj;
int prog_fd, map_fd;
int cg_fd = -1;
__u32 key = 0;
int rv;
@@ -110,6 +130,10 @@ int main(int argc, char **argv)
if (map_fd < 0)
goto err;
sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
if (sock_map_fd < 0)
goto err;
rv = bpf_map_lookup_elem(map_fd, &key, &g);
if (rv != 0) {
printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
@@ -121,6 +145,11 @@ int main(int argc, char **argv)
goto err;
}
if (verify_sockopt_result(sock_map_fd)) {
printf("FAILED: Wrong sockopt stats\n");
goto err;
}
printf("PASSED!\n");
error = 0;
err: