Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller:

 1) Support ipv6 checksum offload in sunvnet driver, from Shannon
    Nelson.

 2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric
    Dumazet.

 3) Allow generic XDP to work on virtual devices, from John Fastabend.

 4) Add bpf device maps and XDP_REDIRECT, which can be used to build
    arbitrary switching frameworks using XDP. From John Fastabend.

 5) Remove UFO offloads from the tree, gave us little other than bugs.

 6) Remove the IPSEC flow cache, from Florian Westphal.

 7) Support ipv6 route offload in mlxsw driver.

 8) Support VF representors in bnxt_en, from Sathya Perla.

 9) Add support for forward error correction modes to ethtool, from
    Vidya Sagar Ravipati.

10) Add time filter for packet scheduler action dumping, from Jamal Hadi
    Salim.

11) Extend the zerocopy sendmsg() used by virtio and tap to regular
    sockets via MSG_ZEROCOPY. From Willem de Bruijn.

12) Significantly rework value tracking in the BPF verifier, from Edward
    Cree.

13) Add new jump instructions to eBPF, from Daniel Borkmann.

14) Rework rtnetlink plumbing so that operations can be run without
    taking the RTNL semaphore. From Florian Westphal.

15) Support XDP in tap driver, from Jason Wang.

16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal.

17) Add Huawei hinic ethernet driver.

18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan
    Delalande.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits)
  i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq
  i40e: avoid NVM acquire deadlock during NVM update
  drivers: net: xgene: Remove return statement from void function
  drivers: net: xgene: Configure tx/rx delay for ACPI
  drivers: net: xgene: Read tx/rx delay for ACPI
  rocker: fix kcalloc parameter order
  rds: Fix non-atomic operation on shared flag variable
  net: sched: don't use GFP_KERNEL under spin lock
  vhost_net: correctly check tx avail during rx busy polling
  net: mdio-mux: add mdio_mux parameter to mdio_mux_init()
  rxrpc: Make service connection lookup always check for retry
  net: stmmac: Delete dead code for MDIO registration
  gianfar: Fix Tx flow control deactivation
  cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6
  cxgb4: Fix pause frame count in t4_get_port_stats
  cxgb4: fix memory leak
  tun: rename generic_xdp to skb_xdp
  tun: reserve extra headroom only when XDP is set
  net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping
  net: dsa: bcm_sf2: Advertise number of egress queues
  ...
This commit is contained in:
Linus Torvalds
2017-09-06 14:45:08 -07:00
1592 changed files with 99386 additions and 30614 deletions

View File

@@ -15,9 +15,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o
test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o
TEST_PROGS := test_kmod.sh
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh
include ../lib.mk

View File

@@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
(void *) BPF_FUNC_clone_redirect;
static int (*bpf_redirect)(int ifindex, int flags) =
(void *) BPF_FUNC_redirect;
static int (*bpf_redirect_map)(void *map, int key, int flags) =
(void *) BPF_FUNC_redirect_map;
static int (*bpf_perf_event_output)(void *ctx, void *map,
unsigned long long flags, void *data,
int size) =
@@ -63,6 +65,12 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags) =
(void *) BPF_FUNC_sock_map_update;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -85,6 +93,7 @@ struct bpf_map_def {
unsigned int max_entries;
unsigned int map_flags;
unsigned int inner_map_idx;
unsigned int numa_node;
};
static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =

View File

@@ -0,0 +1,38 @@
#include <linux/bpf.h>
#include "bpf_helpers.h"
#include "bpf_util.h"
#include "bpf_endian.h"
int _version SEC("version") = 1;
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
void *data_end = (void *)(long) skb->data_end;
void *data = (void *)(long) skb->data;
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
__u8 *d = data;
if (data + 10 > data_end)
return skb->len;
/* This write/read is a bit pointless but tests the verifier and
* strparser handler for read/write pkt data and access into sk
* fields.
*/
d[7] = 1;
bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n",
d[0], lport, bpf_ntohl(rport));
return skb->len;
}
char _license[] SEC("license") = "GPL";

View File

@@ -0,0 +1,68 @@
#include <linux/bpf.h>
#include "bpf_helpers.h"
#include "bpf_util.h"
#include "bpf_endian.h"
int _version SEC("version") = 1;
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
struct bpf_map_def SEC("maps") sock_map_rx = {
.type = BPF_MAP_TYPE_SOCKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 20,
};
struct bpf_map_def SEC("maps") sock_map_tx = {
.type = BPF_MAP_TYPE_SOCKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 20,
};
struct bpf_map_def SEC("maps") sock_map_break = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 20,
};
SEC("sk_skb2")
int bpf_prog2(struct __sk_buff *skb)
{
void *data_end = (void *)(long) skb->data_end;
void *data = (void *)(long) skb->data;
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
__u8 *d = data;
__u8 sk, map;
if (data + 8 > data_end)
return SK_DROP;
map = d[0];
sk = d[1];
d[0] = 0xd;
d[1] = 0xe;
d[2] = 0xa;
d[3] = 0xd;
d[4] = 0xb;
d[5] = 0xe;
d[6] = 0xe;
d[7] = 0xf;
bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
if (!map)
return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
}
char _license[] SEC("license") = "GPL";

View File

@@ -27,6 +27,11 @@
#define MAX_INSNS 512
#define MAX_MATCHES 16
struct bpf_reg_match {
unsigned int line;
const char *match;
};
struct bpf_align_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
@@ -36,10 +41,14 @@ struct bpf_align_test {
REJECT
} result;
enum bpf_prog_type prog_type;
const char *matches[MAX_MATCHES];
/* Matches must be in order of increasing line */
struct bpf_reg_match matches[MAX_MATCHES];
};
static struct bpf_align_test tests[] = {
/* Four tests of known constants. These aren't staggeringly
* interesting since we track exact values now.
*/
{
.descr = "mov",
.insns = {
@@ -53,11 +62,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
"1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
"2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
"3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
"4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
"5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
{1, "R3=inv2"},
{2, "R3=inv4"},
{3, "R3=inv8"},
{4, "R3=inv16"},
{5, "R3=inv32"},
},
},
{
@@ -79,17 +90,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
"1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
"2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
"3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
"4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
"5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
"6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
"7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
"8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
"9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
"10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
"11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
{1, "R3=inv1"},
{2, "R3=inv2"},
{3, "R3=inv4"},
{4, "R3=inv8"},
{5, "R3=inv16"},
{6, "R3=inv1"},
{7, "R4=inv32"},
{8, "R4=inv16"},
{9, "R4=inv8"},
{10, "R4=inv4"},
{11, "R4=inv2"},
},
},
{
@@ -106,12 +119,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
"1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
"2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp",
"3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp",
"4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
"5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp",
"6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
{1, "R3=inv4"},
{2, "R3=inv8"},
{3, "R3=inv10"},
{4, "R4=inv8"},
{5, "R4=inv12"},
{6, "R4=inv14"},
},
},
{
@@ -126,13 +141,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
"1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
"2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
"3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
"4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp",
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
{1, "R3=inv7"},
{2, "R3=inv7"},
{3, "R3=inv14"},
{4, "R3=inv56"},
},
},
/* Tests using unknown values */
#define PREP_PKT_POINTERS \
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
offsetof(struct __sk_buff, data)), \
@@ -166,17 +184,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp",
"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp",
"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp",
"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp",
"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp",
"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp",
"20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp",
"21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp",
"22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp",
"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp",
{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
{9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
{10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
{11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
{18, "R3=pkt_end(id=0,off=0,imm=0)"},
{18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
{20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
{21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
{22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
{23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
},
},
{
@@ -197,16 +217,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp",
"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp",
"12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
"13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp",
"14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp",
"16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp"
{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
{12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
{14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
{16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
},
},
{
@@ -237,12 +257,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
"4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp",
"5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp",
"6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp",
"10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp",
"14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
"15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
{5, "R5=pkt(id=0,off=14,r=0,imm=0)"},
{6, "R4=pkt(id=0,off=14,r=0,imm=0)"},
{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
{14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
{15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
},
},
{
@@ -297,62 +319,286 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp",
/* Offset is added to packet pointer R5, resulting in known
* auxiliary alignment and offset.
{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
{11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
* offset is considered using reg->aux_off_align which
* is 4 and meets the load's requirements.
*/
"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
{15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
{15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Variable offset is added to R5 packet pointer,
* resulting in auxiliary alignment of 4.
*/
"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
{18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
{19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (14) which
* is 16. Then the variable offset is considered using
* reg->aux_off_align which is 4 and meets the load's
* requirements.
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
* aligned, so the total offset is 4-byte aligned and
* meets the load's requirements.
*/
"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
{23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
{23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
"26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp",
/* Variable offset is added to R5, resulting in an
* auxiliary offset of 14, and an auxiliary alignment of 4.
{26, "R5=pkt(id=0,off=14,r=8"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n).
*/
"27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
/* Constant is added to R5 again, setting reg->off to 4. */
"28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
/* And once more we add a variable, which causes an accumulation
* of reg->off into reg->aux_off_align, with resulting value of
* 18. The auxiliary alignment stays at 4.
{27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Constant is added to R5 again, setting reg->off to 18. */
{28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
"29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
{29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (18) which is 20. Then the variable offset
* is considered using reg->aux_off_align which is 4 and meets
* the load's requirements.
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
"33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
},
},
{
.descr = "packet variable offset 2",
.insns = {
/* Create an unknown offset, (4n+2)-aligned */
LOAD_UNKNOWN(BPF_REG_6),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
/* Add it to the packet pointer */
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
/* Check bounds and perform a read */
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
/* Make a (4n) offset from the value we just read */
BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
/* Add it to the packet pointer */
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
/* Check bounds and perform a read */
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Adding 14 makes R6 be (4n+2) */
{9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* Packet pointer has (4n+2) offset */
{11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
{18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
{19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
},
},
{
.descr = "dubious pointer arithmetic",
.insns = {
PREP_PKT_POINTERS,
BPF_MOV64_IMM(BPF_REG_0, 0),
/* ptr & const => unknown & const */
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40),
/* ptr << const => unknown << const */
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
/* We have a (4n) value. Let's make a packet offset
* out of it. First add 14, to make it a (4n+2)
*/
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
/* Then make sure it's nonnegative */
BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
BPF_EXIT_INSN(),
/* Add it to packet pointer */
BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
/* Check bounds and perform a read */
BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
/* ptr & 0x40 == either 0 or 0x40 */
{5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
/* ptr << 2 == unknown, (4n) */
{7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
{8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
/* Checked s>=0 */
{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* packet pointer + nonnegative (4n+2) */
{12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
* upper half of the address space.
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
{16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
}
},
{
.descr = "variable subtraction",
.insns = {
/* Create an unknown offset, (4n+2)-aligned */
LOAD_UNKNOWN(BPF_REG_6),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
/* Create another unknown, (4n)-aligned, and subtract
* it from the first one
*/
BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
/* Bounds-check the result */
BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
BPF_EXIT_INSN(),
/* Add it to the packet pointer */
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
/* Check bounds and perform a read */
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
{9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Adding 14 makes R6 be (4n+2) */
{10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* New unknown value in R7 is (4n) */
{11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Subtracting it from R6 blows our unsigned bounds */
{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
/* Checked s>= 0 */
{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
},
},
{
.descr = "pointer variable subtraction",
.insns = {
/* Create an unknown offset, (4n+2)-aligned and bounded
* to [14,74]
*/
LOAD_UNKNOWN(BPF_REG_6),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
/* Subtract it from the packet pointer */
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
/* Create another unknown, (4n)-aligned and >= 74.
* That in fact means >= 76, since 74 % 4 == 2
*/
BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
/* Add it to the packet pointer */
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
/* Check bounds and perform a read */
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
{10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
/* Adding 14 makes R6 be (4n+2) */
{11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
/* Subtracting from packet pointer overflows ubounds */
{13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
/* New unknown value in R7 is (4n), >= 76 */
{15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
/* Adding it to packet pointer gives nice bounds again */
{16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
},
},
};
@@ -373,6 +619,9 @@ static int do_test_single(struct bpf_align_test *test)
{
struct bpf_insn *prog = test->insns;
int prog_type = test->prog_type;
char bpf_vlog_copy[32768];
const char *line_ptr;
int cur_line = -1;
int prog_len, i;
int fd_prog;
int ret;
@@ -381,26 +630,49 @@ static int do_test_single(struct bpf_align_test *test)
fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
prog, prog_len, 1, "GPL", 0,
bpf_vlog, sizeof(bpf_vlog), 2);
if (fd_prog < 0) {
if (fd_prog < 0 && test->result != REJECT) {
printf("Failed to load program.\n");
printf("%s", bpf_vlog);
ret = 1;
} else if (fd_prog >= 0 && test->result == REJECT) {
printf("Unexpected success to load!\n");
printf("%s", bpf_vlog);
ret = 1;
close(fd_prog);
} else {
ret = 0;
/* We make a local copy so that we can strtok() it */
strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
line_ptr = strtok(bpf_vlog_copy, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
const char *t, *m = test->matches[i];
struct bpf_reg_match m = test->matches[i];
if (!m)
if (!m.match)
break;
t = strstr(bpf_vlog, m);
if (!t) {
printf("Failed to find match: %s\n", m);
while (line_ptr) {
cur_line = -1;
sscanf(line_ptr, "%u: ", &cur_line);
if (cur_line == m.line)
break;
line_ptr = strtok(NULL, "\n");
}
if (!line_ptr) {
printf("Failed to find line %u for match: %s\n",
m.line, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
}
if (!strstr(line_ptr, m.match)) {
printf("Failed to find match %u: %s\n",
m.line, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
}
}
close(fd_prog);
if (fd_prog >= 0)
close(fd_prog);
}
return ret;
}

View File

@@ -22,6 +22,7 @@
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_util.h"
static int map_flags;
@@ -438,6 +439,395 @@ static void test_arraymap_percpu_many_keys(void)
close(fd);
}
static void test_devmap(int task, void *data)
{
int fd;
__u32 key, value;
fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
2, 0);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
}
close(fd);
}
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
#include <sys/select.h>
#include <linux/err.h>
#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
static void test_sockmap(int tasks, void *data)
{
int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog;
struct sockaddr_in addr;
struct bpf_object *obj;
struct timeval to;
__u32 key, value;
pid_t pid[tasks];
fd_set w;
/* Create some sockets to use with sockmap */
for (i = 0; i < 2; i++) {
sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
if (sfd[i] < 0)
goto out;
err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
(char *)&one, sizeof(one));
if (err) {
printf("failed to setsockopt\n");
goto out;
}
err = ioctl(sfd[i], FIONBIO, (char *)&one);
if (err < 0) {
printf("failed to ioctl\n");
goto out;
}
memset(&addr, 0, sizeof(struct sockaddr_in));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
addr.sin_port = htons(ports[i]);
err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
printf("failed to bind: err %i: %i:%i\n",
err, i, sfd[i]);
goto out;
}
err = listen(sfd[i], 32);
if (err < 0) {
printf("failed to listen\n");
goto out;
}
}
for (i = 2; i < 4; i++) {
sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
if (sfd[i] < 0)
goto out;
err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
(char *)&one, sizeof(one));
if (err) {
printf("set sock opt\n");
goto out;
}
memset(&addr, 0, sizeof(struct sockaddr_in));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
addr.sin_port = htons(ports[i - 2]);
err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
if (err) {
printf("failed to connect\n");
goto out;
}
}
for (i = 4; i < 6; i++) {
sfd[i] = accept(sfd[i - 4], NULL, NULL);
if (sfd[i] < 0) {
printf("accept failed\n");
goto out;
}
}
/* Test sockmap with connected sockets */
fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
sizeof(key), sizeof(value),
6, 0);
if (fd < 0) {
printf("Failed to create sockmap %i\n", fd);
goto out_sockmap;
}
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed noprog update sockmap '%i:%i'\n",
i, sfd[i]);
goto out_sockmap;
}
}
/* Test attaching bad fds */
err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
if (!err) {
printf("Failed invalid parser prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!err) {
printf("Failed invalid verdict prog attach\n");
goto out_sockmap;
}
/* Load SK_SKB program and Attach */
err = bpf_prog_load(SOCKMAP_PARSE_PROG,
BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
if (err) {
printf("Failed to load SK_SKB parse prog\n");
goto out_sockmap;
}
err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
if (err) {
printf("Failed to load SK_SKB verdict prog\n");
goto out_sockmap;
}
bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
if (IS_ERR(bpf_map_rx)) {
printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
map_fd_rx = bpf_map__fd(bpf_map_rx);
if (map_fd_rx < 0) {
printf("Failed to get map fd\n");
goto out_sockmap;
}
bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
if (IS_ERR(bpf_map_tx)) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
map_fd_tx = bpf_map__fd(bpf_map_tx);
if (map_fd_tx < 0) {
printf("Failed to get map tx fd\n");
goto out_sockmap;
}
bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
if (IS_ERR(bpf_map_break)) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
map_fd_break = bpf_map__fd(bpf_map_break);
if (map_fd_break < 0) {
printf("Failed to get map tx fd\n");
goto out_sockmap;
}
err = bpf_prog_attach(parse_prog, map_fd_break,
BPF_SK_SKB_STREAM_PARSER, 0);
if (!err) {
printf("Allowed attaching SK_SKB program to invalid map\n");
goto out_sockmap;
}
err = bpf_prog_attach(parse_prog, map_fd_rx,
BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
printf("Failed stream parser bpf prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(verdict_prog, map_fd_rx,
BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
printf("Failed stream verdict bpf prog attach\n");
goto out_sockmap;
}
/* Test map update elem afterwards fd lives in fd and map_fd */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
}
/* Test map delete elem and remove send/recv sockets */
for (i = 2; i < 4; i++) {
err = bpf_map_delete_elem(map_fd_rx, &i);
if (err) {
printf("Failed delete sockmap rx %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_delete_elem(map_fd_tx, &i);
if (err) {
printf("Failed delete sockmap tx %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
}
/* Test map send/recv */
for (i = 0; i < 2; i++) {
buf[0] = i;
buf[1] = 0x5;
sc = send(sfd[2], buf, 20, 0);
if (sc < 0) {
printf("Failed sockmap send\n");
goto out_sockmap;
}
FD_ZERO(&w);
FD_SET(sfd[3], &w);
to.tv_sec = 1;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
perror("Failed sockmap select()");
goto out_sockmap;
} else if (!s) {
printf("Failed sockmap unexpected timeout\n");
goto out_sockmap;
}
if (!FD_ISSET(sfd[3], &w)) {
printf("Failed sockmap select/recv\n");
goto out_sockmap;
}
rc = recv(sfd[3], buf, sizeof(buf), 0);
if (rc < 0) {
printf("Failed sockmap recv\n");
goto out_sockmap;
}
}
/* Negative null entry lookup from datapath should be dropped */
buf[0] = 1;
buf[1] = 12;
sc = send(sfd[2], buf, 20, 0);
if (sc < 0) {
printf("Failed sockmap send\n");
goto out_sockmap;
}
/* Push fd into same slot */
i = 2;
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
if (!err) {
printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed sockmap update new slot BPF_ANY\n");
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
if (err) {
printf("Failed sockmap update new slot BPF_EXIST\n");
goto out_sockmap;
}
/* Delete the elems without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_delete_elem(fd, &i);
if (err) {
printf("Failed delete sockmap %i '%i:%i'\n",
err, i, sfd[i]);
}
}
/* Test having multiple maps open and set with programs on same fds */
err = bpf_prog_attach(parse_prog, fd,
BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
printf("Failed fd bpf parse prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(verdict_prog, fd,
BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
printf("Failed fd bpf verdict prog attach\n");
goto out_sockmap;
}
for (i = 4; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (!err) {
printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
if (!err) {
printf("Failed allowed duplicate program in update NOEXIST sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
if (!err) {
printf("Failed allowed duplicate program in update EXIST sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
}
/* Test tasks number of forked operations */
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
for (i = 0; i < 6; i++) {
bpf_map_delete_elem(map_fd_tx, &i);
bpf_map_delete_elem(map_fd_rx, &i);
bpf_map_update_elem(map_fd_tx, &i,
&sfd[i], BPF_ANY);
bpf_map_update_elem(map_fd_rx, &i,
&sfd[i], BPF_ANY);
}
exit(0);
} else if (pid[i] == -1) {
printf("Couldn't spawn #%d process!\n", i);
exit(1);
}
}
for (i = 0; i < tasks; i++) {
int status;
assert(waitpid(pid[i], &status, 0) == pid[i]);
assert(status == 0);
}
/* Test map close sockets */
for (i = 0; i < 6; i++)
close(sfd[i]);
close(fd);
close(map_fd_rx);
bpf_object__close(obj);
return;
out:
for (i = 0; i < 6; i++)
close(sfd[i]);
printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
exit(1);
out_sockmap:
for (i = 0; i < 6; i++)
close(sfd[i]);
close(fd);
exit(1);
}
#define MAP_SIZE (32 * 1024)
static void test_map_large(void)
@@ -605,6 +995,9 @@ static void run_all_tests(void)
test_arraymap_percpu_many_keys();
test_devmap(0, NULL);
test_sockmap(0, NULL);
test_map_large();
test_map_parallel();
test_map_stress();

View File

@@ -75,39 +75,6 @@ static struct {
__ret; \
})
static int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
struct bpf_program *prog;
struct bpf_object *obj;
int err;
obj = bpf_object__open(file);
if (IS_ERR(obj)) {
error_cnt++;
return -ENOENT;
}
prog = bpf_program__next(NULL, obj);
if (!prog) {
bpf_object__close(obj);
error_cnt++;
return -ENOENT;
}
bpf_program__set_type(prog, type);
err = bpf_object__load(obj);
if (err) {
bpf_object__close(obj);
error_cnt++;
return -EINVAL;
}
*pobj = obj;
*prog_fd = bpf_program__fd(prog);
return 0;
}
static int bpf_find_map(const char *test, struct bpf_object *obj,
const char *name)
{
@@ -130,8 +97,10 @@ static void test_pkt_access(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (err)
if (err) {
error_cnt++;
return;
}
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
@@ -162,8 +131,10 @@ static void test_xdp(void)
int err, prog_fd, map_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (err)
if (err) {
error_cnt++;
return;
}
map_fd = bpf_find_map(__func__, obj, "vip2tnl");
if (map_fd < 0)
@@ -223,8 +194,10 @@ static void test_l4lb(void)
u32 *magic = (u32 *)buf;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (err)
if (err) {
error_cnt++;
return;
}
map_fd = bpf_find_map(__func__, obj, "vip_map");
if (map_fd < 0)
@@ -280,8 +253,10 @@ static void test_tcp_estats(void)
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
CHECK(err, "", "err %d errno %d\n", err, errno);
if (err)
if (err) {
error_cnt++;
return;
}
bpf_object__close(obj);
}
@@ -304,7 +279,7 @@ static void test_bpf_obj_id(void)
/* +1 to test for the info_len returned by kernel */
struct bpf_prog_info prog_infos[nr_iters + 1];
struct bpf_map_info map_infos[nr_iters + 1];
char jited_insns[128], xlated_insns[128];
char jited_insns[128], xlated_insns[128], zeros[128];
__u32 i, next_id, info_len, nr_id_found, duration = 0;
int sysctl_fd, jit_enabled = 0, err = 0;
__u64 array_value;
@@ -330,17 +305,22 @@ static void test_bpf_obj_id(void)
objs[i] = NULL;
/* Check bpf_obj_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
for (i = 0; i < nr_iters; i++) {
err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
&objs[i], &prog_fds[i]);
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
if (err)
error_cnt++;
assert(!err);
/* Check getting prog info */
info_len = sizeof(struct bpf_prog_info) * 2;
bzero(&prog_infos[i], info_len);
bzero(jited_insns, sizeof(jited_insns));
bzero(xlated_insns, sizeof(xlated_insns));
prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns);
prog_infos[i].jited_prog_len = sizeof(jited_insns);
prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
@@ -351,15 +331,20 @@ static void test_bpf_obj_id(void)
prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
info_len != sizeof(struct bpf_prog_info) ||
(jit_enabled && !prog_infos[i].jited_prog_len) ||
!prog_infos[i].xlated_prog_len,
(jit_enabled &&
!memcmp(jited_insns, zeros, sizeof(zeros))) ||
!prog_infos[i].xlated_prog_len ||
!memcmp(xlated_insns, zeros, sizeof(zeros)),
"get-prog-info(fd)",
"err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u\n",
"err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n",
err, errno, i,
prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
info_len, sizeof(struct bpf_prog_info),
jit_enabled,
prog_infos[i].jited_prog_len,
prog_infos[i].xlated_prog_len))
prog_infos[i].xlated_prog_len,
!!memcmp(jited_insns, zeros, sizeof(zeros)),
!!memcmp(xlated_insns, zeros, sizeof(zeros))))
goto done;
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
@@ -496,8 +481,10 @@ static void test_pkt_md_access(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (err)
if (err) {
error_cnt++;
return;
}
err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,28 @@
/* Copyright (c) 2017 VMware
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/bpf.h>
#include "bpf_helpers.h"
int _version SEC("version") = 1;
SEC("redirect_to_111")
int xdp_redirect_to_111(struct xdp_md *xdp)
{
return bpf_redirect(111, 0);
}
SEC("redirect_to_222")
int xdp_redirect_to_222(struct xdp_md *xdp)
{
return bpf_redirect(222, 0);
}
char _license[] SEC("license") = "GPL";

View File

@@ -0,0 +1,59 @@
#!/bin/sh
# Create 2 namespaces with two veth peers, and
# forward packets in-between using generic XDP
#
# NS1(veth11) NS2(veth22)
# | |
# | |
# (veth1, ------ (veth2,
# id:111) id:222)
# | xdp forwarding |
# ------------------
cleanup()
{
if [ "$?" = "0" ]; then
echo "selftests: test_xdp_redirect [PASS]";
else
echo "selftests: test_xdp_redirect [FAILED]";
fi
set +e
ip netns del ns1 2> /dev/null
ip netns del ns2 2> /dev/null
}
ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
if [ $? -ne 0 ];then
echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
exit 0
fi
set -e
ip netns add ns1
ip netns add ns2
trap cleanup 0 2 3 6 9
ip link add veth1 index 111 type veth peer name veth11
ip link add veth2 index 222 type veth peer name veth22
ip link set veth11 netns ns1
ip link set veth22 netns ns2
ip link set veth1 up
ip link set veth2 up
ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
ip netns exec ns1 ip link set dev veth11 up
ip netns exec ns2 ip link set dev veth22 up
ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
ip netns exec ns1 ping -c 1 10.1.1.22
ip netns exec ns2 ping -c 1 10.1.1.11
exit 0

View File

@@ -1,3 +1,4 @@
msg_zerocopy
socket
psock_fanout
psock_tpacket

View File

@@ -3,11 +3,11 @@
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket
TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_FILES += reuseport_dualstack
TEST_GEN_FILES += reuseport_dualstack msg_zerocopy
include ../lib.mk

View File

@@ -0,0 +1,697 @@
/* Evaluate MSG_ZEROCOPY
*
* Send traffic between two processes over one of the supported
* protocols and modes:
*
* PF_INET/PF_INET6
* - SOCK_STREAM
* - SOCK_DGRAM
* - SOCK_DGRAM with UDP_CORK
* - SOCK_RAW
* - SOCK_RAW with IP_HDRINCL
*
* PF_PACKET
* - SOCK_DGRAM
* - SOCK_RAW
*
* Start this program on two connected hosts, one in send mode and
* the other with option '-r' to put it in receiver mode.
*
* If zerocopy mode ('-z') is enabled, the sender will verify that
* the kernel queues completions on the error queue for all zerocopy
* transfers.
*/
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
#include <limits.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#ifndef SO_EE_ORIGIN_ZEROCOPY
#define SO_EE_ORIGIN_ZEROCOPY SO_EE_ORIGIN_UPAGE
#endif
#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif
#ifndef SO_EE_CODE_ZEROCOPY_COPIED
#define SO_EE_CODE_ZEROCOPY_COPIED 1
#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000
#endif
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
static int cfg_family = PF_UNSPEC;
static int cfg_ifindex = 1;
static int cfg_payload_len;
static int cfg_port = 8000;
static bool cfg_rx;
static int cfg_runtime_ms = 4200;
static int cfg_verbose;
static int cfg_waittime_ms = 500;
static bool cfg_zerocopy;
static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
static unsigned long gettimeofday_ms(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
}
static uint16_t get_ip_csum(const uint16_t *start, int num_words)
{
unsigned long sum = 0;
int i;
for (i = 0; i < num_words; i++)
sum += start[i];
while (sum >> 16)
sum = (sum & 0xFFFF) + (sum >> 16);
return ~sum;
}
static int do_setcpu(int cpu)
{
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
if (sched_setaffinity(0, sizeof(mask), &mask))
error(1, 0, "setaffinity %d", cpu);
if (cfg_verbose)
fprintf(stderr, "cpu: %u\n", cpu);
return 0;
}
static void do_setsockopt(int fd, int level, int optname, int val)
{
if (setsockopt(fd, level, optname, &val, sizeof(val)))
error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
}
static int do_poll(int fd, int events)
{
struct pollfd pfd;
int ret;
pfd.events = events;
pfd.revents = 0;
pfd.fd = fd;
ret = poll(&pfd, 1, cfg_waittime_ms);
if (ret == -1)
error(1, errno, "poll");
return ret && (pfd.revents & events);
}
static int do_accept(int fd)
{
int fda = fd;
fd = accept(fda, NULL, NULL);
if (fd == -1)
error(1, errno, "accept");
if (close(fda))
error(1, errno, "close listen sock");
return fd;
}
static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
{
int ret, len, i, flags;
len = 0;
for (i = 0; i < msg->msg_iovlen; i++)
len += msg->msg_iov[i].iov_len;
flags = MSG_DONTWAIT;
if (do_zerocopy)
flags |= MSG_ZEROCOPY;
ret = sendmsg(fd, msg, flags);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "send");
if (cfg_verbose && ret != len)
fprintf(stderr, "send: ret=%u != %u\n", ret, len);
if (len) {
packets++;
bytes += ret;
if (do_zerocopy && ret)
expected_completions++;
}
return true;
}
static void do_sendmsg_corked(int fd, struct msghdr *msg)
{
bool do_zerocopy = cfg_zerocopy;
int i, payload_len, extra_len;
/* split up the packet. for non-multiple, make first buffer longer */
payload_len = cfg_payload_len / cfg_cork;
extra_len = cfg_payload_len - (cfg_cork * payload_len);
do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
for (i = 0; i < cfg_cork; i++) {
/* in mixed-frags mode, alternate zerocopy and copy frags
* start with non-zerocopy, to ensure attach later works
*/
if (cfg_cork_mixed)
do_zerocopy = (i & 1);
msg->msg_iov[0].iov_len = payload_len + extra_len;
extra_len = 0;
do_sendmsg(fd, msg, do_zerocopy);
}
do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
}
static int setup_iph(struct iphdr *iph, uint16_t payload_len)
{
struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
struct sockaddr_in *saddr = (void *) &cfg_src_addr;
memset(iph, 0, sizeof(*iph));
iph->version = 4;
iph->tos = 0;
iph->ihl = 5;
iph->ttl = 2;
iph->saddr = saddr->sin_addr.s_addr;
iph->daddr = daddr->sin_addr.s_addr;
iph->protocol = IPPROTO_EGP;
iph->tot_len = htons(sizeof(*iph) + payload_len);
iph->check = get_ip_csum((void *) iph, iph->ihl << 1);
return sizeof(*iph);
}
static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
{
struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
memset(ip6h, 0, sizeof(*ip6h));
ip6h->version = 6;
ip6h->payload_len = htons(payload_len);
ip6h->nexthdr = IPPROTO_EGP;
ip6h->hop_limit = 2;
ip6h->saddr = saddr->sin6_addr;
ip6h->daddr = daddr->sin6_addr;
return sizeof(*ip6h);
}
static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
{
struct sockaddr_in6 *addr6 = (void *) sockaddr;
struct sockaddr_in *addr4 = (void *) sockaddr;
switch (domain) {
case PF_INET:
addr4->sin_family = AF_INET;
addr4->sin_port = htons(cfg_port);
if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
error(1, 0, "ipv4 parse error: %s", str_addr);
break;
case PF_INET6:
addr6->sin6_family = AF_INET6;
addr6->sin6_port = htons(cfg_port);
if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
error(1, 0, "ipv6 parse error: %s", str_addr);
break;
default:
error(1, 0, "illegal domain");
}
}
static int do_setup_tx(int domain, int type, int protocol)
{
int fd;
fd = socket(domain, type, protocol);
if (fd == -1)
error(1, errno, "socket t");
do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
if (cfg_zerocopy)
do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
if (domain != PF_PACKET)
if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
error(1, errno, "connect");
return fd;
}
static bool do_recv_completion(int fd)
{
struct sock_extended_err *serr;
struct msghdr msg = {};
struct cmsghdr *cm;
uint32_t hi, lo, range;
int ret, zerocopy;
char control[100];
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "recvmsg notification");
if (msg.msg_flags & MSG_CTRUNC)
error(1, errno, "recvmsg notification: truncated");
cm = CMSG_FIRSTHDR(&msg);
if (!cm)
error(1, 0, "cmsg: no cmsg");
if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
(cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
(cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
error(1, 0, "serr: wrong type: %d.%d",
cm->cmsg_level, cm->cmsg_type);
serr = (void *) CMSG_DATA(cm);
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
if (serr->ee_errno != 0)
error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
hi = serr->ee_data;
lo = serr->ee_info;
range = hi - lo + 1;
/* Detect notification gaps. These should not happen often, if at all.
* Gaps can occur due to drops, reordering and retransmissions.
*/
if (lo != next_completion)
fprintf(stderr, "gap: %u..%u does not append to %u\n",
lo, hi, next_completion);
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
if (zerocopied == -1)
zerocopied = zerocopy;
else if (zerocopied != zerocopy) {
fprintf(stderr, "serr: inconsistent\n");
zerocopied = zerocopy;
}
if (cfg_verbose >= 2)
fprintf(stderr, "completed: %u (h=%u l=%u)\n",
range, hi, lo);
completions += range;
return true;
}
/* Read all outstanding messages on the errqueue */
static void do_recv_completions(int fd)
{
while (do_recv_completion(fd)) {}
}
/* Wait for all remaining completions on the errqueue */
static void do_recv_remaining_completions(int fd)
{
int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
while (completions < expected_completions &&
gettimeofday_ms() < tstop) {
if (do_poll(fd, POLLERR))
do_recv_completions(fd);
}
if (completions < expected_completions)
fprintf(stderr, "missing notifications: %lu < %lu\n",
completions, expected_completions);
}
static void do_tx(int domain, int type, int protocol)
{
struct iovec iov[3] = { {0} };
struct sockaddr_ll laddr;
struct msghdr msg = {0};
struct ethhdr eth;
union {
struct ipv6hdr ip6h;
struct iphdr iph;
} nh;
uint64_t tstop;
int fd;
fd = do_setup_tx(domain, type, protocol);
if (domain == PF_PACKET) {
uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
/* sock_raw passes ll header as data */
if (type == SOCK_RAW) {
memset(eth.h_dest, 0x06, ETH_ALEN);
memset(eth.h_source, 0x02, ETH_ALEN);
eth.h_proto = htons(proto);
iov[0].iov_base = &eth;
iov[0].iov_len = sizeof(eth);
msg.msg_iovlen++;
}
/* both sock_raw and sock_dgram expect name */
memset(&laddr, 0, sizeof(laddr));
laddr.sll_family = AF_PACKET;
laddr.sll_ifindex = cfg_ifindex;
laddr.sll_protocol = htons(proto);
laddr.sll_halen = ETH_ALEN;
memset(laddr.sll_addr, 0x06, ETH_ALEN);
msg.msg_name = &laddr;
msg.msg_namelen = sizeof(laddr);
}
/* packet and raw sockets with hdrincl must pass network header */
if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
if (cfg_family == PF_INET)
iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
else
iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
iov[1].iov_base = (void *) &nh;
msg.msg_iovlen++;
}
iov[2].iov_base = payload;
iov[2].iov_len = cfg_payload_len;
msg.msg_iovlen++;
msg.msg_iov = &iov[3 - msg.msg_iovlen];
tstop = gettimeofday_ms() + cfg_runtime_ms;
do {
if (cfg_cork)
do_sendmsg_corked(fd, &msg);
else
do_sendmsg(fd, &msg, cfg_zerocopy);
while (!do_poll(fd, POLLOUT)) {
if (cfg_zerocopy)
do_recv_completions(fd);
}
} while (gettimeofday_ms() < tstop);
if (cfg_zerocopy)
do_recv_remaining_completions(fd);
if (close(fd))
error(1, errno, "close");
fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
packets, bytes >> 20, completions,
zerocopied == 1 ? 'y' : 'n');
}
static int do_setup_rx(int domain, int type, int protocol)
{
int fd;
/* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
* to recv the only copy of the packet, not a clone
*/
if (domain == PF_PACKET)
error(1, 0, "Use PF_INET/SOCK_RAW to read");
if (type == SOCK_RAW && protocol == IPPROTO_RAW)
error(1, 0, "IPPROTO_RAW: not supported on Rx");
fd = socket(domain, type, protocol);
if (fd == -1)
error(1, errno, "socket r");
do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
error(1, errno, "bind");
if (type == SOCK_STREAM) {
if (listen(fd, 1))
error(1, errno, "listen");
fd = do_accept(fd);
}
return fd;
}
/* Flush all outstanding bytes for the tcp receive queue */
static void do_flush_tcp(int fd)
{
int ret;
/* MSG_TRUNC flushes up to len bytes */
ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
if (ret == -1 && errno == EAGAIN)
return;
if (ret == -1)
error(1, errno, "flush");
if (!ret)
return;
packets++;
bytes += ret;
}
/* Flush all outstanding datagrams. Verify first few bytes of each. */
static void do_flush_datagram(int fd, int type)
{
int ret, off = 0;
char buf[64];
/* MSG_TRUNC will return full datagram length */
ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
if (ret == -1 && errno == EAGAIN)
return;
/* raw ipv4 return with header, raw ipv6 without */
if (cfg_family == PF_INET && type == SOCK_RAW) {
off += sizeof(struct iphdr);
ret -= sizeof(struct iphdr);
}
if (ret == -1)
error(1, errno, "recv");
if (ret != cfg_payload_len)
error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
if (ret > sizeof(buf) - off)
ret = sizeof(buf) - off;
if (memcmp(buf + off, payload, ret))
error(1, 0, "recv: data mismatch");
packets++;
bytes += cfg_payload_len;
}
static void do_rx(int domain, int type, int protocol)
{
uint64_t tstop;
int fd;
fd = do_setup_rx(domain, type, protocol);
tstop = gettimeofday_ms() + cfg_runtime_ms;
do {
if (type == SOCK_STREAM)
do_flush_tcp(fd);
else
do_flush_datagram(fd, type);
do_poll(fd, POLLIN);
} while (gettimeofday_ms() < tstop);
if (close(fd))
error(1, errno, "close");
fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
}
static void do_test(int domain, int type, int protocol)
{
int i;
if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
error(1, 0, "can only cork udp sockets");
do_setcpu(cfg_cpu);
for (i = 0; i < IP_MAXPACKET; i++)
payload[i] = 'a' + (i % 26);
if (cfg_rx)
do_rx(domain, type, protocol);
else
do_tx(domain, type, protocol);
}
static void usage(const char *filepath)
{
error(1, 0, "Usage: %s [options] <test>", filepath);
}
static void parse_opts(int argc, char **argv)
{
const int max_payload_len = sizeof(payload) -
sizeof(struct ipv6hdr) -
sizeof(struct tcphdr) -
40 /* max tcp options */;
int c;
cfg_payload_len = max_payload_len;
while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
error(1, 0, "Pass one of -4 or -6");
cfg_family = PF_INET;
cfg_alen = sizeof(struct sockaddr_in);
break;
case '6':
if (cfg_family != PF_UNSPEC)
error(1, 0, "Pass one of -4 or -6");
cfg_family = PF_INET6;
cfg_alen = sizeof(struct sockaddr_in6);
break;
case 'c':
cfg_cork = strtol(optarg, NULL, 0);
break;
case 'C':
cfg_cpu = strtol(optarg, NULL, 0);
break;
case 'D':
setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
break;
case 'i':
cfg_ifindex = if_nametoindex(optarg);
if (cfg_ifindex == 0)
error(1, errno, "invalid iface: %s", optarg);
break;
case 'm':
cfg_cork_mixed = true;
break;
case 'p':
cfg_port = htons(strtoul(optarg, NULL, 0));
break;
case 'r':
cfg_rx = true;
break;
case 's':
cfg_payload_len = strtoul(optarg, NULL, 0);
break;
case 'S':
setup_sockaddr(cfg_family, optarg, &cfg_src_addr);
break;
case 't':
cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
break;
case 'v':
cfg_verbose++;
break;
case 'z':
cfg_zerocopy = true;
break;
}
}
if (cfg_payload_len > max_payload_len)
error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
error(1, 0, "-m: cork_mixed requires corking and zerocopy");
if (optind != argc - 1)
usage(argv[0]);
}
int main(int argc, char **argv)
{
const char *cfg_test;
parse_opts(argc, argv);
cfg_test = argv[argc - 1];
if (!strcmp(cfg_test, "packet"))
do_test(PF_PACKET, SOCK_RAW, 0);
else if (!strcmp(cfg_test, "packet_dgram"))
do_test(PF_PACKET, SOCK_DGRAM, 0);
else if (!strcmp(cfg_test, "raw"))
do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
else if (!strcmp(cfg_test, "raw_hdrincl"))
do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
else if (!strcmp(cfg_test, "tcp"))
do_test(cfg_family, SOCK_STREAM, 0);
else if (!strcmp(cfg_test, "udp"))
do_test(cfg_family, SOCK_DGRAM, 0);
else
error(1, 0, "unknown cfg_test %s", cfg_test);
return 0;
}

View File

@@ -0,0 +1,112 @@
#!/bin/bash
#
# Send data between two processes across namespaces
# Run twice: once without and once with zerocopy
set -e
readonly DEV="veth0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"
readonly RAND="$(mktemp -u XXXXXX)"
readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"
readonly SADDR4='192.168.1.1'
readonly DADDR4='192.168.1.2'
readonly SADDR6='fd::1'
readonly DADDR6='fd::2'
readonly path_sysctl_mem="net.core.optmem_max"
# Argument parsing
if [[ "$#" -lt "2" ]]; then
echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
exit 1
fi
readonly IP="$1"
shift
readonly TXMODE="$1"
shift
readonly EXTRA_ARGS="$@"
# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
readonly SADDR="${SADDR4}"
readonly DADDR="${DADDR4}"
elif [[ "${IP}" == "6" ]]; then
readonly SADDR="${SADDR6}"
readonly DADDR="${DADDR6}"
else
echo "Invalid IP version ${IP}"
exit 1
fi
# Argument parsing: select receive mode
#
# This differs from send mode for
# - packet: use raw recv, because packet receives skb clones
# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
case "${TXMODE}" in
'packet' | 'packet_dgram' | 'raw_hdrincl')
RXMODE='raw'
;;
*)
RXMODE="${TXMODE}"
;;
esac
# Start of state changes: install cleanup handler
save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
cleanup() {
ip netns del "${NS2}"
ip netns del "${NS1}"
sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
}
trap cleanup EXIT
# Configure system settings
sysctl -w -q "${path_sysctl_mem}=1000000"
# Create virtual ethernet pair between network namespaces
ip netns add "${NS1}"
ip netns add "${NS2}"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
# Add fixed IP addresses to the devices
ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
do_test() {
local readonly ARGS="$1"
echo "ipv${IP} ${TXMODE} ${ARGS}"
ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
sleep 0.2
ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
wait
}
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
echo ok

View File

@@ -0,0 +1,272 @@
#!/bin/sh
#
# This test is for checking rtnetlink callpaths, and get as much coverage as possible.
#
# set -e
devdummy="test-dummy0"
ret=0
# set global exit status, but never reset nonzero one.
check_err()
{
if [ $ret -eq 0 ]; then
ret=$1
fi
}
kci_add_dummy()
{
ip link add name "$devdummy" type dummy
check_err $?
ip link set "$devdummy" up
check_err $?
}
kci_del_dummy()
{
ip link del dev "$devdummy"
check_err $?
}
# add a bridge with vlans on top
kci_test_bridge()
{
devbr="test-br0"
vlandev="testbr-vlan1"
ret=0
ip link add name "$devbr" type bridge
check_err $?
ip link set dev "$devdummy" master "$devbr"
check_err $?
ip link set "$devbr" up
check_err $?
ip link add link "$devbr" name "$vlandev" type vlan id 1
check_err $?
ip addr add dev "$vlandev" 10.200.7.23/30
check_err $?
ip -6 addr add dev "$vlandev" dead:42::1234/64
check_err $?
ip -d link > /dev/null
check_err $?
ip r s t all > /dev/null
check_err $?
ip -6 addr del dev "$vlandev" dead:42::1234/64
check_err $?
ip link del dev "$vlandev"
check_err $?
ip link del dev "$devbr"
check_err $?
if [ $ret -ne 0 ];then
echo "FAIL: bridge setup"
return 1
fi
echo "PASS: bridge setup"
}
kci_test_gre()
{
gredev=neta
rem=10.42.42.1
loc=10.0.0.1
ret=0
ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
check_err $?
ip link set $gredev up
check_err $?
ip addr add 10.23.7.10 dev $gredev
check_err $?
ip route add 10.23.8.0/30 dev $gredev
check_err $?
ip addr add dev "$devdummy" 10.23.7.11/24
check_err $?
ip link > /dev/null
check_err $?
ip addr > /dev/null
check_err $?
ip addr del dev "$devdummy" 10.23.7.11/24
check_err $?
ip link del $gredev
check_err $?
if [ $ret -ne 0 ];then
echo "FAIL: gre tunnel endpoint"
return 1
fi
echo "PASS: gre tunnel endpoint"
}
# tc uses rtnetlink too, for full tc testing
# please see tools/testing/selftests/tc-testing.
kci_test_tc()
{
dev=lo
ret=0
tc qdisc add dev "$dev" root handle 1: htb
check_err $?
tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
check_err $?
tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
check_err $?
tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
check_err $?
tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
check_err $?
tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
check_err $?
tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
check_err $?
tc filter show dev "$dev" parent 1:0 > /dev/null
check_err $?
tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
check_err $?
tc filter show dev "$dev" parent 1:0 > /dev/null
check_err $?
tc qdisc del dev "$dev" root handle 1: htb
check_err $?
if [ $ret -ne 0 ];then
echo "FAIL: tc htb hierarchy"
return 1
fi
echo "PASS: tc htb hierarchy"
}
kci_test_polrouting()
{
ret=0
ip rule add fwmark 1 lookup 100
check_err $?
ip route add local 0.0.0.0/0 dev lo table 100
check_err $?
ip r s t all > /dev/null
check_err $?
ip rule del fwmark 1 lookup 100
check_err $?
ip route del local 0.0.0.0/0 dev lo table 100
check_err $?
if [ $ret -ne 0 ];then
echo "FAIL: policy route test"
return 1
fi
echo "PASS: policy routing"
}
kci_test_route_get()
{
ret=0
ip route get 127.0.0.1 > /dev/null
check_err $?
ip route get 127.0.0.1 dev "$devdummy" > /dev/null
check_err $?
ip route get ::1 > /dev/null
check_err $?
ip route get fe80::1 dev "$devdummy" > /dev/null
check_err $?
ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
check_err $?
ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
check_err $?
ip addr add dev "$devdummy" 10.23.7.11/24
check_err $?
ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
check_err $?
ip addr del dev "$devdummy" 10.23.7.11/24
check_err $?
if [ $ret -ne 0 ];then
echo "FAIL: route get"
return 1
fi
echo "PASS: route get"
}
kci_test_addrlabel()
{
ret=0
ip addrlabel add prefix dead::/64 dev lo label 1
check_err $?
ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
check_err $?
ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
check_err $?
ip addrlabel add prefix dead::/64 label 1 2> /dev/null
check_err $?
ip addrlabel del prefix dead::/64 label 1 2> /dev/null
check_err $?
# concurrent add/delete
for i in $(seq 1 1000); do
ip addrlabel add prefix 1c3::/64 label 12345 2>/dev/null
done &
for i in $(seq 1 1000); do
ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
done
wait
ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
if [ $ret -ne 0 ];then
echo "FAIL: ipv6 addrlabel"
return 1
fi
echo "PASS: ipv6 addrlabel"
}
kci_test_rtnl()
{
kci_add_dummy
if [ $ret -ne 0 ];then
echo "FAIL: cannot add dummy interface"
return 1
fi
kci_test_polrouting
kci_test_route_get
kci_test_tc
kci_test_gre
kci_test_bridge
kci_test_addrlabel
kci_del_dummy
}
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
exit 0
fi
for x in ip tc;do
$x -Version 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without the $x tool"
exit 0
fi
done
kci_test_rtnl
exit $ret

View File

@@ -1,3 +1,4 @@
timestamping
rxtimestamp
txtimestamp
hwtstamp_config

View File

@@ -1,4 +1,6 @@
TEST_PROGS := hwtstamp_config timestamping txtimestamp
CFLAGS += -I../../../../../usr/include
TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
all: $(TEST_PROGS)

View File

@@ -0,0 +1,389 @@
#include <errno.h>
#include <error.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <asm/types.h>
#include <linux/net_tstamp.h>
#include <linux/errqueue.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
struct options {
int so_timestamp;
int so_timestampns;
int so_timestamping;
};
struct tstamps {
bool tstamp;
bool tstampns;
bool swtstamp;
bool hwtstamp;
};
struct socket_type {
char *friendly_name;
int type;
int protocol;
bool enabled;
};
struct test_case {
struct options sockopt;
struct tstamps expected;
bool enabled;
};
struct sof_flag {
int mask;
char *name;
};
static struct sof_flag sof_flags[] = {
#define SOF_FLAG(f) { f, #f }
SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
};
static struct socket_type socket_types[] = {
{ "ip", SOCK_RAW, IPPROTO_EGP },
{ "udp", SOCK_DGRAM, IPPROTO_UDP },
{ "tcp", SOCK_STREAM, IPPROTO_TCP },
};
static struct test_case test_cases[] = {
{ {}, {} },
{
{ so_timestamp: 1 },
{ tstamp: true }
},
{
{ so_timestampns: 1 },
{ tstampns: true }
},
{
{ so_timestamp: 1, so_timestampns: 1 },
{ tstampns: true }
},
{
{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
{}
},
{
/* Loopback device does not support hw timestamps. */
{ so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
{}
},
{
{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
| SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
{ swtstamp: true }
},
{
{ so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
{ tstamp: true, swtstamp: true }
},
};
static struct option long_options[] = {
{ "list_tests", no_argument, 0, 'l' },
{ "test_num", required_argument, 0, 'n' },
{ "op_size", required_argument, 0, 's' },
{ "tcp", no_argument, 0, 't' },
{ "udp", no_argument, 0, 'u' },
{ "ip", no_argument, 0, 'i' },
};
static int next_port = 19999;
static int op_size = 10 * 1024;
void print_test_case(struct test_case *t)
{
int f = 0;
printf("sockopts {");
if (t->sockopt.so_timestamp)
printf(" SO_TIMESTAMP ");
if (t->sockopt.so_timestampns)
printf(" SO_TIMESTAMPNS ");
if (t->sockopt.so_timestamping) {
printf(" SO_TIMESTAMPING: {");
for (f = 0; f < ARRAY_SIZE(sof_flags); f++)
if (t->sockopt.so_timestamping & sof_flags[f].mask)
printf(" %s |", sof_flags[f].name);
printf("}");
}
printf("} expected cmsgs: {");
if (t->expected.tstamp)
printf(" SCM_TIMESTAMP ");
if (t->expected.tstampns)
printf(" SCM_TIMESTAMPNS ");
if (t->expected.swtstamp || t->expected.hwtstamp) {
printf(" SCM_TIMESTAMPING {");
if (t->expected.swtstamp)
printf("0");
if (t->expected.swtstamp && t->expected.hwtstamp)
printf(",");
if (t->expected.hwtstamp)
printf("2");
printf("}");
}
printf("}\n");
}
void do_send(int src)
{
int r;
char *buf = malloc(op_size);
memset(buf, 'z', op_size);
r = write(src, buf, op_size);
if (r < 0)
error(1, errno, "Failed to sendmsg");
free(buf);
}
bool do_recv(int rcv, int read_size, struct tstamps expected)
{
const int CMSG_SIZE = 1024;
struct scm_timestamping *ts;
struct tstamps actual = {};
char cmsg_buf[CMSG_SIZE];
struct iovec recv_iov;
struct cmsghdr *cmsg;
bool failed = false;
struct msghdr hdr;
int flags = 0;
int r;
memset(&hdr, 0, sizeof(hdr));
hdr.msg_iov = &recv_iov;
hdr.msg_iovlen = 1;
recv_iov.iov_base = malloc(read_size);
recv_iov.iov_len = read_size;
hdr.msg_control = cmsg_buf;
hdr.msg_controllen = sizeof(cmsg_buf);
r = recvmsg(rcv, &hdr, flags);
if (r < 0)
error(1, errno, "Failed to recvmsg");
if (r != read_size)
error(1, 0, "Only received %d bytes of payload.", r);
if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
error(1, 0, "Message was truncated.");
for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
if (cmsg->cmsg_level != SOL_SOCKET)
error(1, 0, "Unexpected cmsg_level %d",
cmsg->cmsg_level);
switch (cmsg->cmsg_type) {
case SCM_TIMESTAMP:
actual.tstamp = true;
break;
case SCM_TIMESTAMPNS:
actual.tstampns = true;
break;
case SCM_TIMESTAMPING:
ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
actual.swtstamp = !!ts->ts[0].tv_sec;
if (ts->ts[1].tv_sec != 0)
error(0, 0, "ts[1] should not be set.");
actual.hwtstamp = !!ts->ts[2].tv_sec;
break;
default:
error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type);
}
}
#define VALIDATE(field) \
do { \
if (expected.field != actual.field) { \
if (expected.field) \
error(0, 0, "Expected " #field " to be set."); \
else \
error(0, 0, \
"Expected " #field " to not be set."); \
failed = true; \
} \
} while (0)
VALIDATE(tstamp);
VALIDATE(tstampns);
VALIDATE(swtstamp);
VALIDATE(hwtstamp);
#undef VALIDATE
free(recv_iov.iov_base);
return failed;
}
void config_so_flags(int rcv, struct options o)
{
int on = 1;
if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
error(1, errno, "Failed to enable SO_REUSEADDR");
if (o.so_timestamp &&
setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP,
&o.so_timestamp, sizeof(o.so_timestamp)) < 0)
error(1, errno, "Failed to enable SO_TIMESTAMP");
if (o.so_timestampns &&
setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS,
&o.so_timestampns, sizeof(o.so_timestampns)) < 0)
error(1, errno, "Failed to enable SO_TIMESTAMPNS");
if (o.so_timestamping &&
setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING,
&o.so_timestamping, sizeof(o.so_timestamping)) < 0)
error(1, errno, "Failed to set SO_TIMESTAMPING");
}
bool run_test_case(struct socket_type s, struct test_case t)
{
int port = (s.type == SOCK_RAW) ? 0 : next_port++;
int read_size = op_size;
struct sockaddr_in addr;
bool failed = false;
int src, dst, rcv;
src = socket(AF_INET, s.type, s.protocol);
if (src < 0)
error(1, errno, "Failed to open src socket");
dst = socket(AF_INET, s.type, s.protocol);
if (dst < 0)
error(1, errno, "Failed to open dst socket");
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr.sin_port = htons(port);
if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
error(1, errno, "Failed to bind to port %d", port);
if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
error(1, errno, "Failed to listen");
if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
error(1, errno, "Failed to connect");
if (s.type == SOCK_STREAM) {
rcv = accept(dst, NULL, NULL);
if (rcv < 0)
error(1, errno, "Failed to accept");
close(dst);
} else {
rcv = dst;
}
config_so_flags(rcv, t.sockopt);
usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
do_send(src);
if (s.type == SOCK_RAW)
read_size += 20; /* for IP header */
failed = do_recv(rcv, read_size, t.expected);
close(rcv);
close(src);
return failed;
}
int main(int argc, char **argv)
{
bool all_protocols = true;
bool all_tests = true;
int arg_index = 0;
int failures = 0;
int s, t;
char opt;
while ((opt = getopt_long(argc, argv, "", long_options,
&arg_index)) != -1) {
switch (opt) {
case 'l':
for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
printf("%d\t", t);
print_test_case(&test_cases[t]);
}
return 0;
case 'n':
t = atoi(optarg);
if (t > ARRAY_SIZE(test_cases))
error(1, 0, "Invalid test case: %d", t);
all_tests = false;
test_cases[t].enabled = true;
break;
case 's':
op_size = atoi(optarg);
break;
case 't':
all_protocols = false;
socket_types[2].enabled = true;
break;
case 'u':
all_protocols = false;
socket_types[1].enabled = true;
break;
case 'i':
all_protocols = false;
socket_types[0].enabled = true;
break;
default:
error(1, 0, "Failed to parse parameters.");
}
}
for (s = 0; s < ARRAY_SIZE(socket_types); s++) {
if (!all_protocols && !socket_types[s].enabled)
continue;
printf("Testing %s...\n", socket_types[s].friendly_name);
for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
if (!all_tests && !test_cases[t].enabled)
continue;
printf("Starting testcase %d...\n", t);
if (run_test_case(socket_types[s], test_cases[t])) {
failures++;
printf("FAILURE in test case ");
print_test_case(&test_cases[t]);
}
}
}
if (!failures)
printf("PASSED.\n");
return failures;
}

View File

@@ -1111,5 +1111,55 @@
"teardown": [
"$TC actions flush action gact"
]
},
{
"id": "a568",
"name": "Add action with ife type",
"category": [
"actions",
"ife"
],
"setup": [
[
"$TC actions flush action ife",
0,
1,
255
],
"$TC actions add action ife encode type 0xDEAD index 1"
],
"cmdUnderTest": "$TC actions get action ife index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action ife index 1",
"matchPattern": "type 0xDEAD",
"matchCount": "1",
"teardown": [
"$TC actions flush action ife"
]
},
{
"id": "b983",
"name": "Add action without ife type",
"category": [
"actions",
"ife"
],
"setup": [
[
"$TC actions flush action ife",
0,
1,
255
],
"$TC actions add action ife encode index 1"
],
"cmdUnderTest": "$TC actions get action ife index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action ife index 1",
"matchPattern": "type 0xED3E",
"matchCount": "1",
"teardown": [
"$TC actions flush action ife"
]
}
]