Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller:
 "Highlights:

   - Gustavo A. R. Silva keeps working on the implicit switch fallthru
     changes.

   - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From
     Luca Coelho.

   - Re-enable ASPM in r8169, from Kai-Heng Feng.

   - Add virtual XFRM interfaces, which avoids all of the limitations of
     existing IPSEC tunnels. From Steffen Klassert.

   - Convert GRO over to use a hash table, so that when we have many
     flows active we don't traverse a long list during accumluation.

   - Many new self tests for routing, TC, tunnels, etc. Too many
     contributors to mention them all, but I'm really happy to keep
     seeing this stuff.

   - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu.

   - Lots of cleanups and fixes in L2TP code from Guillaume Nault.

   - Add IPSEC offload support to netdevsim, from Shannon Nelson.

   - Add support for slotting with non-uniform distribution to netem
     packet scheduler, from Yousuk Seung.

   - Add UDP GSO support to mlx5e, from Boris Pismenny.

   - Support offloading of Team LAG in NFP, from John Hurley.

   - Allow to configure TX queue selection based upon RX queue, from
     Amritha Nambiar.

   - Support ethtool ring size configuration in aquantia, from Anton
     Mikaev.

   - Support DSCP and flowlabel per-transport in SCTP, from Xin Long.

   - Support list based batching and stack traversal of SKBs, this is
     very exciting work. From Edward Cree.

   - Busyloop optimizations in vhost_net, from Toshiaki Makita.

   - Introduce the ETF qdisc, which allows time based transmissions. IGB
     can offload this in hardware. From Vinicius Costa Gomes.

   - Add parameter support to devlink, from Moshe Shemesh.

   - Several multiplication and division optimizations for BPF JIT in
     nfp driver, from Jiong Wang.

   - Lots of prepatory work to make more of the packet scheduler layer
     lockless, when possible, from Vlad Buslov.

   - Add ACK filter and NAT awareness to sch_cake packet scheduler, from
     Toke Høiland-Jørgensen.

   - Support regions and region snapshots in devlink, from Alex Vesker.

   - Allow to attach XDP programs to both HW and SW at the same time on
     a given device, with initial support in nfp. From Jakub Kicinski.

   - Add TLS RX offload and support in mlx5, from Ilya Lesokhin.

   - Use PHYLIB in r8169 driver, from Heiner Kallweit.

   - All sorts of changes to support Spectrum 2 in mlxsw driver, from
     Ido Schimmel.

   - PTP support in mv88e6xxx DSA driver, from Andrew Lunn.

   - Make TCP_USER_TIMEOUT socket option more accurate, from Jon
     Maxwell.

   - Support for templates in packet scheduler classifier, from Jiri
     Pirko.

   - IPV6 support in RDS, from Ka-Cheong Poon.

   - Native tproxy support in nf_tables, from Máté Eckl.

   - Maintain IP fragment queue in an rbtree, but optimize properly for
     in-order frags. From Peter Oskolkov.

   - Improvde handling of ACKs on hole repairs, from Yuchung Cheng"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits)
  bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT"
  hv/netvsc: Fix NULL dereference at single queue mode fallback
  net: filter: mark expected switch fall-through
  xen-netfront: fix warn message as irq device name has '/'
  cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0
  net: dsa: mv88e6xxx: missing unlock on error path
  rds: fix building with IPV6=m
  inet/connection_sock: prefer _THIS_IP_ to current_text_addr
  net: dsa: mv88e6xxx: bitwise vs logical bug
  net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd()
  ieee802154: hwsim: using right kind of iteration
  net: hns3: Add vlan filter setting by ethtool command -K
  net: hns3: Set tx ring' tc info when netdev is up
  net: hns3: Remove tx ring BD len register in hns3_enet
  net: hns3: Fix desc num set to default when setting channel
  net: hns3: Fix for phy link issue when using marvell phy driver
  net: hns3: Fix for information of phydev lost problem when down/up
  net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero
  net: hns3: Add support for serdes loopback selftest
  bnxt_en: take coredump_record structure off stack
  ...
This commit is contained in:
Linus Torvalds
2018-08-15 15:04:25 -07:00
1752 changed files with 119281 additions and 29178 deletions

View File

@@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -104,9 +105,10 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o syscall_tp_user.o
cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o xdpsock_user.o
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf
HOSTLDLIBS_tracex4 += -lrt
@@ -191,6 +195,8 @@ HOSTLDLIBS_xdpsock += -pthread
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
LLVM_OBJCOPY ?= llvm-objcopy
BTF_PAHOLE ?= pahole
# Detect that we're cross compiling and use the cross compiler
ifdef CROSS_COMPILE
@@ -198,6 +204,16 @@ HOSTCC = $(CROSS_COMPILE)gcc
CLANG_ARCH_ARGS = -target $(ARCH)
endif
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
endif
# Trick to allow make to be run from this directory
all:
$(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
@@ -256,4 +272,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif

View File

@@ -107,6 +107,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return -1;
}
if (prog_cnt == MAX_PROGS)
return -1;
fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (fd < 0) {

55
samples/bpf/hash_func01.h Normal file
View File

@@ -0,0 +1,55 @@
/* SPDX-License-Identifier: LGPL-2.1
*
* Based on Paul Hsieh's (LGPG 2.1) hash function
* From: http://www.azillionmonkeys.com/qed/hash.html
*/
#define get16bits(d) (*((const __u16 *) (d)))
static __always_inline
__u32 SuperFastHash (const char *data, int len, __u32 initval) {
__u32 hash = initval;
__u32 tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
#pragma clang loop unroll(full)
for (;len > 0; len--) {
hash += get16bits (data);
tmp = (get16bits (data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (__u16);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= ((signed char)data[sizeof (__u16)]) << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += (signed char)*data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}

View File

@@ -8,7 +8,8 @@
* information. The number of invocations of the program, which maps
* to the number of packets received, is stored to key 0. Key 1 is
* incremented on each iteration by the number of bytes stored in
* the skb.
* the skb. The program also stores the number of received bytes
* in the cgroup storage.
*
* - Attaches the new program to a cgroup using BPF_PROG_ATTACH
*
@@ -21,12 +22,15 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_insn.h"
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define FOO "/foo"
@@ -205,6 +209,8 @@ static int map_fd = -1;
static int prog_load_cnt(int verdict, int val)
{
int cgroup_storage_fd;
if (map_fd < 0)
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
if (map_fd < 0) {
@@ -212,6 +218,13 @@ static int prog_load_cnt(int verdict, int val)
return -1;
}
cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
if (cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
struct bpf_insn prog[] = {
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
@@ -222,6 +235,11 @@ static int prog_load_cnt(int verdict, int val)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
BPF_MOV64_IMM(BPF_REG_1, val),
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
@@ -237,6 +255,7 @@ static int prog_load_cnt(int verdict, int val)
printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
return 0;
}
close(cgroup_storage_fd);
return ret;
}

View File

@@ -51,7 +51,7 @@ int main(int argc, char **argv)
if (argc > 3)
filter_id = atoi(argv[3]);
if (filter_id > prog_cnt) {
if (filter_id >= prog_cnt) {
printf("Invalid program id; program not found in file\n");
return EXIT_FAILURE;
}

View File

@@ -24,8 +24,7 @@
#include <fcntl.h>
#include <libgen.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include "bpf/libbpf.h"
#include <bpf/bpf.h>
@@ -63,9 +62,15 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
const char *prog_name = "xdp_fwd";
struct bpf_program *prog;
char filename[PATH_MAX];
struct bpf_object *obj;
int opt, i, idx, err;
int prog_id = 0;
int prog_fd, map_fd;
int attach = 1;
int ret = 0;
@@ -75,7 +80,7 @@ int main(int argc, char **argv)
attach = 0;
break;
case 'D':
prog_id = 1;
prog_name = "xdp_fwd_direct";
break;
default:
usage(basename(argv[0]));
@@ -90,6 +95,7 @@ int main(int argc, char **argv)
if (attach) {
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (access(filename, O_RDONLY) < 0) {
printf("error accessing file %s: %s\n",
@@ -97,19 +103,25 @@ int main(int argc, char **argv)
return 1;
}
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
prog = bpf_object__find_program_by_title(obj, prog_name);
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
printf("program not found: %s\n", strerror(prog_fd));
return 1;
}
if (!prog_fd[prog_id]) {
printf("load_bpf_file: %s\n", strerror(errno));
map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
"tx_port"));
if (map_fd < 0) {
printf("map not found: %s\n", strerror(map_fd));
return 1;
}
}
if (attach) {
for (i = 1; i < 64; ++i)
bpf_map_update_elem(map_fd[0], &i, &i, 0);
bpf_map_update_elem(map_fd, &i, &i, 0);
}
for (i = optind; i < argc; ++i) {
@@ -126,7 +138,7 @@ int main(int argc, char **argv)
if (err)
ret = err;
} else {
err = do_attach(idx, prog_fd[prog_id], argv[i]);
err = do_attach(idx, prog_fd, argv[i]);
if (err)
ret = err;
}

View File

@@ -13,6 +13,7 @@
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#include "hash_func01.h"
#define MAX_CPUS 64 /* WARNING - sync with _user.c */
@@ -134,7 +135,16 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
return false;
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
/* TODO: Handle double VLAN tagged packet */
/* Handle double VLAN tagged packet */
if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
struct vlan_hdr *vlan_hdr;
vlan_hdr = (void *)eth + offset;
offset += sizeof(*vlan_hdr);
if ((void *)eth + offset > data_end)
return false;
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
*eth_proto = ntohs(eth_type);
*l3_offset = offset;
@@ -452,6 +462,108 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
/* Hashing initval */
#define INITVAL 15485863
static __always_inline
u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct iphdr *iph = data + nh_off;
u32 cpu_hash;
if (iph + 1 > data_end)
return 0;
cpu_hash = iph->saddr + iph->daddr;
cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
return cpu_hash;
}
static __always_inline
u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ipv6hdr *ip6h = data + nh_off;
u32 cpu_hash;
if (ip6h + 1 > data_end)
return 0;
cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
return cpu_hash;
}
/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
* hashing scheme is symmetric, meaning swapping IP src/dest still hit
* same CPU.
*/
SEC("xdp_cpu_map5_lb_hash_ip_pairs")
int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
u32 cpu_idx = 0;
u32 *cpu_lookup;
u32 *cpu_max;
u32 cpu_hash;
u32 key = 0;
/* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
return XDP_ABORTED;
rec->processed++;
cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
if (!cpu_max)
return XDP_ABORTED;
if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
return XDP_PASS; /* Just skip */
/* Hash for IPv4 and IPv6 */
switch (eth_proto) {
case ETH_P_IP:
cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
break;
case ETH_P_IPV6:
cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
break;
case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
default:
cpu_hash = 0;
}
/* Choose CPU based on hash */
cpu_idx = cpu_hash % *cpu_max;
cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
if (!cpu_lookup)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
if (cpu_dest >= MAX_CPUS) {
rec->issue++;
return XDP_ABORTED;
}
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
char _license[] SEC("license") = "GPL";

View File

@@ -22,7 +22,7 @@ static const char *__doc__ =
#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
/* How many xdp_progs are defined in _kern.c */
#define MAX_PROG 5
#define MAX_PROG 6
/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
* use bpf/libbpf.h), but cannot as (currently) needed for XDP
@@ -567,7 +567,7 @@ int main(int argc, char **argv)
int added_cpus = 0;
int longindex = 0;
int interval = 2;
int prog_num = 0;
int prog_num = 5;
int add_cpu = -1;
__u32 qsize;
int opt;

View File

@@ -4,6 +4,8 @@
* Example howto extract XDP RX-queue info
*/
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/in.h>
#include "bpf_helpers.h"
/* Config setup from with userspace
@@ -14,6 +16,12 @@
struct config {
__u32 action;
int ifindex;
__u32 options;
};
enum cfg_options_flags {
NO_TOUCH = 0x0U,
READ_MEM = 0x1U,
SWAP_MAC = 0x2U,
};
struct bpf_map_def SEC("maps") config_map = {
.type = BPF_MAP_TYPE_ARRAY,
@@ -45,6 +53,23 @@ struct bpf_map_def SEC("maps") rx_queue_index_map = {
.max_entries = MAX_RXQs + 1,
};
static __always_inline
void swap_src_dst_mac(void *data)
{
unsigned short *p = data;
unsigned short dst[3];
dst[0] = p[0];
dst[1] = p[1];
dst[2] = p[2];
p[0] = p[3];
p[1] = p[4];
p[2] = p[5];
p[3] = dst[0];
p[4] = dst[1];
p[5] = dst[2];
}
SEC("xdp_prog0")
int xdp_prognum0(struct xdp_md *ctx)
{
@@ -90,6 +115,24 @@ int xdp_prognum0(struct xdp_md *ctx)
if (key == MAX_RXQs)
rxq_rec->issue++;
/* Default: Don't touch packet data, only count packets */
if (unlikely(config->options & (READ_MEM|SWAP_MAC))) {
struct ethhdr *eth = data;
if (eth + 1 > data_end)
return XDP_ABORTED;
/* Avoid compiler removing this: Drop non 802.3 Ethertypes */
if (ntohs(eth->h_proto) < ETH_P_802_3_MIN)
return XDP_ABORTED;
/* XDP_TX requires changing MAC-addrs, else HW may drop.
* Can also be enabled with --swapmac (for test purposes)
*/
if (unlikely(config->options & SWAP_MAC))
swap_src_dst_mac(data);
}
return config->action;
}

View File

@@ -50,6 +50,8 @@ static const struct option long_options[] = {
{"sec", required_argument, NULL, 's' },
{"no-separators", no_argument, NULL, 'z' },
{"action", required_argument, NULL, 'a' },
{"readmem", no_argument, NULL, 'r' },
{"swapmac", no_argument, NULL, 'm' },
{0, 0, NULL, 0 }
};
@@ -66,6 +68,12 @@ static void int_exit(int sig)
struct config {
__u32 action;
int ifindex;
__u32 options;
};
enum cfg_options_flags {
NO_TOUCH = 0x0U,
READ_MEM = 0x1U,
SWAP_MAC = 0x2U,
};
#define XDP_ACTION_MAX (XDP_TX + 1)
#define XDP_ACTION_MAX_STRLEN 11
@@ -109,6 +117,18 @@ static void list_xdp_actions(void)
printf("\n");
}
static char* options2str(enum cfg_options_flags flag)
{
if (flag == NO_TOUCH)
return "no_touch";
if (flag & SWAP_MAC)
return "swapmac";
if (flag & READ_MEM)
return "read";
fprintf(stderr, "ERR: Unknown config option flags");
exit(EXIT_FAIL);
}
static void usage(char *argv[])
{
int i;
@@ -305,7 +325,7 @@ static __u64 calc_errs_pps(struct datarec *r,
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
int action)
int action, __u32 cfg_opt)
{
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -316,8 +336,8 @@ static void stats_print(struct stats_record *stats_rec,
int i;
/* Header */
printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
ifname, ifindex, action2str(action));
printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n",
ifname, ifindex, action2str(action), options2str(cfg_opt));
/* stats_global_map */
{
@@ -399,7 +419,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b)
*b = tmp;
}
static void stats_poll(int interval, int action)
static void stats_poll(int interval, int action, __u32 cfg_opt)
{
struct stats_record *record, *prev;
@@ -410,7 +430,7 @@ static void stats_poll(int interval, int action)
while (1) {
swap(&prev, &record);
stats_collect(record);
stats_print(record, prev, action);
stats_print(record, prev, action, cfg_opt);
sleep(interval);
}
@@ -421,6 +441,7 @@ static void stats_poll(int interval, int action)
int main(int argc, char **argv)
{
__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
@@ -435,6 +456,7 @@ int main(int argc, char **argv)
int interval = 2;
__u32 key = 0;
char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
int action = XDP_PASS; /* Default action */
char *action_str = NULL;
@@ -496,6 +518,12 @@ int main(int argc, char **argv)
action_str = (char *)&action_str_buf;
strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
break;
case 'r':
cfg_options |= READ_MEM;
break;
case 'm':
cfg_options |= SWAP_MAC;
break;
case 'h':
error:
default:
@@ -523,6 +551,11 @@ int main(int argc, char **argv)
}
cfg.action = action;
/* XDP_TX requires changing MAC-addrs, else HW may drop */
if (action == XDP_TX)
cfg_options |= SWAP_MAC;
cfg.options = cfg_options;
/* Trick to pretty printf with thousands separators use %' */
if (use_separators)
setlocale(LC_NUMERIC, "en_US");
@@ -542,6 +575,6 @@ int main(int argc, char **argv)
return EXIT_FAIL_XDP;
}
stats_poll(interval, action);
stats_poll(interval, action, cfg_options);
return EXIT_OK;
}

View File

@@ -0,0 +1,66 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#define SAMPLE_SIZE 64ul
#define MAX_CPUS 128
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
struct bpf_map_def SEC("maps") my_map = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = MAX_CPUS,
};
SEC("xdp_sample")
int xdp_sample_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
/* Metadata will be in the perf event before the packet data. */
struct S {
u16 cookie;
u16 pkt_len;
} __packed metadata;
if (data < data_end) {
/* The XDP perf_event_output handler will use the upper 32 bits
* of the flags argument as a number of bytes to include of the
* packet payload in the event data. If the size is too big, the
* call to bpf_perf_event_output will fail and return -EFAULT.
*
* See bpf_xdp_event_output in net/core/filter.c.
*
* The BPF_F_CURRENT_CPU flag means that the event output fd
* will be indexed by the CPU number in the event map.
*/
u64 flags = BPF_F_CURRENT_CPU;
u16 sample_size;
int ret;
metadata.cookie = 0xdead;
metadata.pkt_len = (u16)(data_end - data);
sample_size = min(metadata.pkt_len, SAMPLE_SIZE);
flags |= (u64)sample_size << 32;
ret = bpf_perf_event_output(ctx, &my_map, flags,
&metadata, sizeof(metadata));
if (ret)
bpf_printk("perf_event_output failed: %d\n", ret);
}
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;

View File

@@ -0,0 +1,169 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/perf_event.h>
#include <linux/bpf.h>
#include <net/if.h>
#include <errno.h>
#include <assert.h>
#include <sys/sysinfo.h>
#include <sys/ioctl.h>
#include <signal.h>
#include <libbpf.h>
#include <bpf/bpf.h>
#include "perf-sys.h"
#include "trace_helpers.h"
#define MAX_CPUS 128
static int pmu_fds[MAX_CPUS], if_idx;
static struct perf_event_mmap_page *headers[MAX_CPUS];
static char *if_name;
static int do_attach(int idx, int fd, const char *name)
{
int err;
err = bpf_set_link_xdp_fd(idx, fd, 0);
if (err < 0)
printf("ERROR: failed to attach program to %s\n", name);
return err;
}
static int do_detach(int idx, const char *name)
{
int err;
err = bpf_set_link_xdp_fd(idx, -1, 0);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
return err;
}
#define SAMPLE_SIZE 64
static int print_bpf_output(void *data, int size)
{
struct {
__u16 cookie;
__u16 pkt_len;
__u8 pkt_data[SAMPLE_SIZE];
} __packed *e = data;
int i;
if (e->cookie != 0xdead) {
printf("BUG cookie %x sized %d\n",
e->cookie, size);
return LIBBPF_PERF_EVENT_ERROR;
}
printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
for (i = 0; i < 14 && i < e->pkt_len; i++)
printf("%02x ", e->pkt_data[i]);
printf("\n");
return LIBBPF_PERF_EVENT_CONT;
}
static void test_bpf_perf_event(int map_fd, int num)
{
struct perf_event_attr attr = {
.sample_type = PERF_SAMPLE_RAW,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_BPF_OUTPUT,
.wakeup_events = 1, /* get an fd notification for every event */
};
int i;
for (i = 0; i < num; i++) {
int key = i;
pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/,
-1/*group_fd*/, 0);
assert(pmu_fds[i] >= 0);
assert(bpf_map_update_elem(map_fd, &key,
&pmu_fds[i], BPF_ANY) == 0);
ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0);
}
}
static void sig_handler(int signo)
{
do_detach(if_idx, if_name);
exit(0);
}
int main(int argc, char **argv)
{
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
struct bpf_object *obj;
struct bpf_map *map;
int prog_fd, map_fd;
char filename[256];
int ret, err, i;
int numcpus;
if (argc < 2) {
printf("Usage: %s <ifname>\n", argv[0]);
return 1;
}
numcpus = get_nprocs();
if (numcpus > MAX_CPUS)
numcpus = MAX_CPUS;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
if (!prog_fd) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
map = bpf_map__next(NULL, obj);
if (!map) {
printf("finding a map in obj file failed\n");
return 1;
}
map_fd = bpf_map__fd(map);
if_idx = if_nametoindex(argv[1]);
if (!if_idx)
if_idx = strtoul(argv[1], NULL, 0);
if (!if_idx) {
fprintf(stderr, "Invalid ifname\n");
return 1;
}
if_name = argv[1];
err = do_attach(if_idx, prog_fd, argv[1]);
if (err)
return err;
if (signal(SIGINT, sig_handler) ||
signal(SIGHUP, sig_handler) ||
signal(SIGTERM, sig_handler)) {
perror("signal");
return 1;
}
test_bpf_perf_event(map_fd, numcpus);
for (i = 0; i < numcpus; i++)
if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0)
return 1;
ret = perf_event_poller_multi(pmu_fds, headers, numcpus,
print_bpf_output);
kill(0, SIGINT);
return ret;
}

View File

@@ -26,7 +26,7 @@
#include <sys/types.h>
#include <poll.h>
#include "bpf_load.h"
#include "bpf/libbpf.h"
#include "bpf_util.h"
#include <bpf/bpf.h>
@@ -145,8 +145,13 @@ static void dump_stats(void);
} while (0)
#define barrier() __asm__ __volatile__("": : :"memory")
#ifdef __aarch64__
#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
#else
#define u_smp_rmb() barrier()
#define u_smp_wmb() barrier()
#endif
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
@@ -886,7 +891,13 @@ static void l2fwd(struct xdpsock *xsk)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
int prog_fd, qidconf_map, xsks_map;
struct bpf_object *obj;
char xdp_filename[256];
struct bpf_map *map;
int i, ret, key = 0;
pthread_t pt;
@@ -899,24 +910,38 @@ int main(int argc, char **argv)
}
snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
prog_load_attr.file = xdp_filename;
if (load_bpf_file(xdp_filename)) {
fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
exit(EXIT_FAILURE);
if (prog_fd < 0) {
fprintf(stderr, "ERROR: no program found: %s\n",
strerror(prog_fd));
exit(EXIT_FAILURE);
}
if (!prog_fd[0]) {
fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
strerror(errno));
map = bpf_object__find_map_by_name(obj, "qidconf_map");
qidconf_map = bpf_map__fd(map);
if (qidconf_map < 0) {
fprintf(stderr, "ERROR: no qidconf map found: %s\n",
strerror(qidconf_map));
exit(EXIT_FAILURE);
}
if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
map = bpf_object__find_map_by_name(obj, "xsks_map");
xsks_map = bpf_map__fd(map);
if (xsks_map < 0) {
fprintf(stderr, "ERROR: no xsks map found: %s\n",
strerror(xsks_map));
exit(EXIT_FAILURE);
}
if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
fprintf(stderr, "ERROR: link set xdp fd failed\n");
exit(EXIT_FAILURE);
}
ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
exit(EXIT_FAILURE);
@@ -933,7 +958,7 @@ int main(int argc, char **argv)
/* ...and insert them into the map. */
for (i = 0; i < num_socks; i++) {
key = i;
ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
exit(EXIT_FAILURE);