Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller: 1) Fix OOPS during nf_tables rule dump, from Florian Westphal. 2) Use after free in ip_vs_in, from Yue Haibing. 3) Fix various kTLS bugs (NULL deref during device removal resync, netdev notification ignoring, etc.) From Jakub Kicinski. 4) Fix ipv6 redirects with VRF, from David Ahern. 5) Memory leak fix in igmpv3_del_delrec(), from Eric Dumazet. 6) Missing memory allocation failure check in ip6_ra_control(), from Gen Zhang. And likewise fix ip_ra_control(). 7) TX clean budget logic error in aquantia, from Igor Russkikh. 8) SKB leak in llc_build_and_send_ui_pkt(), from Eric Dumazet. 9) Double frees in mlx5, from Parav Pandit. 10) Fix lost MAC address in r8169 during PCI D3, from Heiner Kallweit. 11) Fix botched register access in mvpp2, from Antoine Tenart. 12) Use after free in napi_gro_frags(), from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (89 commits) net: correct zerocopy refcnt with udp MSG_MORE ethtool: Check for vlan etype or vlan tci when parsing flow_rule net: don't clear sock->sk early to avoid trouble in strparser net-gro: fix use-after-free read in napi_gro_frags() net: dsa: tag_8021q: Create a stable binary format net: dsa: tag_8021q: Change order of rx_vid setup net: mvpp2: fix bad MVPP2_TXQ_SCHED_TOKEN_CNTR_REG queue value ipv4: tcp_input: fix stack out of bounds when parsing TCP options. mlxsw: spectrum: Prevent force of 56G mlxsw: spectrum_acl: Avoid warning after identical rules insertion net: dsa: mv88e6xxx: fix handling of upper half of STATS_TYPE_PORT r8169: fix MAC address being lost in PCI D3 net: core: support XDP generic on stacked devices. netvsc: unshare skb in VF rx handler udp: Avoid post-GRO UDP checksum recalculation net: phy: dp83867: Set up RGMII TX delay net: phy: dp83867: do not call config_init twice net: phy: dp83867: increase SGMII autoneg timer duration net: phy: dp83867: fix speed 10 in sgmii mode net: phy: marvell10g: report if the PHY fails to boot firmware ...
2019-05-30 21:11:22 -07:00
parent adc3f554fa 100f6d8e09
commit 036e343109
87 changed files with 1769 additions and 684 deletions
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -208,8 +208,8 @@ tunnel6_a_addr="fd00:2::a"
 tunnel6_b_addr="fd00:2::b"
 tunnel6_mask="64"

-dummy6_0_addr="fc00:1000::0"
-dummy6_1_addr="fc00:1001::0"
+dummy6_0_prefix="fc00:1000::"
+dummy6_1_prefix="fc00:1001::"
 dummy6_mask="64"

 cleanup_done=1
@@ -1005,13 +1005,13 @@ test_pmtu_vti6_link_change_mtu() {
 	run_cmd ${ns_a} ip link set dummy0 up
 	run_cmd ${ns_a} ip link set dummy1 up

-	run_cmd ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
-	run_cmd ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+	run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0
+	run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1

 	fail=0

 	# Create vti6 interface bound to device, passing MTU, check it
-	run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne 1300 ]; then
 		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
@@ -1020,7 +1020,7 @@ test_pmtu_vti6_link_change_mtu() {

 	# Move to another device with different MTU, without passing MTU, check
 	# MTU is adjusted
-	run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+	run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne $((3000 - 40)) ]; then
 		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
@@ -1028,7 +1028,7 @@ test_pmtu_vti6_link_change_mtu() {
 	fi

 	# Move it back, passing MTU, check MTU is not overridden
-	run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne 1280 ]; then
 		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -442,6 +442,21 @@ TEST_F(tls, multiple_send_single_recv)
 	EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
 }

+TEST_F(tls, single_send_multiple_recv_non_align)
+{
+	const unsigned int total_len = 15;
+	const unsigned int recv_len = 10;
+	char recv_mem[recv_len * 2];
+	char send_mem[total_len];
+
+	EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
+	memset(recv_mem, 0, total_len);
+
+	EXPECT_EQ(recv(self->cfd, recv_mem, recv_len, 0), recv_len);
+	EXPECT_EQ(recv(self->cfd, recv_mem + recv_len, recv_len, 0), 5);
+	EXPECT_EQ(memcmp(send_mem, recv_mem, total_len), 0);
+}
+
 TEST_F(tls, recv_partial)
 {
 	char const *test_str = "test_read_partial";
@@ -575,6 +590,25 @@ TEST_F(tls, recv_peek_large_buf_mult_recs)
 	EXPECT_EQ(memcmp(test_str, buf, len), 0);
 }

+TEST_F(tls, recv_lowat)
+{
+	char send_mem[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+	char recv_mem[20];
+	int lowat = 8;
+
+	EXPECT_EQ(send(self->fd, send_mem, 10, 0), 10);
+	EXPECT_EQ(send(self->fd, send_mem, 5, 0), 5);
+
+	memset(recv_mem, 0, 20);
+	EXPECT_EQ(setsockopt(self->cfd, SOL_SOCKET, SO_RCVLOWAT,
+			     &lowat, sizeof(lowat)), 0);
+	EXPECT_EQ(recv(self->cfd, recv_mem, 1, MSG_WAITALL), 1);
+	EXPECT_EQ(recv(self->cfd, recv_mem + 1, 6, MSG_WAITALL), 6);
+	EXPECT_EQ(recv(self->cfd, recv_mem + 7, 10, 0), 8);
+
+	EXPECT_EQ(memcmp(send_mem, recv_mem, 10), 0);
+	EXPECT_EQ(memcmp(send_mem, recv_mem + 10, 5), 0);
+}

 TEST_F(tls, pollin)
 {
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
 # Makefile for netfilter selftests

 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
-	conntrack_icmp_related.sh
+	conntrack_icmp_related.sh nft_flowtable.sh

 include ../lib.mk
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -0,0 +1,324 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This tests basic flowtable functionality.
+# Creates following topology:
+#
+# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
+# Router1 is the one doing flow offloading, Router2 has no special
+# purpose other than having a link that is smaller than either Originator
+# and responder, i.e. TCPMSS announced values are too large and will still
+# result in fragmentation and/or PMTU discovery.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+ns1in=""
+ns2in=""
+ns1out=""
+ns2out=""
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+which nc > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nc (netcat)"
+	exit $ksft_skip
+fi
+
+ip netns add nsr1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace"
+	exit $ksft_skip
+fi
+
+ip netns add ns1
+ip netns add ns2
+
+ip netns add nsr2
+
+cleanup() {
+	for i in 1 2; do
+		ip netns del ns$i
+		ip netns del nsr$i
+	done
+
+	rm -f "$ns1in" "$ns1out"
+	rm -f "$ns2in" "$ns2out"
+
+	[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+trap cleanup EXIT
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+
+ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
+ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
+
+ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
+
+for dev in lo veth0 veth1; do
+  for i in 1 2; do
+    ip -net nsr$i link set $dev up
+  done
+done
+
+ip -net nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net nsr1 addr add dead:1::1/64 dev veth0
+
+ip -net nsr2 addr add 10.0.2.1/24 dev veth1
+ip -net nsr2 addr add dead:2::1/64 dev veth1
+
+# set different MTUs so we need to push packets coming from ns1 (large MTU)
+# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
+# or to do PTMU discovery (send ICMP error back to originator).
+# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
+# is NOT the lowest link mtu.
+
+ip -net nsr1 link set veth0 mtu 9000
+ip -net ns1 link set eth0 mtu 9000
+
+ip -net nsr2 link set veth1 mtu 2000
+ip -net ns2 link set eth0 mtu 2000
+
+# transfer-net between nsr1 and nsr2.
+# these addresses are not used for connections.
+ip -net nsr1 addr add 192.168.10.1/24 dev veth1
+ip -net nsr1 addr add fee1:2::1/64 dev veth1
+
+ip -net nsr2 addr add 192.168.10.2/24 dev veth0
+ip -net nsr2 addr add fee1:2::2/64 dev veth0
+
+for i in 1 2; do
+  ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+  ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+  ip -net ns$i link set lo up
+  ip -net ns$i link set eth0 up
+  ip -net ns$i addr add 10.0.$i.99/24 dev eth0
+  ip -net ns$i route add default via 10.0.$i.1
+  ip -net ns$i addr add dead:$i::99/64 dev eth0
+  ip -net ns$i route add default via dead:$i::1
+  ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
+
+  # don't set ip DF bit for first two tests
+  ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+done
+
+ip -net nsr1 route add default via 192.168.10.2
+ip -net nsr2 route add default via 192.168.10.1
+
+ip netns exec nsr1 nft -f - <<EOF
+table inet filter {
+  flowtable f1 {
+     hook ingress priority 0
+     devices = { veth0, veth1 }
+   }
+
+   chain forward {
+      type filter hook forward priority 0; policy drop;
+
+      # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
+      meta oif "veth1" tcp dport 12345 flow offload @f1 counter
+
+      # use packet size to trigger 'should be offloaded by now'.
+      # otherwise, if 'flow offload' expression never offloads, the
+      # test will pass.
+      tcp dport 12345 meta length gt 200 ct mark set 1 counter
+
+      # this turns off flow offloading internally, so expect packets again
+      tcp flags fin,rst ct mark set 0 accept
+
+      # this allows large packets from responder, we need this as long
+      # as PMTUd is off.
+      # This rule is deleted for the last test, when we expect PMTUd
+      # to kick in and ensure all packets meet mtu requirements.
+      meta length gt 1500 accept comment something-to-grep-for
+
+      # next line blocks connection w.o. working offload.
+      # we only do this for reverse dir, because we expect packets to
+      # enter slow path due to MTU mismatch of veth0 and veth1.
+      tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
+
+      ct state established,related accept
+
+      # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
+      meta length lt 200 oif "veth1" tcp dport 12345 counter accept
+
+      meta nfproto ipv4 meta l4proto icmp accept
+      meta nfproto ipv6 meta l4proto icmpv6 accept
+   }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+	echo "SKIP: Could not load nft ruleset"
+	exit $ksft_skip
+fi
+
+# test basic connectivity
+ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
+if [ $? -ne 0 ];then
+  echo "ERROR: ns1 cannot reach ns2" 1>&2
+  bash
+  exit 1
+fi
+
+ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
+if [ $? -ne 0 ];then
+  echo "ERROR: ns2 cannot reach ns1" 1>&2
+  exit 1
+fi
+
+if [ $ret -eq 0 ];then
+	echo "PASS: netns routing/connectivity: ns1 can reach ns2"
+fi
+
+ns1in=$(mktemp)
+ns1out=$(mktemp)
+ns2in=$(mktemp)
+ns2out=$(mktemp)
+
+make_file()
+{
+	name=$1
+	who=$2
+
+	SIZE=$((RANDOM % (1024 * 8)))
+	TSIZE=$((SIZE * 1024))
+
+	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+
+	SIZE=$((RANDOM % 1024))
+	SIZE=$((SIZE + 128))
+	TSIZE=$((TSIZE + SIZE))
+	dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+}
+
+check_transfer()
+{
+	in=$1
+	out=$2
+	what=$3
+
+	cmp "$in" "$out" > /dev/null 2>&1
+	if [ $? -ne 0 ] ;then
+		echo "FAIL: file mismatch for $what" 1>&2
+		ls -l "$in"
+		ls -l "$out"
+		return 1
+	fi
+
+	return 0
+}
+
+test_tcp_forwarding()
+{
+	local nsa=$1
+	local nsb=$2
+	local lret=0
+
+	ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
+	lpid=$!
+
+	sleep 1
+	ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
+	cpid=$!
+
+	sleep 3
+
+	kill $lpid
+	kill $cpid
+	wait
+
+	check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
+	if [ $? -ne 0 ];then
+		lret=1
+	fi
+
+	check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
+	if [ $? -ne 0 ];then
+		lret=1
+	fi
+
+	return $lret
+}
+
+make_file "$ns1in" "ns1"
+make_file "$ns2in" "ns2"
+
+# First test:
+# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
+test_tcp_forwarding ns1 ns2
+if [ $? -eq 0 ] ;then
+	echo "PASS: flow offloaded for ns1/ns2"
+else
+	echo "FAIL: flow offload for ns1/ns2:" 1>&2
+	ip netns exec nsr1 nft list ruleset
+	ret=1
+fi
+
+# delete default route, i.e. ns2 won't be able to reach ns1 and
+# will depend on ns1 being masqueraded in nsr1.
+# expect ns1 has nsr1 address.
+ip -net ns2 route del default via 10.0.2.1
+ip -net ns2 route del default via dead:2::1
+ip -net ns2 route add 192.168.10.1 via 10.0.2.1
+
+# Second test:
+# Same, but with NAT enabled.
+ip netns exec nsr1 nft -f - <<EOF
+table ip nat {
+   chain postrouting {
+      type nat hook postrouting priority 0; policy accept;
+      meta oifname "veth1" masquerade
+   }
+}
+EOF
+
+test_tcp_forwarding ns1 ns2
+
+if [ $? -eq 0 ] ;then
+	echo "PASS: flow offloaded for ns1/ns2 with NAT"
+else
+	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
+	ip netns exec nsr1 nft list ruleset
+	ret=1
+fi
+
+# Third test:
+# Same as second test, but with PMTU discovery enabled.
+handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
+
+ip netns exec nsr1 nft delete rule inet filter forward $handle
+if [ $? -ne 0 ] ;then
+	echo "FAIL: Could not delete large-packet accept rule"
+	exit 1
+fi
+
+ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+test_tcp_forwarding ns1 ns2
+if [ $? -eq 0 ] ;then
+	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
+else
+	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
+	ip netns exec nsr1 nft list ruleset
+fi
+
+exit $ret
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -36,7 +36,11 @@ trap cleanup EXIT
 ip netns add ns1
 ip netns add ns2

-ip link add veth0 netns ns0 type veth peer name eth0 netns ns1
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1
+if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
 ip link add veth1 netns ns0 type veth peer name eth0 netns ns2

 ip -net ns0 link set lo up