Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller: "Highlights: - Gustavo A. R. Silva keeps working on the implicit switch fallthru changes. - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From Luca Coelho. - Re-enable ASPM in r8169, from Kai-Heng Feng. - Add virtual XFRM interfaces, which avoids all of the limitations of existing IPSEC tunnels. From Steffen Klassert. - Convert GRO over to use a hash table, so that when we have many flows active we don't traverse a long list during accumluation. - Many new self tests for routing, TC, tunnels, etc. Too many contributors to mention them all, but I'm really happy to keep seeing this stuff. - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu. - Lots of cleanups and fixes in L2TP code from Guillaume Nault. - Add IPSEC offload support to netdevsim, from Shannon Nelson. - Add support for slotting with non-uniform distribution to netem packet scheduler, from Yousuk Seung. - Add UDP GSO support to mlx5e, from Boris Pismenny. - Support offloading of Team LAG in NFP, from John Hurley. - Allow to configure TX queue selection based upon RX queue, from Amritha Nambiar. - Support ethtool ring size configuration in aquantia, from Anton Mikaev. - Support DSCP and flowlabel per-transport in SCTP, from Xin Long. - Support list based batching and stack traversal of SKBs, this is very exciting work. From Edward Cree. - Busyloop optimizations in vhost_net, from Toshiaki Makita. - Introduce the ETF qdisc, which allows time based transmissions. IGB can offload this in hardware. From Vinicius Costa Gomes. - Add parameter support to devlink, from Moshe Shemesh. - Several multiplication and division optimizations for BPF JIT in nfp driver, from Jiong Wang. - Lots of prepatory work to make more of the packet scheduler layer lockless, when possible, from Vlad Buslov. - Add ACK filter and NAT awareness to sch_cake packet scheduler, from Toke Høiland-Jørgensen. - Support regions and region snapshots in devlink, from Alex Vesker. - Allow to attach XDP programs to both HW and SW at the same time on a given device, with initial support in nfp. From Jakub Kicinski. - Add TLS RX offload and support in mlx5, from Ilya Lesokhin. - Use PHYLIB in r8169 driver, from Heiner Kallweit. - All sorts of changes to support Spectrum 2 in mlxsw driver, from Ido Schimmel. - PTP support in mv88e6xxx DSA driver, from Andrew Lunn. - Make TCP_USER_TIMEOUT socket option more accurate, from Jon Maxwell. - Support for templates in packet scheduler classifier, from Jiri Pirko. - IPV6 support in RDS, from Ka-Cheong Poon. - Native tproxy support in nf_tables, from Máté Eckl. - Maintain IP fragment queue in an rbtree, but optimize properly for in-order frags. From Peter Oskolkov. - Improvde handling of ACKs on hole repairs, from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits) bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" hv/netvsc: Fix NULL dereference at single queue mode fallback net: filter: mark expected switch fall-through xen-netfront: fix warn message as irq device name has '/' cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0 net: dsa: mv88e6xxx: missing unlock on error path rds: fix building with IPV6=m inet/connection_sock: prefer _THIS_IP_ to current_text_addr net: dsa: mv88e6xxx: bitwise vs logical bug net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd() ieee802154: hwsim: using right kind of iteration net: hns3: Add vlan filter setting by ethtool command -K net: hns3: Set tx ring' tc info when netdev is up net: hns3: Remove tx ring BD len register in hns3_enet net: hns3: Fix desc num set to default when setting channel net: hns3: Fix for phy link issue when using marvell phy driver net: hns3: Fix for information of phydev lost problem when down/up net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero net: hns3: Add support for serdes loopback selftest bnxt_en: take coredump_record structure off stack ...
2018-08-15 15:04:25 -07:00
parent 0a957467c5 26a1ccc6c1
commit 9a76aba02a
1752 changed files with 119281 additions and 29178 deletions
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
 	__u8	data[0];	/* Arbitrary size */
 };

+struct bpf_cgroup_storage_key {
+	__u64	cgroup_inode_id;	/* cgroup inode id */
+	__u32	attach_type;		/* program attach type */
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -120,6 +125,8 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CPUMAP,
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
+	BPF_MAP_TYPE_CGROUP_STORAGE,
+	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 };

 enum bpf_prog_type {
@@ -144,6 +151,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 	BPF_PROG_TYPE_LIRC_MODE2,
+	BPF_PROG_TYPE_SK_REUSEPORT,
 };

 enum bpf_attach_type {
@@ -1371,6 +1379,20 @@ union bpf_attr {
 * 		A 8-byte long non-decreasing number on success, or 0 if the
 * 		socket field is missing inside *skb*.
 *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
 * u32 bpf_get_socket_uid(struct sk_buff *skb)
 * 	Return
 * 		The owner UID of the socket associated to *skb*. If the socket
@@ -1826,7 +1848,7 @@ union bpf_attr {
 * 		A non-negative value equal to or less than *size* on success,
 * 		or a negative error in case of failure.
 *
- * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
 * 	Description
 * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
 * 		it provides an easy way to load *len* bytes from *offset*
@@ -1877,7 +1899,7 @@ union bpf_attr {
 *		* < 0 if any input argument is invalid
 *		*   0 on success (packet is forwarded, nexthop neighbor exists)
 *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
- *		*     packet is not forwarded or needs assist from full stack
+ *		  packet is not forwarded or needs assist from full stack
 *
 * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
 *	Description
@@ -2033,7 +2055,6 @@ union bpf_attr {
 *		This helper is only available is the kernel was compiled with
 *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
 *		"**y**".
- *
 *	Return
 *		0
 *
@@ -2053,7 +2074,6 @@ union bpf_attr {
 *		This helper is only available is the kernel was compiled with
 *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
 *		"**y**".
- *
 *	Return
 *		0
 *
@@ -2073,10 +2093,54 @@ union bpf_attr {
 * 	Return
 * 		The id is returned or 0 in case the id could not be retrieved.
 *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *skb*, then return value will be same as that
+ *		of **bpf_skb_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *skb*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_skb_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
 * u64 bpf_get_current_cgroup_id(void)
 * 	Return
 * 		A 64-bit integer containing the current cgroup id based
 * 		on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ *	Description
+ *		Get the pointer to the local storage area.
+ *		The type and the size of the local storage is defined
+ *		by the *map* argument.
+ *		The *flags* meaning is specific for each map type,
+ *		and has to be 0 for cgroup local storage.
+ *
+ *		Depending on the bpf program type, a local storage area
+ *		can be shared between multiple instances of the bpf program,
+ *		running simultaneously.
+ *
+ *		A user should care about the synchronization by himself.
+ *		For example, by using the BPF_STX_XADD instruction to alter
+ *		the shared data.
+ *	Return
+ *		Pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		Select a SO_REUSEPORT sk from a	BPF_MAP_TYPE_REUSEPORT_ARRAY map
+ *		It checks the selected sk is matching the incoming
+ *		request in the skb.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
 */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2159,7 +2223,10 @@ union bpf_attr {
 	FN(rc_repeat),			\
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
-	FN(get_current_cgroup_id),
+	FN(get_current_cgroup_id),	\
+	FN(get_local_storage),		\
+	FN(sk_select_reuseport),	\
+	FN(skb_ancestor_cgroup_id),

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
@@ -2376,6 +2443,30 @@ struct sk_msg_md {
 	__u32 local_port;	/* stored in host byte order */
 };

+struct sk_reuseport_md {
+	/*
+	 * Start of directly accessible data. It begins from
+	 * the tcp/udp header.
+	 */
+	void *data;
+	void *data_end;		/* End of directly accessible data */
+	/*
+	 * Total length of packet (starting from the tcp/udp header).
+	 * Note that the directly accessible bytes (data_end - data)
+	 * could be less than this "len".  Those bytes could be
+	 * indirectly read by a helper "bpf_skb_load_bytes()".
+	 */
+	__u32 len;
+	/*
+	 * Eth protocol in the mac header (network byte order). e.g.
+	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+	 */
+	__u32 eth_protocol;
+	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+	__u32 bind_inany;	/* Is sock bound to an INANY address? */
+	__u32 hash;		/* A hash of the packet 4 tuples */
+};
+
 #define BPF_TAG_SIZE	8

 struct bpf_prog_info {
@@ -2557,6 +2648,9 @@ enum {
 					 * Arg1: old_state
 					 * Arg2: new_state
 					 */
+	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
+					 * socket transition to LISTEN state.
+					 */
 };

 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect