Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says: ==================== pull-request: bpf 2018-07-01 The following pull-request contains BPF updates for your *net* tree. The main changes are: 1) A bpf_fib_lookup() helper fix to change the API before freeze to return an encoding of the FIB lookup result and return the nexthop device index in the params struct (instead of device index as return code that we had before), from David. 2) Various BPF JIT fixes to address syzkaller fallout, that is, do not reject progs when set_memory_*() fails since it could still be RO. Also arm32 JIT was not using bpf_jit_binary_lock_ro() API which was an issue, and a memory leak in s390 JIT found during review, from Daniel. 3) Multiple fixes for sockmap/hash to address most of the syzkaller triggered bugs. Usage with IPv6 was crashing, a GPF in bpf_tcp_close(), a missing sock_map_release() routine to hook up to callbacks, and a fix for an omitted bucket lock in sock_close(), from John. 4) Two bpftool fixes to remove duplicated error message on program load, and another one to close the libbpf object after program load. One additional fix for nfp driver's BPF offload to avoid stopping offload completely if replace of program failed, from Jakub. 5) Couple of BPF selftest fixes that bail out in some of the test scripts if the user does not have the right privileges, from Jeffrin. 6) Fixes in test_bpf for s390 when CONFIG_BPF_JIT_ALWAYS_ON is set where we need to set the flag that some of the test cases are expected to fail, from Kleber. 7) Fix to detangle BPF_LIRC_MODE2 dependency from CONFIG_CGROUP_BPF since it has no relation to it and lirc2 users often have configs without cgroups enabled and thus would not be able to use it, from Sean. 8) Fix a selftest failure in sockmap by removing a useless setrlimit() call that would set a too low limit where at the same time we are already including bpf_rlimit.h that does the job, from Yonghong. 9) Fix BPF selftest config with missing missing NET_SCHED, from Anders. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2018-07-01 09:27:44 +09:00
parent 35e8c7ba08 bf2b866a2f
commit 271b955e52
22 changed files with 448 additions and 293 deletions
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -188,12 +188,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 									      \
 	__ret;								      \
 })
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype, struct bpf_prog *prog);
+int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype);
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr);
 #else

+struct bpf_prog;
 struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }

+static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype,
+					 struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
+					union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -696,6 +696,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
 struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+			struct bpf_prog *prog);
 #else
 static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -714,6 +716,12 @@ static inline int sock_map_prog(struct bpf_map *map,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+				      struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
 #endif

 #if defined(CONFIG_XDP_SOCKETS)
--- a/include/linux/bpf_lirc.h
+++ b/include/linux/bpf_lirc.h
@@ -5,11 +5,12 @@
 #include <uapi/linux/bpf.h>

 #ifdef CONFIG_BPF_LIRC_MODE2
-int lirc_prog_attach(const union bpf_attr *attr);
+int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int lirc_prog_detach(const union bpf_attr *attr);
 int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
 #else
-static inline int lirc_prog_attach(const union bpf_attr *attr)
+static inline int lirc_prog_attach(const union bpf_attr *attr,
+				   struct bpf_prog *prog)
 {
 	return -EINVAL;
 }
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -470,9 +470,7 @@ struct sock_fprog_kern {
 };

 struct bpf_binary_header {
-	u16 pages;
-	u16 locked:1;
-
+	u32 pages;
 	/* Some arches need word alignment for their instructions */
 	u8 image[] __aligned(4);
 };
@@ -481,7 +479,7 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				jit_requested:1,/* archs need to JIT the prog */
-				locked:1,	/* Program image locked? */
+				undo_set_mem:1,	/* Passed set_memory_ro() checkpoint */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)

 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	fp->locked = 1;
-	if (set_memory_ro((unsigned long)fp, fp->pages))
-		fp->locked = 0;
-#endif
+	fp->undo_set_mem = 1;
+	set_memory_ro((unsigned long)fp, fp->pages);
 }

 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (fp->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		fp->locked = 0;
-	}
-#endif
+	if (fp->undo_set_mem)
+		set_memory_rw((unsigned long)fp, fp->pages);
 }

 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	hdr->locked = 1;
-	if (set_memory_ro((unsigned long)hdr, hdr->pages))
-		hdr->locked = 0;
-#endif
+	set_memory_ro((unsigned long)hdr, hdr->pages);
 }

 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (hdr->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		hdr->locked = 0;
-	}
-#endif
+	set_memory_rw((unsigned long)hdr, hdr->pages);
 }

 static inline struct bpf_binary_header *
@@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 	return (void *)addr;
 }

-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
-{
-	if (!fp->locked)
-		return -ENOLCK;
-	if (fp->jited) {
-		const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
-
-		if (!hdr->locked)
-			return -ENOLCK;
-	}
-
-	return 0;
-}
-#endif
-
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1857,7 +1857,8 @@ union bpf_attr {
 *		is resolved), the nexthop address is returned in ipv4_dst
 *		or ipv6_dst based on family, smac is set to mac address of
 *		egress device, dmac is set to nexthop mac address, rt_metric
- *		is set to metric from route (IPv4/IPv6 only).
+ *		is set to metric from route (IPv4/IPv6 only), and ifindex
+ *		is set to the device index of the nexthop from the FIB lookup.
 *
 *             *plen* argument is the size of the passed in struct.
 *             *flags* argument can be a combination of one or more of the
@@ -1873,9 +1874,10 @@ union bpf_attr {
 *             *ctx* is either **struct xdp_md** for XDP programs or
 *             **struct sk_buff** tc cls_act programs.
 *     Return
- *             Egress device index on success, 0 if packet needs to continue
- *             up the stack for further processing or a negative error in case
- *             of failure.
+ *		* < 0 if any input argument is invalid
+ *		*   0 on success (packet is forwarded, nexthop neighbor exists)
+ *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ *		*     packet is not forwarded or needs assist from full stack
 *
 * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
 *	Description
@@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
 #define BPF_FIB_LOOKUP_DIRECT  BIT(0)
 #define BPF_FIB_LOOKUP_OUTPUT  BIT(1)

+enum {
+	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
+	BPF_FIB_LKUP_RET_BLACKHOLE,    /* dest is blackholed; can be dropped */
+	BPF_FIB_LKUP_RET_UNREACHABLE,  /* dest is unreachable; can be dropped */
+	BPF_FIB_LKUP_RET_PROHIBIT,     /* dest not allowed; can be dropped */
+	BPF_FIB_LKUP_RET_NOT_FWDED,    /* packet is not forwarded */
+	BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
+	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
+	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+};
+
 struct bpf_fib_lookup {
 	/* input:  network family for lookup (AF_INET, AF_INET6)
 	 * output: network family of egress nexthop
@@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {

 	/* total length of packet from network header - used for MTU check */
 	__u16	tot_len;
-	__u32	ifindex;  /* L3 device index for lookup */
+
+	/* input: L3 device index for lookup
+	 * output: device index from FIB lookup
+	 */
+	__u32	ifindex;

 	union {
 		/* inputs to lookup */