Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-05-31 The following pull-request contains BPF updates for your *net-next* tree. Lots of exciting new features in the first PR of this developement cycle! The main changes are: 1) misc verifier improvements, from Alexei. 2) bpftool can now convert btf to valid C, from Andrii. 3) verifier can insert explicit ZEXT insn when requested by 32-bit JITs. This feature greatly improves BPF speed on 32-bit architectures. From Jiong. 4) cgroups will now auto-detach bpf programs. This fixes issue of thousands bpf programs got stuck in dying cgroups. From Roman. 5) new bpf_send_signal() helper, from Yonghong. 6) cgroup inet skb programs can signal CN to the stack, from Lawrence. 7) miscellaneous cleanups, from many developers. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
@@ -71,11 +72,17 @@ struct cgroup_bpf {
|
||||
u32 flags[MAX_BPF_ATTACH_TYPE];
|
||||
|
||||
/* temp storage for effective prog array used by prog_attach/detach */
|
||||
struct bpf_prog_array __rcu *inactive;
|
||||
struct bpf_prog_array *inactive;
|
||||
|
||||
/* reference counter used to detach bpf programs after cgroup removal */
|
||||
struct percpu_ref refcnt;
|
||||
|
||||
/* cgroup_bpf is released using a work queue */
|
||||
struct work_struct release_work;
|
||||
};
|
||||
|
||||
void cgroup_bpf_put(struct cgroup *cgrp);
|
||||
int cgroup_bpf_inherit(struct cgroup *cgrp);
|
||||
void cgroup_bpf_offline(struct cgroup *cgrp);
|
||||
|
||||
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 flags);
|
||||
@@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
|
||||
|
||||
struct bpf_prog;
|
||||
struct cgroup_bpf {};
|
||||
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
|
||||
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
|
||||
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
|
||||
|
||||
static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
|
||||
enum bpf_prog_type ptype,
|
||||
|
@@ -66,6 +66,11 @@ struct bpf_map_ops {
|
||||
u64 imm, u32 *off);
|
||||
};
|
||||
|
||||
struct bpf_map_memory {
|
||||
u32 pages;
|
||||
struct user_struct *user;
|
||||
};
|
||||
|
||||
struct bpf_map {
|
||||
/* The first two cachelines with read-mostly members of which some
|
||||
* are also accessed in fast-path (e.g. ops, max_entries).
|
||||
@@ -86,7 +91,7 @@ struct bpf_map {
|
||||
u32 btf_key_type_id;
|
||||
u32 btf_value_type_id;
|
||||
struct btf *btf;
|
||||
u32 pages;
|
||||
struct bpf_map_memory memory;
|
||||
bool unpriv_array;
|
||||
bool frozen; /* write-once */
|
||||
/* 48 bytes hole */
|
||||
@@ -94,8 +99,7 @@ struct bpf_map {
|
||||
/* The 3rd and 4th cacheline with misc members to avoid false sharing
|
||||
* particularly with refcounting.
|
||||
*/
|
||||
struct user_struct *user ____cacheline_aligned;
|
||||
atomic_t refcnt;
|
||||
atomic_t refcnt ____cacheline_aligned;
|
||||
atomic_t usercnt;
|
||||
struct work_struct work;
|
||||
char name[BPF_OBJ_NAME_LEN];
|
||||
@@ -370,6 +374,7 @@ struct bpf_prog_aux {
|
||||
u32 id;
|
||||
u32 func_cnt; /* used by non-func prog as the number of func progs */
|
||||
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
|
||||
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
|
||||
bool offload_requested;
|
||||
struct bpf_prog **func;
|
||||
void *jit_data; /* JIT specific data. arch dependent */
|
||||
@@ -513,17 +518,17 @@ struct bpf_prog_array {
|
||||
};
|
||||
|
||||
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
|
||||
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
|
||||
int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
|
||||
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
|
||||
void bpf_prog_array_free(struct bpf_prog_array *progs);
|
||||
int bpf_prog_array_length(struct bpf_prog_array *progs);
|
||||
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
|
||||
__u32 __user *prog_ids, u32 cnt);
|
||||
|
||||
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
|
||||
void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
|
||||
struct bpf_prog *old_prog);
|
||||
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
|
||||
int bpf_prog_array_copy_info(struct bpf_prog_array *array,
|
||||
u32 *prog_ids, u32 request_cnt,
|
||||
u32 *prog_cnt);
|
||||
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
|
||||
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
|
||||
struct bpf_prog *exclude_prog,
|
||||
struct bpf_prog *include_prog,
|
||||
struct bpf_prog_array **new_array);
|
||||
@@ -551,6 +556,56 @@ _out: \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
|
||||
* so BPF programs can request cwr for TCP packets.
|
||||
*
|
||||
* Current cgroup skb programs can only return 0 or 1 (0 to drop the
|
||||
* packet. This macro changes the behavior so the low order bit
|
||||
* indicates whether the packet should be dropped (0) or not (1)
|
||||
* and the next bit is a congestion notification bit. This could be
|
||||
* used by TCP to call tcp_enter_cwr()
|
||||
*
|
||||
* Hence, new allowed return values of CGROUP EGRESS BPF programs are:
|
||||
* 0: drop packet
|
||||
* 1: keep packet
|
||||
* 2: drop packet and cn
|
||||
* 3: keep packet and cn
|
||||
*
|
||||
* This macro then converts it to one of the NET_XMIT or an error
|
||||
* code that is then interpreted as drop packet (and no cn):
|
||||
* 0: NET_XMIT_SUCCESS skb should be transmitted
|
||||
* 1: NET_XMIT_DROP skb should be dropped and cn
|
||||
* 2: NET_XMIT_CN skb should be transmitted and cn
|
||||
* 3: -EPERM skb should be dropped
|
||||
*/
|
||||
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
|
||||
({ \
|
||||
struct bpf_prog_array_item *_item; \
|
||||
struct bpf_prog *_prog; \
|
||||
struct bpf_prog_array *_array; \
|
||||
u32 ret; \
|
||||
u32 _ret = 1; \
|
||||
u32 _cn = 0; \
|
||||
preempt_disable(); \
|
||||
rcu_read_lock(); \
|
||||
_array = rcu_dereference(array); \
|
||||
_item = &_array->items[0]; \
|
||||
while ((_prog = READ_ONCE(_item->prog))) { \
|
||||
bpf_cgroup_storage_set(_item->cgroup_storage); \
|
||||
ret = func(_prog, ctx); \
|
||||
_ret &= (ret & 1); \
|
||||
_cn |= (ret & 2); \
|
||||
_item++; \
|
||||
} \
|
||||
rcu_read_unlock(); \
|
||||
preempt_enable(); \
|
||||
if (_ret) \
|
||||
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
|
||||
else \
|
||||
_ret = (_cn ? NET_XMIT_DROP : -EPERM); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
|
||||
__BPF_PROG_RUN_ARRAY(array, ctx, func, false)
|
||||
|
||||
@@ -595,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
|
||||
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
|
||||
void bpf_map_put_with_uref(struct bpf_map *map);
|
||||
void bpf_map_put(struct bpf_map *map);
|
||||
int bpf_map_precharge_memlock(u32 pages);
|
||||
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
|
||||
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
|
||||
int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size);
|
||||
void bpf_map_charge_finish(struct bpf_map_memory *mem);
|
||||
void bpf_map_charge_move(struct bpf_map_memory *dst,
|
||||
struct bpf_map_memory *src);
|
||||
void *bpf_map_area_alloc(size_t size, int numa_node);
|
||||
void bpf_map_area_free(void *base);
|
||||
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
|
||||
|
@@ -36,9 +36,11 @@
|
||||
*/
|
||||
enum bpf_reg_liveness {
|
||||
REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
|
||||
REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
|
||||
REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
|
||||
REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */
|
||||
REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
|
||||
REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
|
||||
REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
|
||||
REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
|
||||
REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
|
||||
};
|
||||
|
||||
struct bpf_reg_state {
|
||||
@@ -131,6 +133,11 @@ struct bpf_reg_state {
|
||||
* pointing to bpf_func_state.
|
||||
*/
|
||||
u32 frameno;
|
||||
/* Tracks subreg definition. The stored value is the insn_idx of the
|
||||
* writing insn. This is safe because subreg_def is used before any insn
|
||||
* patching which only happens after main verification finished.
|
||||
*/
|
||||
s32 subreg_def;
|
||||
enum bpf_reg_liveness live;
|
||||
};
|
||||
|
||||
@@ -187,6 +194,7 @@ struct bpf_func_state {
|
||||
struct bpf_verifier_state {
|
||||
/* call stack tracking */
|
||||
struct bpf_func_state *frame[MAX_CALL_FRAMES];
|
||||
u32 insn_idx;
|
||||
u32 curframe;
|
||||
u32 active_spin_lock;
|
||||
bool speculative;
|
||||
@@ -232,7 +240,9 @@ struct bpf_insn_aux_data {
|
||||
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
||||
int sanitize_stack_off; /* stack slot to be cleared */
|
||||
bool seen; /* this insn was processed by the verifier */
|
||||
bool zext_dst; /* this insn zero extends dst reg */
|
||||
u8 alu_state; /* used in combination with alu_limit */
|
||||
bool prune_point;
|
||||
unsigned int orig_idx; /* original instruction index */
|
||||
};
|
||||
|
||||
|
@@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
|
||||
|
||||
#endif /* !CONFIG_CGROUPS */
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
static inline void cgroup_bpf_get(struct cgroup *cgrp)
|
||||
{
|
||||
percpu_ref_get(&cgrp->bpf.refcnt);
|
||||
}
|
||||
|
||||
static inline void cgroup_bpf_put(struct cgroup *cgrp)
|
||||
{
|
||||
percpu_ref_put(&cgrp->bpf.refcnt);
|
||||
}
|
||||
|
||||
#else /* CONFIG_CGROUP_BPF */
|
||||
|
||||
static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
|
||||
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
|
||||
|
||||
#endif /* CONFIG_CGROUP_BPF */
|
||||
|
||||
#endif /* _LINUX_CGROUP_H */
|
||||
|
@@ -160,6 +160,20 @@ struct ctl_table_header;
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Special form of mov32, used for doing explicit zero extension on dst. */
|
||||
#define BPF_ZEXT_REG(DST) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_MOV | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = DST, \
|
||||
.off = 0, \
|
||||
.imm = 1 })
|
||||
|
||||
static inline bool insn_is_zext(const struct bpf_insn *insn)
|
||||
{
|
||||
return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1;
|
||||
}
|
||||
|
||||
/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
|
||||
#define BPF_LD_IMM64(DST, IMM) \
|
||||
BPF_LD_IMM64_RAW(DST, 0, IMM)
|
||||
@@ -512,7 +526,8 @@ struct bpf_prog {
|
||||
blinded:1, /* Was blinded */
|
||||
is_func:1, /* program is a bpf function */
|
||||
kprobe_override:1, /* Do we override a kprobe? */
|
||||
has_callchain_buf:1; /* callchain buffer allocated? */
|
||||
has_callchain_buf:1, /* callchain buffer allocated? */
|
||||
enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
|
||||
enum bpf_prog_type type; /* Type of BPF program */
|
||||
enum bpf_attach_type expected_attach_type; /* For some prog types */
|
||||
u32 len; /* Number of filter blocks */
|
||||
@@ -811,6 +826,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
||||
|
||||
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
|
||||
void bpf_jit_compile(struct bpf_prog *prog);
|
||||
bool bpf_jit_needs_zext(void);
|
||||
bool bpf_helper_changes_pkt_data(void *func);
|
||||
|
||||
static inline bool bpf_dump_raw_ok(void)
|
||||
|
Reference in New Issue
Block a user