Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2018-05-24

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc).

2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers.

3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit.

4) Jiong Wang adds support for indirect and arithmetic shifts to NFP

5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible.

6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing
   to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions.

7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT.

8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events.

9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2018-05-24 22:20:51 -04:00
90 changed files with 5215 additions and 814 deletions

View File

@@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
}
seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_id, value, m);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
rcu_read_unlock();

View File

@@ -12,6 +12,7 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/idr.h>
#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
@@ -162,13 +163,16 @@
#define BITS_ROUNDUP_BYTES(bits) \
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
#define BTF_INFO_MASK 0x0f00ffff
#define BTF_INT_MASK 0x0fffffff
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
/* 16MB for 64k structs and each has 16 members and
* a few MB spaces for the string section.
* The hard limit is S32_MAX.
*/
#define BTF_MAX_SIZE (16 * 1024 * 1024)
/* 64k. We can raise it later. The hard limit is S32_MAX. */
#define BTF_MAX_NR_TYPES 65535
#define for_each_member(i, struct_type, member) \
for (i = 0, member = btf_type_member(struct_type); \
@@ -184,15 +188,13 @@ static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
struct btf {
union {
struct btf_header *hdr;
void *data;
};
void *data;
struct btf_type **types;
u32 *resolved_ids;
u32 *resolved_sizes;
const char *strings;
void *nohdr_data;
struct btf_header hdr;
u32 nr_types;
u32 types_size;
u32 data_size;
@@ -228,6 +230,11 @@ enum resolve_mode {
#define MAX_RESOLVE_DEPTH 32
struct btf_sec_info {
u32 off;
u32 len;
};
struct btf_verifier_env {
struct btf *btf;
u8 *visit_states;
@@ -379,8 +386,6 @@ static const char *btf_int_encoding_str(u8 encoding)
return "CHAR";
else if (encoding == BTF_INT_BOOL)
return "BOOL";
else if (encoding == BTF_INT_VARARGS)
return "VARARGS";
else
return "UNKN";
}
@@ -417,16 +422,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{
return !BTF_STR_TBL_ELF_ID(offset) &&
BTF_STR_OFFSET(offset) < btf->hdr->str_len;
return BTF_STR_OFFSET_VALID(offset) &&
offset < btf->hdr.str_len;
}
static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
{
if (!BTF_STR_OFFSET(offset))
if (!offset)
return "(anon)";
else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
return &btf->strings[BTF_STR_OFFSET(offset)];
else if (offset < btf->hdr.str_len)
return &btf->strings[offset];
else
return "(invalid-name-offset)";
}
@@ -439,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return btf->types[type_id];
}
/*
* Regular int is not a bit field and it must be either
* u8/u16/u32/u64.
*/
static bool btf_type_int_is_regular(const struct btf_type *t)
{
u16 nr_bits, nr_bytes;
u32 int_data;
int_data = btf_type_int(t);
nr_bits = BTF_INT_BITS(int_data);
nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
if (BITS_PER_BYTE_MASKED(nr_bits) ||
BTF_INT_OFFSET(int_data) ||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
return false;
}
return true;
}
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
const char *fmt, ...)
{
@@ -536,7 +563,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n");
}
static void btf_verifier_log_hdr(struct btf_verifier_env *env)
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
u32 btf_data_size)
{
struct bpf_verifier_log *log = &env->log;
const struct btf *btf = env->btf;
@@ -545,19 +573,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
if (!bpf_verifier_log_needed(log))
return;
hdr = btf->hdr;
hdr = &btf->hdr;
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
__btf_verifier_log(log, "version: %u\n", hdr->version);
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
}
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
@@ -574,13 +599,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
struct btf_type **new_types;
u32 expand_by, new_size;
if (btf->types_size == BTF_MAX_NR_TYPES) {
if (btf->types_size == BTF_MAX_TYPE) {
btf_verifier_log(env, "Exceeded max num of types");
return -E2BIG;
}
expand_by = max_t(u32, btf->types_size >> 2, 16);
new_size = min_t(u32, BTF_MAX_NR_TYPES,
new_size = min_t(u32, BTF_MAX_TYPE,
btf->types_size + expand_by);
new_types = kvzalloc(new_size * sizeof(*new_types),
@@ -910,6 +935,12 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
}
int_data = btf_type_int(t);
if (int_data & ~BTF_INT_MASK) {
btf_verifier_log_basic(env, t, "Invalid int_data:%x",
int_data);
return -EINVAL;
}
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
if (nr_bits > BITS_PER_U64) {
@@ -923,12 +954,17 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
/*
* Only one of the encoding bits is allowed and it
* should be sufficient for the pretty print purpose (i.e. decoding).
* Multiple bits can be allowed later if it is found
* to be insufficient.
*/
encoding = BTF_INT_ENCODING(int_data);
if (encoding &&
encoding != BTF_INT_SIGNED &&
encoding != BTF_INT_CHAR &&
encoding != BTF_INT_BOOL &&
encoding != BTF_INT_VARARGS) {
encoding != BTF_INT_BOOL) {
btf_verifier_log_type(env, t, "Unsupported encoding");
return -ENOTSUPP;
}
@@ -1102,7 +1138,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
if (BTF_TYPE_PARENT(t->type)) {
if (!BTF_TYPE_ID_VALID(t->type)) {
btf_verifier_log_type(env, t, "Invalid type_id");
return -EINVAL;
}
@@ -1306,14 +1342,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
/* We are a little forgiving on array->index_type since
* the kernel is not using it.
/* Array elem type and index type cannot be in type void,
* so !array->type and !array->index_type are not allowed.
*/
/* Array elem cannot be in type void,
* so !array->type is not allowed.
*/
if (!array->type || BTF_TYPE_PARENT(array->type)) {
btf_verifier_log_type(env, t, "Invalid type_id");
if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
btf_verifier_log_type(env, t, "Invalid elem");
return -EINVAL;
}
if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
btf_verifier_log_type(env, t, "Invalid index");
return -EINVAL;
}
@@ -1326,11 +1364,32 @@ static int btf_array_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
const struct btf_array *array = btf_type_array(v->t);
const struct btf_type *elem_type;
u32 elem_type_id = array->type;
const struct btf_type *elem_type, *index_type;
u32 elem_type_id, index_type_id;
struct btf *btf = env->btf;
u32 elem_size;
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
if (btf_type_is_void_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
if (!env_type_is_resolve_sink(env, index_type) &&
!env_type_is_resolved(env, index_type_id))
return env_stack_push(env, index_type, index_type_id);
index_type = btf_type_id_size(btf, &index_type_id, NULL);
if (!index_type || !btf_type_is_int(index_type) ||
!btf_type_int_is_regular(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
if (btf_type_is_void_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
@@ -1348,22 +1407,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
return -EINVAL;
}
if (btf_type_is_int(elem_type)) {
int int_type_data = btf_type_int(elem_type);
u16 nr_bits = BTF_INT_BITS(int_type_data);
u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
/* Put more restriction on array of int. The int cannot
* be a bit field and it must be either u8/u16/u32/u64.
*/
if (BITS_PER_BYTE_MASKED(nr_bits) ||
BTF_INT_OFFSET(int_type_data) ||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
btf_verifier_log_type(env, v->t,
"Invalid array of int");
return -EINVAL;
}
if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
btf_verifier_log_type(env, v->t, "Invalid array of int");
return -EINVAL;
}
if (array->nelems && elem_size > U32_MAX / array->nelems) {
@@ -1473,7 +1519,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
}
/* A member cannot be in type void */
if (!member->type || BTF_TYPE_PARENT(member->type)) {
if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
btf_verifier_log_member(env, t, member,
"Invalid type_id");
return -EINVAL;
@@ -1726,6 +1772,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
}
meta_left -= sizeof(*t);
if (t->info & ~BTF_INFO_MASK) {
btf_verifier_log(env, "[%u] Invalid btf_info:%x",
env->log_type_id, t->info);
return -EINVAL;
}
if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
btf_verifier_log(env, "[%u] Invalid kind:%u",
@@ -1754,9 +1806,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
struct btf_header *hdr;
void *cur, *end;
hdr = btf->hdr;
hdr = &btf->hdr;
cur = btf->nohdr_data + hdr->type_off;
end = btf->nohdr_data + hdr->str_off;
end = btf->nohdr_data + hdr->type_len;
env->log_type_id = 1;
while (cur < end) {
@@ -1866,8 +1918,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
static int btf_parse_type_sec(struct btf_verifier_env *env)
{
const struct btf_header *hdr = &env->btf->hdr;
int err;
/* Type section must align to 4 bytes */
if (hdr->type_off & (sizeof(u32) - 1)) {
btf_verifier_log(env, "Unaligned type_off");
return -EINVAL;
}
if (!hdr->type_len) {
btf_verifier_log(env, "No type found");
return -EINVAL;
}
err = btf_check_all_metas(env);
if (err)
return err;
@@ -1881,10 +1945,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
struct btf *btf = env->btf;
const char *start, *end;
hdr = btf->hdr;
hdr = &btf->hdr;
start = btf->nohdr_data + hdr->str_off;
end = start + hdr->str_len;
if (end != btf->data + btf->data_size) {
btf_verifier_log(env, "String section is not at the end");
return -EINVAL;
}
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
start[0] || end[-1]) {
btf_verifier_log(env, "Invalid string section");
@@ -1896,20 +1965,121 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
return 0;
}
static int btf_parse_hdr(struct btf_verifier_env *env)
static const size_t btf_sec_info_offset[] = {
offsetof(struct btf_header, type_off),
offsetof(struct btf_header, str_off),
};
static int btf_sec_info_cmp(const void *a, const void *b)
{
const struct btf_sec_info *x = a;
const struct btf_sec_info *y = b;
return (int)(x->off - y->off) ? : (int)(x->len - y->len);
}
static int btf_check_sec_info(struct btf_verifier_env *env,
u32 btf_data_size)
{
struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
u32 total, expected_total, i;
const struct btf_header *hdr;
const struct btf *btf;
btf = env->btf;
hdr = &btf->hdr;
/* Populate the secs from hdr */
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
secs[i] = *(struct btf_sec_info *)((void *)hdr +
btf_sec_info_offset[i]);
sort(secs, ARRAY_SIZE(btf_sec_info_offset),
sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
/* Check for gaps and overlap among sections */
total = 0;
expected_total = btf_data_size - hdr->hdr_len;
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
if (expected_total < secs[i].off) {
btf_verifier_log(env, "Invalid section offset");
return -EINVAL;
}
if (total < secs[i].off) {
/* gap */
btf_verifier_log(env, "Unsupported section found");
return -EINVAL;
}
if (total > secs[i].off) {
btf_verifier_log(env, "Section overlap found");
return -EINVAL;
}
if (expected_total - total < secs[i].len) {
btf_verifier_log(env,
"Total section length too long");
return -EINVAL;
}
total += secs[i].len;
}
/* There is data other than hdr and known sections */
if (expected_total != total) {
btf_verifier_log(env, "Unsupported section found");
return -EINVAL;
}
return 0;
}
static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
u32 btf_data_size)
{
const struct btf_header *hdr;
struct btf *btf = env->btf;
u32 meta_left;
u32 hdr_len, hdr_copy;
/*
* Minimal part of the "struct btf_header" that
* contains the hdr_len.
*/
struct btf_min_header {
u16 magic;
u8 version;
u8 flags;
u32 hdr_len;
} __user *min_hdr;
struct btf *btf;
int err;
if (btf->data_size < sizeof(*hdr)) {
btf = env->btf;
min_hdr = btf_data;
if (btf_data_size < sizeof(*min_hdr)) {
btf_verifier_log(env, "hdr_len not found");
return -EINVAL;
}
if (get_user(hdr_len, &min_hdr->hdr_len))
return -EFAULT;
if (btf_data_size < hdr_len) {
btf_verifier_log(env, "btf_header not found");
return -EINVAL;
}
btf_verifier_log_hdr(env);
err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
if (err) {
if (err == -E2BIG)
btf_verifier_log(env, "Unsupported btf_header");
return err;
}
hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
return -EFAULT;
hdr = &btf->hdr;
btf_verifier_log_hdr(env, btf_data_size);
hdr = btf->hdr;
if (hdr->magic != BTF_MAGIC) {
btf_verifier_log(env, "Invalid magic");
return -EINVAL;
@@ -1925,26 +2095,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return -ENOTSUPP;
}
meta_left = btf->data_size - sizeof(*hdr);
if (!meta_left) {
if (btf_data_size == hdr->hdr_len) {
btf_verifier_log(env, "No data");
return -EINVAL;
}
if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
/* Type section must align to 4 bytes */
hdr->type_off & (sizeof(u32) - 1)) {
btf_verifier_log(env, "Invalid type_off");
return -EINVAL;
}
if (meta_left < hdr->str_off ||
meta_left - hdr->str_off < hdr->str_len) {
btf_verifier_log(env, "Invalid str_off or str_len");
return -EINVAL;
}
btf->nohdr_data = btf->hdr + 1;
err = btf_check_sec_info(env, btf_data_size);
if (err)
return err;
return 0;
}
@@ -1987,6 +2145,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
err = -ENOMEM;
goto errout;
}
env->btf = btf;
err = btf_parse_hdr(env, btf_data, btf_data_size);
if (err)
goto errout;
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) {
@@ -1996,18 +2159,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
btf->data = data;
btf->data_size = btf_data_size;
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
if (copy_from_user(data, btf_data, btf_data_size)) {
err = -EFAULT;
goto errout;
}
env->btf = btf;
err = btf_parse_hdr(env);
if (err)
goto errout;
err = btf_parse_str_sec(env);
if (err)
goto errout;
@@ -2016,16 +2174,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
if (err)
goto errout;
if (!err && log->level && bpf_verifier_log_full(log)) {
if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC;
goto errout;
}
if (!err) {
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
}
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
errout:
btf_verifier_env_free(env);

View File

@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
xdp_return_frame(xdpf);
xdp_return_frame_rx_napi(xdpf);
}
processed++;
}

View File

@@ -48,15 +48,25 @@
* calls will fail at this point.
*/
#include <linux/bpf.h>
#include <net/xdp.h>
#include <linux/filter.h>
#include <trace/events/xdp.h>
#define DEV_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
#define DEV_MAP_BULK_SIZE 16
struct xdp_bulk_queue {
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
struct net_device *dev_rx;
unsigned int count;
};
struct bpf_dtab_netdev {
struct net_device *dev;
struct net_device *dev; /* must be first member, due to tracepoint */
struct bpf_dtab *dtab;
unsigned int bit;
struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu;
};
@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
__set_bit(bit, bitmap);
}
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
struct xdp_bulk_queue *bq)
{
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0;
int i;
if (unlikely(!bq->count))
return 0;
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
prefetch(xdpf);
}
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
if (sent < 0) {
err = sent;
sent = 0;
goto error;
}
drops = bq->count - sent;
out:
bq->count = 0;
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
sent, drops, bq->dev_rx, dev, err);
bq->dev_rx = NULL;
return 0;
error:
/* If ndo_xdp_xmit fails with an errno, no frames have been
* xmit'ed and it's our responsibility to them free all.
*/
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
/* RX path under NAPI protection, can return frames faster */
xdp_return_frame_rx_napi(xdpf);
drops++;
}
goto out;
}
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
* from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled
@@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
struct net_device *netdev;
/* This is possible if the dev entry is removed by user space
@@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map)
continue;
__clear_bit(bit, bitmap);
bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq);
netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev);
@@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map)
* update happens in parallel here a dev_put wont happen until after reading the
* ifindex.
*/
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev;
struct bpf_dtab_netdev *obj;
if (key >= map->max_entries)
return NULL;
dev = READ_ONCE(dtab->netdev_map[key]);
return dev ? dev->dev : NULL;
obj = READ_ONCE(dtab->netdev_map[key]);
return obj;
}
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(obj, bq);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
* from net_device drivers NAPI func end.
*/
if (!bq->dev_rx)
bq->dev_rx = dev_rx;
bq->q[bq->count++] = xdpf;
return 0;
}
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
struct net_device *dev = dst->dev;
struct xdp_frame *xdpf;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
return bq_enqueue(dst, xdpf, dev_rx);
}
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
struct net_device *dev = dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL;
}
@@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
if (dev->dev->netdev_ops->ndo_xdp_flush) {
struct net_device *fl = dev->dev;
struct xdp_bulk_queue *bq;
unsigned long *bitmap;
int cpu;
for_each_online_cpu(cpu) {
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
__clear_bit(dev->bit, bitmap);
bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq);
fl->netdev_ops->ndo_xdp_flush(dev->dev);
}
}
@@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
dev_map_flush_old(dev);
free_percpu(dev->bulkq);
dev_put(dev->dev);
kfree(dev);
}
@@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct net *net = current->nsproxy->net_ns;
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev, *old_dev;
u32 i = *(u32 *)key;
u32 ifindex = *(u32 *)value;
@@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!ifindex) {
dev = NULL;
} else {
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
map->numa_node);
dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
if (!dev)
return -ENOMEM;
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return -ENOMEM;
}
dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev);
return -EINVAL;
}
@@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void)
{
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier);
return 0;
}

View File

@@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
}
bpf_compute_data_pointers_sg(md);
md->sk = sk;
rc = (*prog->bpf_func)(md, prog->insnsi);
psock->apply_bytes = md->apply_bytes;
@@ -1713,7 +1714,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock;
bool new = false;
int err;
int err = 0;
/* 1. If sock map has BPF programs those will be inherited by the
* sock being added. If the sock is already attached to BPF programs
@@ -1823,7 +1824,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock);
return err;
out_free:
kfree(e);
smap_release_sock(psock, sock);
out_progs:
if (parse && verdict) {

View File

@@ -18,7 +18,9 @@
#include <linux/vmalloc.h>
#include <linux/mmzone.h>
#include <linux/anon_inodes.h>
#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/license.h>
#include <linux/filter.h>
#include <linux/version.h>
@@ -65,9 +67,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
* copy_from_user() call. However, this is not a concern since this function is
* meant to be a future-proofing of bits.
*/
static int check_uarg_tail_zero(void __user *uaddr,
size_t expected_size,
size_t actual_size)
int bpf_check_uarg_tail_zero(void __user *uaddr,
size_t expected_size,
size_t actual_size)
{
unsigned char __user *addr;
unsigned char __user *end;
@@ -422,7 +424,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -457,10 +459,10 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->usercnt, 1);
if (bpf_map_support_seq_show(map) &&
(attr->btf_key_id || attr->btf_value_id)) {
(attr->btf_key_type_id || attr->btf_value_type_id)) {
struct btf *btf;
if (!attr->btf_key_id || !attr->btf_value_id) {
if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
@@ -471,16 +473,16 @@ static int map_create(union bpf_attr *attr)
goto free_map_nouncharge;
}
err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
attr->btf_value_id);
err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err) {
btf_put(btf);
goto free_map_nouncharge;
}
map->btf = btf;
map->btf_key_id = attr->btf_key_id;
map->btf_value_id = attr->btf_value_id;
map->btf_key_type_id = attr->btf_key_type_id;
map->btf_value_type_id = attr->btf_value_type_id;
}
err = security_bpf_map_alloc(map);
@@ -1899,7 +1901,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
u32 ulen;
int err;
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@@ -1933,6 +1935,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
if (!capable(CAP_SYS_ADMIN)) {
info.jited_prog_len = 0;
info.xlated_prog_len = 0;
info.nr_jited_ksyms = 0;
goto done;
}
@@ -1969,18 +1972,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
* for offload.
*/
ulen = info.jited_prog_len;
info.jited_prog_len = prog->jited_len;
if (prog->aux->func_cnt) {
u32 i;
info.jited_prog_len = 0;
for (i = 0; i < prog->aux->func_cnt; i++)
info.jited_prog_len += prog->aux->func[i]->jited_len;
} else {
info.jited_prog_len = prog->jited_len;
}
if (info.jited_prog_len && ulen) {
if (bpf_dump_raw_ok()) {
uinsns = u64_to_user_ptr(info.jited_prog_insns);
ulen = min_t(u32, info.jited_prog_len, ulen);
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT;
/* for multi-function programs, copy the JITed
* instructions for all the functions
*/
if (prog->aux->func_cnt) {
u32 len, free, i;
u8 *img;
free = ulen;
for (i = 0; i < prog->aux->func_cnt; i++) {
len = prog->aux->func[i]->jited_len;
len = min_t(u32, len, free);
img = (u8 *) prog->aux->func[i]->bpf_func;
if (copy_to_user(uinsns, img, len))
return -EFAULT;
uinsns += len;
free -= len;
if (!free)
break;
}
} else {
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT;
}
} else {
info.jited_prog_insns = 0;
}
}
ulen = info.nr_jited_ksyms;
info.nr_jited_ksyms = prog->aux->func_cnt;
if (info.nr_jited_ksyms && ulen) {
if (bpf_dump_raw_ok()) {
u64 __user *user_ksyms;
ulong ksym_addr;
u32 i;
/* copy the address of the kernel symbol
* corresponding to each function
*/
ulen = min_t(u32, info.nr_jited_ksyms, ulen);
user_ksyms = u64_to_user_ptr(info.jited_ksyms);
for (i = 0; i < ulen; i++) {
ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
ksym_addr &= PAGE_MASK;
if (put_user((u64) ksym_addr, &user_ksyms[i]))
return -EFAULT;
}
} else {
info.jited_ksyms = 0;
}
}
ulen = info.nr_jited_func_lens;
info.nr_jited_func_lens = prog->aux->func_cnt;
if (info.nr_jited_func_lens && ulen) {
if (bpf_dump_raw_ok()) {
u32 __user *user_lens;
u32 func_len, i;
/* copy the JITed image lengths for each function */
ulen = min_t(u32, info.nr_jited_func_lens, ulen);
user_lens = u64_to_user_ptr(info.jited_func_lens);
for (i = 0; i < ulen; i++) {
func_len = prog->aux->func[i]->jited_len;
if (put_user(func_len, &user_lens[i]))
return -EFAULT;
}
} else {
info.jited_func_lens = 0;
}
}
done:
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
@@ -1998,7 +2076,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
u32 info_len = attr->info.info_len;
int err;
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@@ -2013,8 +2091,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
if (map->btf) {
info.btf_id = btf_id(map->btf);
info.btf_key_id = map->btf_key_id;
info.btf_value_id = map->btf_value_id;
info.btf_key_type_id = map->btf_key_type_id;
info.btf_value_type_id = map->btf_value_type_id;
}
if (bpf_map_is_dev_bound(map)) {
@@ -2038,7 +2116,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
u32 info_len = attr->info.info_len;
int err;
err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
if (err)
return err;
@@ -2102,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
return btf_get_fd_by_id(attr->btf_id);
}
static int bpf_task_fd_query_copy(const union bpf_attr *attr,
union bpf_attr __user *uattr,
u32 prog_id, u32 fd_type,
const char *buf, u64 probe_offset,
u64 probe_addr)
{
char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
u32 len = buf ? strlen(buf) : 0, input_len;
int err = 0;
if (put_user(len, &uattr->task_fd_query.buf_len))
return -EFAULT;
input_len = attr->task_fd_query.buf_len;
if (input_len && ubuf) {
if (!len) {
/* nothing to copy, just make ubuf NULL terminated */
char zero = '\0';
if (put_user(zero, ubuf))
return -EFAULT;
} else if (input_len >= len + 1) {
/* ubuf can hold the string with NULL terminator */
if (copy_to_user(ubuf, buf, len + 1))
return -EFAULT;
} else {
/* ubuf cannot hold the string with NULL terminator,
* do a partial copy with NULL terminator.
*/
char zero = '\0';
err = -ENOSPC;
if (copy_to_user(ubuf, buf, input_len - 1))
return -EFAULT;
if (put_user(zero, ubuf + input_len - 1))
return -EFAULT;
}
}
if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
put_user(fd_type, &uattr->task_fd_query.fd_type) ||
put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
put_user(probe_addr, &uattr->task_fd_query.probe_addr))
return -EFAULT;
return err;
}
#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
static int bpf_task_fd_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
pid_t pid = attr->task_fd_query.pid;
u32 fd = attr->task_fd_query.fd;
const struct perf_event *event;
struct files_struct *files;
struct task_struct *task;
struct file *file;
int err;
if (CHECK_ATTR(BPF_TASK_FD_QUERY))
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (attr->task_fd_query.flags != 0)
return -EINVAL;
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
if (!task)
return -ENOENT;
files = get_files_struct(task);
put_task_struct(task);
if (!files)
return -ENOENT;
err = 0;
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (!file)
err = -EBADF;
else
get_file(file);
spin_unlock(&files->file_lock);
put_files_struct(files);
if (err)
goto out;
if (file->f_op == &bpf_raw_tp_fops) {
struct bpf_raw_tracepoint *raw_tp = file->private_data;
struct bpf_raw_event_map *btp = raw_tp->btp;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
}
event = perf_get_event(file);
if (!IS_ERR(event)) {
u64 probe_offset, probe_addr;
u32 prog_id, fd_type;
const char *buf;
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
&buf, &probe_offset,
&probe_addr);
if (!err)
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
fd_type, buf,
probe_offset,
probe_addr);
goto put_file;
}
err = -ENOTSUPP;
put_file:
fput(file);
out:
return err;
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -2110,7 +2314,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
return -EPERM;
err = check_uarg_tail_zero(uattr, sizeof(attr), size);
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
return err;
size = min_t(u32, size, sizeof(attr));
@@ -2188,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr);
break;
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(&attr, uattr);
break;
default:
err = -EINVAL;
break;

View File

@@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
switch (env->prog->type) {
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
/* dst_input() and dst_output() can't write for now */
if (t == BPF_WRITE)
return false;
@@ -5383,11 +5384,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
insn->src_reg != BPF_PSEUDO_CALL)
continue;
subprog = insn->off;
insn->off = 0;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
func[subprog]->bpf_func -
__bpf_call_base;
}
/* we use the aux data to keep a list of the start addresses
* of the JITed images for each function in the program
*
* for some architectures, such as powerpc64, the imm field
* might not be large enough to hold the offset of the start
* address of the callee's JITed image from __bpf_call_base
*
* in such cases, we can lookup the start address of a callee
* by using its subprog id, available from the off field of
* the call instruction, as an index for this list
*/
func[i]->aux->func = func;
func[i]->aux->func_cnt = env->subprog_cnt;
}
for (i = 0; i < env->subprog_cnt; i++) {
old_bpf_func = func[i]->bpf_func;
@@ -5413,17 +5427,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
unsigned long addr;
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
continue;
insn->off = env->insn_aux_data[i].call_imm;
subprog = find_subprog(env, i + insn->off + 1);
addr = (unsigned long)func[subprog]->bpf_func;
addr &= PAGE_MASK;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
addr - __bpf_call_base;
insn->imm = subprog;
}
prog->jited = 1;

View File

@@ -1,15 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* XSKMAP used for AF_XDP sockets
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/bpf.h>