Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-10-16

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Convert BPF sockmap and kTLS to both use a new sk_msg API and enable
   sk_msg BPF integration for the latter, from Daniel and John.

2) Enable BPF syscall side to indicate for maps that they do not support
   a map lookup operation as opposed to just missing key, from Prashant.

3) Add bpftool map create command which after map creation pins the
   map into bpf fs for further processing, from Jakub.

4) Add bpftool support for attaching programs to maps allowing sock_map
   and sock_hash to be used from bpftool, from John.

5) Improve syscall BPF map update/delete path for map-in-map types to
   wait a RCU grace period for pending references to complete, from Daniel.

6) Couple of follow-up fixes for the BPF socket lookup to get it
   enabled also when IPv6 is compiled as a module, from Joe.

7) Fix a generic-XDP bug to handle the case when the Ethernet header
   was mangled and thus update skb's protocol and data, from Jesper.

8) Add a missing BTF header length check between header copies from
   user space, from Wenwen.

9) Minor fixups in libbpf to use __u32 instead u32 types and include
   proper perf_event.h uapi header instead of perf internal one, from Yonghong.

10) Allow to pass user-defined flags through EXTRA_CFLAGS and EXTRA_LDFLAGS
    to bpftool's build, from Jiri.

11) BPF kselftest tweaks to add LWTUNNEL to config fragment and to install
    with_addr.sh script from flow dissector selftest, from Anders.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2018-10-15 23:21:07 -07:00
54 changed files with 4978 additions and 3675 deletions

View File

@@ -737,33 +737,18 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
int sockmap_get_from_fd(const union bpf_attr *attr, int type,
struct bpf_prog *prog);
#if defined(CONFIG_BPF_STREAM_PARSER)
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
#else
static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
{
return NULL;
}
static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map,
void *key)
{
return NULL;
}
static inline int sock_map_prog(struct bpf_map *map,
struct bpf_prog *prog,
u32 type)
static inline int sock_map_prog_update(struct bpf_map *map,
struct bpf_prog *prog, u32 which)
{
return -EOPNOTSUPP;
}
static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
struct bpf_prog *prog)
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
struct bpf_prog *prog)
{
return -EINVAL;
}
@@ -839,6 +824,10 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
extern const struct bpf_func_proto bpf_get_local_storage_proto;

View File

@@ -57,7 +57,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
#if defined(CONFIG_BPF_STREAM_PARSER)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif

View File

@@ -520,24 +520,6 @@ struct bpf_skb_data_end {
void *data_end;
};
struct sk_msg_buff {
void *data;
void *data_end;
__u32 apply_bytes;
__u32 cork_bytes;
int sg_copybreak;
int sg_start;
int sg_curr;
int sg_end;
struct scatterlist sg_data[MAX_SKB_FRAGS];
bool sg_copy[MAX_SKB_FRAGS];
__u32 flags;
struct sock *sk_redir;
struct sock *sk;
struct sk_buff *skb;
struct list_head list;
};
struct bpf_redirect_info {
u32 ifindex;
u32 flags;
@@ -833,9 +815,6 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
struct sock *do_sk_redirect_map(struct sk_buff *skb);
struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
#ifdef CONFIG_INET
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
struct bpf_prog *prog, struct sk_buff *skb,

410
include/linux/skmsg.h Normal file
View File

@@ -0,0 +1,410 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
#ifndef _LINUX_SKMSG_H
#define _LINUX_SKMSG_H
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/scatterlist.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/strparser.h>
#define MAX_MSG_FRAGS MAX_SKB_FRAGS
enum __sk_action {
__SK_DROP = 0,
__SK_PASS,
__SK_REDIRECT,
__SK_NONE,
};
struct sk_msg_sg {
u32 start;
u32 curr;
u32 end;
u32 size;
u32 copybreak;
bool copy[MAX_MSG_FRAGS];
/* The extra element is used for chaining the front and sections when
* the list becomes partitioned (e.g. end < start). The crypto APIs
* require the chaining.
*/
struct scatterlist data[MAX_MSG_FRAGS + 1];
};
struct sk_msg {
struct sk_msg_sg sg;
void *data;
void *data_end;
u32 apply_bytes;
u32 cork_bytes;
u32 flags;
struct sk_buff *skb;
struct sock *sk_redir;
struct sock *sk;
struct list_head list;
};
struct sk_psock_progs {
struct bpf_prog *msg_parser;
struct bpf_prog *skb_parser;
struct bpf_prog *skb_verdict;
};
enum sk_psock_state_bits {
SK_PSOCK_TX_ENABLED,
};
struct sk_psock_link {
struct list_head list;
struct bpf_map *map;
void *link_raw;
};
struct sk_psock_parser {
struct strparser strp;
bool enabled;
void (*saved_data_ready)(struct sock *sk);
};
struct sk_psock_work_state {
struct sk_buff *skb;
u32 len;
u32 off;
};
struct sk_psock {
struct sock *sk;
struct sock *sk_redir;
u32 apply_bytes;
u32 cork_bytes;
u32 eval;
struct sk_msg *cork;
struct sk_psock_progs progs;
struct sk_psock_parser parser;
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
unsigned long state;
struct list_head link;
spinlock_t link_lock;
refcount_t refcnt;
void (*saved_unhash)(struct sock *sk);
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
struct proto *sk_proto;
struct sk_psock_work_state work_state;
struct work_struct work;
union {
struct rcu_head rcu;
struct work_struct gc;
};
};
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce);
int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
u32 off, u32 len);
void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
int sk_msg_free(struct sock *sk, struct sk_msg *msg);
int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg);
void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes);
void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
u32 bytes);
void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes);
void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes);
int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
WARN_ON(i == msg->sg.end && bytes);
}
static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes)
{
if (psock->apply_bytes) {
if (psock->apply_bytes < bytes)
psock->apply_bytes = 0;
else
psock->apply_bytes -= bytes;
}
}
#define sk_msg_iter_var_prev(var) \
do { \
if (var == 0) \
var = MAX_MSG_FRAGS - 1; \
else \
var--; \
} while (0)
#define sk_msg_iter_var_next(var) \
do { \
var++; \
if (var == MAX_MSG_FRAGS) \
var = 0; \
} while (0)
#define sk_msg_iter_prev(msg, which) \
sk_msg_iter_var_prev(msg->sg.which)
#define sk_msg_iter_next(msg, which) \
sk_msg_iter_var_next(msg->sg.which)
static inline void sk_msg_clear_meta(struct sk_msg *msg)
{
memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy));
}
static inline void sk_msg_init(struct sk_msg *msg)
{
BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS);
memset(msg, 0, sizeof(*msg));
sg_init_marker(msg->sg.data, MAX_MSG_FRAGS);
}
static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
int which, u32 size)
{
dst->sg.data[which] = src->sg.data[which];
dst->sg.data[which].length = size;
src->sg.data[which].length -= size;
src->sg.data[which].offset += size;
}
static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
{
memcpy(dst, src, sizeof(*src));
sk_msg_init(src);
}
static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
{
return msg->sg.end >= msg->sg.start ?
msg->sg.end - msg->sg.start :
msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
}
static inline bool sk_msg_full(const struct sk_msg *msg)
{
return (msg->sg.end == msg->sg.start) && msg->sg.size;
}
static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
{
return &msg->sg.data[which];
}
static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
{
return sg_page(sk_msg_elem(msg, which));
}
static inline bool sk_msg_to_ingress(const struct sk_msg *msg)
{
return msg->flags & BPF_F_INGRESS;
}
static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
{
struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
if (msg->sg.copy[msg->sg.start]) {
msg->data = NULL;
msg->data_end = NULL;
} else {
msg->data = sg_virt(sge);
msg->data_end = msg->data + sge->length;
}
}
static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
u32 len, u32 offset)
{
struct scatterlist *sge;
get_page(page);
sge = sk_msg_elem(msg, msg->sg.end);
sg_set_page(sge, page, len, offset);
sg_unmark_end(sge);
msg->sg.copy[msg->sg.end] = true;
msg->sg.size += len;
sk_msg_iter_next(msg, end);
}
static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
{
do {
msg->sg.copy[i] = copy_state;
sk_msg_iter_var_next(i);
if (i == msg->sg.end)
break;
} while (1);
}
static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start)
{
sk_msg_sg_copy(msg, start, true);
}
static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start)
{
sk_msg_sg_copy(msg, start, false);
}
static inline struct sk_psock *sk_psock(const struct sock *sk)
{
return rcu_dereference_sk_user_data(sk);
}
static inline bool sk_has_psock(struct sock *sk)
{
return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
}
static inline void sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
list_add_tail(&msg->list, &psock->ingress_msg);
}
static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
{
return psock ? list_empty(&psock->ingress_msg) : true;
}
static inline void sk_psock_report_error(struct sk_psock *psock, int err)
{
struct sock *sk = psock->sk;
sk->sk_err = err;
sk->sk_error_report(sk);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node);
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg);
static inline struct sk_psock_link *sk_psock_init_link(void)
{
return kzalloc(sizeof(struct sk_psock_link),
GFP_ATOMIC | __GFP_NOWARN);
}
static inline void sk_psock_free_link(struct sk_psock_link *link)
{
kfree(link);
}
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
#if defined(CONFIG_BPF_STREAM_PARSER)
void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link);
#else
static inline void sk_psock_unlink(struct sock *sk,
struct sk_psock_link *link)
{
}
#endif
void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
static inline void sk_psock_cork_free(struct sk_psock *psock)
{
if (psock->cork) {
sk_msg_free(psock->sk, psock->cork);
kfree(psock->cork);
psock->cork = NULL;
}
}
static inline void sk_psock_update_proto(struct sock *sk,
struct sk_psock *psock,
struct proto *ops)
{
psock->saved_unhash = sk->sk_prot->unhash;
psock->saved_close = sk->sk_prot->close;
psock->saved_write_space = sk->sk_write_space;
psock->sk_proto = sk->sk_prot;
sk->sk_prot = ops;
}
static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
if (psock->sk_proto) {
sk->sk_prot = psock->sk_proto;
psock->sk_proto = NULL;
}
}
static inline void sk_psock_set_state(struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
set_bit(bit, &psock->state);
}
static inline void sk_psock_clear_state(struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
clear_bit(bit, &psock->state);
}
static inline bool sk_psock_test_state(const struct sk_psock *psock,
enum sk_psock_state_bits bit)
{
return test_bit(bit, &psock->state);
}
static inline struct sk_psock *sk_psock_get(struct sock *sk)
{
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
if (psock && !refcount_inc_not_zero(&psock->refcnt))
psock = NULL;
rcu_read_unlock();
return psock;
}
void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
void sk_psock_destroy(struct rcu_head *rcu);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
{
if (refcount_dec_and_test(&psock->refcnt))
sk_psock_drop(sk, psock);
}
static inline void psock_set_prog(struct bpf_prog **pprog,
struct bpf_prog *prog)
{
prog = xchg(pprog, prog);
if (prog)
bpf_prog_put(prog);
}
static inline void psock_progs_drop(struct sk_psock_progs *progs)
{
psock_set_prog(&progs->msg_parser, NULL);
psock_set_prog(&progs->skb_parser, NULL);
psock_set_prog(&progs->skb_verdict, NULL);
}
#endif /* _LINUX_SKMSG_H */