bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region

SK_SKB BPF programs are run from the socket/tcp context but early in
the stack before much of the TCP metadata is needed in tcp_skb_cb. So
we can use some unused fields to place BPF metadata needed for SK_SKB
programs when implementing the redirect function.

This allows us to drop the preempt disable logic. It does however
require an API change so sk_redirect_map() has been updated to
additionally provide ctx_ptr to skb. Note, we do however continue to
disable/enable preemption around actual BPF program running to account
for map updates.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
John Fastabend
2017-10-18 07:10:36 -07:00
committed by David S. Miller
父節點 435bf0d3f9
當前提交 34f79502bb
共有 8 個文件被更改,包括 36 次插入30 次删除

查看文件

@@ -39,6 +39,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <net/strparser.h>
#include <net/tcp.h>
struct bpf_stab {
struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
return SK_DROP;
skb_orphan(skb);
/* We need to ensure that BPF metadata for maps is also cleared
* when we orphan the skb so that we don't have the possibility
* to reference a stale map.
*/
TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock;
bpf_compute_data_end(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
skb->sk = NULL;
return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
struct sock *sk;
int rc;
/* Because we use per cpu values to feed input from sock redirect
* in BPF program to do_sk_redirect_map() call we need to ensure we
* are not preempted. RCU read lock is not sufficient in this case
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
*/
preempt_disable();
rc = smap_verdict_func(psock, skb);
switch (rc) {
case SK_REDIRECT:
sk = do_sk_redirect_map();
preempt_enable();
sk = do_sk_redirect_map(skb);
if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk);
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
/* Fall through and free skb otherwise */
case SK_DROP:
default:
if (rc != SK_REDIRECT)
preempt_enable();
kfree_skb(skb);
}
}