bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region
SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

父節點
435bf0d3f9
當前提交
34f79502bb
@@ -39,6 +39,7 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/list.h>
|
||||
#include <net/strparser.h>
|
||||
#include <net/tcp.h>
|
||||
|
||||
struct bpf_stab {
|
||||
struct bpf_map map;
|
||||
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
|
||||
return SK_DROP;
|
||||
|
||||
skb_orphan(skb);
|
||||
/* We need to ensure that BPF metadata for maps is also cleared
|
||||
* when we orphan the skb so that we don't have the possibility
|
||||
* to reference a stale map.
|
||||
*/
|
||||
TCP_SKB_CB(skb)->bpf.map = NULL;
|
||||
skb->sk = psock->sock;
|
||||
bpf_compute_data_end(skb);
|
||||
preempt_disable();
|
||||
rc = (*prog->bpf_func)(skb, prog->insnsi);
|
||||
preempt_enable();
|
||||
skb->sk = NULL;
|
||||
|
||||
return rc;
|
||||
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
|
||||
struct sock *sk;
|
||||
int rc;
|
||||
|
||||
/* Because we use per cpu values to feed input from sock redirect
|
||||
* in BPF program to do_sk_redirect_map() call we need to ensure we
|
||||
* are not preempted. RCU read lock is not sufficient in this case
|
||||
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
|
||||
*/
|
||||
preempt_disable();
|
||||
rc = smap_verdict_func(psock, skb);
|
||||
switch (rc) {
|
||||
case SK_REDIRECT:
|
||||
sk = do_sk_redirect_map();
|
||||
preempt_enable();
|
||||
sk = do_sk_redirect_map(skb);
|
||||
if (likely(sk)) {
|
||||
struct smap_psock *peer = smap_psock_sk(sk);
|
||||
|
||||
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
|
||||
/* Fall through and free skb otherwise */
|
||||
case SK_DROP:
|
||||
default:
|
||||
if (rc != SK_REDIRECT)
|
||||
preempt_enable();
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user