bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE

[ Upstream commit 9cacf81f8161111db25f98e78a7a0e32ae142b3f ]

Add custom implementation of getsockopt hook for TCP_ZEROCOPY_RECEIVE.
We skip generic hooks for TCP_ZEROCOPY_RECEIVE and have a custom
call in do_tcp_getsockopt using the on-stack data. This removes
3% overhead for locking/unlocking the socket.

Without this patch:
     3.38%     0.07%  tcp_mmap  [kernel.kallsyms]  [k] __cgroup_bpf_run_filter_getsockopt
            |
             --3.30%--__cgroup_bpf_run_filter_getsockopt
                       |
                        --0.81%--__kmalloc

With the patch applied:
     0.52%     0.12%  tcp_mmap  [kernel.kallsyms]  [k] __cgroup_bpf_run_filter_getsockopt_kern

Note, exporting uapi/tcp.h requires removing netinet/tcp.h
from test_progs.h because those headers have confliciting
definitions.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210115163501.805133-2-sdf@google.com
Stable-dep-of: 2598619e012c ("sctp: add bpf_bypass_getsockopt proto callback")
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Stanislav Fomichev
2021-01-15 08:34:59 -08:00
committed by Greg Kroah-Hartman
parent c62e2ac02e
commit 08f61a3491
16 changed files with 506 additions and 7 deletions

View File

@@ -1546,6 +1546,52 @@ out:
sockopt_free_buf(&ctx);
return ret;
}
int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
int optname, void *optval,
int *optlen, int retval)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
struct bpf_sockopt_kern ctx = {
.sk = sk,
.level = level,
.optname = optname,
.retval = retval,
.optlen = *optlen,
.optval = optval,
.optval_end = optval + *optlen,
};
int ret;
/* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy
* user data back into BPF buffer when reval != 0. This is
* done as an optimization to avoid extra copy, assuming
* kernel won't populate the data in case of an error.
* Here we always pass the data and memset() should
* be called if that data shouldn't be "exported".
*/
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
&ctx, BPF_PROG_RUN);
if (!ret)
return -EPERM;
if (ctx.optlen > *optlen)
return -EFAULT;
/* BPF programs only allowed to set retval to 0, not some
* arbitrary value.
*/
if (ctx.retval != 0 && ctx.retval != retval)
return -EFAULT;
/* BPF programs can shrink the buffer, export the modifications.
*/
if (ctx.optlen != 0)
*optlen = ctx.optlen;
return ctx.retval;
}
#endif
static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,