bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE
[ Upstream commit 9cacf81f8161111db25f98e78a7a0e32ae142b3f ] Add custom implementation of getsockopt hook for TCP_ZEROCOPY_RECEIVE. We skip generic hooks for TCP_ZEROCOPY_RECEIVE and have a custom call in do_tcp_getsockopt using the on-stack data. This removes 3% overhead for locking/unlocking the socket. Without this patch: 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc With the patch applied: 0.52% 0.12% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt_kern Note, exporting uapi/tcp.h requires removing netinet/tcp.h from test_progs.h because those headers have confliciting definitions. Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20210115163501.805133-2-sdf@google.com Stable-dep-of: 2598619e012c ("sctp: add bpf_bypass_getsockopt proto callback") Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:

committed by
Greg Kroah-Hartman

parent
c62e2ac02e
commit
08f61a3491
@@ -1546,6 +1546,52 @@ out:
|
||||
sockopt_free_buf(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
|
||||
int optname, void *optval,
|
||||
int *optlen, int retval)
|
||||
{
|
||||
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
struct bpf_sockopt_kern ctx = {
|
||||
.sk = sk,
|
||||
.level = level,
|
||||
.optname = optname,
|
||||
.retval = retval,
|
||||
.optlen = *optlen,
|
||||
.optval = optval,
|
||||
.optval_end = optval + *optlen,
|
||||
};
|
||||
int ret;
|
||||
|
||||
/* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy
|
||||
* user data back into BPF buffer when reval != 0. This is
|
||||
* done as an optimization to avoid extra copy, assuming
|
||||
* kernel won't populate the data in case of an error.
|
||||
* Here we always pass the data and memset() should
|
||||
* be called if that data shouldn't be "exported".
|
||||
*/
|
||||
|
||||
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
|
||||
&ctx, BPF_PROG_RUN);
|
||||
if (!ret)
|
||||
return -EPERM;
|
||||
|
||||
if (ctx.optlen > *optlen)
|
||||
return -EFAULT;
|
||||
|
||||
/* BPF programs only allowed to set retval to 0, not some
|
||||
* arbitrary value.
|
||||
*/
|
||||
if (ctx.retval != 0 && ctx.retval != retval)
|
||||
return -EFAULT;
|
||||
|
||||
/* BPF programs can shrink the buffer, export the modifications.
|
||||
*/
|
||||
if (ctx.optlen != 0)
|
||||
*optlen = ctx.optlen;
|
||||
|
||||
return ctx.retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
|
||||
|
Reference in New Issue
Block a user