bpf: Bpf_{g,s}etsockopt for struct bpf_sock_addr

Currently, bpf_getsockopt and bpf_setsockopt helpers operate on the
'struct bpf_sock_ops' context in BPF_PROG_TYPE_SOCK_OPS program.
Let's generalize them and make them available for 'struct bpf_sock_addr'.
That way, in the future, we can allow those helpers in more places.

As an example, let's expose those 'struct bpf_sock_addr' based helpers to
BPF_CGROUP_INET{4,6}_CONNECT hooks. That way we can override CC before the
connection is made.

v3:
* Expose custom helpers for bpf_sock_addr context instead of doing
  generic bpf_sock argument (as suggested by Daniel). Even with
  try_socket_lock that doesn't sleep we have a problem where context sk
  is already locked and socket lock is non-nestable.

v2:
* s/BPF_PROG_TYPE_CGROUP_SOCKOPT/BPF_PROG_TYPE_SOCK_OPS/

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200430233152.199403-1-sdf@google.com
Esse commit está contido em:
Stanislav Fomichev
2020-04-30 16:31:52 -07:00
commit de Alexei Starovoitov
commit beecf11bc2
5 arquivos alterados com 166 adições e 27 exclusões

Ver arquivo

@@ -37,3 +37,4 @@ CONFIG_IPV6_SIT=m
CONFIG_BPF_JIT=y
CONFIG_BPF_LSM=y
CONFIG_SECURITY=y
CONFIG_TCP_CONG_DCTCP=y

Ver arquivo

@@ -8,6 +8,7 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
@@ -16,6 +17,10 @@
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
#endif
int _version SEC("version") = 1;
__attribute__ ((noinline))
@@ -33,6 +38,43 @@ int do_bind(struct bpf_sock_addr *ctx)
return 1;
}
static __inline int verify_cc(struct bpf_sock_addr *ctx,
char expected[TCP_CA_NAME_MAX])
{
char buf[TCP_CA_NAME_MAX];
int i;
if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
return 1;
for (i = 0; i < TCP_CA_NAME_MAX; i++) {
if (buf[i] != expected[i])
return 1;
if (buf[i] == 0)
break;
}
return 0;
}
static __inline int set_cc(struct bpf_sock_addr *ctx)
{
char dctcp[TCP_CA_NAME_MAX] = "dctcp";
char cubic[TCP_CA_NAME_MAX] = "cubic";
if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &dctcp, sizeof(dctcp)))
return 1;
if (verify_cc(ctx, dctcp))
return 1;
if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
return 1;
if (verify_cc(ctx, cubic))
return 1;
return 0;
}
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -66,6 +108,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
bpf_sk_release(sk);
/* Rewrite congestion control. */
if (ctx->type == SOCK_STREAM && set_cc(ctx))
return 0;
/* Rewrite destination. */
ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
ctx->user_port = bpf_htons(DST_REWRITE_PORT4);