bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt
This patch adds support of SO_KEEPALIVE flag and TCP related options to bpf_setsockopt() routine. This is helpful if we want to enable or tune TCP keepalive for applications which don't do it in the userspace code. v3: - update kernel-doc in uapi (Nikita Vetoshkin <nekto0n@yandex-team.ru>) v4: - update kernel-doc in tools too (Alexei Starovoitov) - add test to selftests (Alexei Starovoitov) Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru
This commit is contained in:

committed by
Alexei Starovoitov

parent
aad4a0a951
commit
f9bcf96837
@@ -1621,10 +1621,13 @@ union bpf_attr {
|
|||||||
*
|
*
|
||||||
* * **SOL_SOCKET**, which supports the following *optname*\ s:
|
* * **SOL_SOCKET**, which supports the following *optname*\ s:
|
||||||
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
|
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
|
||||||
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
|
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
|
||||||
|
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
|
||||||
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
|
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
|
||||||
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
||||||
* **TCP_BPF_SNDCWND_CLAMP**.
|
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
||||||
|
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
||||||
|
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
|
||||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||||
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
||||||
* Return
|
* Return
|
||||||
|
@@ -4289,10 +4289,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
char *optval, int optlen, u32 flags)
|
char *optval, int optlen, u32 flags)
|
||||||
{
|
{
|
||||||
char devname[IFNAMSIZ];
|
char devname[IFNAMSIZ];
|
||||||
|
int val, valbool;
|
||||||
struct net *net;
|
struct net *net;
|
||||||
int ifindex;
|
int ifindex;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int val;
|
|
||||||
|
|
||||||
if (!sk_fullsock(sk))
|
if (!sk_fullsock(sk))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -4303,6 +4303,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
|
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
val = *((int *)optval);
|
val = *((int *)optval);
|
||||||
|
valbool = val ? 1 : 0;
|
||||||
|
|
||||||
/* Only some socketops are supported */
|
/* Only some socketops are supported */
|
||||||
switch (optname) {
|
switch (optname) {
|
||||||
@@ -4361,6 +4362,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
}
|
}
|
||||||
ret = sock_bindtoindex(sk, ifindex, false);
|
ret = sock_bindtoindex(sk, ifindex, false);
|
||||||
break;
|
break;
|
||||||
|
case SO_KEEPALIVE:
|
||||||
|
if (sk->sk_prot->keepalive)
|
||||||
|
sk->sk_prot->keepalive(sk, valbool);
|
||||||
|
sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
}
|
}
|
||||||
@@ -4421,6 +4427,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
ret = tcp_set_congestion_control(sk, name, false,
|
ret = tcp_set_congestion_control(sk, name, false,
|
||||||
reinit, true);
|
reinit, true);
|
||||||
} else {
|
} else {
|
||||||
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
if (optlen != sizeof(int))
|
if (optlen != sizeof(int))
|
||||||
@@ -4449,6 +4456,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
else
|
else
|
||||||
tp->save_syn = val;
|
tp->save_syn = val;
|
||||||
break;
|
break;
|
||||||
|
case TCP_KEEPIDLE:
|
||||||
|
ret = tcp_sock_set_keepidle_locked(sk, val);
|
||||||
|
break;
|
||||||
|
case TCP_KEEPINTVL:
|
||||||
|
if (val < 1 || val > MAX_TCP_KEEPINTVL)
|
||||||
|
ret = -EINVAL;
|
||||||
|
else
|
||||||
|
tp->keepalive_intvl = val * HZ;
|
||||||
|
break;
|
||||||
|
case TCP_KEEPCNT:
|
||||||
|
if (val < 1 || val > MAX_TCP_KEEPCNT)
|
||||||
|
ret = -EINVAL;
|
||||||
|
else
|
||||||
|
tp->keepalive_probes = val;
|
||||||
|
break;
|
||||||
|
case TCP_SYNCNT:
|
||||||
|
if (val < 1 || val > MAX_TCP_SYNCNT)
|
||||||
|
ret = -EINVAL;
|
||||||
|
else
|
||||||
|
icsk->icsk_syn_retries = val;
|
||||||
|
break;
|
||||||
|
case TCP_USER_TIMEOUT:
|
||||||
|
if (val < 0)
|
||||||
|
ret = -EINVAL;
|
||||||
|
else
|
||||||
|
icsk->icsk_user_timeout = val;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
}
|
}
|
||||||
|
@@ -1621,10 +1621,13 @@ union bpf_attr {
|
|||||||
*
|
*
|
||||||
* * **SOL_SOCKET**, which supports the following *optname*\ s:
|
* * **SOL_SOCKET**, which supports the following *optname*\ s:
|
||||||
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
|
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
|
||||||
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
|
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
|
||||||
|
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
|
||||||
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
|
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
|
||||||
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
* **TCP_CONGESTION**, **TCP_BPF_IW**,
|
||||||
* **TCP_BPF_SNDCWND_CLAMP**.
|
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
|
||||||
|
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
|
||||||
|
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
|
||||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||||
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
|
||||||
* Return
|
* Return
|
||||||
|
@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __inline int set_keepalive(struct bpf_sock_addr *ctx)
|
||||||
|
{
|
||||||
|
int zero = 0, one = 1;
|
||||||
|
|
||||||
|
if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
|
||||||
|
return 1;
|
||||||
|
if (ctx->type == SOCK_STREAM) {
|
||||||
|
if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
|
||||||
|
return 1;
|
||||||
|
if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
|
||||||
|
return 1;
|
||||||
|
if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
|
||||||
|
return 1;
|
||||||
|
if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
|
||||||
|
return 1;
|
||||||
|
if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
SEC("cgroup/connect4")
|
SEC("cgroup/connect4")
|
||||||
int connect_v4_prog(struct bpf_sock_addr *ctx)
|
int connect_v4_prog(struct bpf_sock_addr *ctx)
|
||||||
{
|
{
|
||||||
@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
|
|||||||
if (bind_to_device(ctx))
|
if (bind_to_device(ctx))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (set_keepalive(ctx))
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
|
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
|
||||||
return 0;
|
return 0;
|
||||||
else if (ctx->type == SOCK_STREAM)
|
else if (ctx->type == SOCK_STREAM)
|
||||||
|
Reference in New Issue
Block a user