Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-02-16 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) numerous libbpf API improvements, from Andrii, Andrey, Yonghong. 2) test all bpf progs in alu32 mode, from Jiong. 3) skb->sk access and bpf_sk_fullsock(), bpf_tcp_sock() helpers, from Martin. 4) support for IP encap in lwt bpf progs, from Peter. 5) remove XDP_QUERY_XSK_UMEM dead code, from Jan. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -403,7 +403,7 @@ config LWTUNNEL
|
||||
|
||||
config LWTUNNEL_BPF
|
||||
bool "Execute BPF program as route nexthop action"
|
||||
depends on LWTUNNEL
|
||||
depends on LWTUNNEL && INET
|
||||
default y if LWTUNNEL=y
|
||||
---help---
|
||||
Allows to run BPF programs as a nexthop action following a route
|
||||
|
@@ -73,6 +73,7 @@
|
||||
#include <linux/seg6_local.h>
|
||||
#include <net/seg6.h>
|
||||
#include <net/seg6_local.h>
|
||||
#include <net/lwtunnel.h>
|
||||
|
||||
/**
|
||||
* sk_filter_trim_cap - run a packet through a socket filter
|
||||
@@ -1793,6 +1794,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
|
||||
{
|
||||
sk = sk_to_full_sk(sk);
|
||||
|
||||
return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_sk_fullsock_proto = {
|
||||
.func = bpf_sk_fullsock,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
|
||||
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
|
||||
};
|
||||
|
||||
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
|
||||
unsigned int write_len)
|
||||
{
|
||||
@@ -4803,7 +4818,15 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
|
||||
}
|
||||
#endif /* CONFIG_IPV6_SEG6_BPF */
|
||||
|
||||
BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
|
||||
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
|
||||
static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
|
||||
bool ingress)
|
||||
{
|
||||
return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
|
||||
}
|
||||
#endif
|
||||
|
||||
BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
|
||||
u32, len)
|
||||
{
|
||||
switch (type) {
|
||||
@@ -4811,14 +4834,41 @@ BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
|
||||
case BPF_LWT_ENCAP_SEG6:
|
||||
case BPF_LWT_ENCAP_SEG6_INLINE:
|
||||
return bpf_push_seg6_encap(skb, type, hdr, len);
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
|
||||
case BPF_LWT_ENCAP_IP:
|
||||
return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
|
||||
.func = bpf_lwt_push_encap,
|
||||
BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
|
||||
void *, hdr, u32, len)
|
||||
{
|
||||
switch (type) {
|
||||
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
|
||||
case BPF_LWT_ENCAP_IP:
|
||||
return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
|
||||
.func = bpf_lwt_in_push_encap,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_MEM,
|
||||
.arg4_type = ARG_CONST_SIZE
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
|
||||
.func = bpf_lwt_xmit_push_encap,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
@@ -5018,6 +5068,54 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
|
||||
};
|
||||
#endif /* CONFIG_IPV6_SEG6_BPF */
|
||||
|
||||
#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \
|
||||
do { \
|
||||
switch (si->off) { \
|
||||
case offsetof(md_type, snd_cwnd): \
|
||||
CONVERT(snd_cwnd); break; \
|
||||
case offsetof(md_type, srtt_us): \
|
||||
CONVERT(srtt_us); break; \
|
||||
case offsetof(md_type, snd_ssthresh): \
|
||||
CONVERT(snd_ssthresh); break; \
|
||||
case offsetof(md_type, rcv_nxt): \
|
||||
CONVERT(rcv_nxt); break; \
|
||||
case offsetof(md_type, snd_nxt): \
|
||||
CONVERT(snd_nxt); break; \
|
||||
case offsetof(md_type, snd_una): \
|
||||
CONVERT(snd_una); break; \
|
||||
case offsetof(md_type, mss_cache): \
|
||||
CONVERT(mss_cache); break; \
|
||||
case offsetof(md_type, ecn_flags): \
|
||||
CONVERT(ecn_flags); break; \
|
||||
case offsetof(md_type, rate_delivered): \
|
||||
CONVERT(rate_delivered); break; \
|
||||
case offsetof(md_type, rate_interval_us): \
|
||||
CONVERT(rate_interval_us); break; \
|
||||
case offsetof(md_type, packets_out): \
|
||||
CONVERT(packets_out); break; \
|
||||
case offsetof(md_type, retrans_out): \
|
||||
CONVERT(retrans_out); break; \
|
||||
case offsetof(md_type, total_retrans): \
|
||||
CONVERT(total_retrans); break; \
|
||||
case offsetof(md_type, segs_in): \
|
||||
CONVERT(segs_in); break; \
|
||||
case offsetof(md_type, data_segs_in): \
|
||||
CONVERT(data_segs_in); break; \
|
||||
case offsetof(md_type, segs_out): \
|
||||
CONVERT(segs_out); break; \
|
||||
case offsetof(md_type, data_segs_out): \
|
||||
CONVERT(data_segs_out); break; \
|
||||
case offsetof(md_type, lost_out): \
|
||||
CONVERT(lost_out); break; \
|
||||
case offsetof(md_type, sacked_out): \
|
||||
CONVERT(sacked_out); break; \
|
||||
case offsetof(md_type, bytes_received): \
|
||||
CONVERT(bytes_received); break; \
|
||||
case offsetof(md_type, bytes_acked): \
|
||||
CONVERT(bytes_acked); break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
int dif, int sdif, u8 family, u8 proto)
|
||||
@@ -5255,6 +5353,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
|
||||
return false;
|
||||
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
|
||||
switch (off) {
|
||||
case offsetof(struct bpf_tcp_sock, bytes_received):
|
||||
case offsetof(struct bpf_tcp_sock, bytes_acked):
|
||||
return size == sizeof(__u64);
|
||||
default:
|
||||
return size == sizeof(__u32);
|
||||
}
|
||||
}
|
||||
|
||||
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog, u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
|
||||
do { \
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
|
||||
FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
|
||||
si->dst_reg, si->src_reg, \
|
||||
offsetof(struct tcp_sock, FIELD)); \
|
||||
} while (0)
|
||||
|
||||
CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
|
||||
BPF_TCP_SOCK_GET_COMMON);
|
||||
|
||||
if (insn > insn_buf)
|
||||
return insn - insn_buf;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_tcp_sock, rtt_min):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
|
||||
sizeof(struct minmax));
|
||||
BUILD_BUG_ON(sizeof(struct minmax) <
|
||||
sizeof(struct minmax_sample));
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
|
||||
offsetof(struct tcp_sock, rtt_min) +
|
||||
offsetof(struct minmax_sample, v));
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
|
||||
{
|
||||
sk = sk_to_full_sk(sk);
|
||||
|
||||
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
|
||||
return (unsigned long)sk;
|
||||
|
||||
return (unsigned long)NULL;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_tcp_sock_proto = {
|
||||
.func = bpf_tcp_sock,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
|
||||
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_INET */
|
||||
|
||||
bool bpf_helper_changes_pkt_data(void *func)
|
||||
@@ -5284,7 +5455,8 @@ bool bpf_helper_changes_pkt_data(void *func)
|
||||
func == bpf_lwt_seg6_adjust_srh ||
|
||||
func == bpf_lwt_seg6_action ||
|
||||
#endif
|
||||
func == bpf_lwt_push_encap)
|
||||
func == bpf_lwt_in_push_encap ||
|
||||
func == bpf_lwt_xmit_push_encap)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -5408,6 +5580,12 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
case BPF_FUNC_sk_fullsock:
|
||||
return &bpf_sk_fullsock_proto;
|
||||
#ifdef CONFIG_INET
|
||||
case BPF_FUNC_tcp_sock:
|
||||
return &bpf_tcp_sock_proto;
|
||||
#endif
|
||||
default:
|
||||
return sk_filter_func_proto(func_id, prog);
|
||||
}
|
||||
@@ -5479,6 +5657,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_get_socket_uid_proto;
|
||||
case BPF_FUNC_fib_lookup:
|
||||
return &bpf_skb_fib_lookup_proto;
|
||||
case BPF_FUNC_sk_fullsock:
|
||||
return &bpf_sk_fullsock_proto;
|
||||
#ifdef CONFIG_XFRM
|
||||
case BPF_FUNC_skb_get_xfrm_state:
|
||||
return &bpf_skb_get_xfrm_state_proto;
|
||||
@@ -5496,6 +5676,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_sk_lookup_udp_proto;
|
||||
case BPF_FUNC_sk_release:
|
||||
return &bpf_sk_release_proto;
|
||||
case BPF_FUNC_tcp_sock:
|
||||
return &bpf_tcp_sock_proto;
|
||||
#endif
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
@@ -5672,7 +5854,7 @@ lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_lwt_push_encap:
|
||||
return &bpf_lwt_push_encap_proto;
|
||||
return &bpf_lwt_in_push_encap_proto;
|
||||
default:
|
||||
return lwt_out_func_proto(func_id, prog);
|
||||
}
|
||||
@@ -5708,6 +5890,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_l4_csum_replace_proto;
|
||||
case BPF_FUNC_set_hash_invalid:
|
||||
return &bpf_set_hash_invalid_proto;
|
||||
case BPF_FUNC_lwt_push_encap:
|
||||
return &bpf_lwt_xmit_push_encap_proto;
|
||||
default:
|
||||
return lwt_out_func_proto(func_id, prog);
|
||||
}
|
||||
@@ -5766,6 +5950,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
|
||||
if (size != sizeof(__u64))
|
||||
return false;
|
||||
break;
|
||||
case offsetof(struct __sk_buff, sk):
|
||||
if (type == BPF_WRITE || size != sizeof(__u64))
|
||||
return false;
|
||||
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
|
||||
break;
|
||||
default:
|
||||
/* Only narrow read access allowed for now. */
|
||||
if (type == BPF_WRITE) {
|
||||
@@ -5937,31 +6126,44 @@ full_access:
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool __sock_filter_check_size(int off, int size,
|
||||
bool bpf_sock_common_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
const int size_default = sizeof(__u32);
|
||||
|
||||
switch (off) {
|
||||
case bpf_ctx_range(struct bpf_sock, src_ip4):
|
||||
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
|
||||
bpf_ctx_record_field_size(info, size_default);
|
||||
return bpf_ctx_narrow_access_ok(off, size, size_default);
|
||||
case bpf_ctx_range_till(struct bpf_sock, type, priority):
|
||||
return false;
|
||||
default:
|
||||
return bpf_sock_is_valid_access(off, size, type, info);
|
||||
}
|
||||
|
||||
return size == size_default;
|
||||
}
|
||||
|
||||
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
const int size_default = sizeof(__u32);
|
||||
|
||||
if (off < 0 || off >= sizeof(struct bpf_sock))
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
if (!__sock_filter_check_size(off, size, info))
|
||||
return false;
|
||||
return true;
|
||||
|
||||
switch (off) {
|
||||
case offsetof(struct bpf_sock, state):
|
||||
case offsetof(struct bpf_sock, family):
|
||||
case offsetof(struct bpf_sock, type):
|
||||
case offsetof(struct bpf_sock, protocol):
|
||||
case offsetof(struct bpf_sock, dst_port):
|
||||
case offsetof(struct bpf_sock, src_port):
|
||||
case bpf_ctx_range(struct bpf_sock, src_ip4):
|
||||
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
|
||||
case bpf_ctx_range(struct bpf_sock, dst_ip4):
|
||||
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
|
||||
bpf_ctx_record_field_size(info, size_default);
|
||||
return bpf_ctx_narrow_access_ok(off, size, size_default);
|
||||
}
|
||||
|
||||
return size == size_default;
|
||||
}
|
||||
|
||||
static bool sock_filter_is_valid_access(int off, int size,
|
||||
@@ -6750,6 +6952,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
|
||||
off += offsetof(struct qdisc_skb_cb, pkt_len);
|
||||
*target_size = 4;
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
|
||||
break;
|
||||
|
||||
case offsetof(struct __sk_buff, sk):
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_buff, sk));
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
@@ -6798,24 +7007,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, family):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
|
||||
offsetof(struct sock, sk_family));
|
||||
*insn++ = BPF_LDX_MEM(
|
||||
BPF_FIELD_SIZEOF(struct sock_common, skc_family),
|
||||
si->dst_reg, si->src_reg,
|
||||
bpf_target_off(struct sock_common,
|
||||
skc_family,
|
||||
FIELD_SIZEOF(struct sock_common,
|
||||
skc_family),
|
||||
target_size));
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, type):
|
||||
BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
|
||||
offsetof(struct sock, __sk_flags_offset));
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
|
||||
*target_size = 2;
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, protocol):
|
||||
BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
|
||||
offsetof(struct sock, __sk_flags_offset));
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
|
||||
*target_size = 1;
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, src_ip4):
|
||||
@@ -6827,6 +7044,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
target_size));
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, dst_ip4):
|
||||
*insn++ = BPF_LDX_MEM(
|
||||
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
|
||||
bpf_target_off(struct sock_common, skc_daddr,
|
||||
FIELD_SIZEOF(struct sock_common,
|
||||
skc_daddr),
|
||||
target_size));
|
||||
break;
|
||||
|
||||
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
off = si->off;
|
||||
@@ -6845,6 +7071,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
#endif
|
||||
break;
|
||||
|
||||
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
off = si->off;
|
||||
off -= offsetof(struct bpf_sock, dst_ip6[0]);
|
||||
*insn++ = BPF_LDX_MEM(
|
||||
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
|
||||
bpf_target_off(struct sock_common,
|
||||
skc_v6_daddr.s6_addr32[0],
|
||||
FIELD_SIZEOF(struct sock_common,
|
||||
skc_v6_daddr.s6_addr32[0]),
|
||||
target_size) + off);
|
||||
#else
|
||||
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
|
||||
*target_size = 4;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, src_port):
|
||||
*insn++ = BPF_LDX_MEM(
|
||||
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
|
||||
@@ -6854,6 +7097,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
skc_num),
|
||||
target_size));
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, dst_port):
|
||||
*insn++ = BPF_LDX_MEM(
|
||||
BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
|
||||
si->dst_reg, si->src_reg,
|
||||
bpf_target_off(struct sock_common, skc_dport,
|
||||
FIELD_SIZEOF(struct sock_common,
|
||||
skc_dport),
|
||||
target_size));
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, state):
|
||||
*insn++ = BPF_LDX_MEM(
|
||||
BPF_FIELD_SIZEOF(struct sock_common, skc_state),
|
||||
si->dst_reg, si->src_reg,
|
||||
bpf_target_off(struct sock_common, skc_state,
|
||||
FIELD_SIZEOF(struct sock_common,
|
||||
skc_state),
|
||||
target_size));
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
@@ -7101,6 +7364,85 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
int off;
|
||||
|
||||
/* Helper macro for adding read access to tcp_sock or sock fields. */
|
||||
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
|
||||
do { \
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
|
||||
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, \
|
||||
is_fullsock), \
|
||||
si->dst_reg, si->src_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
is_fullsock)); \
|
||||
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, sk),\
|
||||
si->dst_reg, si->src_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, sk));\
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
|
||||
OBJ_FIELD), \
|
||||
si->dst_reg, si->dst_reg, \
|
||||
offsetof(OBJ, OBJ_FIELD)); \
|
||||
} while (0)
|
||||
|
||||
#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
|
||||
SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
|
||||
|
||||
/* Helper macro for adding write access to tcp_sock or sock fields.
|
||||
* The macro is called with two registers, dst_reg which contains a pointer
|
||||
* to ctx (context) and src_reg which contains the value that should be
|
||||
* stored. However, we need an additional register since we cannot overwrite
|
||||
* dst_reg because it may be used later in the program.
|
||||
* Instead we "borrow" one of the other register. We first save its value
|
||||
* into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
|
||||
* it at the end of the macro.
|
||||
*/
|
||||
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
|
||||
do { \
|
||||
int reg = BPF_REG_9; \
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
|
||||
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
|
||||
if (si->dst_reg == reg || si->src_reg == reg) \
|
||||
reg--; \
|
||||
if (si->dst_reg == reg || si->src_reg == reg) \
|
||||
reg--; \
|
||||
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
temp)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, \
|
||||
is_fullsock), \
|
||||
reg, si->dst_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
is_fullsock)); \
|
||||
*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, sk),\
|
||||
reg, si->dst_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, sk));\
|
||||
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
|
||||
reg, si->src_reg, \
|
||||
offsetof(OBJ, OBJ_FIELD)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
temp)); \
|
||||
} while (0)
|
||||
|
||||
#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
|
||||
do { \
|
||||
if (TYPE == BPF_WRITE) \
|
||||
SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
|
||||
else \
|
||||
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
|
||||
} while (0)
|
||||
|
||||
CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
|
||||
SOCK_OPS_GET_TCP_SOCK_FIELD);
|
||||
|
||||
if (insn > insn_buf)
|
||||
return insn - insn_buf;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_sock_ops, op) ...
|
||||
offsetof(struct bpf_sock_ops, replylong[3]):
|
||||
@@ -7258,175 +7600,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
|
||||
FIELD_SIZEOF(struct minmax_sample, t));
|
||||
break;
|
||||
|
||||
/* Helper macro for adding read access to tcp_sock or sock fields. */
|
||||
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
|
||||
do { \
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
|
||||
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, \
|
||||
is_fullsock), \
|
||||
si->dst_reg, si->src_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
is_fullsock)); \
|
||||
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, sk),\
|
||||
si->dst_reg, si->src_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, sk));\
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
|
||||
OBJ_FIELD), \
|
||||
si->dst_reg, si->dst_reg, \
|
||||
offsetof(OBJ, OBJ_FIELD)); \
|
||||
} while (0)
|
||||
|
||||
/* Helper macro for adding write access to tcp_sock or sock fields.
|
||||
* The macro is called with two registers, dst_reg which contains a pointer
|
||||
* to ctx (context) and src_reg which contains the value that should be
|
||||
* stored. However, we need an additional register since we cannot overwrite
|
||||
* dst_reg because it may be used later in the program.
|
||||
* Instead we "borrow" one of the other register. We first save its value
|
||||
* into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
|
||||
* it at the end of the macro.
|
||||
*/
|
||||
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
|
||||
do { \
|
||||
int reg = BPF_REG_9; \
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
|
||||
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
|
||||
if (si->dst_reg == reg || si->src_reg == reg) \
|
||||
reg--; \
|
||||
if (si->dst_reg == reg || si->src_reg == reg) \
|
||||
reg--; \
|
||||
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
temp)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, \
|
||||
is_fullsock), \
|
||||
reg, si->dst_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
is_fullsock)); \
|
||||
*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
|
||||
struct bpf_sock_ops_kern, sk),\
|
||||
reg, si->dst_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, sk));\
|
||||
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
|
||||
reg, si->src_reg, \
|
||||
offsetof(OBJ, OBJ_FIELD)); \
|
||||
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
|
||||
offsetof(struct bpf_sock_ops_kern, \
|
||||
temp)); \
|
||||
} while (0)
|
||||
|
||||
#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
|
||||
do { \
|
||||
if (TYPE == BPF_WRITE) \
|
||||
SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
|
||||
else \
|
||||
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
|
||||
} while (0)
|
||||
|
||||
case offsetof(struct bpf_sock_ops, snd_cwnd):
|
||||
SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, srtt_us):
|
||||
SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
|
||||
SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
|
||||
struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, snd_ssthresh):
|
||||
SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, rcv_nxt):
|
||||
SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, snd_nxt):
|
||||
SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, snd_una):
|
||||
SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, mss_cache):
|
||||
SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, ecn_flags):
|
||||
SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, rate_delivered):
|
||||
SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
|
||||
struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, rate_interval_us):
|
||||
SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
|
||||
struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, packets_out):
|
||||
SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, retrans_out):
|
||||
SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, total_retrans):
|
||||
SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
|
||||
struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, segs_in):
|
||||
SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, data_segs_in):
|
||||
SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, segs_out):
|
||||
SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, data_segs_out):
|
||||
SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
|
||||
struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, lost_out):
|
||||
SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, sacked_out):
|
||||
SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, sk_txhash):
|
||||
SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
|
||||
struct sock, type);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, bytes_received):
|
||||
SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
|
||||
struct tcp_sock);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, bytes_acked):
|
||||
SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
|
||||
break;
|
||||
|
||||
}
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
@@ -16,6 +16,8 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <net/lwtunnel.h>
|
||||
#include <net/gre.h>
|
||||
#include <net/ip6_route.h>
|
||||
|
||||
struct bpf_lwt_prog {
|
||||
struct bpf_prog *prog;
|
||||
@@ -55,6 +57,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
|
||||
|
||||
switch (ret) {
|
||||
case BPF_OK:
|
||||
case BPF_LWT_REROUTE:
|
||||
break;
|
||||
|
||||
case BPF_REDIRECT:
|
||||
@@ -87,6 +90,30 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bpf_lwt_input_reroute(struct sk_buff *skb)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
if (skb->protocol == htons(ETH_P_IP)) {
|
||||
struct iphdr *iph = ip_hdr(skb);
|
||||
|
||||
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
|
||||
iph->tos, skb_dst(skb)->dev);
|
||||
} else if (skb->protocol == htons(ETH_P_IPV6)) {
|
||||
err = ipv6_stub->ipv6_route_input(skb);
|
||||
} else {
|
||||
err = -EAFNOSUPPORT;
|
||||
}
|
||||
|
||||
if (err)
|
||||
goto err;
|
||||
return dst_input(skb);
|
||||
|
||||
err:
|
||||
kfree_skb(skb);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_input(struct sk_buff *skb)
|
||||
{
|
||||
struct dst_entry *dst = skb_dst(skb);
|
||||
@@ -98,11 +125,11 @@ static int bpf_input(struct sk_buff *skb)
|
||||
ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret == BPF_LWT_REROUTE)
|
||||
return bpf_lwt_input_reroute(skb);
|
||||
}
|
||||
|
||||
if (unlikely(!dst->lwtstate->orig_input)) {
|
||||
pr_warn_once("orig_input not set on dst for prog %s\n",
|
||||
bpf->out.name);
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -147,6 +174,102 @@ static int xmit_check_hhlen(struct sk_buff *skb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
|
||||
{
|
||||
struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
|
||||
int oif = l3mdev ? l3mdev->ifindex : 0;
|
||||
struct dst_entry *dst = NULL;
|
||||
int err = -EAFNOSUPPORT;
|
||||
struct sock *sk;
|
||||
struct net *net;
|
||||
bool ipv4;
|
||||
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
ipv4 = true;
|
||||
else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
ipv4 = false;
|
||||
else
|
||||
goto err;
|
||||
|
||||
sk = sk_to_full_sk(skb->sk);
|
||||
if (sk) {
|
||||
if (sk->sk_bound_dev_if)
|
||||
oif = sk->sk_bound_dev_if;
|
||||
net = sock_net(sk);
|
||||
} else {
|
||||
net = dev_net(skb_dst(skb)->dev);
|
||||
}
|
||||
|
||||
if (ipv4) {
|
||||
struct iphdr *iph = ip_hdr(skb);
|
||||
struct flowi4 fl4 = {};
|
||||
struct rtable *rt;
|
||||
|
||||
fl4.flowi4_oif = oif;
|
||||
fl4.flowi4_mark = skb->mark;
|
||||
fl4.flowi4_uid = sock_net_uid(net, sk);
|
||||
fl4.flowi4_tos = RT_TOS(iph->tos);
|
||||
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
|
||||
fl4.flowi4_proto = iph->protocol;
|
||||
fl4.daddr = iph->daddr;
|
||||
fl4.saddr = iph->saddr;
|
||||
|
||||
rt = ip_route_output_key(net, &fl4);
|
||||
if (IS_ERR(rt)) {
|
||||
err = PTR_ERR(rt);
|
||||
goto err;
|
||||
}
|
||||
dst = &rt->dst;
|
||||
} else {
|
||||
struct ipv6hdr *iph6 = ipv6_hdr(skb);
|
||||
struct flowi6 fl6 = {};
|
||||
|
||||
fl6.flowi6_oif = oif;
|
||||
fl6.flowi6_mark = skb->mark;
|
||||
fl6.flowi6_uid = sock_net_uid(net, sk);
|
||||
fl6.flowlabel = ip6_flowinfo(iph6);
|
||||
fl6.flowi6_proto = iph6->nexthdr;
|
||||
fl6.daddr = iph6->daddr;
|
||||
fl6.saddr = iph6->saddr;
|
||||
|
||||
err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
if (IS_ERR(dst)) {
|
||||
err = PTR_ERR(dst);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
if (unlikely(dst->error)) {
|
||||
err = dst->error;
|
||||
dst_release(dst);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
|
||||
* was done for the previous dst, so we are doing it here again, in
|
||||
* case the new dst needs much more space. The call below is a noop
|
||||
* if there is enough header space in skb.
|
||||
*/
|
||||
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
skb_dst_drop(skb);
|
||||
skb_dst_set(skb, dst);
|
||||
|
||||
err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
|
||||
return LWTUNNEL_XMIT_DONE;
|
||||
|
||||
err:
|
||||
kfree_skb(skb);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_xmit(struct sk_buff *skb)
|
||||
{
|
||||
struct dst_entry *dst = skb_dst(skb);
|
||||
@@ -154,11 +277,20 @@ static int bpf_xmit(struct sk_buff *skb)
|
||||
|
||||
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
|
||||
if (bpf->xmit.prog) {
|
||||
__be16 proto = skb->protocol;
|
||||
int ret;
|
||||
|
||||
ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
|
||||
switch (ret) {
|
||||
case BPF_OK:
|
||||
/* If the header changed, e.g. via bpf_lwt_push_encap,
|
||||
* BPF_LWT_REROUTE below should have been used if the
|
||||
* protocol was also changed.
|
||||
*/
|
||||
if (skb->protocol != proto) {
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
/* If the header was expanded, headroom might be too
|
||||
* small for L2 header to come, expand as needed.
|
||||
*/
|
||||
@@ -169,6 +301,8 @@ static int bpf_xmit(struct sk_buff *skb)
|
||||
return LWTUNNEL_XMIT_CONTINUE;
|
||||
case BPF_REDIRECT:
|
||||
return LWTUNNEL_XMIT_DONE;
|
||||
case BPF_LWT_REROUTE:
|
||||
return bpf_lwt_xmit_reroute(skb);
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
@@ -390,6 +524,133 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type,
|
||||
int encap_len)
|
||||
{
|
||||
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
|
||||
gso_type |= SKB_GSO_DODGY;
|
||||
shinfo->gso_type |= gso_type;
|
||||
skb_decrease_gso_size(shinfo, encap_len);
|
||||
shinfo->gso_segs = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
|
||||
{
|
||||
int next_hdr_offset;
|
||||
void *next_hdr;
|
||||
__u8 protocol;
|
||||
|
||||
/* SCTP and UDP_L4 gso need more nuanced handling than what
|
||||
* handle_gso_type() does above: skb_decrease_gso_size() is not enough.
|
||||
* So at the moment only TCP GSO packets are let through.
|
||||
*/
|
||||
if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (ipv4) {
|
||||
protocol = ip_hdr(skb)->protocol;
|
||||
next_hdr_offset = sizeof(struct iphdr);
|
||||
next_hdr = skb_network_header(skb) + next_hdr_offset;
|
||||
} else {
|
||||
protocol = ipv6_hdr(skb)->nexthdr;
|
||||
next_hdr_offset = sizeof(struct ipv6hdr);
|
||||
next_hdr = skb_network_header(skb) + next_hdr_offset;
|
||||
}
|
||||
|
||||
switch (protocol) {
|
||||
case IPPROTO_GRE:
|
||||
next_hdr_offset += sizeof(struct gre_base_hdr);
|
||||
if (next_hdr_offset > encap_len)
|
||||
return -EINVAL;
|
||||
|
||||
if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM)
|
||||
return handle_gso_type(skb, SKB_GSO_GRE_CSUM,
|
||||
encap_len);
|
||||
return handle_gso_type(skb, SKB_GSO_GRE, encap_len);
|
||||
|
||||
case IPPROTO_UDP:
|
||||
next_hdr_offset += sizeof(struct udphdr);
|
||||
if (next_hdr_offset > encap_len)
|
||||
return -EINVAL;
|
||||
|
||||
if (((struct udphdr *)next_hdr)->check)
|
||||
return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM,
|
||||
encap_len);
|
||||
return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len);
|
||||
|
||||
case IPPROTO_IP:
|
||||
case IPPROTO_IPV6:
|
||||
if (ipv4)
|
||||
return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len);
|
||||
else
|
||||
return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len);
|
||||
|
||||
default:
|
||||
return -EPROTONOSUPPORT;
|
||||
}
|
||||
}
|
||||
|
||||
int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
|
||||
{
|
||||
struct iphdr *iph;
|
||||
bool ipv4;
|
||||
int err;
|
||||
|
||||
if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM))
|
||||
return -EINVAL;
|
||||
|
||||
/* validate protocol and length */
|
||||
iph = (struct iphdr *)hdr;
|
||||
if (iph->version == 4) {
|
||||
ipv4 = true;
|
||||
if (unlikely(len < iph->ihl * 4))
|
||||
return -EINVAL;
|
||||
} else if (iph->version == 6) {
|
||||
ipv4 = false;
|
||||
if (unlikely(len < sizeof(struct ipv6hdr)))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ingress)
|
||||
err = skb_cow_head(skb, len + skb->mac_len);
|
||||
else
|
||||
err = skb_cow_head(skb,
|
||||
len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
/* push the encap headers and fix pointers */
|
||||
skb_reset_inner_headers(skb);
|
||||
skb->encapsulation = 1;
|
||||
skb_push(skb, len);
|
||||
if (ingress)
|
||||
skb_postpush_rcsum(skb, iph, len);
|
||||
skb_reset_network_header(skb);
|
||||
memcpy(skb_network_header(skb), hdr, len);
|
||||
bpf_compute_data_pointers(skb);
|
||||
skb_clear_hash(skb);
|
||||
|
||||
if (ipv4) {
|
||||
skb->protocol = htons(ETH_P_IP);
|
||||
iph = ip_hdr(skb);
|
||||
|
||||
if (!iph->check)
|
||||
iph->check = ip_fast_csum((unsigned char *)iph,
|
||||
iph->ihl);
|
||||
} else {
|
||||
skb->protocol = htons(ETH_P_IPV6);
|
||||
}
|
||||
|
||||
if (skb_is_gso(skb))
|
||||
return handle_gso_encap(skb, ipv4, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init bpf_lwt_init(void)
|
||||
{
|
||||
return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
|
||||
|
@@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
|
||||
static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
|
||||
{
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
|
||||
static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
|
||||
{
|
||||
return NULL;
|
||||
@@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
|
||||
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
|
||||
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
|
||||
.ipv6_route_input = eafnosupport_ipv6_route_input,
|
||||
.fib6_get_table = eafnosupport_fib6_get_table,
|
||||
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
|
||||
.fib6_lookup = eafnosupport_fib6_lookup,
|
||||
|
@@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = {
|
||||
.exit = inet6_net_exit,
|
||||
};
|
||||
|
||||
static int ipv6_route_input(struct sk_buff *skb)
|
||||
{
|
||||
ip6_route_input(skb);
|
||||
return skb_dst(skb)->error;
|
||||
}
|
||||
|
||||
static const struct ipv6_stub ipv6_stub_impl = {
|
||||
.ipv6_sock_mc_join = ipv6_sock_mc_join,
|
||||
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
|
||||
.ipv6_dst_lookup = ip6_dst_lookup,
|
||||
.ipv6_route_input = ipv6_route_input,
|
||||
.fib6_get_table = fib6_get_table,
|
||||
.fib6_table_lookup = fib6_table_lookup,
|
||||
.fib6_lookup = fib6_lookup,
|
||||
|
Reference in New Issue
Block a user