Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-10-16 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Convert BPF sockmap and kTLS to both use a new sk_msg API and enable sk_msg BPF integration for the latter, from Daniel and John. 2) Enable BPF syscall side to indicate for maps that they do not support a map lookup operation as opposed to just missing key, from Prashant. 3) Add bpftool map create command which after map creation pins the map into bpf fs for further processing, from Jakub. 4) Add bpftool support for attaching programs to maps allowing sock_map and sock_hash to be used from bpftool, from John. 5) Improve syscall BPF map update/delete path for map-in-map types to wait a RCU grace period for pending references to complete, from Daniel. 6) Couple of follow-up fixes for the BPF socket lookup to get it enabled also when IPv6 is compiled as a module, from Joe. 7) Fix a generic-XDP bug to handle the case when the Ethernet header was mangled and thus update skb's protocol and data, from Jesper. 8) Add a missing BTF header length check between header copies from user space, from Wenwen. 9) Minor fixups in libbpf to use __u32 instead u32 types and include proper perf_event.h uapi header instead of perf internal one, from Yonghong. 10) Allow to pass user-defined flags through EXTRA_CFLAGS and EXTRA_LDFLAGS to bpftool's build, from Jiri. 11) BPF kselftest tweaks to add LWTUNNEL to config fragment and to install with_addr.sh script from flow dissector selftest, from Anders. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
11
net/Kconfig
11
net/Kconfig
@@ -300,8 +300,11 @@ config BPF_JIT
|
||||
|
||||
config BPF_STREAM_PARSER
|
||||
bool "enable BPF STREAM_PARSER"
|
||||
depends on INET
|
||||
depends on BPF_SYSCALL
|
||||
depends on CGROUP_BPF
|
||||
select STREAM_PARSER
|
||||
select NET_SOCK_MSG
|
||||
---help---
|
||||
Enabling this allows a stream parser to be used with
|
||||
BPF_MAP_TYPE_SOCKMAP.
|
||||
@@ -413,6 +416,14 @@ config GRO_CELLS
|
||||
config SOCK_VALIDATE_XMIT
|
||||
bool
|
||||
|
||||
config NET_SOCK_MSG
|
||||
bool
|
||||
default n
|
||||
help
|
||||
The NET_SOCK_MSG provides a framework for plain sockets (e.g. TCP) or
|
||||
ULPs (upper layer modules, e.g. TLS) to process L7 application data
|
||||
with the help of BPF programs.
|
||||
|
||||
config NET_DEVLINK
|
||||
tristate "Network physical/parent device Netlink interface"
|
||||
help
|
||||
|
@@ -16,6 +16,7 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
|
||||
obj-y += net-sysfs.o
|
||||
obj-$(CONFIG_PAGE_POOL) += page_pool.o
|
||||
obj-$(CONFIG_PROC_FS) += net-procfs.o
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
|
||||
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
|
||||
obj-$(CONFIG_NETPOLL) += netpoll.o
|
||||
obj-$(CONFIG_FIB_RULES) += fib_rules.o
|
||||
@@ -27,6 +28,7 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
|
||||
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
|
||||
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
|
||||
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
|
||||
obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
|
||||
obj-$(CONFIG_DST_CACHE) += dst_cache.o
|
||||
obj-$(CONFIG_HWBM) += hwbm.o
|
||||
obj-$(CONFIG_NET_DEVLINK) += devlink.o
|
||||
|
@@ -4291,6 +4291,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
||||
struct netdev_rx_queue *rxqueue;
|
||||
void *orig_data, *orig_data_end;
|
||||
u32 metalen, act = XDP_DROP;
|
||||
__be16 orig_eth_type;
|
||||
struct ethhdr *eth;
|
||||
bool orig_bcast;
|
||||
int hlen, off;
|
||||
u32 mac_len;
|
||||
|
||||
@@ -4331,6 +4334,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
||||
xdp->data_hard_start = skb->data - skb_headroom(skb);
|
||||
orig_data_end = xdp->data_end;
|
||||
orig_data = xdp->data;
|
||||
eth = (struct ethhdr *)xdp->data;
|
||||
orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
|
||||
orig_eth_type = eth->h_proto;
|
||||
|
||||
rxqueue = netif_get_rxqueue(skb);
|
||||
xdp->rxq = &rxqueue->xdp_rxq;
|
||||
@@ -4354,6 +4360,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
||||
|
||||
}
|
||||
|
||||
/* check if XDP changed eth hdr such SKB needs update */
|
||||
eth = (struct ethhdr *)xdp->data;
|
||||
if ((orig_eth_type != eth->h_proto) ||
|
||||
(orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
|
||||
__skb_push(skb, ETH_HLEN);
|
||||
skb->protocol = eth_type_trans(skb, skb->dev);
|
||||
}
|
||||
|
||||
switch (act) {
|
||||
case XDP_REDIRECT:
|
||||
case XDP_TX:
|
||||
|
@@ -38,6 +38,7 @@
|
||||
#include <net/protocol.h>
|
||||
#include <net/netlink.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/skmsg.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/flow_dissector.h>
|
||||
#include <linux/errno.h>
|
||||
@@ -2142,123 +2143,7 @@ static const struct bpf_func_proto bpf_redirect_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
|
||||
struct bpf_map *, map, void *, key, u64, flags)
|
||||
{
|
||||
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
|
||||
|
||||
/* If user passes invalid input drop the packet. */
|
||||
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
||||
return SK_DROP;
|
||||
|
||||
tcb->bpf.flags = flags;
|
||||
tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
|
||||
if (!tcb->bpf.sk_redir)
|
||||
return SK_DROP;
|
||||
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
|
||||
.func = bpf_sk_redirect_hash,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
|
||||
struct bpf_map *, map, u32, key, u64, flags)
|
||||
{
|
||||
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
|
||||
|
||||
/* If user passes invalid input drop the packet. */
|
||||
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
||||
return SK_DROP;
|
||||
|
||||
tcb->bpf.flags = flags;
|
||||
tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
|
||||
if (!tcb->bpf.sk_redir)
|
||||
return SK_DROP;
|
||||
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
struct sock *do_sk_redirect_map(struct sk_buff *skb)
|
||||
{
|
||||
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
|
||||
|
||||
return tcb->bpf.sk_redir;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
|
||||
.func = bpf_sk_redirect_map,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg_buff *, msg,
|
||||
struct bpf_map *, map, void *, key, u64, flags)
|
||||
{
|
||||
/* If user passes invalid input drop the packet. */
|
||||
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
||||
return SK_DROP;
|
||||
|
||||
msg->flags = flags;
|
||||
msg->sk_redir = __sock_hash_lookup_elem(map, key);
|
||||
if (!msg->sk_redir)
|
||||
return SK_DROP;
|
||||
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
|
||||
.func = bpf_msg_redirect_hash,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
|
||||
struct bpf_map *, map, u32, key, u64, flags)
|
||||
{
|
||||
/* If user passes invalid input drop the packet. */
|
||||
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
||||
return SK_DROP;
|
||||
|
||||
msg->flags = flags;
|
||||
msg->sk_redir = __sock_map_lookup_elem(map, key);
|
||||
if (!msg->sk_redir)
|
||||
return SK_DROP;
|
||||
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
|
||||
{
|
||||
return msg->sk_redir;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
|
||||
.func = bpf_msg_redirect_map,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
|
||||
BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
|
||||
{
|
||||
msg->apply_bytes = bytes;
|
||||
return 0;
|
||||
@@ -2272,7 +2157,7 @@ static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
|
||||
BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
|
||||
{
|
||||
msg->cork_bytes = bytes;
|
||||
return 0;
|
||||
@@ -2286,45 +2171,37 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
#define sk_msg_iter_var(var) \
|
||||
do { \
|
||||
var++; \
|
||||
if (var == MAX_SKB_FRAGS) \
|
||||
var = 0; \
|
||||
} while (0)
|
||||
|
||||
BPF_CALL_4(bpf_msg_pull_data,
|
||||
struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
|
||||
BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
|
||||
u32, end, u64, flags)
|
||||
{
|
||||
unsigned int len = 0, offset = 0, copy = 0, poffset = 0;
|
||||
int bytes = end - start, bytes_sg_total;
|
||||
struct scatterlist *sg = msg->sg_data;
|
||||
int first_sg, last_sg, i, shift;
|
||||
unsigned char *p, *to, *from;
|
||||
u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
|
||||
u32 first_sge, last_sge, i, shift, bytes_sg_total;
|
||||
struct scatterlist *sge;
|
||||
u8 *raw, *to, *from;
|
||||
struct page *page;
|
||||
|
||||
if (unlikely(flags || end <= start))
|
||||
return -EINVAL;
|
||||
|
||||
/* First find the starting scatterlist element */
|
||||
i = msg->sg_start;
|
||||
i = msg->sg.start;
|
||||
do {
|
||||
len = sg[i].length;
|
||||
len = sk_msg_elem(msg, i)->length;
|
||||
if (start < offset + len)
|
||||
break;
|
||||
offset += len;
|
||||
sk_msg_iter_var(i);
|
||||
} while (i != msg->sg_end);
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (i != msg->sg.end);
|
||||
|
||||
if (unlikely(start >= offset + len))
|
||||
return -EINVAL;
|
||||
|
||||
first_sg = i;
|
||||
first_sge = i;
|
||||
/* The start may point into the sg element so we need to also
|
||||
* account for the headroom.
|
||||
*/
|
||||
bytes_sg_total = start - offset + bytes;
|
||||
if (!msg->sg_copy[i] && bytes_sg_total <= len)
|
||||
if (!msg->sg.copy[i] && bytes_sg_total <= len)
|
||||
goto out;
|
||||
|
||||
/* At this point we need to linearize multiple scatterlist
|
||||
@@ -2338,12 +2215,12 @@ BPF_CALL_4(bpf_msg_pull_data,
|
||||
* will copy the entire sg entry.
|
||||
*/
|
||||
do {
|
||||
copy += sg[i].length;
|
||||
sk_msg_iter_var(i);
|
||||
copy += sk_msg_elem(msg, i)->length;
|
||||
sk_msg_iter_var_next(i);
|
||||
if (bytes_sg_total <= copy)
|
||||
break;
|
||||
} while (i != msg->sg_end);
|
||||
last_sg = i;
|
||||
} while (i != msg->sg.end);
|
||||
last_sge = i;
|
||||
|
||||
if (unlikely(bytes_sg_total > copy))
|
||||
return -EINVAL;
|
||||
@@ -2352,63 +2229,61 @@ BPF_CALL_4(bpf_msg_pull_data,
|
||||
get_order(copy));
|
||||
if (unlikely(!page))
|
||||
return -ENOMEM;
|
||||
p = page_address(page);
|
||||
|
||||
i = first_sg;
|
||||
raw = page_address(page);
|
||||
i = first_sge;
|
||||
do {
|
||||
from = sg_virt(&sg[i]);
|
||||
len = sg[i].length;
|
||||
to = p + poffset;
|
||||
sge = sk_msg_elem(msg, i);
|
||||
from = sg_virt(sge);
|
||||
len = sge->length;
|
||||
to = raw + poffset;
|
||||
|
||||
memcpy(to, from, len);
|
||||
poffset += len;
|
||||
sg[i].length = 0;
|
||||
put_page(sg_page(&sg[i]));
|
||||
sge->length = 0;
|
||||
put_page(sg_page(sge));
|
||||
|
||||
sk_msg_iter_var(i);
|
||||
} while (i != last_sg);
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (i != last_sge);
|
||||
|
||||
sg[first_sg].length = copy;
|
||||
sg_set_page(&sg[first_sg], page, copy, 0);
|
||||
sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
|
||||
|
||||
/* To repair sg ring we need to shift entries. If we only
|
||||
* had a single entry though we can just replace it and
|
||||
* be done. Otherwise walk the ring and shift the entries.
|
||||
*/
|
||||
WARN_ON_ONCE(last_sg == first_sg);
|
||||
shift = last_sg > first_sg ?
|
||||
last_sg - first_sg - 1 :
|
||||
MAX_SKB_FRAGS - first_sg + last_sg - 1;
|
||||
WARN_ON_ONCE(last_sge == first_sge);
|
||||
shift = last_sge > first_sge ?
|
||||
last_sge - first_sge - 1 :
|
||||
MAX_SKB_FRAGS - first_sge + last_sge - 1;
|
||||
if (!shift)
|
||||
goto out;
|
||||
|
||||
i = first_sg;
|
||||
sk_msg_iter_var(i);
|
||||
i = first_sge;
|
||||
sk_msg_iter_var_next(i);
|
||||
do {
|
||||
int move_from;
|
||||
u32 move_from;
|
||||
|
||||
if (i + shift >= MAX_SKB_FRAGS)
|
||||
move_from = i + shift - MAX_SKB_FRAGS;
|
||||
if (i + shift >= MAX_MSG_FRAGS)
|
||||
move_from = i + shift - MAX_MSG_FRAGS;
|
||||
else
|
||||
move_from = i + shift;
|
||||
|
||||
if (move_from == msg->sg_end)
|
||||
if (move_from == msg->sg.end)
|
||||
break;
|
||||
|
||||
sg[i] = sg[move_from];
|
||||
sg[move_from].length = 0;
|
||||
sg[move_from].page_link = 0;
|
||||
sg[move_from].offset = 0;
|
||||
|
||||
sk_msg_iter_var(i);
|
||||
msg->sg.data[i] = msg->sg.data[move_from];
|
||||
msg->sg.data[move_from].length = 0;
|
||||
msg->sg.data[move_from].page_link = 0;
|
||||
msg->sg.data[move_from].offset = 0;
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (1);
|
||||
msg->sg_end -= shift;
|
||||
if (msg->sg_end < 0)
|
||||
msg->sg_end += MAX_SKB_FRAGS;
|
||||
out:
|
||||
msg->data = sg_virt(&sg[first_sg]) + start - offset;
|
||||
msg->data_end = msg->data + bytes;
|
||||
|
||||
msg->sg.end = msg->sg.end - shift > msg->sg.end ?
|
||||
msg->sg.end - shift + MAX_MSG_FRAGS :
|
||||
msg->sg.end - shift;
|
||||
out:
|
||||
msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
|
||||
msg->data_end = msg->data + bytes;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4821,9 +4696,12 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
|
||||
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
struct sk_buff *skb, u8 family, u8 proto)
|
||||
{
|
||||
int dif = skb->dev->ifindex;
|
||||
bool refcounted = false;
|
||||
struct sock *sk = NULL;
|
||||
int dif = 0;
|
||||
|
||||
if (skb->dev)
|
||||
dif = skb->dev->ifindex;
|
||||
|
||||
if (family == AF_INET) {
|
||||
__be32 src4 = tuple->ipv4.saddr;
|
||||
@@ -4839,21 +4717,24 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
|
||||
dst4, tuple->ipv4.dport,
|
||||
dif, sdif, &udp_table, skb);
|
||||
#if IS_REACHABLE(CONFIG_IPV6)
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
} else {
|
||||
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
|
||||
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
|
||||
u16 hnum = ntohs(tuple->ipv6.dport);
|
||||
int sdif = inet6_sdif(skb);
|
||||
|
||||
if (proto == IPPROTO_TCP)
|
||||
sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
|
||||
src6, tuple->ipv6.sport,
|
||||
dst6, tuple->ipv6.dport,
|
||||
dst6, hnum,
|
||||
dif, sdif, &refcounted);
|
||||
else
|
||||
sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
|
||||
dst6, tuple->ipv6.dport,
|
||||
dif, sdif, &udp_table, skb);
|
||||
else if (likely(ipv6_bpf_stub))
|
||||
sk = ipv6_bpf_stub->udp6_lib_lookup(net,
|
||||
src6, tuple->ipv6.sport,
|
||||
dst6, hnum,
|
||||
dif, sdif,
|
||||
&udp_table, skb);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -5200,6 +5081,9 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
|
||||
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -5223,6 +5107,9 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
|
||||
const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -5244,6 +5131,9 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
|
||||
const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -6998,22 +6888,22 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct sk_msg_md, data):
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, data));
|
||||
offsetof(struct sk_msg, data));
|
||||
break;
|
||||
case offsetof(struct sk_msg_md, data_end):
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, data_end));
|
||||
offsetof(struct sk_msg, data_end));
|
||||
break;
|
||||
case offsetof(struct sk_msg_md, family):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
struct sk_msg, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
offsetof(struct sk_msg, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_family));
|
||||
break;
|
||||
@@ -7022,9 +6912,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
struct sk_msg, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
offsetof(struct sk_msg, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_daddr));
|
||||
break;
|
||||
@@ -7034,9 +6924,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
||||
skc_rcv_saddr) != 4);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
struct sk_msg, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
offsetof(struct sk_msg, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common,
|
||||
skc_rcv_saddr));
|
||||
@@ -7051,9 +6941,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
||||
off = si->off;
|
||||
off -= offsetof(struct sk_msg_md, remote_ip6[0]);
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
struct sk_msg, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
offsetof(struct sk_msg, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common,
|
||||
skc_v6_daddr.s6_addr32[0]) +
|
||||
@@ -7072,9 +6962,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
||||
off = si->off;
|
||||
off -= offsetof(struct sk_msg_md, local_ip6[0]);
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
struct sk_msg, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
offsetof(struct sk_msg, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common,
|
||||
skc_v6_rcv_saddr.s6_addr32[0]) +
|
||||
@@ -7088,9 +6978,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
struct sk_msg, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
offsetof(struct sk_msg, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_dport));
|
||||
#ifndef __BIG_ENDIAN_BITFIELD
|
||||
@@ -7102,9 +6992,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
struct sk_msg, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
offsetof(struct sk_msg, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_num));
|
||||
break;
|
||||
|
802
net/core/skmsg.c
Normal file
802
net/core/skmsg.c
Normal file
@@ -0,0 +1,802 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
||||
|
||||
#include <linux/skmsg.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
#include <net/tcp.h>
|
||||
|
||||
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
|
||||
{
|
||||
if (msg->sg.end > msg->sg.start &&
|
||||
elem_first_coalesce < msg->sg.end)
|
||||
return true;
|
||||
|
||||
if (msg->sg.end < msg->sg.start &&
|
||||
(elem_first_coalesce > msg->sg.start ||
|
||||
elem_first_coalesce < msg->sg.end))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
|
||||
int elem_first_coalesce)
|
||||
{
|
||||
struct page_frag *pfrag = sk_page_frag(sk);
|
||||
int ret = 0;
|
||||
|
||||
len -= msg->sg.size;
|
||||
while (len > 0) {
|
||||
struct scatterlist *sge;
|
||||
u32 orig_offset;
|
||||
int use, i;
|
||||
|
||||
if (!sk_page_frag_refill(sk, pfrag))
|
||||
return -ENOMEM;
|
||||
|
||||
orig_offset = pfrag->offset;
|
||||
use = min_t(int, len, pfrag->size - orig_offset);
|
||||
if (!sk_wmem_schedule(sk, use))
|
||||
return -ENOMEM;
|
||||
|
||||
i = msg->sg.end;
|
||||
sk_msg_iter_var_prev(i);
|
||||
sge = &msg->sg.data[i];
|
||||
|
||||
if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) &&
|
||||
sg_page(sge) == pfrag->page &&
|
||||
sge->offset + sge->length == orig_offset) {
|
||||
sge->length += use;
|
||||
} else {
|
||||
if (sk_msg_full(msg)) {
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
|
||||
sge = &msg->sg.data[msg->sg.end];
|
||||
sg_unmark_end(sge);
|
||||
sg_set_page(sge, pfrag->page, use, orig_offset);
|
||||
get_page(pfrag->page);
|
||||
sk_msg_iter_next(msg, end);
|
||||
}
|
||||
|
||||
sk_mem_charge(sk, use);
|
||||
msg->sg.size += use;
|
||||
pfrag->offset += use;
|
||||
len -= use;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_alloc);
|
||||
|
||||
int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
|
||||
u32 off, u32 len)
|
||||
{
|
||||
int i = src->sg.start;
|
||||
struct scatterlist *sge = sk_msg_elem(src, i);
|
||||
u32 sge_len, sge_off;
|
||||
|
||||
if (sk_msg_full(dst))
|
||||
return -ENOSPC;
|
||||
|
||||
while (off) {
|
||||
if (sge->length > off)
|
||||
break;
|
||||
off -= sge->length;
|
||||
sk_msg_iter_var_next(i);
|
||||
if (i == src->sg.end && off)
|
||||
return -ENOSPC;
|
||||
sge = sk_msg_elem(src, i);
|
||||
}
|
||||
|
||||
while (len) {
|
||||
sge_len = sge->length - off;
|
||||
sge_off = sge->offset + off;
|
||||
if (sge_len > len)
|
||||
sge_len = len;
|
||||
off = 0;
|
||||
len -= sge_len;
|
||||
sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off);
|
||||
sk_mem_charge(sk, sge_len);
|
||||
sk_msg_iter_var_next(i);
|
||||
if (i == src->sg.end && len)
|
||||
return -ENOSPC;
|
||||
sge = sk_msg_elem(src, i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_clone);
|
||||
|
||||
void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes)
|
||||
{
|
||||
int i = msg->sg.start;
|
||||
|
||||
do {
|
||||
struct scatterlist *sge = sk_msg_elem(msg, i);
|
||||
|
||||
if (bytes < sge->length) {
|
||||
sge->length -= bytes;
|
||||
sge->offset += bytes;
|
||||
sk_mem_uncharge(sk, bytes);
|
||||
break;
|
||||
}
|
||||
|
||||
sk_mem_uncharge(sk, sge->length);
|
||||
bytes -= sge->length;
|
||||
sge->length = 0;
|
||||
sge->offset = 0;
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (bytes && i != msg->sg.end);
|
||||
msg->sg.start = i;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_return_zero);
|
||||
|
||||
void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes)
|
||||
{
|
||||
int i = msg->sg.start;
|
||||
|
||||
do {
|
||||
struct scatterlist *sge = &msg->sg.data[i];
|
||||
int uncharge = (bytes < sge->length) ? bytes : sge->length;
|
||||
|
||||
sk_mem_uncharge(sk, uncharge);
|
||||
bytes -= uncharge;
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (i != msg->sg.end);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_return);
|
||||
|
||||
static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
|
||||
bool charge)
|
||||
{
|
||||
struct scatterlist *sge = sk_msg_elem(msg, i);
|
||||
u32 len = sge->length;
|
||||
|
||||
if (charge)
|
||||
sk_mem_uncharge(sk, len);
|
||||
if (!msg->skb)
|
||||
put_page(sg_page(sge));
|
||||
memset(sge, 0, sizeof(*sge));
|
||||
return len;
|
||||
}
|
||||
|
||||
static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
|
||||
bool charge)
|
||||
{
|
||||
struct scatterlist *sge = sk_msg_elem(msg, i);
|
||||
int freed = 0;
|
||||
|
||||
while (msg->sg.size) {
|
||||
msg->sg.size -= sge->length;
|
||||
freed += sk_msg_free_elem(sk, msg, i, charge);
|
||||
sk_msg_iter_var_next(i);
|
||||
sk_msg_check_to_free(msg, i, msg->sg.size);
|
||||
sge = sk_msg_elem(msg, i);
|
||||
}
|
||||
if (msg->skb)
|
||||
consume_skb(msg->skb);
|
||||
sk_msg_init(msg);
|
||||
return freed;
|
||||
}
|
||||
|
||||
int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg)
|
||||
{
|
||||
return __sk_msg_free(sk, msg, msg->sg.start, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_free_nocharge);
|
||||
|
||||
int sk_msg_free(struct sock *sk, struct sk_msg *msg)
|
||||
{
|
||||
return __sk_msg_free(sk, msg, msg->sg.start, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_free);
|
||||
|
||||
static void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes, bool charge)
|
||||
{
|
||||
struct scatterlist *sge;
|
||||
u32 i = msg->sg.start;
|
||||
|
||||
while (bytes) {
|
||||
sge = sk_msg_elem(msg, i);
|
||||
if (!sge->length)
|
||||
break;
|
||||
if (bytes < sge->length) {
|
||||
if (charge)
|
||||
sk_mem_uncharge(sk, bytes);
|
||||
sge->length -= bytes;
|
||||
sge->offset += bytes;
|
||||
msg->sg.size -= bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
msg->sg.size -= sge->length;
|
||||
bytes -= sge->length;
|
||||
sk_msg_free_elem(sk, msg, i, charge);
|
||||
sk_msg_iter_var_next(i);
|
||||
sk_msg_check_to_free(msg, i, bytes);
|
||||
}
|
||||
msg->sg.start = i;
|
||||
}
|
||||
|
||||
void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes)
|
||||
{
|
||||
__sk_msg_free_partial(sk, msg, bytes, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_free_partial);
|
||||
|
||||
void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes)
|
||||
{
|
||||
__sk_msg_free_partial(sk, msg, bytes, false);
|
||||
}
|
||||
|
||||
void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
|
||||
{
|
||||
int trim = msg->sg.size - len;
|
||||
u32 i = msg->sg.end;
|
||||
|
||||
if (trim <= 0) {
|
||||
WARN_ON(trim < 0);
|
||||
return;
|
||||
}
|
||||
|
||||
sk_msg_iter_var_prev(i);
|
||||
msg->sg.size = len;
|
||||
while (msg->sg.data[i].length &&
|
||||
trim >= msg->sg.data[i].length) {
|
||||
trim -= msg->sg.data[i].length;
|
||||
sk_msg_free_elem(sk, msg, i, true);
|
||||
sk_msg_iter_var_prev(i);
|
||||
if (!trim)
|
||||
goto out;
|
||||
}
|
||||
|
||||
msg->sg.data[i].length -= trim;
|
||||
sk_mem_uncharge(sk, trim);
|
||||
out:
|
||||
/* If we trim data before curr pointer update copybreak and current
|
||||
* so that any future copy operations start at new copy location.
|
||||
* However trimed data that has not yet been used in a copy op
|
||||
* does not require an update.
|
||||
*/
|
||||
if (msg->sg.curr >= i) {
|
||||
msg->sg.curr = i;
|
||||
msg->sg.copybreak = msg->sg.data[i].length;
|
||||
}
|
||||
sk_msg_iter_var_next(i);
|
||||
msg->sg.end = i;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_trim);
|
||||
|
||||
int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
|
||||
struct sk_msg *msg, u32 bytes)
|
||||
{
|
||||
int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg);
|
||||
const int to_max_pages = MAX_MSG_FRAGS;
|
||||
struct page *pages[MAX_MSG_FRAGS];
|
||||
ssize_t orig, copied, use, offset;
|
||||
|
||||
orig = msg->sg.size;
|
||||
while (bytes > 0) {
|
||||
i = 0;
|
||||
maxpages = to_max_pages - num_elems;
|
||||
if (maxpages == 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied = iov_iter_get_pages(from, pages, bytes, maxpages,
|
||||
&offset);
|
||||
if (copied <= 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
iov_iter_advance(from, copied);
|
||||
bytes -= copied;
|
||||
msg->sg.size += copied;
|
||||
|
||||
while (copied) {
|
||||
use = min_t(int, copied, PAGE_SIZE - offset);
|
||||
sg_set_page(&msg->sg.data[msg->sg.end],
|
||||
pages[i], use, offset);
|
||||
sg_unmark_end(&msg->sg.data[msg->sg.end]);
|
||||
sk_mem_charge(sk, use);
|
||||
|
||||
offset = 0;
|
||||
copied -= use;
|
||||
sk_msg_iter_next(msg, end);
|
||||
num_elems++;
|
||||
i++;
|
||||
}
|
||||
/* When zerocopy is mixed with sk_msg_*copy* operations we
|
||||
* may have a copybreak set in this case clear and prefer
|
||||
* zerocopy remainder when possible.
|
||||
*/
|
||||
msg->sg.copybreak = 0;
|
||||
msg->sg.curr = msg->sg.end;
|
||||
}
|
||||
out:
|
||||
/* Revert iov_iter updates, msg will need to use 'trim' later if it
|
||||
* also needs to be cleared.
|
||||
*/
|
||||
if (ret)
|
||||
iov_iter_revert(from, msg->sg.size - orig);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter);
|
||||
|
||||
int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
|
||||
struct sk_msg *msg, u32 bytes)
|
||||
{
|
||||
int ret = -ENOSPC, i = msg->sg.curr;
|
||||
struct scatterlist *sge;
|
||||
u32 copy, buf_size;
|
||||
void *to;
|
||||
|
||||
do {
|
||||
sge = sk_msg_elem(msg, i);
|
||||
/* This is possible if a trim operation shrunk the buffer */
|
||||
if (msg->sg.copybreak >= sge->length) {
|
||||
msg->sg.copybreak = 0;
|
||||
sk_msg_iter_var_next(i);
|
||||
if (i == msg->sg.end)
|
||||
break;
|
||||
sge = sk_msg_elem(msg, i);
|
||||
}
|
||||
|
||||
buf_size = sge->length - msg->sg.copybreak;
|
||||
copy = (buf_size > bytes) ? bytes : buf_size;
|
||||
to = sg_virt(sge) + msg->sg.copybreak;
|
||||
msg->sg.copybreak += copy;
|
||||
if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
|
||||
ret = copy_from_iter_nocache(to, copy, from);
|
||||
else
|
||||
ret = copy_from_iter(to, copy, from);
|
||||
if (ret != copy) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
bytes -= copy;
|
||||
if (!bytes)
|
||||
break;
|
||||
msg->sg.copybreak = 0;
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (i != msg->sg.end);
|
||||
out:
|
||||
msg->sg.curr = i;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
|
||||
|
||||
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
|
||||
{
|
||||
struct sock *sk = psock->sk;
|
||||
int copied = 0, num_sge;
|
||||
struct sk_msg *msg;
|
||||
|
||||
msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
|
||||
if (unlikely(!msg))
|
||||
return -EAGAIN;
|
||||
if (!sk_rmem_schedule(sk, skb, skb->len)) {
|
||||
kfree(msg);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
sk_msg_init(msg);
|
||||
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
|
||||
if (unlikely(num_sge < 0)) {
|
||||
kfree(msg);
|
||||
return num_sge;
|
||||
}
|
||||
|
||||
sk_mem_charge(sk, skb->len);
|
||||
copied = skb->len;
|
||||
msg->sg.start = 0;
|
||||
msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge;
|
||||
msg->skb = skb;
|
||||
|
||||
sk_psock_queue_msg(psock, msg);
|
||||
sk->sk_data_ready(sk);
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||
u32 off, u32 len, bool ingress)
|
||||
{
|
||||
if (ingress)
|
||||
return sk_psock_skb_ingress(psock, skb);
|
||||
else
|
||||
return skb_send_sock_locked(psock->sk, skb, off, len);
|
||||
}
|
||||
|
||||
static void sk_psock_backlog(struct work_struct *work)
|
||||
{
|
||||
struct sk_psock *psock = container_of(work, struct sk_psock, work);
|
||||
struct sk_psock_work_state *state = &psock->work_state;
|
||||
struct sk_buff *skb;
|
||||
bool ingress;
|
||||
u32 len, off;
|
||||
int ret;
|
||||
|
||||
/* Lock sock to avoid losing sk_socket during loop. */
|
||||
lock_sock(psock->sk);
|
||||
if (state->skb) {
|
||||
skb = state->skb;
|
||||
len = state->len;
|
||||
off = state->off;
|
||||
state->skb = NULL;
|
||||
goto start;
|
||||
}
|
||||
|
||||
while ((skb = skb_dequeue(&psock->ingress_skb))) {
|
||||
len = skb->len;
|
||||
off = 0;
|
||||
start:
|
||||
ingress = tcp_skb_bpf_ingress(skb);
|
||||
do {
|
||||
ret = -EIO;
|
||||
if (likely(psock->sk->sk_socket))
|
||||
ret = sk_psock_handle_skb(psock, skb, off,
|
||||
len, ingress);
|
||||
if (ret <= 0) {
|
||||
if (ret == -EAGAIN) {
|
||||
state->skb = skb;
|
||||
state->len = len;
|
||||
state->off = off;
|
||||
goto end;
|
||||
}
|
||||
/* Hard errors break pipe and stop xmit. */
|
||||
sk_psock_report_error(psock, ret ? -ret : EPIPE);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
kfree_skb(skb);
|
||||
goto end;
|
||||
}
|
||||
off += ret;
|
||||
len -= ret;
|
||||
} while (len);
|
||||
|
||||
if (!ingress)
|
||||
kfree_skb(skb);
|
||||
}
|
||||
end:
|
||||
release_sock(psock->sk);
|
||||
}
|
||||
|
||||
struct sk_psock *sk_psock_init(struct sock *sk, int node)
|
||||
{
|
||||
struct sk_psock *psock = kzalloc_node(sizeof(*psock),
|
||||
GFP_ATOMIC | __GFP_NOWARN,
|
||||
node);
|
||||
if (!psock)
|
||||
return NULL;
|
||||
|
||||
psock->sk = sk;
|
||||
psock->eval = __SK_NONE;
|
||||
|
||||
INIT_LIST_HEAD(&psock->link);
|
||||
spin_lock_init(&psock->link_lock);
|
||||
|
||||
INIT_WORK(&psock->work, sk_psock_backlog);
|
||||
INIT_LIST_HEAD(&psock->ingress_msg);
|
||||
skb_queue_head_init(&psock->ingress_skb);
|
||||
|
||||
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
refcount_set(&psock->refcnt, 1);
|
||||
|
||||
rcu_assign_sk_user_data(sk, psock);
|
||||
sock_hold(sk);
|
||||
|
||||
return psock;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_init);
|
||||
|
||||
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_link *link;
|
||||
|
||||
spin_lock_bh(&psock->link_lock);
|
||||
link = list_first_entry_or_null(&psock->link, struct sk_psock_link,
|
||||
list);
|
||||
if (link)
|
||||
list_del(&link->list);
|
||||
spin_unlock_bh(&psock->link_lock);
|
||||
return link;
|
||||
}
|
||||
|
||||
void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
|
||||
{
|
||||
struct sk_msg *msg, *tmp;
|
||||
|
||||
list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
|
||||
list_del(&msg->list);
|
||||
sk_msg_free(psock->sk, msg);
|
||||
kfree(msg);
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_zap_ingress(struct sk_psock *psock)
|
||||
{
|
||||
__skb_queue_purge(&psock->ingress_skb);
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
}
|
||||
|
||||
static void sk_psock_link_destroy(struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_link *link, *tmp;
|
||||
|
||||
list_for_each_entry_safe(link, tmp, &psock->link, list) {
|
||||
list_del(&link->list);
|
||||
sk_psock_free_link(link);
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_destroy_deferred(struct work_struct *gc)
|
||||
{
|
||||
struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
|
||||
|
||||
/* No sk_callback_lock since already detached. */
|
||||
if (psock->parser.enabled)
|
||||
strp_done(&psock->parser.strp);
|
||||
|
||||
cancel_work_sync(&psock->work);
|
||||
|
||||
psock_progs_drop(&psock->progs);
|
||||
|
||||
sk_psock_link_destroy(psock);
|
||||
sk_psock_cork_free(psock);
|
||||
sk_psock_zap_ingress(psock);
|
||||
|
||||
if (psock->sk_redir)
|
||||
sock_put(psock->sk_redir);
|
||||
sock_put(psock->sk);
|
||||
kfree(psock);
|
||||
}
|
||||
|
||||
void sk_psock_destroy(struct rcu_head *rcu)
|
||||
{
|
||||
struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
|
||||
|
||||
INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
|
||||
schedule_work(&psock->gc);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_destroy);
|
||||
|
||||
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
rcu_assign_sk_user_data(sk, NULL);
|
||||
sk_psock_cork_free(psock);
|
||||
sk_psock_restore_proto(sk, psock);
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
if (psock->progs.skb_parser)
|
||||
sk_psock_stop_strp(sk, psock);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
|
||||
call_rcu_sched(&psock->rcu, sk_psock_destroy);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_drop);
|
||||
|
||||
static int sk_psock_map_verd(int verdict, bool redir)
|
||||
{
|
||||
switch (verdict) {
|
||||
case SK_PASS:
|
||||
return redir ? __SK_REDIRECT : __SK_PASS;
|
||||
case SK_DROP:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return __SK_DROP;
|
||||
}
|
||||
|
||||
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.msg_parser);
|
||||
if (unlikely(!prog)) {
|
||||
ret = __SK_PASS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sk_msg_compute_data_pointers(msg);
|
||||
msg->sk = sk;
|
||||
ret = BPF_PROG_RUN(prog, msg);
|
||||
ret = sk_psock_map_verd(ret, msg->sk_redir);
|
||||
psock->apply_bytes = msg->apply_bytes;
|
||||
if (ret == __SK_REDIRECT) {
|
||||
if (psock->sk_redir)
|
||||
sock_put(psock->sk_redir);
|
||||
psock->sk_redir = msg->sk_redir;
|
||||
if (!psock->sk_redir) {
|
||||
ret = __SK_DROP;
|
||||
goto out;
|
||||
}
|
||||
sock_hold(psock->sk_redir);
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
||||
|
||||
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
skb->sk = psock->sk;
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
preempt_disable();
|
||||
ret = BPF_PROG_RUN(prog, skb);
|
||||
preempt_enable();
|
||||
/* strparser clones the skb before handing it to a upper layer,
|
||||
* meaning skb_orphan has been called. We NULL sk on the way out
|
||||
* to ensure we don't trigger a BUG_ON() in skb/sk operations
|
||||
* later and because we are not charging the memory of this skb
|
||||
* to any socket yet.
|
||||
*/
|
||||
skb->sk = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
|
||||
{
|
||||
struct sk_psock_parser *parser;
|
||||
|
||||
parser = container_of(strp, struct sk_psock_parser, strp);
|
||||
return container_of(parser, struct sk_psock, parser);
|
||||
}
|
||||
|
||||
static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||
struct sk_buff *skb, int verdict)
|
||||
{
|
||||
struct sk_psock *psock_other;
|
||||
struct sock *sk_other;
|
||||
bool ingress;
|
||||
|
||||
switch (verdict) {
|
||||
case __SK_REDIRECT:
|
||||
sk_other = tcp_skb_bpf_redirect_fetch(skb);
|
||||
if (unlikely(!sk_other))
|
||||
goto out_free;
|
||||
psock_other = sk_psock(sk_other);
|
||||
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
|
||||
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
|
||||
goto out_free;
|
||||
ingress = tcp_skb_bpf_ingress(skb);
|
||||
if ((!ingress && sock_writeable(sk_other)) ||
|
||||
(ingress &&
|
||||
atomic_read(&sk_other->sk_rmem_alloc) <=
|
||||
sk_other->sk_rcvbuf)) {
|
||||
if (!ingress)
|
||||
skb_set_owner_w(skb, sk_other);
|
||||
skb_queue_tail(&psock_other->ingress_skb, skb);
|
||||
schedule_work(&psock_other->work);
|
||||
break;
|
||||
}
|
||||
/* fall-through */
|
||||
case __SK_DROP:
|
||||
/* fall-through */
|
||||
default:
|
||||
out_free:
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock = sk_psock_from_strp(strp);
|
||||
struct bpf_prog *prog;
|
||||
int ret = __SK_DROP;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
if (likely(prog)) {
|
||||
skb_orphan(skb);
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
sk_psock_verdict_apply(psock, skb, ret);
|
||||
}
|
||||
|
||||
static int sk_psock_strp_read_done(struct strparser *strp, int err)
|
||||
{
|
||||
return err;
|
||||
}
|
||||
|
||||
static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock = sk_psock_from_strp(strp);
|
||||
struct bpf_prog *prog;
|
||||
int ret = skb->len;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_parser);
|
||||
if (likely(prog))
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Called with socket lock held. */
|
||||
static void sk_psock_data_ready(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock)) {
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
strp_data_ready(&psock->parser.strp);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void sk_psock_write_space(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
void (*write_space)(struct sock *sk);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock && sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)))
|
||||
schedule_work(&psock->work);
|
||||
write_space = psock->saved_write_space;
|
||||
rcu_read_unlock();
|
||||
write_space(sk);
|
||||
}
|
||||
|
||||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
static const struct strp_callbacks cb = {
|
||||
.rcv_msg = sk_psock_strp_read,
|
||||
.read_sock_done = sk_psock_strp_read_done,
|
||||
.parse_msg = sk_psock_strp_parse,
|
||||
};
|
||||
|
||||
psock->parser.enabled = false;
|
||||
return strp_init(&psock->parser.strp, sk, &cb);
|
||||
}
|
||||
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (parser->enabled)
|
||||
return;
|
||||
|
||||
parser->saved_data_ready = sk->sk_data_ready;
|
||||
sk->sk_data_ready = sk_psock_data_ready;
|
||||
sk->sk_write_space = sk_psock_write_space;
|
||||
parser->enabled = true;
|
||||
}
|
||||
|
||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (!parser->enabled)
|
||||
return;
|
||||
|
||||
sk->sk_data_ready = parser->saved_data_ready;
|
||||
parser->saved_data_ready = NULL;
|
||||
strp_stop(&parser->strp);
|
||||
parser->enabled = false;
|
||||
}
|
@@ -2239,67 +2239,6 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
|
||||
}
|
||||
EXPORT_SYMBOL(sk_page_frag_refill);
|
||||
|
||||
int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
|
||||
int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
|
||||
int first_coalesce)
|
||||
{
|
||||
int sg_curr = *sg_curr_index, use = 0, rc = 0;
|
||||
unsigned int size = *sg_curr_size;
|
||||
struct page_frag *pfrag;
|
||||
struct scatterlist *sge;
|
||||
|
||||
len -= size;
|
||||
pfrag = sk_page_frag(sk);
|
||||
|
||||
while (len > 0) {
|
||||
unsigned int orig_offset;
|
||||
|
||||
if (!sk_page_frag_refill(sk, pfrag)) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
use = min_t(int, len, pfrag->size - pfrag->offset);
|
||||
|
||||
if (!sk_wmem_schedule(sk, use)) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sk_mem_charge(sk, use);
|
||||
size += use;
|
||||
orig_offset = pfrag->offset;
|
||||
pfrag->offset += use;
|
||||
|
||||
sge = sg + sg_curr - 1;
|
||||
if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
|
||||
sge->offset + sge->length == orig_offset) {
|
||||
sge->length += use;
|
||||
} else {
|
||||
sge = sg + sg_curr;
|
||||
sg_unmark_end(sge);
|
||||
sg_set_page(sge, pfrag->page, use, orig_offset);
|
||||
get_page(pfrag->page);
|
||||
sg_curr++;
|
||||
|
||||
if (sg_curr == MAX_SKB_FRAGS)
|
||||
sg_curr = 0;
|
||||
|
||||
if (sg_curr == sg_start) {
|
||||
rc = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
len -= use;
|
||||
}
|
||||
out:
|
||||
*sg_curr_size = size;
|
||||
*sg_curr_index = sg_curr;
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(sk_alloc_sg);
|
||||
|
||||
static void __lock_sock(struct sock *sk)
|
||||
__releases(&sk->sk_lock.slock)
|
||||
__acquires(&sk->sk_lock.slock)
|
||||
|
1002
net/core/sock_map.c
Normal file
1002
net/core/sock_map.c
Normal file
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -63,6 +63,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
|
||||
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
|
||||
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
|
||||
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
|
||||
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
|
||||
|
||||
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
|
||||
|
655
net/ipv4/tcp_bpf.c
Normal file
655
net/ipv4/tcp_bpf.c
Normal file
@@ -0,0 +1,655 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
||||
|
||||
#include <linux/skmsg.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#include <net/inet_common.h>
|
||||
|
||||
static bool tcp_bpf_stream_read(const struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
bool empty = true;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock))
|
||||
empty = list_empty(&psock->ingress_msg);
|
||||
rcu_read_unlock();
|
||||
return !empty;
|
||||
}
|
||||
|
||||
static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
|
||||
int flags, long timeo, int *err)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
int ret;
|
||||
|
||||
add_wait_queue(sk_sleep(sk), &wait);
|
||||
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
ret = sk_wait_event(sk, &timeo,
|
||||
!list_empty(&psock->ingress_msg) ||
|
||||
!skb_queue_empty(&sk->sk_receive_queue), &wait);
|
||||
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
remove_wait_queue(sk_sleep(sk), &wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
|
||||
struct msghdr *msg, int len)
|
||||
{
|
||||
struct iov_iter *iter = &msg->msg_iter;
|
||||
int i, ret, copied = 0;
|
||||
|
||||
while (copied != len) {
|
||||
struct scatterlist *sge;
|
||||
struct sk_msg *msg_rx;
|
||||
|
||||
msg_rx = list_first_entry_or_null(&psock->ingress_msg,
|
||||
struct sk_msg, list);
|
||||
if (unlikely(!msg_rx))
|
||||
break;
|
||||
|
||||
i = msg_rx->sg.start;
|
||||
do {
|
||||
struct page *page;
|
||||
int copy;
|
||||
|
||||
sge = sk_msg_elem(msg_rx, i);
|
||||
copy = sge->length;
|
||||
page = sg_page(sge);
|
||||
if (copied + copy > len)
|
||||
copy = len - copied;
|
||||
ret = copy_page_to_iter(page, sge->offset, copy, iter);
|
||||
if (ret != copy) {
|
||||
msg_rx->sg.start = i;
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
copied += copy;
|
||||
sge->offset += copy;
|
||||
sge->length -= copy;
|
||||
sk_mem_uncharge(sk, copy);
|
||||
if (!sge->length) {
|
||||
i++;
|
||||
if (i == MAX_SKB_FRAGS)
|
||||
i = 0;
|
||||
if (!msg_rx->skb)
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
if (copied == len)
|
||||
break;
|
||||
} while (i != msg_rx->sg.end);
|
||||
|
||||
msg_rx->sg.start = i;
|
||||
if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
|
||||
list_del(&msg_rx->list);
|
||||
if (msg_rx->skb)
|
||||
consume_skb(msg_rx->skb);
|
||||
kfree(msg_rx);
|
||||
}
|
||||
}
|
||||
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
|
||||
|
||||
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
int nonblock, int flags, int *addr_len)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
int copied, ret;
|
||||
|
||||
if (unlikely(flags & MSG_ERRQUEUE))
|
||||
return inet_recv_error(sk, msg, len, addr_len);
|
||||
if (!skb_queue_empty(&sk->sk_receive_queue))
|
||||
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
|
||||
lock_sock(sk);
|
||||
msg_bytes_ready:
|
||||
copied = __tcp_bpf_recvmsg(sk, psock, msg, len);
|
||||
if (!copied) {
|
||||
int data, err = 0;
|
||||
long timeo;
|
||||
|
||||
timeo = sock_rcvtimeo(sk, nonblock);
|
||||
data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
|
||||
if (data) {
|
||||
if (skb_queue_empty(&sk->sk_receive_queue))
|
||||
goto msg_bytes_ready;
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
|
||||
}
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = copied;
|
||||
out:
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg, u32 apply_bytes, int flags)
|
||||
{
|
||||
bool apply = apply_bytes;
|
||||
struct scatterlist *sge;
|
||||
u32 size, copied = 0;
|
||||
struct sk_msg *tmp;
|
||||
int i, ret = 0;
|
||||
|
||||
tmp = kzalloc(sizeof(*tmp), __GFP_NOWARN | GFP_KERNEL);
|
||||
if (unlikely(!tmp))
|
||||
return -ENOMEM;
|
||||
|
||||
lock_sock(sk);
|
||||
tmp->sg.start = msg->sg.start;
|
||||
i = msg->sg.start;
|
||||
do {
|
||||
sge = sk_msg_elem(msg, i);
|
||||
size = (apply && apply_bytes < sge->length) ?
|
||||
apply_bytes : sge->length;
|
||||
if (!sk_wmem_schedule(sk, size)) {
|
||||
if (!copied)
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
sk_mem_charge(sk, size);
|
||||
sk_msg_xfer(tmp, msg, i, size);
|
||||
copied += size;
|
||||
if (sge->length)
|
||||
get_page(sk_msg_page(tmp, i));
|
||||
sk_msg_iter_var_next(i);
|
||||
tmp->sg.end = i;
|
||||
if (apply) {
|
||||
apply_bytes -= size;
|
||||
if (!apply_bytes)
|
||||
break;
|
||||
}
|
||||
} while (i != msg->sg.end);
|
||||
|
||||
if (!ret) {
|
||||
msg->sg.start = i;
|
||||
msg->sg.size -= apply_bytes;
|
||||
sk_psock_queue_msg(psock, tmp);
|
||||
sk->sk_data_ready(sk);
|
||||
} else {
|
||||
sk_msg_free(sk, tmp);
|
||||
kfree(tmp);
|
||||
}
|
||||
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
|
||||
int flags, bool uncharge)
|
||||
{
|
||||
bool apply = apply_bytes;
|
||||
struct scatterlist *sge;
|
||||
struct page *page;
|
||||
int size, ret = 0;
|
||||
u32 off;
|
||||
|
||||
while (1) {
|
||||
sge = sk_msg_elem(msg, msg->sg.start);
|
||||
size = (apply && apply_bytes < sge->length) ?
|
||||
apply_bytes : sge->length;
|
||||
off = sge->offset;
|
||||
page = sg_page(sge);
|
||||
|
||||
tcp_rate_check_app_limited(sk);
|
||||
retry:
|
||||
ret = do_tcp_sendpages(sk, page, off, size, flags);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
if (apply)
|
||||
apply_bytes -= ret;
|
||||
msg->sg.size -= ret;
|
||||
sge->offset += ret;
|
||||
sge->length -= ret;
|
||||
if (uncharge)
|
||||
sk_mem_uncharge(sk, ret);
|
||||
if (ret != size) {
|
||||
size -= ret;
|
||||
off += ret;
|
||||
goto retry;
|
||||
}
|
||||
if (!sge->length) {
|
||||
put_page(page);
|
||||
sk_msg_iter_next(msg, start);
|
||||
sg_init_table(sge, 1);
|
||||
if (msg->sg.start == msg->sg.end)
|
||||
break;
|
||||
}
|
||||
if (apply && !apply_bytes)
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
|
||||
u32 apply_bytes, int flags, bool uncharge)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lock_sock(sk);
|
||||
ret = tcp_bpf_push(sk, msg, apply_bytes, flags, uncharge);
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes, int flags)
|
||||
{
|
||||
bool ingress = sk_msg_to_ingress(msg);
|
||||
struct sk_psock *psock = sk_psock_get(sk);
|
||||
int ret;
|
||||
|
||||
if (unlikely(!psock)) {
|
||||
sk_msg_free(sk, msg);
|
||||
return 0;
|
||||
}
|
||||
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
|
||||
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
|
||||
sk_psock_put(sk, psock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
|
||||
|
||||
static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg, int *copied, int flags)
|
||||
{
|
||||
bool cork = false, enospc = msg->sg.start == msg->sg.end;
|
||||
struct sock *sk_redir;
|
||||
u32 tosend;
|
||||
int ret;
|
||||
|
||||
more_data:
|
||||
if (psock->eval == __SK_NONE)
|
||||
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
|
||||
|
||||
if (msg->cork_bytes &&
|
||||
msg->cork_bytes > msg->sg.size && !enospc) {
|
||||
psock->cork_bytes = msg->cork_bytes - msg->sg.size;
|
||||
if (!psock->cork) {
|
||||
psock->cork = kzalloc(sizeof(*psock->cork),
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!psock->cork)
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(psock->cork, msg, sizeof(*msg));
|
||||
return 0;
|
||||
}
|
||||
|
||||
tosend = msg->sg.size;
|
||||
if (psock->apply_bytes && psock->apply_bytes < tosend)
|
||||
tosend = psock->apply_bytes;
|
||||
|
||||
switch (psock->eval) {
|
||||
case __SK_PASS:
|
||||
ret = tcp_bpf_push(sk, msg, tosend, flags, true);
|
||||
if (unlikely(ret)) {
|
||||
*copied -= sk_msg_free(sk, msg);
|
||||
break;
|
||||
}
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
break;
|
||||
case __SK_REDIRECT:
|
||||
sk_redir = psock->sk_redir;
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
if (psock->cork) {
|
||||
cork = true;
|
||||
psock->cork = NULL;
|
||||
}
|
||||
sk_msg_return(sk, msg, tosend);
|
||||
release_sock(sk);
|
||||
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
|
||||
lock_sock(sk);
|
||||
if (unlikely(ret < 0)) {
|
||||
int free = sk_msg_free_nocharge(sk, msg);
|
||||
|
||||
if (!cork)
|
||||
*copied -= free;
|
||||
}
|
||||
if (cork) {
|
||||
sk_msg_free(sk, msg);
|
||||
kfree(msg);
|
||||
msg = NULL;
|
||||
ret = 0;
|
||||
}
|
||||
break;
|
||||
case __SK_DROP:
|
||||
default:
|
||||
sk_msg_free_partial(sk, msg, tosend);
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
*copied -= tosend;
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (likely(!ret)) {
|
||||
if (!psock->apply_bytes) {
|
||||
psock->eval = __SK_NONE;
|
||||
if (psock->sk_redir) {
|
||||
sock_put(psock->sk_redir);
|
||||
psock->sk_redir = NULL;
|
||||
}
|
||||
}
|
||||
if (msg &&
|
||||
msg->sg.data[msg->sg.start].page_link &&
|
||||
msg->sg.data[msg->sg.start].length)
|
||||
goto more_data;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
|
||||
{
|
||||
struct sk_msg tmp, *msg_tx = NULL;
|
||||
int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
|
||||
int copied = 0, err = 0;
|
||||
struct sk_psock *psock;
|
||||
long timeo;
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return tcp_sendmsg(sk, msg, size);
|
||||
|
||||
lock_sock(sk);
|
||||
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
|
||||
while (msg_data_left(msg)) {
|
||||
bool enospc = false;
|
||||
u32 copy, osize;
|
||||
|
||||
if (sk->sk_err) {
|
||||
err = -sk->sk_err;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
copy = msg_data_left(msg);
|
||||
if (!sk_stream_memory_free(sk))
|
||||
goto wait_for_sndbuf;
|
||||
if (psock->cork) {
|
||||
msg_tx = psock->cork;
|
||||
} else {
|
||||
msg_tx = &tmp;
|
||||
sk_msg_init(msg_tx);
|
||||
}
|
||||
|
||||
osize = msg_tx->sg.size;
|
||||
err = sk_msg_alloc(sk, msg_tx, msg_tx->sg.size + copy, msg_tx->sg.end - 1);
|
||||
if (err) {
|
||||
if (err != -ENOSPC)
|
||||
goto wait_for_memory;
|
||||
enospc = true;
|
||||
copy = msg_tx->sg.size - osize;
|
||||
}
|
||||
|
||||
err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
|
||||
copy);
|
||||
if (err < 0) {
|
||||
sk_msg_trim(sk, msg_tx, osize);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
copied += copy;
|
||||
if (psock->cork_bytes) {
|
||||
if (size > psock->cork_bytes)
|
||||
psock->cork_bytes = 0;
|
||||
else
|
||||
psock->cork_bytes -= size;
|
||||
if (psock->cork_bytes && !enospc)
|
||||
goto out_err;
|
||||
/* All cork bytes are accounted, rerun the prog. */
|
||||
psock->eval = __SK_NONE;
|
||||
psock->cork_bytes = 0;
|
||||
}
|
||||
|
||||
err = tcp_bpf_send_verdict(sk, psock, msg_tx, &copied, flags);
|
||||
if (unlikely(err < 0))
|
||||
goto out_err;
|
||||
continue;
|
||||
wait_for_sndbuf:
|
||||
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
||||
wait_for_memory:
|
||||
err = sk_stream_wait_memory(sk, &timeo);
|
||||
if (err) {
|
||||
if (msg_tx && msg_tx != psock->cork)
|
||||
sk_msg_free(sk, msg_tx);
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
out_err:
|
||||
if (err < 0)
|
||||
err = sk_stream_error(sk, msg->msg_flags, err);
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return copied ? copied : err;
|
||||
}
|
||||
|
||||
static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
|
||||
size_t size, int flags)
|
||||
{
|
||||
struct sk_msg tmp, *msg = NULL;
|
||||
int err = 0, copied = 0;
|
||||
struct sk_psock *psock;
|
||||
bool enospc = false;
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return tcp_sendpage(sk, page, offset, size, flags);
|
||||
|
||||
lock_sock(sk);
|
||||
if (psock->cork) {
|
||||
msg = psock->cork;
|
||||
} else {
|
||||
msg = &tmp;
|
||||
sk_msg_init(msg);
|
||||
}
|
||||
|
||||
/* Catch case where ring is full and sendpage is stalled. */
|
||||
if (unlikely(sk_msg_full(msg)))
|
||||
goto out_err;
|
||||
|
||||
sk_msg_page_add(msg, page, size, offset);
|
||||
sk_mem_charge(sk, size);
|
||||
copied = size;
|
||||
if (sk_msg_full(msg))
|
||||
enospc = true;
|
||||
if (psock->cork_bytes) {
|
||||
if (size > psock->cork_bytes)
|
||||
psock->cork_bytes = 0;
|
||||
else
|
||||
psock->cork_bytes -= size;
|
||||
if (psock->cork_bytes && !enospc)
|
||||
goto out_err;
|
||||
/* All cork bytes are accounted, rerun the prog. */
|
||||
psock->eval = __SK_NONE;
|
||||
psock->cork_bytes = 0;
|
||||
}
|
||||
|
||||
err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags);
|
||||
out_err:
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return copied ? copied : err;
|
||||
}
|
||||
|
||||
static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_link *link;
|
||||
|
||||
sk_psock_cork_free(psock);
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
while ((link = sk_psock_link_pop(psock))) {
|
||||
sk_psock_unlink(sk, link);
|
||||
sk_psock_free_link(link);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcp_bpf_unhash(struct sock *sk)
|
||||
{
|
||||
void (*saved_unhash)(struct sock *sk);
|
||||
struct sk_psock *psock;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
rcu_read_unlock();
|
||||
if (sk->sk_prot->unhash)
|
||||
sk->sk_prot->unhash(sk);
|
||||
return;
|
||||
}
|
||||
|
||||
saved_unhash = psock->saved_unhash;
|
||||
tcp_bpf_remove(sk, psock);
|
||||
rcu_read_unlock();
|
||||
saved_unhash(sk);
|
||||
}
|
||||
|
||||
static void tcp_bpf_close(struct sock *sk, long timeout)
|
||||
{
|
||||
void (*saved_close)(struct sock *sk, long timeout);
|
||||
struct sk_psock *psock;
|
||||
|
||||
lock_sock(sk);
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
return sk->sk_prot->close(sk, timeout);
|
||||
}
|
||||
|
||||
saved_close = psock->saved_close;
|
||||
tcp_bpf_remove(sk, psock);
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
saved_close(sk, timeout);
|
||||
}
|
||||
|
||||
enum {
|
||||
TCP_BPF_IPV4,
|
||||
TCP_BPF_IPV6,
|
||||
TCP_BPF_NUM_PROTS,
|
||||
};
|
||||
|
||||
enum {
|
||||
TCP_BPF_BASE,
|
||||
TCP_BPF_TX,
|
||||
TCP_BPF_NUM_CFGS,
|
||||
};
|
||||
|
||||
static struct proto *tcpv6_prot_saved __read_mostly;
|
||||
static DEFINE_SPINLOCK(tcpv6_prot_lock);
|
||||
static struct proto tcp_bpf_prots[TCP_BPF_NUM_PROTS][TCP_BPF_NUM_CFGS];
|
||||
|
||||
static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
|
||||
struct proto *base)
|
||||
{
|
||||
prot[TCP_BPF_BASE] = *base;
|
||||
prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash;
|
||||
prot[TCP_BPF_BASE].close = tcp_bpf_close;
|
||||
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
|
||||
prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
|
||||
|
||||
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
|
||||
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
|
||||
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
|
||||
}
|
||||
|
||||
static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
|
||||
{
|
||||
if (sk->sk_family == AF_INET6 &&
|
||||
unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
|
||||
spin_lock_bh(&tcpv6_prot_lock);
|
||||
if (likely(ops != tcpv6_prot_saved)) {
|
||||
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
|
||||
smp_store_release(&tcpv6_prot_saved, ops);
|
||||
}
|
||||
spin_unlock_bh(&tcpv6_prot_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init tcp_bpf_v4_build_proto(void)
|
||||
{
|
||||
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
|
||||
return 0;
|
||||
}
|
||||
core_initcall(tcp_bpf_v4_build_proto);
|
||||
|
||||
static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
|
||||
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
|
||||
|
||||
sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
|
||||
}
|
||||
|
||||
static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
|
||||
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
|
||||
|
||||
/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
|
||||
* or added requiring sk_prot hook updates. We keep original saved
|
||||
* hooks in this case.
|
||||
*/
|
||||
sk->sk_prot = &tcp_bpf_prots[family][config];
|
||||
}
|
||||
|
||||
static int tcp_bpf_assert_proto_ops(struct proto *ops)
|
||||
{
|
||||
/* In order to avoid retpoline, we make assumptions when we call
|
||||
* into ops if e.g. a psock is not present. Make sure they are
|
||||
* indeed valid assumptions.
|
||||
*/
|
||||
return ops->recvmsg == tcp_recvmsg &&
|
||||
ops->sendmsg == tcp_sendmsg &&
|
||||
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
|
||||
}
|
||||
|
||||
void tcp_bpf_reinit(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
tcp_bpf_reinit_sk_prot(sk, psock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
int tcp_bpf_init(struct sock *sk)
|
||||
{
|
||||
struct proto *ops = READ_ONCE(sk->sk_prot);
|
||||
struct sk_psock *psock;
|
||||
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock || psock->sk_proto ||
|
||||
tcp_bpf_assert_proto_ops(ops))) {
|
||||
rcu_read_unlock();
|
||||
return -EINVAL;
|
||||
}
|
||||
tcp_bpf_check_v6_needs_rebuild(sk, ops);
|
||||
tcp_bpf_update_sk_prot(sk, psock);
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
@@ -6,7 +6,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include<linux/module.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/list.h>
|
||||
@@ -29,18 +29,6 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp)
|
||||
{
|
||||
struct tcp_ulp_ops *e;
|
||||
|
||||
list_for_each_entry_rcu(e, &tcp_ulp_list, list) {
|
||||
if (e->uid == ulp)
|
||||
return e;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
|
||||
{
|
||||
const struct tcp_ulp_ops *ulp = NULL;
|
||||
@@ -63,18 +51,6 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
|
||||
return ulp;
|
||||
}
|
||||
|
||||
static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid)
|
||||
{
|
||||
const struct tcp_ulp_ops *ulp;
|
||||
|
||||
rcu_read_lock();
|
||||
ulp = tcp_ulp_find_id(uid);
|
||||
if (!ulp || !try_module_get(ulp->owner))
|
||||
ulp = NULL;
|
||||
rcu_read_unlock();
|
||||
return ulp;
|
||||
}
|
||||
|
||||
/* Attach new upper layer protocol to the list
|
||||
* of available protocols.
|
||||
*/
|
||||
@@ -123,6 +99,8 @@ void tcp_cleanup_ulp(struct sock *sk)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
if (!icsk->icsk_ulp_ops)
|
||||
return;
|
||||
|
||||
@@ -133,54 +111,35 @@ void tcp_cleanup_ulp(struct sock *sk)
|
||||
icsk->icsk_ulp_ops = NULL;
|
||||
}
|
||||
|
||||
/* Change upper layer protocol for socket */
|
||||
int tcp_set_ulp(struct sock *sk, const char *name)
|
||||
static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
const struct tcp_ulp_ops *ulp_ops;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
err = -EEXIST;
|
||||
if (icsk->icsk_ulp_ops)
|
||||
return -EEXIST;
|
||||
goto out_err;
|
||||
|
||||
err = ulp_ops->init(sk);
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
icsk->icsk_ulp_ops = ulp_ops;
|
||||
return 0;
|
||||
out_err:
|
||||
module_put(ulp_ops->owner);
|
||||
return err;
|
||||
}
|
||||
|
||||
int tcp_set_ulp(struct sock *sk, const char *name)
|
||||
{
|
||||
const struct tcp_ulp_ops *ulp_ops;
|
||||
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
ulp_ops = __tcp_ulp_find_autoload(name);
|
||||
if (!ulp_ops)
|
||||
return -ENOENT;
|
||||
|
||||
if (!ulp_ops->user_visible) {
|
||||
module_put(ulp_ops->owner);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
err = ulp_ops->init(sk);
|
||||
if (err) {
|
||||
module_put(ulp_ops->owner);
|
||||
return err;
|
||||
}
|
||||
|
||||
icsk->icsk_ulp_ops = ulp_ops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tcp_set_ulp_id(struct sock *sk, int ulp)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
const struct tcp_ulp_ops *ulp_ops;
|
||||
int err;
|
||||
|
||||
if (icsk->icsk_ulp_ops)
|
||||
return -EEXIST;
|
||||
|
||||
ulp_ops = __tcp_ulp_lookup(ulp);
|
||||
if (!ulp_ops)
|
||||
return -ENOENT;
|
||||
|
||||
err = ulp_ops->init(sk);
|
||||
if (err) {
|
||||
module_put(ulp_ops->owner);
|
||||
return err;
|
||||
}
|
||||
|
||||
icsk->icsk_ulp_ops = ulp_ops;
|
||||
return 0;
|
||||
return __tcp_set_ulp(sk, ulp_ops);
|
||||
}
|
||||
|
@@ -901,6 +901,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
|
||||
|
||||
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
|
||||
.inet6_bind = __inet6_bind,
|
||||
.udp6_lib_lookup = __udp6_lib_lookup,
|
||||
};
|
||||
|
||||
static int __init inet6_init(void)
|
||||
|
@@ -1,4 +1,2 @@
|
||||
|
||||
config STREAM_PARSER
|
||||
tristate
|
||||
default n
|
||||
def_bool n
|
||||
|
@@ -8,6 +8,7 @@ config TLS
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_GCM
|
||||
select STREAM_PARSER
|
||||
select NET_SOCK_MSG
|
||||
default n
|
||||
---help---
|
||||
Enable kernel support for TLS protocol. This allows symmetric
|
||||
|
@@ -421,7 +421,7 @@ last_record:
|
||||
tls_push_record_flags = flags;
|
||||
if (more) {
|
||||
tls_ctx->pending_open_record_frags =
|
||||
record->num_frags;
|
||||
!!record->num_frags;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -620,12 +620,14 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
|
||||
prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage;
|
||||
|
||||
prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
|
||||
prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg;
|
||||
prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close;
|
||||
prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg;
|
||||
prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read;
|
||||
prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close;
|
||||
|
||||
prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
|
||||
prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg;
|
||||
prot[TLS_SW][TLS_SW].close = tls_sk_proto_close;
|
||||
prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg;
|
||||
prot[TLS_SW][TLS_SW].stream_memory_read = tls_sw_stream_read;
|
||||
prot[TLS_SW][TLS_SW].close = tls_sk_proto_close;
|
||||
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
|
||||
@@ -724,7 +726,6 @@ static int __init tls_register(void)
|
||||
build_protos(tls_prots[TLSV4], &tcp_prot);
|
||||
|
||||
tls_sw_proto_ops = inet_stream_ops;
|
||||
tls_sw_proto_ops.poll = tls_sw_poll;
|
||||
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
|
||||
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
|
912
net/tls/tls_sw.c
912
net/tls/tls_sw.c
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
مرجع در شماره جدید
Block a user