sock: enable MSG_ZEROCOPY
Prepare the datapath for refcounted ubuf_info. Clone ubuf_info with skb_zerocopy_clone() wherever needed due to skb split, merge, resize or clone. Split skb_orphan_frags into two variants. The split, merge, .. paths support reference counted zerocopy buffers, so do not do a deep copy. Add skb_orphan_frags_rx for paths that may loop packets to receive sockets. That is not allowed, as it may cause unbounded latency. Deep copy all zerocopy copy buffers, ref-counted or not, in this path. The exact locations to modify were chosen by exhaustively searching through all code that might modify skb_frag references and/or the the SKBTX_DEV_ZEROCOPY tx_flags bit. The changes err on the safe side, in two ways. (1) legacy ubuf_info paths virtio and tap are not modified. They keep a 1:1 ubuf_info to sk_buff relationship. Calls to skb_orphan_frags still call skb_copy_ubufs and thus copy frags in this case. (2) not all copies deep in the stack are addressed yet. skb_shift, skb_split and skb_try_coalesce can be refined to avoid copying. These are not in the hot path and this patch is hairy enough as is, so that is left for future refinement. Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

parent
76851d1212
commit
1f8b977ab3
@@ -567,21 +567,10 @@ static void skb_release_data(struct sk_buff *skb)
|
||||
for (i = 0; i < shinfo->nr_frags; i++)
|
||||
__skb_frag_unref(&shinfo->frags[i]);
|
||||
|
||||
/*
|
||||
* If skb buf is from userspace, we need to notify the caller
|
||||
* the lower device DMA has done;
|
||||
*/
|
||||
if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
|
||||
struct ubuf_info *uarg;
|
||||
|
||||
uarg = shinfo->destructor_arg;
|
||||
if (uarg->callback)
|
||||
uarg->callback(uarg, true);
|
||||
}
|
||||
|
||||
if (shinfo->frag_list)
|
||||
kfree_skb_list(shinfo->frag_list);
|
||||
|
||||
skb_zcopy_clear(skb, true);
|
||||
skb_free_head(skb);
|
||||
}
|
||||
|
||||
@@ -695,14 +684,7 @@ EXPORT_SYMBOL(kfree_skb_list);
|
||||
*/
|
||||
void skb_tx_error(struct sk_buff *skb)
|
||||
{
|
||||
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
|
||||
struct ubuf_info *uarg;
|
||||
|
||||
uarg = skb_shinfo(skb)->destructor_arg;
|
||||
if (uarg->callback)
|
||||
uarg->callback(uarg, false);
|
||||
skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
|
||||
}
|
||||
skb_zcopy_clear(skb, true);
|
||||
}
|
||||
EXPORT_SYMBOL(skb_tx_error);
|
||||
|
||||
@@ -1029,9 +1011,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
|
||||
|
||||
/* unused only until next patch in the series; will remove attribute */
|
||||
static int __attribute__((unused))
|
||||
skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
|
||||
static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
if (skb_zcopy(orig)) {
|
||||
@@ -1068,7 +1048,6 @@ static int __attribute__((unused))
|
||||
*/
|
||||
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
|
||||
{
|
||||
struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
|
||||
int num_frags = skb_shinfo(skb)->nr_frags;
|
||||
struct page *page, *head = NULL;
|
||||
int i, new_frags;
|
||||
@@ -1127,8 +1106,6 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
|
||||
for (i = 0; i < num_frags; i++)
|
||||
skb_frag_unref(skb, i);
|
||||
|
||||
uarg->callback(uarg, false);
|
||||
|
||||
/* skb frags point to kernel buffers */
|
||||
for (i = 0; i < new_frags - 1; i++) {
|
||||
__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
|
||||
@@ -1137,7 +1114,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
|
||||
__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
|
||||
skb_shinfo(skb)->nr_frags = new_frags;
|
||||
|
||||
skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
|
||||
skb_zcopy_clear(skb, false);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skb_copy_ubufs);
|
||||
@@ -1298,7 +1275,8 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
|
||||
if (skb_shinfo(skb)->nr_frags) {
|
||||
int i;
|
||||
|
||||
if (skb_orphan_frags(skb, gfp_mask)) {
|
||||
if (skb_orphan_frags(skb, gfp_mask) ||
|
||||
skb_zerocopy_clone(n, skb, gfp_mask)) {
|
||||
kfree_skb(n);
|
||||
n = NULL;
|
||||
goto out;
|
||||
@@ -1375,9 +1353,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
|
||||
* be since all we did is relocate the values
|
||||
*/
|
||||
if (skb_cloned(skb)) {
|
||||
/* copy this zero copy skb frags */
|
||||
if (skb_orphan_frags(skb, gfp_mask))
|
||||
goto nofrags;
|
||||
if (skb_zcopy(skb))
|
||||
atomic_inc(&skb_uarg(skb)->refcnt);
|
||||
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
|
||||
skb_frag_ref(skb, i);
|
||||
|
||||
@@ -1872,6 +1851,9 @@ end:
|
||||
skb->tail += delta;
|
||||
skb->data_len -= delta;
|
||||
|
||||
if (!skb->data_len)
|
||||
skb_zcopy_clear(skb, false);
|
||||
|
||||
return skb_tail_pointer(skb);
|
||||
}
|
||||
EXPORT_SYMBOL(__pskb_pull_tail);
|
||||
@@ -2627,6 +2609,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
|
||||
skb_tx_error(from);
|
||||
return -ENOMEM;
|
||||
}
|
||||
skb_zerocopy_clone(to, from, GFP_ATOMIC);
|
||||
|
||||
for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
|
||||
if (!len)
|
||||
@@ -2924,6 +2907,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
|
||||
|
||||
skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
|
||||
SKBTX_SHARED_FRAG;
|
||||
skb_zerocopy_clone(skb1, skb, 0);
|
||||
if (len < pos) /* Split line is inside header. */
|
||||
skb_split_inside_header(skb, skb1, len, pos);
|
||||
else /* Second chunk has no header, nothing to copy. */
|
||||
@@ -2967,6 +2951,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
|
||||
|
||||
if (skb_headlen(skb))
|
||||
return 0;
|
||||
if (skb_zcopy(tgt) || skb_zcopy(skb))
|
||||
return 0;
|
||||
|
||||
todo = shiftlen;
|
||||
from = 0;
|
||||
@@ -3540,6 +3526,8 @@ normal:
|
||||
|
||||
skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
|
||||
SKBTX_SHARED_FRAG;
|
||||
if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
|
||||
goto err;
|
||||
|
||||
while (pos < offset + len) {
|
||||
if (i >= nfrags) {
|
||||
@@ -4663,6 +4651,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
|
||||
|
||||
if (skb_has_frag_list(to) || skb_has_frag_list(from))
|
||||
return false;
|
||||
if (skb_zcopy(to) || skb_zcopy(from))
|
||||
return false;
|
||||
|
||||
if (skb_headlen(from) != 0) {
|
||||
struct page *page;
|
||||
|
Reference in New Issue
Block a user