xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where each frame is referenced via an index (idx). A user passes the frame index to the kernel, and the kernel acts upon the data. Some NICs, however, do not have a fixed frame-size model, instead they have a model where a memory window is passed to the hardware and multiple frames are filled into that window (referred to as the "type-writer" model). By changing the descriptor format from the current frame index addressing scheme, AF_XDP can in the future be extended to support these kinds of NICs. In the index-based model, an idx refers to a frame of size frame_size. Addressing a frame in the UMEM is done by offseting the UMEM starting address by a global offset, idx * frame_size + offset. Communicating via the fill- and completion-rings are done by means of idx. In this commit, the idx is removed in favor of an address (addr), which is a relative address ranging over the UMEM. To convert an idx-based address to the new addr is simply: addr = idx * frame_size + offset. We also stop referring to the UMEM "frame" as a frame. Instead it is simply called a chunk. To transfer ownership of a chunk to the kernel, the addr of the chunk is passed in the fill-ring. Note, that the kernel will mask addr to make it chunk aligned, so there is no need for userspace to do that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or 3000 to the fill-ring will refer to the same chunk. On the completion-ring, the addr will match that of the Tx descriptor, passed to the kernel. Changing the descriptor format to use chunks/addr will allow for future changes to move to a type-writer based model, where multiple frames can reside in one chunk. In this model passing one single chunk into the fill-ring, would potentially result in multiple Rx descriptors. This commit changes the uapi of AF_XDP sockets, and updates the documentation. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:

committed by
Daniel Borkmann

parent
a509a95536
commit
bbff2f321a
@@ -41,24 +41,27 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
|
||||
|
||||
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
{
|
||||
u32 id, len = xdp->data_end - xdp->data;
|
||||
u32 len = xdp->data_end - xdp->data;
|
||||
void *buffer;
|
||||
u64 addr;
|
||||
int err;
|
||||
|
||||
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
|
||||
return -EINVAL;
|
||||
|
||||
if (!xskq_peek_id(xs->umem->fq, &id)) {
|
||||
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
|
||||
len > xs->umem->chunk_size_nohr) {
|
||||
xs->rx_dropped++;
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
buffer = xdp_umem_get_data_with_headroom(xs->umem, id);
|
||||
addr += xs->umem->headroom;
|
||||
|
||||
buffer = xdp_umem_get_data(xs->umem, addr);
|
||||
memcpy(buffer, xdp->data, len);
|
||||
err = xskq_produce_batch_desc(xs->rx, id, len,
|
||||
xs->umem->frame_headroom);
|
||||
err = xskq_produce_batch_desc(xs->rx, addr, len);
|
||||
if (!err)
|
||||
xskq_discard_id(xs->umem->fq);
|
||||
xskq_discard_addr(xs->umem->fq);
|
||||
else
|
||||
xs->rx_dropped++;
|
||||
|
||||
@@ -95,10 +98,10 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
|
||||
static void xsk_destruct_skb(struct sk_buff *skb)
|
||||
{
|
||||
u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg;
|
||||
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
|
||||
struct xdp_sock *xs = xdp_sk(skb->sk);
|
||||
|
||||
WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id));
|
||||
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
|
||||
|
||||
sock_wfree(skb);
|
||||
}
|
||||
@@ -123,14 +126,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
|
||||
|
||||
while (xskq_peek_desc(xs->tx, &desc)) {
|
||||
char *buffer;
|
||||
u32 id, len;
|
||||
u64 addr;
|
||||
u32 len;
|
||||
|
||||
if (max_batch-- == 0) {
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (xskq_reserve_id(xs->umem->cq)) {
|
||||
if (xskq_reserve_addr(xs->umem->cq)) {
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
@@ -153,8 +157,8 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
|
||||
}
|
||||
|
||||
skb_put(skb, len);
|
||||
id = desc.idx;
|
||||
buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
|
||||
addr = desc.addr;
|
||||
buffer = xdp_umem_get_data(xs->umem, addr);
|
||||
err = skb_store_bits(skb, 0, buffer, len);
|
||||
if (unlikely(err)) {
|
||||
kfree_skb(skb);
|
||||
@@ -164,7 +168,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
|
||||
skb->dev = xs->dev;
|
||||
skb->priority = sk->sk_priority;
|
||||
skb->mark = sk->sk_mark;
|
||||
skb_shinfo(skb)->destructor_arg = (void *)(long)id;
|
||||
skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
|
||||
skb->destructor = xsk_destruct_skb;
|
||||
|
||||
err = dev_direct_xmit(skb, xs->queue_id);
|
||||
|
Reference in New Issue
Block a user