Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio/vhost updates from Michael Tsirkin: - new vsock device support in host and guest - platform IOMMU support in host and guest, including compatibility quirks for legacy systems. - misc fixes and cleanups. * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: VSOCK: Use kvfree() vhost: split out vringh Kconfig vhost: detect 32 bit integer wrap around vhost: new device IOTLB API vhost: drop vringh dependency vhost: convert pre sorted vhost memory array to interval tree vhost: introduce vhost memory accessors VSOCK: Add Makefile and Kconfig VSOCK: Introduce vhost_vsock.ko VSOCK: Introduce virtio_transport.ko VSOCK: Introduce virtio_vsock_common.ko VSOCK: defer sock removal to transports VSOCK: transport-specific vsock_transport functions vhost: drop vringh dependency vop: pull in vhost Kconfig virtio: new feature to detect IOMMU device quirk balloon: check the number of available pages in leak balloon vhost: lockless enqueuing vhost: simplify work flushing
此提交包含在:
@@ -2,7 +2,6 @@ config VHOST_NET
|
||||
tristate "Host kernel accelerator for virtio net"
|
||||
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
|
||||
select VHOST
|
||||
select VHOST_RING
|
||||
---help---
|
||||
This kernel module can be loaded in host kernel to accelerate
|
||||
guest networking with virtio_net. Not to be confused with virtio_net
|
||||
@@ -15,17 +14,24 @@ config VHOST_SCSI
|
||||
tristate "VHOST_SCSI TCM fabric driver"
|
||||
depends on TARGET_CORE && EVENTFD && m
|
||||
select VHOST
|
||||
select VHOST_RING
|
||||
default n
|
||||
---help---
|
||||
Say M here to enable the vhost_scsi TCM fabric module
|
||||
for use with virtio-scsi guests
|
||||
|
||||
config VHOST_RING
|
||||
tristate
|
||||
config VHOST_VSOCK
|
||||
tristate "vhost virtio-vsock driver"
|
||||
depends on VSOCKETS && EVENTFD
|
||||
select VIRTIO_VSOCKETS_COMMON
|
||||
select VHOST
|
||||
default n
|
||||
---help---
|
||||
This option is selected by any driver which needs to access
|
||||
the host side of a virtio ring.
|
||||
This kernel module can be loaded in the host kernel to provide AF_VSOCK
|
||||
sockets for communicating with guests. The guests must have the
|
||||
virtio_transport.ko driver loaded to use the virtio-vsock device.
|
||||
|
||||
To compile this driver as a module, choose M here: the module will be called
|
||||
vhost_vsock.
|
||||
|
||||
config VHOST
|
||||
tristate
|
||||
|
5
drivers/vhost/Kconfig.vringh
一般檔案
5
drivers/vhost/Kconfig.vringh
一般檔案
@@ -0,0 +1,5 @@
|
||||
config VHOST_RING
|
||||
tristate
|
||||
---help---
|
||||
This option is selected by any driver which needs to access
|
||||
the host side of a virtio ring.
|
@@ -4,5 +4,9 @@ vhost_net-y := net.o
|
||||
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
|
||||
vhost_scsi-y := scsi.o
|
||||
|
||||
obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
|
||||
vhost_vsock-y := vsock.o
|
||||
|
||||
obj-$(CONFIG_VHOST_RING) += vringh.o
|
||||
|
||||
obj-$(CONFIG_VHOST) += vhost.o
|
||||
|
@@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
|
||||
enum {
|
||||
VHOST_NET_FEATURES = VHOST_FEATURES |
|
||||
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
|
||||
(1ULL << VIRTIO_NET_F_MRG_RXBUF)
|
||||
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
|
||||
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -334,7 +335,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
|
||||
{
|
||||
unsigned long uninitialized_var(endtime);
|
||||
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
|
||||
out_num, in_num, NULL, NULL);
|
||||
out_num, in_num, NULL, NULL);
|
||||
|
||||
if (r == vq->num && vq->busyloop_timeout) {
|
||||
preempt_disable();
|
||||
@@ -344,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
|
||||
cpu_relax_lowlatency();
|
||||
preempt_enable();
|
||||
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
|
||||
out_num, in_num, NULL, NULL);
|
||||
out_num, in_num, NULL, NULL);
|
||||
}
|
||||
|
||||
return r;
|
||||
@@ -377,6 +378,9 @@ static void handle_tx(struct vhost_net *net)
|
||||
if (!sock)
|
||||
goto out;
|
||||
|
||||
if (!vq_iotlb_prefetch(vq))
|
||||
goto out;
|
||||
|
||||
vhost_disable_notify(&net->dev, vq);
|
||||
|
||||
hdr_size = nvq->vhost_hlen;
|
||||
@@ -652,6 +656,10 @@ static void handle_rx(struct vhost_net *net)
|
||||
sock = vq->private_data;
|
||||
if (!sock)
|
||||
goto out;
|
||||
|
||||
if (!vq_iotlb_prefetch(vq))
|
||||
goto out;
|
||||
|
||||
vhost_disable_notify(&net->dev, vq);
|
||||
vhost_net_disable_vq(net, vq);
|
||||
|
||||
@@ -1052,20 +1060,20 @@ static long vhost_net_reset_owner(struct vhost_net *n)
|
||||
struct socket *tx_sock = NULL;
|
||||
struct socket *rx_sock = NULL;
|
||||
long err;
|
||||
struct vhost_memory *memory;
|
||||
struct vhost_umem *umem;
|
||||
|
||||
mutex_lock(&n->dev.mutex);
|
||||
err = vhost_dev_check_owner(&n->dev);
|
||||
if (err)
|
||||
goto done;
|
||||
memory = vhost_dev_reset_owner_prepare();
|
||||
if (!memory) {
|
||||
umem = vhost_dev_reset_owner_prepare();
|
||||
if (!umem) {
|
||||
err = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
vhost_net_stop(n, &tx_sock, &rx_sock);
|
||||
vhost_net_flush(n);
|
||||
vhost_dev_reset_owner(&n->dev, memory);
|
||||
vhost_dev_reset_owner(&n->dev, umem);
|
||||
vhost_net_vq_reset(n);
|
||||
done:
|
||||
mutex_unlock(&n->dev.mutex);
|
||||
@@ -1096,10 +1104,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
|
||||
}
|
||||
mutex_lock(&n->dev.mutex);
|
||||
if ((features & (1 << VHOST_F_LOG_ALL)) &&
|
||||
!vhost_log_access_ok(&n->dev)) {
|
||||
mutex_unlock(&n->dev.mutex);
|
||||
return -EFAULT;
|
||||
!vhost_log_access_ok(&n->dev))
|
||||
goto out_unlock;
|
||||
|
||||
if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
|
||||
if (vhost_init_device_iotlb(&n->dev, true))
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
|
||||
mutex_lock(&n->vqs[i].vq.mutex);
|
||||
n->vqs[i].vq.acked_features = features;
|
||||
@@ -1109,6 +1121,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
|
||||
}
|
||||
mutex_unlock(&n->dev.mutex);
|
||||
return 0;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&n->dev.mutex);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static long vhost_net_set_owner(struct vhost_net *n)
|
||||
@@ -1182,9 +1198,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct vhost_net *n = file->private_data;
|
||||
struct vhost_dev *dev = &n->dev;
|
||||
int noblock = file->f_flags & O_NONBLOCK;
|
||||
|
||||
return vhost_chr_read_iter(dev, to, noblock);
|
||||
}
|
||||
|
||||
static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb,
|
||||
struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct vhost_net *n = file->private_data;
|
||||
struct vhost_dev *dev = &n->dev;
|
||||
|
||||
return vhost_chr_write_iter(dev, from);
|
||||
}
|
||||
|
||||
static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
struct vhost_net *n = file->private_data;
|
||||
struct vhost_dev *dev = &n->dev;
|
||||
|
||||
return vhost_chr_poll(file, dev, wait);
|
||||
}
|
||||
|
||||
static const struct file_operations vhost_net_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = vhost_net_release,
|
||||
.read_iter = vhost_net_chr_read_iter,
|
||||
.write_iter = vhost_net_chr_write_iter,
|
||||
.poll = vhost_net_chr_poll,
|
||||
.unlocked_ioctl = vhost_net_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = vhost_net_compat_ioctl,
|
||||
|
檔案差異因為檔案過大而無法顯示
載入差異
@@ -15,13 +15,15 @@
|
||||
struct vhost_work;
|
||||
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
|
||||
|
||||
#define VHOST_WORK_QUEUED 1
|
||||
struct vhost_work {
|
||||
struct list_head node;
|
||||
struct llist_node node;
|
||||
vhost_work_fn_t fn;
|
||||
wait_queue_head_t done;
|
||||
int flushing;
|
||||
unsigned queue_seq;
|
||||
unsigned done_seq;
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
/* Poll a file (eventfd or socket) */
|
||||
@@ -53,6 +55,27 @@ struct vhost_log {
|
||||
u64 len;
|
||||
};
|
||||
|
||||
#define START(node) ((node)->start)
|
||||
#define LAST(node) ((node)->last)
|
||||
|
||||
struct vhost_umem_node {
|
||||
struct rb_node rb;
|
||||
struct list_head link;
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
__u64 size;
|
||||
__u64 userspace_addr;
|
||||
__u32 perm;
|
||||
__u32 flags_padding;
|
||||
__u64 __subtree_last;
|
||||
};
|
||||
|
||||
struct vhost_umem {
|
||||
struct rb_root umem_tree;
|
||||
struct list_head umem_list;
|
||||
int numem;
|
||||
};
|
||||
|
||||
/* The virtqueue structure describes a queue attached to a device. */
|
||||
struct vhost_virtqueue {
|
||||
struct vhost_dev *dev;
|
||||
@@ -98,10 +121,12 @@ struct vhost_virtqueue {
|
||||
u64 log_addr;
|
||||
|
||||
struct iovec iov[UIO_MAXIOV];
|
||||
struct iovec iotlb_iov[64];
|
||||
struct iovec *indirect;
|
||||
struct vring_used_elem *heads;
|
||||
/* Protected by virtqueue mutex. */
|
||||
struct vhost_memory *memory;
|
||||
struct vhost_umem *umem;
|
||||
struct vhost_umem *iotlb;
|
||||
void *private_data;
|
||||
u64 acked_features;
|
||||
/* Log write descriptors */
|
||||
@@ -118,25 +143,35 @@ struct vhost_virtqueue {
|
||||
u32 busyloop_timeout;
|
||||
};
|
||||
|
||||
struct vhost_msg_node {
|
||||
struct vhost_msg msg;
|
||||
struct vhost_virtqueue *vq;
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
struct vhost_dev {
|
||||
struct vhost_memory *memory;
|
||||
struct mm_struct *mm;
|
||||
struct mutex mutex;
|
||||
struct vhost_virtqueue **vqs;
|
||||
int nvqs;
|
||||
struct file *log_file;
|
||||
struct eventfd_ctx *log_ctx;
|
||||
spinlock_t work_lock;
|
||||
struct list_head work_list;
|
||||
struct llist_head work_list;
|
||||
struct task_struct *worker;
|
||||
struct vhost_umem *umem;
|
||||
struct vhost_umem *iotlb;
|
||||
spinlock_t iotlb_lock;
|
||||
struct list_head read_list;
|
||||
struct list_head pending_list;
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
|
||||
long vhost_dev_set_owner(struct vhost_dev *dev);
|
||||
bool vhost_dev_has_owner(struct vhost_dev *dev);
|
||||
long vhost_dev_check_owner(struct vhost_dev *);
|
||||
struct vhost_memory *vhost_dev_reset_owner_prepare(void);
|
||||
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *);
|
||||
struct vhost_umem *vhost_dev_reset_owner_prepare(void);
|
||||
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
|
||||
void vhost_dev_cleanup(struct vhost_dev *, bool locked);
|
||||
void vhost_dev_stop(struct vhost_dev *);
|
||||
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
|
||||
@@ -165,6 +200,21 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
|
||||
|
||||
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
|
||||
unsigned int log_num, u64 len);
|
||||
int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
|
||||
|
||||
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);
|
||||
void vhost_enqueue_msg(struct vhost_dev *dev,
|
||||
struct list_head *head,
|
||||
struct vhost_msg_node *node);
|
||||
struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
|
||||
struct list_head *head);
|
||||
unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev,
|
||||
poll_table *wait);
|
||||
ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
|
||||
int noblock);
|
||||
ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
|
||||
struct iov_iter *from);
|
||||
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
|
||||
|
||||
#define vq_err(vq, fmt, ...) do { \
|
||||
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
|
||||
|
719
drivers/vhost/vsock.c
一般檔案
719
drivers/vhost/vsock.c
一般檔案
@@ -0,0 +1,719 @@
|
||||
/*
|
||||
* vhost transport for vsock
|
||||
*
|
||||
* Copyright (C) 2013-2015 Red Hat, Inc.
|
||||
* Author: Asias He <asias@redhat.com>
|
||||
* Stefan Hajnoczi <stefanha@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2.
|
||||
*/
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <net/sock.h>
|
||||
#include <linux/virtio_vsock.h>
|
||||
#include <linux/vhost.h>
|
||||
|
||||
#include <net/af_vsock.h>
|
||||
#include "vhost.h"
|
||||
|
||||
#define VHOST_VSOCK_DEFAULT_HOST_CID 2
|
||||
|
||||
enum {
|
||||
VHOST_VSOCK_FEATURES = VHOST_FEATURES,
|
||||
};
|
||||
|
||||
/* Used to track all the vhost_vsock instances on the system. */
|
||||
static DEFINE_SPINLOCK(vhost_vsock_lock);
|
||||
static LIST_HEAD(vhost_vsock_list);
|
||||
|
||||
struct vhost_vsock {
|
||||
struct vhost_dev dev;
|
||||
struct vhost_virtqueue vqs[2];
|
||||
|
||||
/* Link to global vhost_vsock_list, protected by vhost_vsock_lock */
|
||||
struct list_head list;
|
||||
|
||||
struct vhost_work send_pkt_work;
|
||||
spinlock_t send_pkt_list_lock;
|
||||
struct list_head send_pkt_list; /* host->guest pending packets */
|
||||
|
||||
atomic_t queued_replies;
|
||||
|
||||
u32 guest_cid;
|
||||
};
|
||||
|
||||
static u32 vhost_transport_get_local_cid(void)
|
||||
{
|
||||
return VHOST_VSOCK_DEFAULT_HOST_CID;
|
||||
}
|
||||
|
||||
static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
|
||||
{
|
||||
struct vhost_vsock *vsock;
|
||||
|
||||
spin_lock_bh(&vhost_vsock_lock);
|
||||
list_for_each_entry(vsock, &vhost_vsock_list, list) {
|
||||
u32 other_cid = vsock->guest_cid;
|
||||
|
||||
/* Skip instances that have no CID yet */
|
||||
if (other_cid == 0)
|
||||
continue;
|
||||
|
||||
if (other_cid == guest_cid) {
|
||||
spin_unlock_bh(&vhost_vsock_lock);
|
||||
return vsock;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&vhost_vsock_lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
|
||||
struct vhost_virtqueue *vq)
|
||||
{
|
||||
struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
|
||||
bool added = false;
|
||||
bool restart_tx = false;
|
||||
|
||||
mutex_lock(&vq->mutex);
|
||||
|
||||
if (!vq->private_data)
|
||||
goto out;
|
||||
|
||||
/* Avoid further vmexits, we're already processing the virtqueue */
|
||||
vhost_disable_notify(&vsock->dev, vq);
|
||||
|
||||
for (;;) {
|
||||
struct virtio_vsock_pkt *pkt;
|
||||
struct iov_iter iov_iter;
|
||||
unsigned out, in;
|
||||
size_t nbytes;
|
||||
size_t len;
|
||||
int head;
|
||||
|
||||
spin_lock_bh(&vsock->send_pkt_list_lock);
|
||||
if (list_empty(&vsock->send_pkt_list)) {
|
||||
spin_unlock_bh(&vsock->send_pkt_list_lock);
|
||||
vhost_enable_notify(&vsock->dev, vq);
|
||||
break;
|
||||
}
|
||||
|
||||
pkt = list_first_entry(&vsock->send_pkt_list,
|
||||
struct virtio_vsock_pkt, list);
|
||||
list_del_init(&pkt->list);
|
||||
spin_unlock_bh(&vsock->send_pkt_list_lock);
|
||||
|
||||
head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
|
||||
&out, &in, NULL, NULL);
|
||||
if (head < 0) {
|
||||
spin_lock_bh(&vsock->send_pkt_list_lock);
|
||||
list_add(&pkt->list, &vsock->send_pkt_list);
|
||||
spin_unlock_bh(&vsock->send_pkt_list_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
if (head == vq->num) {
|
||||
spin_lock_bh(&vsock->send_pkt_list_lock);
|
||||
list_add(&pkt->list, &vsock->send_pkt_list);
|
||||
spin_unlock_bh(&vsock->send_pkt_list_lock);
|
||||
|
||||
/* We cannot finish yet if more buffers snuck in while
|
||||
* re-enabling notify.
|
||||
*/
|
||||
if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
|
||||
vhost_disable_notify(&vsock->dev, vq);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (out) {
|
||||
virtio_transport_free_pkt(pkt);
|
||||
vq_err(vq, "Expected 0 output buffers, got %u\n", out);
|
||||
break;
|
||||
}
|
||||
|
||||
len = iov_length(&vq->iov[out], in);
|
||||
iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
|
||||
|
||||
nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
|
||||
if (nbytes != sizeof(pkt->hdr)) {
|
||||
virtio_transport_free_pkt(pkt);
|
||||
vq_err(vq, "Faulted on copying pkt hdr\n");
|
||||
break;
|
||||
}
|
||||
|
||||
nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
|
||||
if (nbytes != pkt->len) {
|
||||
virtio_transport_free_pkt(pkt);
|
||||
vq_err(vq, "Faulted on copying pkt buf\n");
|
||||
break;
|
||||
}
|
||||
|
||||
vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
|
||||
added = true;
|
||||
|
||||
if (pkt->reply) {
|
||||
int val;
|
||||
|
||||
val = atomic_dec_return(&vsock->queued_replies);
|
||||
|
||||
/* Do we have resources to resume tx processing? */
|
||||
if (val + 1 == tx_vq->num)
|
||||
restart_tx = true;
|
||||
}
|
||||
|
||||
virtio_transport_free_pkt(pkt);
|
||||
}
|
||||
if (added)
|
||||
vhost_signal(&vsock->dev, vq);
|
||||
|
||||
out:
|
||||
mutex_unlock(&vq->mutex);
|
||||
|
||||
if (restart_tx)
|
||||
vhost_poll_queue(&tx_vq->poll);
|
||||
}
|
||||
|
||||
static void vhost_transport_send_pkt_work(struct vhost_work *work)
|
||||
{
|
||||
struct vhost_virtqueue *vq;
|
||||
struct vhost_vsock *vsock;
|
||||
|
||||
vsock = container_of(work, struct vhost_vsock, send_pkt_work);
|
||||
vq = &vsock->vqs[VSOCK_VQ_RX];
|
||||
|
||||
vhost_transport_do_send_pkt(vsock, vq);
|
||||
}
|
||||
|
||||
static int
|
||||
vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
|
||||
{
|
||||
struct vhost_vsock *vsock;
|
||||
struct vhost_virtqueue *vq;
|
||||
int len = pkt->len;
|
||||
|
||||
/* Find the vhost_vsock according to guest context id */
|
||||
vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
|
||||
if (!vsock) {
|
||||
virtio_transport_free_pkt(pkt);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
vq = &vsock->vqs[VSOCK_VQ_RX];
|
||||
|
||||
if (pkt->reply)
|
||||
atomic_inc(&vsock->queued_replies);
|
||||
|
||||
spin_lock_bh(&vsock->send_pkt_list_lock);
|
||||
list_add_tail(&pkt->list, &vsock->send_pkt_list);
|
||||
spin_unlock_bh(&vsock->send_pkt_list_lock);
|
||||
|
||||
vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct virtio_vsock_pkt *
|
||||
vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
|
||||
unsigned int out, unsigned int in)
|
||||
{
|
||||
struct virtio_vsock_pkt *pkt;
|
||||
struct iov_iter iov_iter;
|
||||
size_t nbytes;
|
||||
size_t len;
|
||||
|
||||
if (in != 0) {
|
||||
vq_err(vq, "Expected 0 input buffers, got %u\n", in);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
|
||||
if (!pkt)
|
||||
return NULL;
|
||||
|
||||
len = iov_length(vq->iov, out);
|
||||
iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
|
||||
|
||||
nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
|
||||
if (nbytes != sizeof(pkt->hdr)) {
|
||||
vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
|
||||
sizeof(pkt->hdr), nbytes);
|
||||
kfree(pkt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
|
||||
pkt->len = le32_to_cpu(pkt->hdr.len);
|
||||
|
||||
/* No payload */
|
||||
if (!pkt->len)
|
||||
return pkt;
|
||||
|
||||
/* The pkt is too big */
|
||||
if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
|
||||
kfree(pkt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
|
||||
if (!pkt->buf) {
|
||||
kfree(pkt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
|
||||
if (nbytes != pkt->len) {
|
||||
vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
|
||||
pkt->len, nbytes);
|
||||
virtio_transport_free_pkt(pkt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pkt;
|
||||
}
|
||||
|
||||
/* Is there space left for replies to rx packets? */
|
||||
static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
|
||||
{
|
||||
struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
|
||||
int val;
|
||||
|
||||
smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
|
||||
val = atomic_read(&vsock->queued_replies);
|
||||
|
||||
return val < vq->num;
|
||||
}
|
||||
|
||||
static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
|
||||
{
|
||||
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
|
||||
poll.work);
|
||||
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
|
||||
dev);
|
||||
struct virtio_vsock_pkt *pkt;
|
||||
int head;
|
||||
unsigned int out, in;
|
||||
bool added = false;
|
||||
|
||||
mutex_lock(&vq->mutex);
|
||||
|
||||
if (!vq->private_data)
|
||||
goto out;
|
||||
|
||||
vhost_disable_notify(&vsock->dev, vq);
|
||||
for (;;) {
|
||||
if (!vhost_vsock_more_replies(vsock)) {
|
||||
/* Stop tx until the device processes already
|
||||
* pending replies. Leave tx virtqueue
|
||||
* callbacks disabled.
|
||||
*/
|
||||
goto no_more_replies;
|
||||
}
|
||||
|
||||
head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
|
||||
&out, &in, NULL, NULL);
|
||||
if (head < 0)
|
||||
break;
|
||||
|
||||
if (head == vq->num) {
|
||||
if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
|
||||
vhost_disable_notify(&vsock->dev, vq);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
pkt = vhost_vsock_alloc_pkt(vq, out, in);
|
||||
if (!pkt) {
|
||||
vq_err(vq, "Faulted on pkt\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Only accept correctly addressed packets */
|
||||
if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
|
||||
virtio_transport_recv_pkt(pkt);
|
||||
else
|
||||
virtio_transport_free_pkt(pkt);
|
||||
|
||||
vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
|
||||
added = true;
|
||||
}
|
||||
|
||||
no_more_replies:
|
||||
if (added)
|
||||
vhost_signal(&vsock->dev, vq);
|
||||
|
||||
out:
|
||||
mutex_unlock(&vq->mutex);
|
||||
}
|
||||
|
||||
static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
|
||||
{
|
||||
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
|
||||
poll.work);
|
||||
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
|
||||
dev);
|
||||
|
||||
vhost_transport_do_send_pkt(vsock, vq);
|
||||
}
|
||||
|
||||
static int vhost_vsock_start(struct vhost_vsock *vsock)
|
||||
{
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&vsock->dev.mutex);
|
||||
|
||||
ret = vhost_dev_check_owner(&vsock->dev);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
|
||||
struct vhost_virtqueue *vq = &vsock->vqs[i];
|
||||
|
||||
mutex_lock(&vq->mutex);
|
||||
|
||||
if (!vhost_vq_access_ok(vq)) {
|
||||
ret = -EFAULT;
|
||||
mutex_unlock(&vq->mutex);
|
||||
goto err_vq;
|
||||
}
|
||||
|
||||
if (!vq->private_data) {
|
||||
vq->private_data = vsock;
|
||||
vhost_vq_init_access(vq);
|
||||
}
|
||||
|
||||
mutex_unlock(&vq->mutex);
|
||||
}
|
||||
|
||||
mutex_unlock(&vsock->dev.mutex);
|
||||
return 0;
|
||||
|
||||
err_vq:
|
||||
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
|
||||
struct vhost_virtqueue *vq = &vsock->vqs[i];
|
||||
|
||||
mutex_lock(&vq->mutex);
|
||||
vq->private_data = NULL;
|
||||
mutex_unlock(&vq->mutex);
|
||||
}
|
||||
err:
|
||||
mutex_unlock(&vsock->dev.mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vhost_vsock_stop(struct vhost_vsock *vsock)
|
||||
{
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&vsock->dev.mutex);
|
||||
|
||||
ret = vhost_dev_check_owner(&vsock->dev);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
|
||||
struct vhost_virtqueue *vq = &vsock->vqs[i];
|
||||
|
||||
mutex_lock(&vq->mutex);
|
||||
vq->private_data = NULL;
|
||||
mutex_unlock(&vq->mutex);
|
||||
}
|
||||
|
||||
err:
|
||||
mutex_unlock(&vsock->dev.mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vhost_vsock_free(struct vhost_vsock *vsock)
|
||||
{
|
||||
kvfree(vsock);
|
||||
}
|
||||
|
||||
static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct vhost_virtqueue **vqs;
|
||||
struct vhost_vsock *vsock;
|
||||
int ret;
|
||||
|
||||
/* This struct is large and allocation could fail, fall back to vmalloc
|
||||
* if there is no other way.
|
||||
*/
|
||||
vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
|
||||
if (!vsock) {
|
||||
vsock = vmalloc(sizeof(*vsock));
|
||||
if (!vsock)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
|
||||
if (!vqs) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
atomic_set(&vsock->queued_replies, 0);
|
||||
|
||||
vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
|
||||
vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
|
||||
vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
|
||||
vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
|
||||
|
||||
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
|
||||
|
||||
file->private_data = vsock;
|
||||
spin_lock_init(&vsock->send_pkt_list_lock);
|
||||
INIT_LIST_HEAD(&vsock->send_pkt_list);
|
||||
vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
|
||||
|
||||
spin_lock_bh(&vhost_vsock_lock);
|
||||
list_add_tail(&vsock->list, &vhost_vsock_list);
|
||||
spin_unlock_bh(&vhost_vsock_lock);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
vhost_vsock_free(vsock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vhost_vsock_flush(struct vhost_vsock *vsock)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
|
||||
if (vsock->vqs[i].handle_kick)
|
||||
vhost_poll_flush(&vsock->vqs[i].poll);
|
||||
vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
|
||||
}
|
||||
|
||||
static void vhost_vsock_reset_orphans(struct sock *sk)
|
||||
{
|
||||
struct vsock_sock *vsk = vsock_sk(sk);
|
||||
|
||||
/* vmci_transport.c doesn't take sk_lock here either. At least we're
|
||||
* under vsock_table_lock so the sock cannot disappear while we're
|
||||
* executing.
|
||||
*/
|
||||
|
||||
if (!vhost_vsock_get(vsk->local_addr.svm_cid)) {
|
||||
sock_set_flag(sk, SOCK_DONE);
|
||||
vsk->peer_shutdown = SHUTDOWN_MASK;
|
||||
sk->sk_state = SS_UNCONNECTED;
|
||||
sk->sk_err = ECONNRESET;
|
||||
sk->sk_error_report(sk);
|
||||
}
|
||||
}
|
||||
|
||||
static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct vhost_vsock *vsock = file->private_data;
|
||||
|
||||
spin_lock_bh(&vhost_vsock_lock);
|
||||
list_del(&vsock->list);
|
||||
spin_unlock_bh(&vhost_vsock_lock);
|
||||
|
||||
/* Iterating over all connections for all CIDs to find orphans is
|
||||
* inefficient. Room for improvement here. */
|
||||
vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
|
||||
|
||||
vhost_vsock_stop(vsock);
|
||||
vhost_vsock_flush(vsock);
|
||||
vhost_dev_stop(&vsock->dev);
|
||||
|
||||
spin_lock_bh(&vsock->send_pkt_list_lock);
|
||||
while (!list_empty(&vsock->send_pkt_list)) {
|
||||
struct virtio_vsock_pkt *pkt;
|
||||
|
||||
pkt = list_first_entry(&vsock->send_pkt_list,
|
||||
struct virtio_vsock_pkt, list);
|
||||
list_del_init(&pkt->list);
|
||||
virtio_transport_free_pkt(pkt);
|
||||
}
|
||||
spin_unlock_bh(&vsock->send_pkt_list_lock);
|
||||
|
||||
vhost_dev_cleanup(&vsock->dev, false);
|
||||
kfree(vsock->dev.vqs);
|
||||
vhost_vsock_free(vsock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
|
||||
{
|
||||
struct vhost_vsock *other;
|
||||
|
||||
/* Refuse reserved CIDs */
|
||||
if (guest_cid <= VMADDR_CID_HOST ||
|
||||
guest_cid == U32_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/* 64-bit CIDs are not yet supported */
|
||||
if (guest_cid > U32_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/* Refuse if CID is already in use */
|
||||
other = vhost_vsock_get(guest_cid);
|
||||
if (other && other != vsock)
|
||||
return -EADDRINUSE;
|
||||
|
||||
spin_lock_bh(&vhost_vsock_lock);
|
||||
vsock->guest_cid = guest_cid;
|
||||
spin_unlock_bh(&vhost_vsock_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
|
||||
{
|
||||
struct vhost_virtqueue *vq;
|
||||
int i;
|
||||
|
||||
if (features & ~VHOST_VSOCK_FEATURES)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&vsock->dev.mutex);
|
||||
if ((features & (1 << VHOST_F_LOG_ALL)) &&
|
||||
!vhost_log_access_ok(&vsock->dev)) {
|
||||
mutex_unlock(&vsock->dev.mutex);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
|
||||
vq = &vsock->vqs[i];
|
||||
mutex_lock(&vq->mutex);
|
||||
vq->acked_features = features;
|
||||
mutex_unlock(&vq->mutex);
|
||||
}
|
||||
mutex_unlock(&vsock->dev.mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct vhost_vsock *vsock = f->private_data;
|
||||
void __user *argp = (void __user *)arg;
|
||||
u64 guest_cid;
|
||||
u64 features;
|
||||
int start;
|
||||
int r;
|
||||
|
||||
switch (ioctl) {
|
||||
case VHOST_VSOCK_SET_GUEST_CID:
|
||||
if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
|
||||
return -EFAULT;
|
||||
return vhost_vsock_set_cid(vsock, guest_cid);
|
||||
case VHOST_VSOCK_SET_RUNNING:
|
||||
if (copy_from_user(&start, argp, sizeof(start)))
|
||||
return -EFAULT;
|
||||
if (start)
|
||||
return vhost_vsock_start(vsock);
|
||||
else
|
||||
return vhost_vsock_stop(vsock);
|
||||
case VHOST_GET_FEATURES:
|
||||
features = VHOST_VSOCK_FEATURES;
|
||||
if (copy_to_user(argp, &features, sizeof(features)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
case VHOST_SET_FEATURES:
|
||||
if (copy_from_user(&features, argp, sizeof(features)))
|
||||
return -EFAULT;
|
||||
return vhost_vsock_set_features(vsock, features);
|
||||
default:
|
||||
mutex_lock(&vsock->dev.mutex);
|
||||
r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
|
||||
if (r == -ENOIOCTLCMD)
|
||||
r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
|
||||
else
|
||||
vhost_vsock_flush(vsock);
|
||||
mutex_unlock(&vsock->dev.mutex);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct file_operations vhost_vsock_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = vhost_vsock_dev_open,
|
||||
.release = vhost_vsock_dev_release,
|
||||
.llseek = noop_llseek,
|
||||
.unlocked_ioctl = vhost_vsock_dev_ioctl,
|
||||
};
|
||||
|
||||
static struct miscdevice vhost_vsock_misc = {
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.name = "vhost-vsock",
|
||||
.fops = &vhost_vsock_fops,
|
||||
};
|
||||
|
||||
static struct virtio_transport vhost_transport = {
|
||||
.transport = {
|
||||
.get_local_cid = vhost_transport_get_local_cid,
|
||||
|
||||
.init = virtio_transport_do_socket_init,
|
||||
.destruct = virtio_transport_destruct,
|
||||
.release = virtio_transport_release,
|
||||
.connect = virtio_transport_connect,
|
||||
.shutdown = virtio_transport_shutdown,
|
||||
|
||||
.dgram_enqueue = virtio_transport_dgram_enqueue,
|
||||
.dgram_dequeue = virtio_transport_dgram_dequeue,
|
||||
.dgram_bind = virtio_transport_dgram_bind,
|
||||
.dgram_allow = virtio_transport_dgram_allow,
|
||||
|
||||
.stream_enqueue = virtio_transport_stream_enqueue,
|
||||
.stream_dequeue = virtio_transport_stream_dequeue,
|
||||
.stream_has_data = virtio_transport_stream_has_data,
|
||||
.stream_has_space = virtio_transport_stream_has_space,
|
||||
.stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
|
||||
.stream_is_active = virtio_transport_stream_is_active,
|
||||
.stream_allow = virtio_transport_stream_allow,
|
||||
|
||||
.notify_poll_in = virtio_transport_notify_poll_in,
|
||||
.notify_poll_out = virtio_transport_notify_poll_out,
|
||||
.notify_recv_init = virtio_transport_notify_recv_init,
|
||||
.notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
|
||||
.notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
|
||||
.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
|
||||
.notify_send_init = virtio_transport_notify_send_init,
|
||||
.notify_send_pre_block = virtio_transport_notify_send_pre_block,
|
||||
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
|
||||
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
|
||||
|
||||
.set_buffer_size = virtio_transport_set_buffer_size,
|
||||
.set_min_buffer_size = virtio_transport_set_min_buffer_size,
|
||||
.set_max_buffer_size = virtio_transport_set_max_buffer_size,
|
||||
.get_buffer_size = virtio_transport_get_buffer_size,
|
||||
.get_min_buffer_size = virtio_transport_get_min_buffer_size,
|
||||
.get_max_buffer_size = virtio_transport_get_max_buffer_size,
|
||||
},
|
||||
|
||||
.send_pkt = vhost_transport_send_pkt,
|
||||
};
|
||||
|
||||
static int __init vhost_vsock_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vsock_core_init(&vhost_transport.transport);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
return misc_register(&vhost_vsock_misc);
|
||||
};
|
||||
|
||||
static void __exit vhost_vsock_exit(void)
|
||||
{
|
||||
misc_deregister(&vhost_vsock_misc);
|
||||
vsock_core_exit();
|
||||
};
|
||||
|
||||
module_init(vhost_vsock_init);
|
||||
module_exit(vhost_vsock_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Asias He");
|
||||
MODULE_DESCRIPTION("vhost transport for vsock ");
|
新增問題並參考
封鎖使用者