Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - virtio-mem: paravirtualized memory hotplug

 - support doorbell mapping for vdpa

 - config interrupt support in ifc

 - fixes all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (40 commits)
  vhost/test: fix up after API change
  virtio_mem: convert device block size into 64bit
  virtio-mem: drop unnecessary initialization
  ifcvf: implement config interrupt in IFCVF
  vhost: replace -1 with VHOST_FILE_UNBIND in ioctls
  vhost_vdpa: Support config interrupt in vdpa
  ifcvf: ignore continuous setting same status value
  virtio-mem: Don't rely on implicit compiler padding for requests
  virtio-mem: Try to unplug the complete online memory block first
  virtio-mem: Use -ETXTBSY as error code if the device is busy
  virtio-mem: Unplug subblocks right-to-left
  virtio-mem: Drop manual check for already present memory
  virtio-mem: Add parent resource for all added "System RAM"
  virtio-mem: Better retry handling
  virtio-mem: Offline and remove completely unplugged memory blocks
  mm/memory_hotplug: Introduce offline_and_remove_memory()
  virtio-mem: Allow to offline partially unplugged memory blocks
  mm: Allow to offline unmovable PageOffline() pages via MEM_GOING_OFFLINE
  virtio-mem: Paravirtualized memory hotunplug part 2
  virtio-mem: Paravirtualized memory hotunplug part 1
  ...
This commit is contained in:
Linus Torvalds
2020-06-10 13:42:09 -07:00
36 changed files with 2723 additions and 144 deletions

View File

@@ -15,6 +15,8 @@
#include <linux/types.h>
#include <linux/ioctl.h>
#define VHOST_FILE_UNBIND -1
/* ioctls */
#define VHOST_VIRTIO 0xAF
@@ -140,4 +142,6 @@
/* Get the max ring size. */
#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
/* Set event fd for config interrupt*/
#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
#endif

View File

@@ -44,6 +44,7 @@
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#define VIRTIO_ID_MEM 24 /* virtio mem */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */

View File

@@ -0,0 +1,211 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/*
* Virtio Mem Device
*
* Copyright Red Hat, Inc. 2020
*
* Authors:
* David Hildenbrand <david@redhat.com>
*
* This header is BSD licensed so anyone can use the definitions
* to implement compatible drivers/servers:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _LINUX_VIRTIO_MEM_H
#define _LINUX_VIRTIO_MEM_H
#include <linux/types.h>
#include <linux/virtio_types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
/*
* Each virtio-mem device manages a dedicated region in physical address
* space. Each device can belong to a single NUMA node, multiple devices
* for a single NUMA node are possible. A virtio-mem device is like a
* "resizable DIMM" consisting of small memory blocks that can be plugged
* or unplugged. The device driver is responsible for (un)plugging memory
* blocks on demand.
*
* Virtio-mem devices can only operate on their assigned memory region in
* order to (un)plug memory. A device cannot (un)plug memory belonging to
* other devices.
*
* The "region_size" corresponds to the maximum amount of memory that can
* be provided by a device. The "size" corresponds to the amount of memory
* that is currently plugged. "requested_size" corresponds to a request
* from the device to the device driver to (un)plug blocks. The
* device driver should try to (un)plug blocks in order to reach the
* "requested_size". It is impossible to plug more memory than requested.
*
* The "usable_region_size" represents the memory region that can actually
* be used to (un)plug memory. It is always at least as big as the
* "requested_size" and will grow dynamically. It will only shrink when
* explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
*
* There are no guarantees what will happen if unplugged memory is
* read/written. Such memory should, in general, not be touched. E.g.,
* even writing might succeed, but the values will simply be discarded at
* random points in time.
*
* It can happen that the device cannot process a request, because it is
* busy. The device driver has to retry later.
*
* Usually, during system resets all memory will get unplugged, so the
* device driver can start with a clean state. However, in specific
* scenarios (if the device is busy) it can happen that the device still
* has memory plugged. The device driver can request to unplug all memory
* (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the
* device is busy.
*/
/* --- virtio-mem: feature bits --- */
/* node_id is an ACPI PXM and is valid */
#define VIRTIO_MEM_F_ACPI_PXM 0
/* --- virtio-mem: guest -> host requests --- */
/* request to plug memory blocks */
#define VIRTIO_MEM_REQ_PLUG 0
/* request to unplug memory blocks */
#define VIRTIO_MEM_REQ_UNPLUG 1
/* request to unplug all blocks and shrink the usable size */
#define VIRTIO_MEM_REQ_UNPLUG_ALL 2
/* request information about the plugged state of memory blocks */
#define VIRTIO_MEM_REQ_STATE 3
struct virtio_mem_req_plug {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req_unplug {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req_state {
__virtio64 addr;
__virtio16 nb_blocks;
__virtio16 padding[3];
};
struct virtio_mem_req {
__virtio16 type;
__virtio16 padding[3];
union {
struct virtio_mem_req_plug plug;
struct virtio_mem_req_unplug unplug;
struct virtio_mem_req_state state;
} u;
};
/* --- virtio-mem: host -> guest response --- */
/*
* Request processed successfully, applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_UNPLUG_ALL
* - VIRTIO_MEM_REQ_STATE
*/
#define VIRTIO_MEM_RESP_ACK 0
/*
* Request denied - e.g. trying to plug more than requested, applicable for
* - VIRTIO_MEM_REQ_PLUG
*/
#define VIRTIO_MEM_RESP_NACK 1
/*
* Request cannot be processed right now, try again later, applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_UNPLUG_ALL
*/
#define VIRTIO_MEM_RESP_BUSY 2
/*
* Error in request (e.g. addresses/alignment), applicable for
* - VIRTIO_MEM_REQ_PLUG
* - VIRTIO_MEM_REQ_UNPLUG
* - VIRTIO_MEM_REQ_STATE
*/
#define VIRTIO_MEM_RESP_ERROR 3
/* State of memory blocks is "plugged" */
#define VIRTIO_MEM_STATE_PLUGGED 0
/* State of memory blocks is "unplugged" */
#define VIRTIO_MEM_STATE_UNPLUGGED 1
/* State of memory blocks is "mixed" */
#define VIRTIO_MEM_STATE_MIXED 2
struct virtio_mem_resp_state {
__virtio16 state;
};
struct virtio_mem_resp {
__virtio16 type;
__virtio16 padding[3];
union {
struct virtio_mem_resp_state state;
} u;
};
/* --- virtio-mem: configuration --- */
struct virtio_mem_config {
/* Block size and alignment. Cannot change. */
__u64 block_size;
/* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
__u16 node_id;
__u8 padding[6];
/* Start address of the memory region. Cannot change. */
__u64 addr;
/* Region size (maximum). Cannot change. */
__u64 region_size;
/*
* Currently usable region size. Can grow up to region_size. Can
* shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config
* update will be sent).
*/
__u64 usable_region_size;
/*
* Currently used size. Changes due to plug/unplug requests, but no
* config updates will be sent.
*/
__u64 plugged_size;
/* Requested size. New plug requests cannot exceed it. Can change. */
__u64 requested_size;
};
#endif /* _LINUX_VIRTIO_MEM_H */

View File

@@ -86,6 +86,13 @@
* at the end of the used ring. Guest should ignore the used->flags field. */
#define VIRTIO_RING_F_EVENT_IDX 29
/* Alignment requirements for vring elements.
* When using pre-virtio 1.0 layout, these fall out naturally.
*/
#define VRING_AVAIL_ALIGN_SIZE 2
#define VRING_USED_ALIGN_SIZE 4
#define VRING_DESC_ALIGN_SIZE 16
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
struct vring_desc {
/* Address (guest-physical). */
@@ -112,28 +119,47 @@ struct vring_used_elem {
__virtio32 len;
};
typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
vring_used_elem_t;
struct vring_used {
__virtio16 flags;
__virtio16 idx;
struct vring_used_elem ring[];
vring_used_elem_t ring[];
};
/*
* The ring element addresses are passed between components with different
* alignments assumptions. Thus, we might need to decrease the compiler-selected
* alignment, and so must use a typedef to make sure the aligned attribute
* actually takes hold:
*
* https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes
*
* When used on a struct, or struct member, the aligned attribute can only
* increase the alignment; in order to decrease it, the packed attribute must
* be specified as well. When used as part of a typedef, the aligned attribute
* can both increase and decrease alignment, and specifying the packed
* attribute generates a warning.
*/
typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE)))
vring_desc_t;
typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE)))
vring_avail_t;
typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
vring_used_t;
struct vring {
unsigned int num;
struct vring_desc *desc;
vring_desc_t *desc;
struct vring_avail *avail;
vring_avail_t *avail;
struct vring_used *used;
vring_used_t *used;
};
/* Alignment requirements for vring elements.
* When using pre-virtio 1.0 layout, these fall out naturally.
*/
#define VRING_AVAIL_ALIGN_SIZE 2
#define VRING_USED_ALIGN_SIZE 4
#define VRING_DESC_ALIGN_SIZE 16
#ifndef VIRTIO_RING_NO_LEGACY
/* The standard layout for the ring is a continuous chunk of memory which looks
* like this. We assume num is a power of 2.
@@ -181,6 +207,8 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
}
#endif /* VIRTIO_RING_NO_LEGACY */
/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
/* Assuming a given event_idx value from the other side, if
* we have just incremented index from old to new_idx,