Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM: - Move the arch-specific code into arch/arm64/kvm - Start the post-32bit cleanup - Cherry-pick a few non-invasive pre-NV patches x86: - Rework of TLB flushing - Rework of event injection, especially with respect to nested virtualization - Nested AMD event injection facelift, building on the rework of generic code and fixing a lot of corner cases - Nested AMD live migration support - Optimization for TSC deadline MSR writes and IPIs - Various cleanups - Asynchronous page fault cleanups (from tglx, common topic branch with tip tree) - Interrupt-based delivery of asynchronous "page ready" events (host side) - Hyper-V MSRs and hypercalls for guest debugging - VMX preemption timer fixes s390: - Cleanups Generic: - switch vCPU thread wakeup from swait to rcuwait The other architectures, and the guest side of the asynchronous page fault work, will come next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (256 commits) KVM: selftests: fix rdtsc() for vmx_tsc_adjust_test KVM: check userspace_addr for all memslots KVM: selftests: update hyperv_cpuid with SynDBG tests x86/kvm/hyper-v: Add support for synthetic debugger via hypercalls x86/kvm/hyper-v: enable hypercalls regardless of hypercall page x86/kvm/hyper-v: Add support for synthetic debugger interface x86/hyper-v: Add synthetic debugger definitions KVM: selftests: VMX preemption timer migration test KVM: nVMX: Fix VMX preemption timer migration x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit KVM: x86/pmu: Support full width counting KVM: x86/pmu: Tweak kvm_pmu_get_msr to pass 'struct msr_data' in KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT KVM: x86: acknowledgment mechanism for async pf page ready notifications KVM: x86: interrupt based APF 'page ready' event delivery KVM: introduce kvm_read_guest_offset_cached() KVM: rename kvm_arch_can_inject_async_page_present() to kvm_arch_can_dequeue_async_page_present() KVM: x86: extend struct kvm_vcpu_pv_apf_data with token info Revert "KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously" KVM: VMX: Replace zero-length array with flexible-array ...
This commit is contained in:
@@ -4336,9 +4336,13 @@ Errors:
|
||||
#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE 0x00000001
|
||||
#define KVM_STATE_NESTED_VMX_SMM_VMXON 0x00000002
|
||||
|
||||
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
|
||||
|
||||
struct kvm_vmx_nested_state_hdr {
|
||||
__u32 flags;
|
||||
__u64 vmxon_pa;
|
||||
__u64 vmcs12_pa;
|
||||
__u64 preemption_timer_deadline;
|
||||
|
||||
struct {
|
||||
__u16 flags;
|
||||
@@ -5068,10 +5072,13 @@ EOI was received.
|
||||
struct kvm_hyperv_exit {
|
||||
#define KVM_EXIT_HYPERV_SYNIC 1
|
||||
#define KVM_EXIT_HYPERV_HCALL 2
|
||||
#define KVM_EXIT_HYPERV_SYNDBG 3
|
||||
__u32 type;
|
||||
__u32 pad1;
|
||||
union {
|
||||
struct {
|
||||
__u32 msr;
|
||||
__u32 pad2;
|
||||
__u64 control;
|
||||
__u64 evt_page;
|
||||
__u64 msg_page;
|
||||
@@ -5081,6 +5088,15 @@ EOI was received.
|
||||
__u64 result;
|
||||
__u64 params[2];
|
||||
} hcall;
|
||||
struct {
|
||||
__u32 msr;
|
||||
__u32 pad2;
|
||||
__u64 control;
|
||||
__u64 status;
|
||||
__u64 send_page;
|
||||
__u64 recv_page;
|
||||
__u64 pending_page;
|
||||
} syndbg;
|
||||
} u;
|
||||
};
|
||||
/* KVM_EXIT_HYPERV */
|
||||
@@ -5097,6 +5113,12 @@ Hyper-V SynIC state change. Notification is used to remap SynIC
|
||||
event/message pages and to enable/disable SynIC messages/events processing
|
||||
in userspace.
|
||||
|
||||
- KVM_EXIT_HYPERV_SYNDBG -- synchronously notify user-space about
|
||||
|
||||
Hyper-V Synthetic debugger state change. Notification is used to either update
|
||||
the pending_page location or to send a control command (send the buffer located
|
||||
in send_page or recv a buffer to recv_page).
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_ARM_NISV */
|
||||
@@ -5779,7 +5801,7 @@ will be initialized to 1 when created. This also improves performance because
|
||||
dirty logging can be enabled gradually in small chunks on the first call
|
||||
to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
|
||||
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
|
||||
x86 for now).
|
||||
x86 and arm64 for now).
|
||||
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
|
||||
@@ -5804,6 +5826,23 @@ If present, this capability can be enabled for a VM, meaning that KVM
|
||||
will allow the transition to secure guest mode. Otherwise KVM will
|
||||
veto the transition.
|
||||
|
||||
7.20 KVM_CAP_HALT_POLL
|
||||
----------------------
|
||||
|
||||
:Architectures: all
|
||||
:Target: VM
|
||||
:Parameters: args[0] is the maximum poll time in nanoseconds
|
||||
:Returns: 0 on success; -1 on error
|
||||
|
||||
This capability overrides the kvm module parameter halt_poll_ns for the
|
||||
target VM.
|
||||
|
||||
VCPU polling allows a VCPU to poll for wakeup events instead of immediately
|
||||
scheduling during guest halts. The maximum time a VCPU can spend polling is
|
||||
controlled by the kvm module parameter halt_poll_ns. This capability allows
|
||||
the maximum halt time to specified on a per-VM basis, effectively overriding
|
||||
the module parameter for the target VM.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
|
@@ -50,8 +50,8 @@ KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays
|
||||
KVM_FEATURE_MMU_OP 2 deprecated
|
||||
|
||||
KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs
|
||||
|
||||
0x4b564d00 and 0x4b564d01
|
||||
|
||||
KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by
|
||||
writing to msr 0x4b564d02
|
||||
|
||||
@@ -86,6 +86,12 @@ KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
|
||||
before using paravirtualized
|
||||
sched yield.
|
||||
|
||||
KVM_FEATURE_ASYNC_PF_INT 14 guest checks this feature bit
|
||||
before using the second async
|
||||
pf control msr 0x4b564d06 and
|
||||
async pf acknowledgment msr
|
||||
0x4b564d07.
|
||||
|
||||
KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side
|
||||
per-cpu warps are expeced in
|
||||
kvmclock
|
||||
|
@@ -190,41 +190,72 @@ MSR_KVM_ASYNC_PF_EN:
|
||||
0x4b564d02
|
||||
|
||||
data:
|
||||
Bits 63-6 hold 64-byte aligned physical address of a
|
||||
64 byte memory area which must be in guest RAM and must be
|
||||
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
|
||||
when asynchronous page faults are enabled on the vcpu 0 when
|
||||
disabled. Bit 1 is 1 if asynchronous page faults can be injected
|
||||
when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
|
||||
are delivered to L1 as #PF vmexits. Bit 2 can be set only if
|
||||
KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
|
||||
Asynchronous page fault (APF) control MSR.
|
||||
|
||||
First 4 byte of 64 byte memory location will be written to by
|
||||
the hypervisor at the time of asynchronous page fault (APF)
|
||||
injection to indicate type of asynchronous page fault. Value
|
||||
of 1 means that the page referred to by the page fault is not
|
||||
present. Value 2 means that the page is now available. Disabling
|
||||
interrupt inhibits APFs. Guest must not enable interrupt
|
||||
before the reason is read, or it may be overwritten by another
|
||||
APF. Since APF uses the same exception vector as regular page
|
||||
fault guest must reset the reason to 0 before it does
|
||||
something that can generate normal page fault. If during page
|
||||
fault APF reason is 0 it means that this is regular page
|
||||
fault.
|
||||
Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
|
||||
which must be in guest RAM and must be zeroed. This memory is expected
|
||||
to hold a copy of the following structure::
|
||||
|
||||
During delivery of type 1 APF cr2 contains a token that will
|
||||
be used to notify a guest when missing page becomes
|
||||
available. When page becomes available type 2 APF is sent with
|
||||
cr2 set to the token associated with the page. There is special
|
||||
kind of token 0xffffffff which tells vcpu that it should wake
|
||||
up all processes waiting for APFs and no individual type 2 APFs
|
||||
will be sent.
|
||||
struct kvm_vcpu_pv_apf_data {
|
||||
/* Used for 'page not present' events delivered via #PF */
|
||||
__u32 flags;
|
||||
|
||||
/* Used for 'page ready' events delivered via interrupt notification */
|
||||
__u32 token;
|
||||
|
||||
__u8 pad[56];
|
||||
__u32 enabled;
|
||||
};
|
||||
|
||||
Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
|
||||
when asynchronous page faults are enabled on the vcpu, 0 when disabled.
|
||||
Bit 1 is 1 if asynchronous page faults can be injected when vcpu is in
|
||||
cpl == 0. Bit 2 is 1 if asynchronous page faults are delivered to L1 as
|
||||
#PF vmexits. Bit 2 can be set only if KVM_FEATURE_ASYNC_PF_VMEXIT is
|
||||
present in CPUID. Bit 3 enables interrupt based delivery of 'page ready'
|
||||
events. Bit 3 can only be set if KVM_FEATURE_ASYNC_PF_INT is present in
|
||||
CPUID.
|
||||
|
||||
'Page not present' events are currently always delivered as synthetic
|
||||
#PF exception. During delivery of these events APF CR2 register contains
|
||||
a token that will be used to notify the guest when missing page becomes
|
||||
available. Also, to make it possible to distinguish between real #PF and
|
||||
APF, first 4 bytes of 64 byte memory location ('flags') will be written
|
||||
to by the hypervisor at the time of injection. Only first bit of 'flags'
|
||||
is currently supported, when set, it indicates that the guest is dealing
|
||||
with asynchronous 'page not present' event. If during a page fault APF
|
||||
'flags' is '0' it means that this is regular page fault. Guest is
|
||||
supposed to clear 'flags' when it is done handling #PF exception so the
|
||||
next event can be delivered.
|
||||
|
||||
Note, since APF 'page not present' events use the same exception vector
|
||||
as regular page fault, guest must reset 'flags' to '0' before it does
|
||||
something that can generate normal page fault.
|
||||
|
||||
Bytes 5-7 of 64 byte memory location ('token') will be written to by the
|
||||
hypervisor at the time of APF 'page ready' event injection. The content
|
||||
of these bytes is a token which was previously delivered as 'page not
|
||||
present' event. The event indicates the page in now available. Guest is
|
||||
supposed to write '0' to 'token' when it is done handling 'page ready'
|
||||
event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
|
||||
writing to the MSR forces KVM to re-scan its queue and deliver the next
|
||||
pending notification.
|
||||
|
||||
Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
|
||||
ready' APF delivery needs to be written to before enabling APF mechanism
|
||||
in MSR_KVM_ASYNC_PF_EN or interrupt #0 can get injected. The MSR is
|
||||
available if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
|
||||
|
||||
Note, previously, 'page ready' events were delivered via the same #PF
|
||||
exception as 'page not present' events but this is now deprecated. If
|
||||
bit 3 (interrupt based delivery) is not set APF events are not delivered.
|
||||
|
||||
If APF is disabled while there are outstanding APFs, they will
|
||||
not be delivered.
|
||||
|
||||
Currently type 2 APF will be always delivered on the same vcpu as
|
||||
type 1 was, but guest should not rely on that.
|
||||
Currently 'page ready' APF events will be always delivered on the
|
||||
same vcpu as 'page not present' event was, but guest should not rely on
|
||||
that.
|
||||
|
||||
MSR_KVM_STEAL_TIME:
|
||||
0x4b564d03
|
||||
@@ -319,3 +350,29 @@ data:
|
||||
|
||||
KVM guests can request the host not to poll on HLT, for example if
|
||||
they are performing polling themselves.
|
||||
|
||||
MSR_KVM_ASYNC_PF_INT:
|
||||
0x4b564d06
|
||||
|
||||
data:
|
||||
Second asynchronous page fault (APF) control MSR.
|
||||
|
||||
Bits 0-7: APIC vector for delivery of 'page ready' APF events.
|
||||
Bits 8-63: Reserved
|
||||
|
||||
Interrupt vector for asynchnonous 'page ready' notifications delivery.
|
||||
The vector has to be set up before asynchronous page fault mechanism
|
||||
is enabled in MSR_KVM_ASYNC_PF_EN. The MSR is only available if
|
||||
KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
|
||||
|
||||
MSR_KVM_ASYNC_PF_ACK:
|
||||
0x4b564d07
|
||||
|
||||
data:
|
||||
Asynchronous page fault (APF) acknowledgment.
|
||||
|
||||
When the guest is done processing 'page ready' APF event and 'token'
|
||||
field in 'struct kvm_vcpu_pv_apf_data' is cleared it is supposed to
|
||||
write '1' to bit 0 of the MSR, this causes the host to re-scan its queue
|
||||
and check if there are more notifications pending. The MSR is available
|
||||
if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
|
||||
|
@@ -116,10 +116,7 @@ struct shadow_vmcs is ever changed.
|
||||
natural_width cr4_guest_host_mask;
|
||||
natural_width cr0_read_shadow;
|
||||
natural_width cr4_read_shadow;
|
||||
natural_width cr3_target_value0;
|
||||
natural_width cr3_target_value1;
|
||||
natural_width cr3_target_value2;
|
||||
natural_width cr3_target_value3;
|
||||
natural_width dead_space[4]; /* Last remnants of cr3_target_value[0-3]. */
|
||||
natural_width exit_qualification;
|
||||
natural_width guest_linear_address;
|
||||
natural_width guest_cr0;
|
||||
|
Reference in New Issue
Block a user