Merge tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity: "Highlights of the changes for this release include support for vfio level triggered interrupts, improved big real mode support on older Intels, a streamlines guest page table walker, guest APIC speedups, PIO optimizations, better overcommit handling, and read-only memory." * tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits) KVM: s390: Fix vcpu_load handling in interrupt code KVM: x86: Fix guest debug across vcpu INIT reset KVM: Add resampling irqfds for level triggered interrupts KVM: optimize apic interrupt delivery KVM: MMU: Eliminate pointless temporary 'ac' KVM: MMU: Avoid access/dirty update loop if all is well KVM: MMU: Eliminate eperm temporary KVM: MMU: Optimize is_last_gpte() KVM: MMU: Simplify walk_addr_generic() loop KVM: MMU: Optimize pte permission checks KVM: MMU: Update accessed and dirty bits after guest pagetable walk KVM: MMU: Move gpte_access() out of paging_tmpl.h KVM: MMU: Optimize gpte_access() slightly KVM: MMU: Push clean gpte write protection out of gpte_access() KVM: clarify kvmclock documentation KVM: make processes waiting on vcpu mutex killable KVM: SVM: Make use of asm.h KVM: VMX: Make use of asm.h KVM: VMX: Make lto-friendly KVM: x86: lapic: Clean up find_highest_vector() and count_vectors() ... Conflicts: arch/s390/include/asm/processor.h arch/x86/kvm/i8259.c
This commit is contained in:
@@ -857,7 +857,8 @@ struct kvm_userspace_memory_region {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* for kvm_memory_region::flags */
|
/* for kvm_memory_region::flags */
|
||||||
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
|
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||||
|
#define KVM_MEM_READONLY (1UL << 1)
|
||||||
|
|
||||||
This ioctl allows the user to create or modify a guest physical memory
|
This ioctl allows the user to create or modify a guest physical memory
|
||||||
slot. When changing an existing slot, it may be moved in the guest
|
slot. When changing an existing slot, it may be moved in the guest
|
||||||
@@ -873,14 +874,17 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
|
|||||||
be identical. This allows large pages in the guest to be backed by large
|
be identical. This allows large pages in the guest to be backed by large
|
||||||
pages in the host.
|
pages in the host.
|
||||||
|
|
||||||
The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
|
The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which instructs
|
||||||
instructs kvm to keep track of writes to memory within the slot. See
|
kvm to keep track of writes to memory within the slot. See KVM_GET_DIRTY_LOG
|
||||||
the KVM_GET_DIRTY_LOG ioctl.
|
ioctl. The KVM_CAP_READONLY_MEM capability indicates the availability of the
|
||||||
|
KVM_MEM_READONLY flag. When this flag is set for a memory region, KVM only
|
||||||
|
allows read accesses. Writes will be posted to userspace as KVM_EXIT_MMIO
|
||||||
|
exits.
|
||||||
|
|
||||||
When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory
|
When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
|
||||||
region are automatically reflected into the guest. For example, an mmap()
|
the memory region are automatically reflected into the guest. For example, an
|
||||||
that affects the region will be made visible immediately. Another example
|
mmap() that affects the region will be made visible immediately. Another
|
||||||
is madvise(MADV_DROP).
|
example is madvise(MADV_DROP).
|
||||||
|
|
||||||
It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
|
It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
|
||||||
The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
|
The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
|
||||||
@@ -1946,6 +1950,19 @@ the guest using the specified gsi pin. The irqfd is removed using
|
|||||||
the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
|
the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
|
||||||
and kvm_irqfd.gsi.
|
and kvm_irqfd.gsi.
|
||||||
|
|
||||||
|
With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
|
||||||
|
mechanism allowing emulation of level-triggered, irqfd-based
|
||||||
|
interrupts. When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
|
||||||
|
additional eventfd in the kvm_irqfd.resamplefd field. When operating
|
||||||
|
in resample mode, posting of an interrupt through kvm_irq.fd asserts
|
||||||
|
the specified gsi in the irqchip. When the irqchip is resampled, such
|
||||||
|
as from an EOI, the gsi is de-asserted and the user is notifed via
|
||||||
|
kvm_irqfd.resamplefd. It is the user's responsibility to re-queue
|
||||||
|
the interrupt if the device making use of it still requires service.
|
||||||
|
Note that closing the resamplefd is not sufficient to disable the
|
||||||
|
irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
|
||||||
|
and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
|
||||||
|
|
||||||
4.76 KVM_PPC_ALLOCATE_HTAB
|
4.76 KVM_PPC_ALLOCATE_HTAB
|
||||||
|
|
||||||
Capability: KVM_CAP_PPC_ALLOC_HTAB
|
Capability: KVM_CAP_PPC_ALLOC_HTAB
|
||||||
|
66
Documentation/virtual/kvm/hypercalls.txt
Normal file
66
Documentation/virtual/kvm/hypercalls.txt
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
Linux KVM Hypercall:
|
||||||
|
===================
|
||||||
|
X86:
|
||||||
|
KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
|
||||||
|
instruction. The hypervisor can replace it with instructions that are
|
||||||
|
guaranteed to be supported.
|
||||||
|
|
||||||
|
Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
|
||||||
|
The hypercall number should be placed in rax and the return value will be
|
||||||
|
placed in rax. No other registers will be clobbered unless explicitly stated
|
||||||
|
by the particular hypercall.
|
||||||
|
|
||||||
|
S390:
|
||||||
|
R2-R7 are used for parameters 1-6. In addition, R1 is used for hypercall
|
||||||
|
number. The return value is written to R2.
|
||||||
|
|
||||||
|
S390 uses diagnose instruction as hypercall (0x500) along with hypercall
|
||||||
|
number in R1.
|
||||||
|
|
||||||
|
PowerPC:
|
||||||
|
It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
|
||||||
|
Return value is placed in R3.
|
||||||
|
|
||||||
|
KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
|
||||||
|
property inside the device tree's /hypervisor node.
|
||||||
|
For more information refer to Documentation/virtual/kvm/ppc-pv.txt
|
||||||
|
|
||||||
|
KVM Hypercalls Documentation
|
||||||
|
===========================
|
||||||
|
The template for each hypercall is:
|
||||||
|
1. Hypercall name.
|
||||||
|
2. Architecture(s)
|
||||||
|
3. Status (deprecated, obsolete, active)
|
||||||
|
4. Purpose
|
||||||
|
|
||||||
|
1. KVM_HC_VAPIC_POLL_IRQ
|
||||||
|
------------------------
|
||||||
|
Architecture: x86
|
||||||
|
Status: active
|
||||||
|
Purpose: Trigger guest exit so that the host can check for pending
|
||||||
|
interrupts on reentry.
|
||||||
|
|
||||||
|
2. KVM_HC_MMU_OP
|
||||||
|
------------------------
|
||||||
|
Architecture: x86
|
||||||
|
Status: deprecated.
|
||||||
|
Purpose: Support MMU operations such as writing to PTE,
|
||||||
|
flushing TLB, release PT.
|
||||||
|
|
||||||
|
3. KVM_HC_FEATURES
|
||||||
|
------------------------
|
||||||
|
Architecture: PPC
|
||||||
|
Status: active
|
||||||
|
Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
|
||||||
|
used to enumerate which hypercalls are available. On PPC, either device tree
|
||||||
|
based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
|
||||||
|
mechanism (which is this hypercall) can be used.
|
||||||
|
|
||||||
|
4. KVM_HC_PPC_MAP_MAGIC_PAGE
|
||||||
|
------------------------
|
||||||
|
Architecture: PPC
|
||||||
|
Status: active
|
||||||
|
Purpose: To enable communication between the hypervisor and guest there is a
|
||||||
|
shared page that contains parts of supervisor visible register state.
|
||||||
|
The guest can map this shared page to access its supervisor register through
|
||||||
|
memory using this hypercall.
|
@@ -34,9 +34,12 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
|
|||||||
time information and check that they are both equal and even.
|
time information and check that they are both equal and even.
|
||||||
An odd version indicates an in-progress update.
|
An odd version indicates an in-progress update.
|
||||||
|
|
||||||
sec: number of seconds for wallclock.
|
sec: number of seconds for wallclock at time of boot.
|
||||||
|
|
||||||
nsec: number of nanoseconds for wallclock.
|
nsec: number of nanoseconds for wallclock at time of boot.
|
||||||
|
|
||||||
|
In order to get the current wallclock time, the system_time from
|
||||||
|
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
|
||||||
|
|
||||||
Note that although MSRs are per-CPU entities, the effect of this
|
Note that although MSRs are per-CPU entities, the effect of this
|
||||||
particular MSR is global.
|
particular MSR is global.
|
||||||
@@ -82,20 +85,25 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
|||||||
time at the time this structure was last updated. Unit is
|
time at the time this structure was last updated. Unit is
|
||||||
nanoseconds.
|
nanoseconds.
|
||||||
|
|
||||||
tsc_to_system_mul: a function of the tsc frequency. One has
|
tsc_to_system_mul: multiplier to be used when converting
|
||||||
to multiply any tsc-related quantity by this value to get
|
tsc-related quantity to nanoseconds
|
||||||
a value in nanoseconds, besides dividing by 2^tsc_shift
|
|
||||||
|
|
||||||
tsc_shift: cycle to nanosecond divider, as a power of two, to
|
tsc_shift: shift to be used when converting tsc-related
|
||||||
allow for shift rights. One has to shift right any tsc-related
|
quantity to nanoseconds. This shift will ensure that
|
||||||
quantity by this value to get a value in nanoseconds, besides
|
multiplication with tsc_to_system_mul does not overflow.
|
||||||
multiplying by tsc_to_system_mul.
|
A positive value denotes a left shift, a negative value
|
||||||
|
a right shift.
|
||||||
|
|
||||||
With this information, guests can derive per-CPU time by
|
The conversion from tsc to nanoseconds involves an additional
|
||||||
doing:
|
right shift by 32 bits. With this information, guests can
|
||||||
|
derive per-CPU time by doing:
|
||||||
|
|
||||||
time = (current_tsc - tsc_timestamp)
|
time = (current_tsc - tsc_timestamp)
|
||||||
time = (time * tsc_to_system_mul) >> tsc_shift
|
if (tsc_shift >= 0)
|
||||||
|
time <<= tsc_shift;
|
||||||
|
else
|
||||||
|
time >>= -tsc_shift;
|
||||||
|
time = (time * tsc_to_system_mul) >> 32
|
||||||
time = time + system_time
|
time = time + system_time
|
||||||
|
|
||||||
flags: bits in this field indicate extended capabilities
|
flags: bits in this field indicate extended capabilities
|
||||||
|
@@ -174,3 +174,25 @@ following:
|
|||||||
That way we can inject an arbitrary amount of code as replacement for a single
|
That way we can inject an arbitrary amount of code as replacement for a single
|
||||||
instruction. This allows us to check for pending interrupts when setting EE=1
|
instruction. This allows us to check for pending interrupts when setting EE=1
|
||||||
for example.
|
for example.
|
||||||
|
|
||||||
|
Hypercall ABIs in KVM on PowerPC
|
||||||
|
=================================
|
||||||
|
1) KVM hypercalls (ePAPR)
|
||||||
|
|
||||||
|
These are ePAPR compliant hypercall implementation (mentioned above). Even
|
||||||
|
generic hypercalls are implemented here, like the ePAPR idle hcall. These are
|
||||||
|
available on all targets.
|
||||||
|
|
||||||
|
2) PAPR hypercalls
|
||||||
|
|
||||||
|
PAPR hypercalls are needed to run server PowerPC PAPR guests (-M pseries in QEMU).
|
||||||
|
These are the same hypercalls that pHyp, the POWER hypervisor implements. Some of
|
||||||
|
them are handled in the kernel, some are handled in user space. This is only
|
||||||
|
available on book3s_64.
|
||||||
|
|
||||||
|
3) OSI hypercalls
|
||||||
|
|
||||||
|
Mac-on-Linux is another user of KVM on PowerPC, which has its own hypercall (long
|
||||||
|
before KVM). This is supported to maintain compatibility. All these hypercalls get
|
||||||
|
forwarded to user space. This is only useful on book3s_32, but can be used with
|
||||||
|
book3s_64 as well.
|
||||||
|
@@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
|
||||||
|
{
|
||||||
|
if (!irqchip_in_kernel(kvm))
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
||||||
|
irq_event->irq, irq_event->level);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
long kvm_arch_vm_ioctl(struct file *filp,
|
long kvm_arch_vm_ioctl(struct file *filp,
|
||||||
unsigned int ioctl, unsigned long arg)
|
unsigned int ioctl, unsigned long arg)
|
||||||
{
|
{
|
||||||
@@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case KVM_IRQ_LINE_STATUS:
|
|
||||||
case KVM_IRQ_LINE: {
|
|
||||||
struct kvm_irq_level irq_event;
|
|
||||||
|
|
||||||
r = -EFAULT;
|
|
||||||
if (copy_from_user(&irq_event, argp, sizeof irq_event))
|
|
||||||
goto out;
|
|
||||||
r = -ENXIO;
|
|
||||||
if (irqchip_in_kernel(kvm)) {
|
|
||||||
__s32 status;
|
|
||||||
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
|
||||||
irq_event.irq, irq_event.level);
|
|
||||||
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
|
||||||
r = -EFAULT;
|
|
||||||
irq_event.status = status;
|
|
||||||
if (copy_to_user(argp, &irq_event,
|
|
||||||
sizeof irq_event))
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
r = 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case KVM_GET_IRQCHIP: {
|
case KVM_GET_IRQCHIP: {
|
||||||
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
|
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
|
||||||
struct kvm_irqchip chip;
|
struct kvm_irqchip chip;
|
||||||
@@ -1626,11 +1613,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_arch_flush_shadow(struct kvm *kvm)
|
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
kvm_flush_remote_tlbs(kvm);
|
kvm_flush_remote_tlbs(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||||
|
struct kvm_memory_slot *slot)
|
||||||
|
{
|
||||||
|
kvm_arch_flush_shadow_all();
|
||||||
|
}
|
||||||
|
|
||||||
long kvm_arch_dev_ioctl(struct file *filp,
|
long kvm_arch_dev_ioctl(struct file *filp,
|
||||||
unsigned int ioctl, unsigned long arg)
|
unsigned int ioctl, unsigned long arg)
|
||||||
{
|
{
|
||||||
|
@@ -53,6 +53,8 @@
|
|||||||
|
|
||||||
struct kvm;
|
struct kvm;
|
||||||
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||||
|
extern int kvm_unmap_hva_range(struct kvm *kvm,
|
||||||
|
unsigned long start, unsigned long end);
|
||||||
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
@@ -220,6 +222,7 @@ struct revmap_entry {
|
|||||||
#define KVMPPC_GOT_PAGE 0x80
|
#define KVMPPC_GOT_PAGE 0x80
|
||||||
|
|
||||||
struct kvm_arch_memory_slot {
|
struct kvm_arch_memory_slot {
|
||||||
|
unsigned long *rmap;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_arch {
|
struct kvm_arch {
|
||||||
|
@@ -319,7 +319,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
|
|||||||
if (is_error_page(new_page)) {
|
if (is_error_page(new_page)) {
|
||||||
printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
|
printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
|
||||||
(unsigned long long)gfn);
|
(unsigned long long)gfn);
|
||||||
kvm_release_page_clean(new_page);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
hpaddr = page_to_phys(new_page);
|
hpaddr = page_to_phys(new_page);
|
||||||
|
@@ -705,7 +705,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
|
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
|
||||||
|
|
||||||
rmap = &memslot->rmap[gfn - memslot->base_gfn];
|
rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||||
lock_rmap(rmap);
|
lock_rmap(rmap);
|
||||||
|
|
||||||
/* Check if we might have been invalidated; let the guest retry if so */
|
/* Check if we might have been invalidated; let the guest retry if so */
|
||||||
@@ -756,8 +756,11 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||||||
goto out_put;
|
goto out_put;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
static int kvm_handle_hva_range(struct kvm *kvm,
|
||||||
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
unsigned long start,
|
||||||
|
unsigned long end,
|
||||||
|
int (*handler)(struct kvm *kvm,
|
||||||
|
unsigned long *rmapp,
|
||||||
unsigned long gfn))
|
unsigned long gfn))
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
@@ -767,15 +770,25 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
|||||||
|
|
||||||
slots = kvm_memslots(kvm);
|
slots = kvm_memslots(kvm);
|
||||||
kvm_for_each_memslot(memslot, slots) {
|
kvm_for_each_memslot(memslot, slots) {
|
||||||
unsigned long start = memslot->userspace_addr;
|
unsigned long hva_start, hva_end;
|
||||||
unsigned long end;
|
gfn_t gfn, gfn_end;
|
||||||
|
|
||||||
end = start + (memslot->npages << PAGE_SHIFT);
|
hva_start = max(start, memslot->userspace_addr);
|
||||||
if (hva >= start && hva < end) {
|
hva_end = min(end, memslot->userspace_addr +
|
||||||
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
|
(memslot->npages << PAGE_SHIFT));
|
||||||
|
if (hva_start >= hva_end)
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* {gfn(page) | page intersects with [hva_start, hva_end)} =
|
||||||
|
* {gfn, gfn+1, ..., gfn_end-1}.
|
||||||
|
*/
|
||||||
|
gfn = hva_to_gfn_memslot(hva_start, memslot);
|
||||||
|
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
|
||||||
|
|
||||||
ret = handler(kvm, &memslot->rmap[gfn_offset],
|
for (; gfn < gfn_end; ++gfn) {
|
||||||
memslot->base_gfn + gfn_offset);
|
gfn_t gfn_offset = gfn - memslot->base_gfn;
|
||||||
|
|
||||||
|
ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
|
||||||
retval |= ret;
|
retval |= ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
||||||
|
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
||||||
|
unsigned long gfn))
|
||||||
|
{
|
||||||
|
return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
|
||||||
|
}
|
||||||
|
|
||||||
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long gfn)
|
unsigned long gfn)
|
||||||
{
|
{
|
||||||
@@ -850,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
if (kvm->arch.using_mmu_notifiers)
|
||||||
|
kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long gfn)
|
unsigned long gfn)
|
||||||
{
|
{
|
||||||
@@ -1009,7 +1036,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
|||||||
unsigned long *rmapp, *map;
|
unsigned long *rmapp, *map;
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
rmapp = memslot->rmap;
|
rmapp = memslot->arch.rmap;
|
||||||
map = memslot->dirty_bitmap;
|
map = memslot->dirty_bitmap;
|
||||||
for (i = 0; i < memslot->npages; ++i) {
|
for (i = 0; i < memslot->npages; ++i) {
|
||||||
if (kvm_test_clear_dirty(kvm, rmapp))
|
if (kvm_test_clear_dirty(kvm, rmapp))
|
||||||
|
@@ -84,7 +84,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
|||||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
|
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
|
||||||
lock_rmap(rmap);
|
lock_rmap(rmap);
|
||||||
|
|
||||||
head = *rmap & KVMPPC_RMAP_INDEX;
|
head = *rmap & KVMPPC_RMAP_INDEX;
|
||||||
@@ -180,7 +180,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
if (!slot_is_aligned(memslot, psize))
|
if (!slot_is_aligned(memslot, psize))
|
||||||
return H_PARAMETER;
|
return H_PARAMETER;
|
||||||
slot_fn = gfn - memslot->base_gfn;
|
slot_fn = gfn - memslot->base_gfn;
|
||||||
rmap = &memslot->rmap[slot_fn];
|
rmap = &memslot->arch.rmap[slot_fn];
|
||||||
|
|
||||||
if (!kvm->arch.using_mmu_notifiers) {
|
if (!kvm->arch.using_mmu_notifiers) {
|
||||||
physp = kvm->arch.slot_phys[memslot->id];
|
physp = kvm->arch.slot_phys[memslot->id];
|
||||||
@@ -197,7 +197,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
pa &= PAGE_MASK;
|
pa &= PAGE_MASK;
|
||||||
} else {
|
} else {
|
||||||
/* Translate to host virtual address */
|
/* Translate to host virtual address */
|
||||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
hva = __gfn_to_hva_memslot(memslot, gfn);
|
||||||
|
|
||||||
/* Look up the Linux PTE for the backing page */
|
/* Look up the Linux PTE for the backing page */
|
||||||
pte_size = psize;
|
pte_size = psize;
|
||||||
|
@@ -242,10 +242,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
|
hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
|
||||||
if (is_error_page(hpage)) {
|
if (is_error_page(hpage))
|
||||||
kvm_release_page_clean(hpage);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
hpage_offset = pte->raddr & ~PAGE_MASK;
|
hpage_offset = pte->raddr & ~PAGE_MASK;
|
||||||
hpage_offset &= ~0xFFFULL;
|
hpage_offset &= ~0xFFFULL;
|
||||||
|
@@ -520,11 +520,10 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||||||
|
|
||||||
if (likely(!pfnmap)) {
|
if (likely(!pfnmap)) {
|
||||||
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
||||||
pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
|
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||||
if (is_error_pfn(pfn)) {
|
if (is_error_pfn(pfn)) {
|
||||||
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
|
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
|
||||||
(long)gfn);
|
(long)gfn);
|
||||||
kvm_release_pfn_clean(pfn);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -302,10 +302,18 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
|||||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||||
struct kvm_memory_slot *dont)
|
struct kvm_memory_slot *dont)
|
||||||
{
|
{
|
||||||
|
if (!dont || free->arch.rmap != dont->arch.rmap) {
|
||||||
|
vfree(free->arch.rmap);
|
||||||
|
free->arch.rmap = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
||||||
{
|
{
|
||||||
|
slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
|
||||||
|
if (!slot->arch.rmap)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -326,8 +334,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||||||
kvmppc_core_commit_memory_region(kvm, mem);
|
kvmppc_core_commit_memory_region(kvm, mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void kvm_arch_flush_shadow(struct kvm *kvm)
|
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||||
|
struct kvm_memory_slot *slot)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -159,6 +159,7 @@ extern unsigned long thread_saved_pc(struct task_struct *t);
|
|||||||
|
|
||||||
extern void show_code(struct pt_regs *regs);
|
extern void show_code(struct pt_regs *regs);
|
||||||
extern void print_fn_code(unsigned char *code, unsigned long len);
|
extern void print_fn_code(unsigned char *code, unsigned long len);
|
||||||
|
extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]);
|
||||||
|
|
||||||
unsigned long get_wchan(struct task_struct *p);
|
unsigned long get_wchan(struct task_struct *p);
|
||||||
#define task_pt_regs(tsk) ((struct pt_regs *) \
|
#define task_pt_regs(tsk) ((struct pt_regs *) \
|
||||||
|
@@ -1501,6 +1501,33 @@ static struct insn *find_insn(unsigned char *code)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* insn_to_mnemonic - decode an s390 instruction
|
||||||
|
* @instruction: instruction to decode
|
||||||
|
* @buf: buffer to fill with mnemonic
|
||||||
|
*
|
||||||
|
* Decode the instruction at @instruction and store the corresponding
|
||||||
|
* mnemonic into @buf.
|
||||||
|
* @buf is left unchanged if the instruction could not be decoded.
|
||||||
|
* Returns:
|
||||||
|
* %0 on success, %-ENOENT if the instruction was not found.
|
||||||
|
*/
|
||||||
|
int insn_to_mnemonic(unsigned char *instruction, char buf[8])
|
||||||
|
{
|
||||||
|
struct insn *insn;
|
||||||
|
|
||||||
|
insn = find_insn(instruction);
|
||||||
|
if (!insn)
|
||||||
|
return -ENOENT;
|
||||||
|
if (insn->name[0] == '\0')
|
||||||
|
snprintf(buf, sizeof(buf), "%s",
|
||||||
|
long_insn_name[(int) insn->name[1]]);
|
||||||
|
else
|
||||||
|
snprintf(buf, sizeof(buf), "%.5s", insn->name);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(insn_to_mnemonic);
|
||||||
|
|
||||||
static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
|
static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
|
||||||
{
|
{
|
||||||
struct insn *insn;
|
struct insn *insn;
|
||||||
|
@@ -21,6 +21,7 @@ config KVM
|
|||||||
depends on HAVE_KVM && EXPERIMENTAL
|
depends on HAVE_KVM && EXPERIMENTAL
|
||||||
select PREEMPT_NOTIFIERS
|
select PREEMPT_NOTIFIERS
|
||||||
select ANON_INODES
|
select ANON_INODES
|
||||||
|
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
---help---
|
---help---
|
||||||
Support hosting paravirtualized guest machines using the SIE
|
Support hosting paravirtualized guest machines using the SIE
|
||||||
virtualization capability on the mainframe. This should work
|
virtualization capability on the mainframe. This should work
|
||||||
|
@@ -14,6 +14,8 @@
|
|||||||
#include <linux/kvm.h>
|
#include <linux/kvm.h>
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
|
#include "trace.h"
|
||||||
|
#include "trace-s390.h"
|
||||||
|
|
||||||
static int diag_release_pages(struct kvm_vcpu *vcpu)
|
static int diag_release_pages(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
@@ -98,6 +100,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
|
|||||||
vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
|
vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
|
||||||
VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
|
VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
|
||||||
vcpu->run->s390_reset_flags);
|
vcpu->run->s390_reset_flags);
|
||||||
|
trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags);
|
||||||
return -EREMOTE;
|
return -EREMOTE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -105,6 +108,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
|
|||||||
{
|
{
|
||||||
int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
|
int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
|
||||||
|
|
||||||
|
trace_kvm_s390_handle_diag(vcpu, code);
|
||||||
switch (code) {
|
switch (code) {
|
||||||
case 0x10:
|
case 0x10:
|
||||||
return diag_release_pages(vcpu);
|
return diag_release_pages(vcpu);
|
||||||
|
@@ -19,6 +19,8 @@
|
|||||||
|
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
|
#include "trace.h"
|
||||||
|
#include "trace-s390.h"
|
||||||
|
|
||||||
static int handle_lctlg(struct kvm_vcpu *vcpu)
|
static int handle_lctlg(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
@@ -45,6 +47,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
|
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
|
||||||
disp2);
|
disp2);
|
||||||
|
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
rc = get_guest_u64(vcpu, useraddr,
|
rc = get_guest_u64(vcpu, useraddr,
|
||||||
@@ -82,6 +85,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
|
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
|
||||||
disp2);
|
disp2);
|
||||||
|
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
|
||||||
|
|
||||||
reg = reg1;
|
reg = reg1;
|
||||||
do {
|
do {
|
||||||
@@ -135,6 +139,8 @@ static int handle_stop(struct kvm_vcpu *vcpu)
|
|||||||
vcpu->stat.exit_stop_request++;
|
vcpu->stat.exit_stop_request++;
|
||||||
spin_lock_bh(&vcpu->arch.local_int.lock);
|
spin_lock_bh(&vcpu->arch.local_int.lock);
|
||||||
|
|
||||||
|
trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
|
||||||
|
|
||||||
if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
|
if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
|
||||||
vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
|
vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
|
||||||
rc = SIE_INTERCEPT_RERUNVCPU;
|
rc = SIE_INTERCEPT_RERUNVCPU;
|
||||||
@@ -171,6 +177,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
vcpu->stat.exit_validity++;
|
vcpu->stat.exit_validity++;
|
||||||
|
trace_kvm_s390_intercept_validity(vcpu, viwhy);
|
||||||
if (viwhy == 0x37) {
|
if (viwhy == 0x37) {
|
||||||
vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
|
vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
|
||||||
vcpu->arch.gmap);
|
vcpu->arch.gmap);
|
||||||
@@ -213,6 +220,9 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
|
|||||||
intercept_handler_t handler;
|
intercept_handler_t handler;
|
||||||
|
|
||||||
vcpu->stat.exit_instruction++;
|
vcpu->stat.exit_instruction++;
|
||||||
|
trace_kvm_s390_intercept_instruction(vcpu,
|
||||||
|
vcpu->arch.sie_block->ipa,
|
||||||
|
vcpu->arch.sie_block->ipb);
|
||||||
handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
|
handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
|
||||||
if (handler)
|
if (handler)
|
||||||
return handler(vcpu);
|
return handler(vcpu);
|
||||||
@@ -222,6 +232,7 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
|
|||||||
static int handle_prog(struct kvm_vcpu *vcpu)
|
static int handle_prog(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
vcpu->stat.exit_program_interruption++;
|
vcpu->stat.exit_program_interruption++;
|
||||||
|
trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
|
||||||
return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
|
return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -19,6 +19,7 @@
|
|||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
|
#include "trace-s390.h"
|
||||||
|
|
||||||
static int psw_extint_disabled(struct kvm_vcpu *vcpu)
|
static int psw_extint_disabled(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
@@ -130,6 +131,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
|||||||
case KVM_S390_INT_EMERGENCY:
|
case KVM_S390_INT_EMERGENCY:
|
||||||
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
|
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
|
||||||
vcpu->stat.deliver_emergency_signal++;
|
vcpu->stat.deliver_emergency_signal++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
inti->emerg.code, 0);
|
||||||
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
|
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
|
||||||
if (rc == -EFAULT)
|
if (rc == -EFAULT)
|
||||||
exception = 1;
|
exception = 1;
|
||||||
@@ -152,6 +155,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
|||||||
case KVM_S390_INT_EXTERNAL_CALL:
|
case KVM_S390_INT_EXTERNAL_CALL:
|
||||||
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
|
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
|
||||||
vcpu->stat.deliver_external_call++;
|
vcpu->stat.deliver_external_call++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
inti->extcall.code, 0);
|
||||||
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
|
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
|
||||||
if (rc == -EFAULT)
|
if (rc == -EFAULT)
|
||||||
exception = 1;
|
exception = 1;
|
||||||
@@ -175,6 +180,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
|||||||
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
|
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
|
||||||
inti->ext.ext_params);
|
inti->ext.ext_params);
|
||||||
vcpu->stat.deliver_service_signal++;
|
vcpu->stat.deliver_service_signal++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
inti->ext.ext_params, 0);
|
||||||
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
|
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
|
||||||
if (rc == -EFAULT)
|
if (rc == -EFAULT)
|
||||||
exception = 1;
|
exception = 1;
|
||||||
@@ -198,6 +205,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
|||||||
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
|
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
|
||||||
inti->ext.ext_params, inti->ext.ext_params2);
|
inti->ext.ext_params, inti->ext.ext_params2);
|
||||||
vcpu->stat.deliver_virtio_interrupt++;
|
vcpu->stat.deliver_virtio_interrupt++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
inti->ext.ext_params,
|
||||||
|
inti->ext.ext_params2);
|
||||||
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
|
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
|
||||||
if (rc == -EFAULT)
|
if (rc == -EFAULT)
|
||||||
exception = 1;
|
exception = 1;
|
||||||
@@ -229,6 +239,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
|||||||
case KVM_S390_SIGP_STOP:
|
case KVM_S390_SIGP_STOP:
|
||||||
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
|
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
|
||||||
vcpu->stat.deliver_stop_signal++;
|
vcpu->stat.deliver_stop_signal++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
0, 0);
|
||||||
__set_intercept_indicator(vcpu, inti);
|
__set_intercept_indicator(vcpu, inti);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -236,12 +248,16 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
|||||||
VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
|
VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
|
||||||
inti->prefix.address);
|
inti->prefix.address);
|
||||||
vcpu->stat.deliver_prefix_signal++;
|
vcpu->stat.deliver_prefix_signal++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
inti->prefix.address, 0);
|
||||||
kvm_s390_set_prefix(vcpu, inti->prefix.address);
|
kvm_s390_set_prefix(vcpu, inti->prefix.address);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case KVM_S390_RESTART:
|
case KVM_S390_RESTART:
|
||||||
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
|
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
|
||||||
vcpu->stat.deliver_restart_signal++;
|
vcpu->stat.deliver_restart_signal++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
0, 0);
|
||||||
rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
|
rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
|
||||||
restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
|
restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
|
||||||
if (rc == -EFAULT)
|
if (rc == -EFAULT)
|
||||||
@@ -259,6 +275,8 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
|||||||
inti->pgm.code,
|
inti->pgm.code,
|
||||||
table[vcpu->arch.sie_block->ipa >> 14]);
|
table[vcpu->arch.sie_block->ipa >> 14]);
|
||||||
vcpu->stat.deliver_program_int++;
|
vcpu->stat.deliver_program_int++;
|
||||||
|
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||||
|
inti->pgm.code, 0);
|
||||||
rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
|
rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
|
||||||
if (rc == -EFAULT)
|
if (rc == -EFAULT)
|
||||||
exception = 1;
|
exception = 1;
|
||||||
@@ -405,9 +423,7 @@ no_timer:
|
|||||||
set_current_state(TASK_INTERRUPTIBLE);
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
spin_unlock_bh(&vcpu->arch.local_int.lock);
|
spin_unlock_bh(&vcpu->arch.local_int.lock);
|
||||||
spin_unlock(&vcpu->arch.local_int.float_int->lock);
|
spin_unlock(&vcpu->arch.local_int.float_int->lock);
|
||||||
vcpu_put(vcpu);
|
|
||||||
schedule();
|
schedule();
|
||||||
vcpu_load(vcpu);
|
|
||||||
spin_lock(&vcpu->arch.local_int.float_int->lock);
|
spin_lock(&vcpu->arch.local_int.float_int->lock);
|
||||||
spin_lock_bh(&vcpu->arch.local_int.lock);
|
spin_lock_bh(&vcpu->arch.local_int.lock);
|
||||||
}
|
}
|
||||||
@@ -515,6 +531,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
|
|||||||
inti->pgm.code = code;
|
inti->pgm.code = code;
|
||||||
|
|
||||||
VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
|
VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
|
||||||
|
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
|
||||||
spin_lock_bh(&li->lock);
|
spin_lock_bh(&li->lock);
|
||||||
list_add(&inti->list, &li->list);
|
list_add(&inti->list, &li->list);
|
||||||
atomic_set(&li->active, 1);
|
atomic_set(&li->active, 1);
|
||||||
@@ -556,6 +573,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
|||||||
kfree(inti);
|
kfree(inti);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
|
||||||
|
2);
|
||||||
|
|
||||||
mutex_lock(&kvm->lock);
|
mutex_lock(&kvm->lock);
|
||||||
fi = &kvm->arch.float_int;
|
fi = &kvm->arch.float_int;
|
||||||
@@ -621,6 +640,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
|||||||
kfree(inti);
|
kfree(inti);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
|
||||||
|
s390int->parm64, 2);
|
||||||
|
|
||||||
mutex_lock(&vcpu->kvm->lock);
|
mutex_lock(&vcpu->kvm->lock);
|
||||||
li = &vcpu->arch.local_int;
|
li = &vcpu->arch.local_int;
|
||||||
|
@@ -32,6 +32,10 @@
|
|||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
|
|
||||||
|
#define CREATE_TRACE_POINTS
|
||||||
|
#include "trace.h"
|
||||||
|
#include "trace-s390.h"
|
||||||
|
|
||||||
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
|
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
|
||||||
|
|
||||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||||
@@ -242,6 +246,7 @@ out_err:
|
|||||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
|
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
|
||||||
|
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
|
||||||
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
if (!kvm_is_ucontrol(vcpu->kvm)) {
|
||||||
clear_bit(63 - vcpu->vcpu_id,
|
clear_bit(63 - vcpu->vcpu_id,
|
||||||
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
|
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
|
||||||
@@ -417,6 +422,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
|||||||
goto out_free_sie_block;
|
goto out_free_sie_block;
|
||||||
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
|
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
|
||||||
vcpu->arch.sie_block);
|
vcpu->arch.sie_block);
|
||||||
|
trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
|
||||||
|
|
||||||
return vcpu;
|
return vcpu;
|
||||||
out_free_sie_block:
|
out_free_sie_block:
|
||||||
@@ -607,18 +613,22 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
|
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
|
||||||
atomic_read(&vcpu->arch.sie_block->cpuflags));
|
atomic_read(&vcpu->arch.sie_block->cpuflags));
|
||||||
|
trace_kvm_s390_sie_enter(vcpu,
|
||||||
|
atomic_read(&vcpu->arch.sie_block->cpuflags));
|
||||||
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
|
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
if (kvm_is_ucontrol(vcpu->kvm)) {
|
if (kvm_is_ucontrol(vcpu->kvm)) {
|
||||||
rc = SIE_INTERCEPT_UCONTROL;
|
rc = SIE_INTERCEPT_UCONTROL;
|
||||||
} else {
|
} else {
|
||||||
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
|
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
|
||||||
|
trace_kvm_s390_sie_fault(vcpu);
|
||||||
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
rc = 0;
|
rc = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
|
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
|
||||||
vcpu->arch.sie_block->icptcode);
|
vcpu->arch.sie_block->icptcode);
|
||||||
|
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
kvm_guest_exit();
|
kvm_guest_exit();
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
@@ -959,7 +969,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_arch_flush_shadow(struct kvm *kvm)
|
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||||
|
struct kvm_memory_slot *slot)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -20,6 +20,7 @@
|
|||||||
#include <asm/sysinfo.h>
|
#include <asm/sysinfo.h>
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
@@ -59,6 +60,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
|||||||
kvm_s390_set_prefix(vcpu, address);
|
kvm_s390_set_prefix(vcpu, address);
|
||||||
|
|
||||||
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
|
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
|
||||||
|
trace_kvm_s390_handle_prefix(vcpu, 1, address);
|
||||||
out:
|
out:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -91,6 +93,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
|
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
|
||||||
|
trace_kvm_s390_handle_prefix(vcpu, 0, address);
|
||||||
out:
|
out:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -119,6 +122,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
|
VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
|
||||||
|
trace_kvm_s390_handle_stap(vcpu, useraddr);
|
||||||
out:
|
out:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -164,9 +168,11 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
|
|||||||
&facility_list, sizeof(facility_list));
|
&facility_list, sizeof(facility_list));
|
||||||
if (rc == -EFAULT)
|
if (rc == -EFAULT)
|
||||||
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
else
|
else {
|
||||||
VCPU_EVENT(vcpu, 5, "store facility list value %x",
|
VCPU_EVENT(vcpu, 5, "store facility list value %x",
|
||||||
facility_list);
|
facility_list);
|
||||||
|
trace_kvm_s390_handle_stfl(vcpu, facility_list);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -278,6 +284,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
|||||||
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||||
goto out_mem;
|
goto out_mem;
|
||||||
}
|
}
|
||||||
|
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
|
||||||
free_page(mem);
|
free_page(mem);
|
||||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
||||||
vcpu->run->s.regs.gprs[0] = 0;
|
vcpu->run->s.regs.gprs[0] = 0;
|
||||||
|
@@ -18,6 +18,7 @@
|
|||||||
#include <asm/sigp.h>
|
#include <asm/sigp.h>
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
|
static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
|
||||||
u64 *reg)
|
u64 *reg)
|
||||||
@@ -344,6 +345,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
|||||||
else
|
else
|
||||||
parameter = vcpu->run->s.regs.gprs[r1 + 1];
|
parameter = vcpu->run->s.regs.gprs[r1 + 1];
|
||||||
|
|
||||||
|
trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
|
||||||
switch (order_code) {
|
switch (order_code) {
|
||||||
case SIGP_SENSE:
|
case SIGP_SENSE:
|
||||||
vcpu->stat.instruction_sigp_sense++;
|
vcpu->stat.instruction_sigp_sense++;
|
||||||
|
210
arch/s390/kvm/trace-s390.h
Normal file
210
arch/s390/kvm/trace-s390.h
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_KVMS390_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM kvm-s390
|
||||||
|
#define TRACE_INCLUDE_PATH .
|
||||||
|
#undef TRACE_INCLUDE_FILE
|
||||||
|
#define TRACE_INCLUDE_FILE trace-s390
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace point for the creation of the kvm instance.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_create_vm,
|
||||||
|
TP_PROTO(unsigned long type),
|
||||||
|
TP_ARGS(type),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(unsigned long, type)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->type = type;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("create vm%s",
|
||||||
|
__entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "")
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace points for creation and destruction of vpcus.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_create_vcpu,
|
||||||
|
TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu,
|
||||||
|
struct kvm_s390_sie_block *sie_block),
|
||||||
|
TP_ARGS(id, vcpu, sie_block),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(unsigned int, id)
|
||||||
|
__field(struct kvm_vcpu *, vcpu)
|
||||||
|
__field(struct kvm_s390_sie_block *, sie_block)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->id = id;
|
||||||
|
__entry->vcpu = vcpu;
|
||||||
|
__entry->sie_block = sie_block;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
|
||||||
|
__entry->vcpu, __entry->sie_block)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_destroy_vcpu,
|
||||||
|
TP_PROTO(unsigned int id),
|
||||||
|
TP_ARGS(id),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(unsigned int, id)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->id = id;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("destroy cpu %d", __entry->id)
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace points for injection of interrupts, either per machine or
|
||||||
|
* per vcpu.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define kvm_s390_int_type \
|
||||||
|
{KVM_S390_SIGP_STOP, "sigp stop"}, \
|
||||||
|
{KVM_S390_PROGRAM_INT, "program interrupt"}, \
|
||||||
|
{KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \
|
||||||
|
{KVM_S390_RESTART, "sigp restart"}, \
|
||||||
|
{KVM_S390_INT_VIRTIO, "virtio interrupt"}, \
|
||||||
|
{KVM_S390_INT_SERVICE, "sclp interrupt"}, \
|
||||||
|
{KVM_S390_INT_EMERGENCY, "sigp emergency"}, \
|
||||||
|
{KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_inject_vm,
|
||||||
|
TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
|
||||||
|
TP_ARGS(type, parm, parm64, who),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(__u32, inttype)
|
||||||
|
__field(__u32, parm)
|
||||||
|
__field(__u64, parm64)
|
||||||
|
__field(int, who)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->inttype = type & 0x00000000ffffffff;
|
||||||
|
__entry->parm = parm;
|
||||||
|
__entry->parm64 = parm64;
|
||||||
|
__entry->who = who;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
|
||||||
|
(__entry->who == 1) ? " (from kernel)" :
|
||||||
|
(__entry->who == 2) ? " (from user)" : "",
|
||||||
|
__entry->inttype,
|
||||||
|
__print_symbolic(__entry->inttype, kvm_s390_int_type),
|
||||||
|
__entry->parm, __entry->parm64)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_inject_vcpu,
|
||||||
|
TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
|
||||||
|
int who),
|
||||||
|
TP_ARGS(id, type, parm, parm64, who),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(int, id)
|
||||||
|
__field(__u32, inttype)
|
||||||
|
__field(__u32, parm)
|
||||||
|
__field(__u64, parm64)
|
||||||
|
__field(int, who)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->id = id;
|
||||||
|
__entry->inttype = type & 0x00000000ffffffff;
|
||||||
|
__entry->parm = parm;
|
||||||
|
__entry->parm64 = parm64;
|
||||||
|
__entry->who = who;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
|
||||||
|
(__entry->who == 1) ? " (from kernel)" :
|
||||||
|
(__entry->who == 2) ? " (from user)" : "",
|
||||||
|
__entry->id, __entry->inttype,
|
||||||
|
__print_symbolic(__entry->inttype, kvm_s390_int_type),
|
||||||
|
__entry->parm, __entry->parm64)
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace point for the actual delivery of interrupts.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_deliver_interrupt,
|
||||||
|
TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1),
|
||||||
|
TP_ARGS(id, type, data0, data1),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(int, id)
|
||||||
|
__field(__u32, inttype)
|
||||||
|
__field(__u32, data0)
|
||||||
|
__field(__u64, data1)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->id = id;
|
||||||
|
__entry->inttype = type & 0x00000000ffffffff;
|
||||||
|
__entry->data0 = data0;
|
||||||
|
__entry->data1 = data1;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
|
||||||
|
"data:%08x %016llx",
|
||||||
|
__entry->id, __entry->inttype,
|
||||||
|
__print_symbolic(__entry->inttype, kvm_s390_int_type),
|
||||||
|
__entry->data0, __entry->data1)
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace point for resets that may be requested from userspace.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_request_resets,
|
||||||
|
TP_PROTO(__u64 resets),
|
||||||
|
TP_ARGS(resets),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(__u64, resets)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->resets = resets;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("requesting userspace resets %llx",
|
||||||
|
__entry->resets)
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace point for a vcpu's stop requests.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_stop_request,
|
||||||
|
TP_PROTO(unsigned int action_bits),
|
||||||
|
TP_ARGS(action_bits),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(unsigned int, action_bits)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->action_bits = action_bits;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("stop request, action_bits = %08x",
|
||||||
|
__entry->action_bits)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* _TRACE_KVMS390_H */
|
||||||
|
|
||||||
|
/* This part must be outside protection */
|
||||||
|
#include <trace/define_trace.h>
|
341
arch/s390/kvm/trace.h
Normal file
341
arch/s390/kvm/trace.h
Normal file
@@ -0,0 +1,341 @@
|
|||||||
|
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_KVM_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
#include <asm/sigp.h>
|
||||||
|
#include <asm/debug.h>
|
||||||
|
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM kvm
|
||||||
|
#define TRACE_INCLUDE_PATH .
|
||||||
|
#undef TRACE_INCLUDE_FILE
|
||||||
|
#define TRACE_INCLUDE_FILE trace
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helpers for vcpu-specific tracepoints containing the same information
|
||||||
|
* as s390dbf VCPU_EVENTs.
|
||||||
|
*/
|
||||||
|
#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu
|
||||||
|
#define VCPU_ARGS_COMMON vcpu
|
||||||
|
#define VCPU_FIELD_COMMON __field(int, id) \
|
||||||
|
__field(unsigned long, pswmask) \
|
||||||
|
__field(unsigned long, pswaddr)
|
||||||
|
#define VCPU_ASSIGN_COMMON do { \
|
||||||
|
__entry->id = vcpu->vcpu_id; \
|
||||||
|
__entry->pswmask = vcpu->arch.sie_block->gpsw.mask; \
|
||||||
|
__entry->pswaddr = vcpu->arch.sie_block->gpsw.addr; \
|
||||||
|
} while (0);
|
||||||
|
#define VCPU_TP_PRINTK(p_str, p_args...) \
|
||||||
|
TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
|
||||||
|
__entry->pswmask, __entry->pswaddr, p_args)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tracepoints for SIE entry and exit.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_sie_enter,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, int cpuflags),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, cpuflags),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(int, cpuflags)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->cpuflags = cpuflags;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_sie_fault,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("%s", "fault in sie instruction")
|
||||||
|
);
|
||||||
|
|
||||||
|
#define sie_intercept_code \
|
||||||
|
{0x04, "Instruction"}, \
|
||||||
|
{0x08, "Program interruption"}, \
|
||||||
|
{0x0C, "Instruction and program interuption"}, \
|
||||||
|
{0x10, "External request"}, \
|
||||||
|
{0x14, "External interruption"}, \
|
||||||
|
{0x18, "I/O request"}, \
|
||||||
|
{0x1C, "Wait state"}, \
|
||||||
|
{0x20, "Validity"}, \
|
||||||
|
{0x28, "Stop request"}
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_sie_exit,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, icptcode),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(u8, icptcode)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->icptcode = icptcode;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode,
|
||||||
|
__print_symbolic(__entry->icptcode,
|
||||||
|
sie_intercept_code))
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace point for intercepted instructions.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_intercept_instruction,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(__u64, instruction)
|
||||||
|
__field(char, insn[8])
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->instruction = ((__u64)ipa << 48) |
|
||||||
|
((__u64)ipb << 16);
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
|
||||||
|
__entry->instruction,
|
||||||
|
insn_to_mnemonic((unsigned char *)
|
||||||
|
&__entry->instruction,
|
||||||
|
__entry->insn) ?
|
||||||
|
"unknown" : __entry->insn)
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace point for intercepted program interruptions.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_intercept_prog,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, code),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(__u16, code)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->code = code;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("intercepted program interruption %04x",
|
||||||
|
__entry->code)
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace point for validity intercepts.
|
||||||
|
*/
|
||||||
|
TRACE_EVENT(kvm_s390_intercept_validity,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, viwhy),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(__u16, viwhy)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->viwhy = viwhy;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy)
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace points for instructions that are of special interest.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define sigp_order_codes \
|
||||||
|
{SIGP_SENSE, "sense"}, \
|
||||||
|
{SIGP_EXTERNAL_CALL, "external call"}, \
|
||||||
|
{SIGP_EMERGENCY_SIGNAL, "emergency signal"}, \
|
||||||
|
{SIGP_STOP, "stop"}, \
|
||||||
|
{SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \
|
||||||
|
{SIGP_SET_ARCHITECTURE, "set architecture"}, \
|
||||||
|
{SIGP_SET_PREFIX, "set prefix"}, \
|
||||||
|
{SIGP_SENSE_RUNNING, "sense running"}, \
|
||||||
|
{SIGP_RESTART, "restart"}
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_handle_sigp,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
|
||||||
|
__u32 parameter),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(__u8, order_code)
|
||||||
|
__field(__u16, cpu_addr)
|
||||||
|
__field(__u32, parameter)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->order_code = order_code;
|
||||||
|
__entry->cpu_addr = cpu_addr;
|
||||||
|
__entry->parameter = parameter;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \
|
||||||
|
"parameter %08x", __entry->order_code,
|
||||||
|
__print_symbolic(__entry->order_code,
|
||||||
|
sigp_order_codes),
|
||||||
|
__entry->cpu_addr, __entry->parameter)
|
||||||
|
);
|
||||||
|
|
||||||
|
#define diagnose_codes \
|
||||||
|
{0x10, "release pages"}, \
|
||||||
|
{0x44, "time slice end"}, \
|
||||||
|
{0x308, "ipl functions"}, \
|
||||||
|
{0x500, "kvm hypercall"}, \
|
||||||
|
{0x501, "kvm breakpoint"}
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_handle_diag,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, code),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(__u16, code)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->code = code;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code,
|
||||||
|
__print_symbolic(__entry->code, diagnose_codes))
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_handle_lctl,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(int, g)
|
||||||
|
__field(int, reg1)
|
||||||
|
__field(int, reg3)
|
||||||
|
__field(u64, addr)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->g = g;
|
||||||
|
__entry->reg1 = reg1;
|
||||||
|
__entry->reg3 = reg3;
|
||||||
|
__entry->addr = addr;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx",
|
||||||
|
__entry->g ? "lctlg" : "lctl",
|
||||||
|
__entry->reg1, __entry->reg3, __entry->addr)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_handle_prefix,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, set, address),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(int, set)
|
||||||
|
__field(u32, address)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->set = set;
|
||||||
|
__entry->address = address;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("%s prefix to %08x",
|
||||||
|
__entry->set ? "setting" : "storing",
|
||||||
|
__entry->address)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_handle_stap,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, u64 address),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, address),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(u64, address)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->address = address;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("storing cpu address to %016llx",
|
||||||
|
__entry->address)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_handle_stfl,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, facility_list),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(unsigned int, facility_list)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->facility_list = facility_list;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("store facility list value %08x",
|
||||||
|
__entry->facility_list)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_s390_handle_stsi,
|
||||||
|
TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr),
|
||||||
|
TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
VCPU_FIELD_COMMON
|
||||||
|
__field(int, fc)
|
||||||
|
__field(int, sel1)
|
||||||
|
__field(int, sel2)
|
||||||
|
__field(u64, addr)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
VCPU_ASSIGN_COMMON
|
||||||
|
__entry->fc = fc;
|
||||||
|
__entry->sel1 = sel1;
|
||||||
|
__entry->sel2 = sel2;
|
||||||
|
__entry->addr = addr;
|
||||||
|
),
|
||||||
|
|
||||||
|
VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx",
|
||||||
|
__entry->fc, __entry->sel1, __entry->sel2,
|
||||||
|
__entry->addr)
|
||||||
|
);
|
||||||
|
|
||||||
|
#endif /* _TRACE_KVM_H */
|
||||||
|
|
||||||
|
/* This part must be outside protection */
|
||||||
|
#include <trace/define_trace.h>
|
@@ -586,23 +586,18 @@ config PARAVIRT_TIME_ACCOUNTING
|
|||||||
|
|
||||||
source "arch/x86/xen/Kconfig"
|
source "arch/x86/xen/Kconfig"
|
||||||
|
|
||||||
config KVM_CLOCK
|
config KVM_GUEST
|
||||||
bool "KVM paravirtualized clock"
|
bool "KVM Guest support (including kvmclock)"
|
||||||
|
select PARAVIRT
|
||||||
select PARAVIRT
|
select PARAVIRT
|
||||||
select PARAVIRT_CLOCK
|
select PARAVIRT_CLOCK
|
||||||
---help---
|
default y if PARAVIRT_GUEST
|
||||||
Turning on this option will allow you to run a paravirtualized clock
|
|
||||||
when running over the KVM hypervisor. Instead of relying on a PIT
|
|
||||||
(or probably other) emulation by the underlying device model, the host
|
|
||||||
provides the guest with timing infrastructure such as time of day, and
|
|
||||||
system time
|
|
||||||
|
|
||||||
config KVM_GUEST
|
|
||||||
bool "KVM Guest support"
|
|
||||||
select PARAVIRT
|
|
||||||
---help---
|
---help---
|
||||||
This option enables various optimizations for running under the KVM
|
This option enables various optimizations for running under the KVM
|
||||||
hypervisor.
|
hypervisor. It includes a paravirtualized clock, so that instead
|
||||||
|
of relying on a PIT (or probably other) emulation by the
|
||||||
|
underlying device model, the host provides the guest with
|
||||||
|
timing infrastructure such as time of day, and system time
|
||||||
|
|
||||||
source "arch/x86/lguest/Kconfig"
|
source "arch/x86/lguest/Kconfig"
|
||||||
|
|
||||||
|
@@ -41,6 +41,7 @@
|
|||||||
#define __KVM_HAVE_DEBUGREGS
|
#define __KVM_HAVE_DEBUGREGS
|
||||||
#define __KVM_HAVE_XSAVE
|
#define __KVM_HAVE_XSAVE
|
||||||
#define __KVM_HAVE_XCRS
|
#define __KVM_HAVE_XCRS
|
||||||
|
#define __KVM_HAVE_READONLY_MEM
|
||||||
|
|
||||||
/* Architectural interrupt line count. */
|
/* Architectural interrupt line count. */
|
||||||
#define KVM_NR_INTERRUPTS 256
|
#define KVM_NR_INTERRUPTS 256
|
||||||
|
@@ -85,6 +85,19 @@ struct x86_instruction_info {
|
|||||||
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
|
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
|
||||||
|
|
||||||
struct x86_emulate_ops {
|
struct x86_emulate_ops {
|
||||||
|
/*
|
||||||
|
* read_gpr: read a general purpose register (rax - r15)
|
||||||
|
*
|
||||||
|
* @reg: gpr number.
|
||||||
|
*/
|
||||||
|
ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
|
||||||
|
/*
|
||||||
|
* write_gpr: write a general purpose register (rax - r15)
|
||||||
|
*
|
||||||
|
* @reg: gpr number.
|
||||||
|
* @val: value to write.
|
||||||
|
*/
|
||||||
|
void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
|
||||||
/*
|
/*
|
||||||
* read_std: Read bytes of standard (non-emulated/special) memory.
|
* read_std: Read bytes of standard (non-emulated/special) memory.
|
||||||
* Used for descriptor reading.
|
* Used for descriptor reading.
|
||||||
@@ -200,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
|
|||||||
|
|
||||||
/* Type, address-of, and value of an instruction's operand. */
|
/* Type, address-of, and value of an instruction's operand. */
|
||||||
struct operand {
|
struct operand {
|
||||||
enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
|
enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
|
||||||
unsigned int bytes;
|
unsigned int bytes;
|
||||||
|
unsigned int count;
|
||||||
union {
|
union {
|
||||||
unsigned long orig_val;
|
unsigned long orig_val;
|
||||||
u64 orig_val64;
|
u64 orig_val64;
|
||||||
@@ -221,6 +235,7 @@ struct operand {
|
|||||||
char valptr[sizeof(unsigned long) + 2];
|
char valptr[sizeof(unsigned long) + 2];
|
||||||
sse128_t vec_val;
|
sse128_t vec_val;
|
||||||
u64 mm_val;
|
u64 mm_val;
|
||||||
|
void *data;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -236,14 +251,23 @@ struct read_cache {
|
|||||||
unsigned long end;
|
unsigned long end;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Execution mode, passed to the emulator. */
|
||||||
|
enum x86emul_mode {
|
||||||
|
X86EMUL_MODE_REAL, /* Real mode. */
|
||||||
|
X86EMUL_MODE_VM86, /* Virtual 8086 mode. */
|
||||||
|
X86EMUL_MODE_PROT16, /* 16-bit protected mode. */
|
||||||
|
X86EMUL_MODE_PROT32, /* 32-bit protected mode. */
|
||||||
|
X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */
|
||||||
|
};
|
||||||
|
|
||||||
struct x86_emulate_ctxt {
|
struct x86_emulate_ctxt {
|
||||||
struct x86_emulate_ops *ops;
|
const struct x86_emulate_ops *ops;
|
||||||
|
|
||||||
/* Register state before/after emulation. */
|
/* Register state before/after emulation. */
|
||||||
unsigned long eflags;
|
unsigned long eflags;
|
||||||
unsigned long eip; /* eip before instruction emulation */
|
unsigned long eip; /* eip before instruction emulation */
|
||||||
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
|
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
|
||||||
int mode;
|
enum x86emul_mode mode;
|
||||||
|
|
||||||
/* interruptibility state, as a result of execution of STI or MOV SS */
|
/* interruptibility state, as a result of execution of STI or MOV SS */
|
||||||
int interruptibility;
|
int interruptibility;
|
||||||
@@ -281,8 +305,10 @@ struct x86_emulate_ctxt {
|
|||||||
bool rip_relative;
|
bool rip_relative;
|
||||||
unsigned long _eip;
|
unsigned long _eip;
|
||||||
struct operand memop;
|
struct operand memop;
|
||||||
|
u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */
|
||||||
|
u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */
|
||||||
/* Fields above regs are cleared together. */
|
/* Fields above regs are cleared together. */
|
||||||
unsigned long regs[NR_VCPU_REGS];
|
unsigned long _regs[NR_VCPU_REGS];
|
||||||
struct operand *memopp;
|
struct operand *memopp;
|
||||||
struct fetch_cache fetch;
|
struct fetch_cache fetch;
|
||||||
struct read_cache io_read;
|
struct read_cache io_read;
|
||||||
@@ -293,17 +319,6 @@ struct x86_emulate_ctxt {
|
|||||||
#define REPE_PREFIX 0xf3
|
#define REPE_PREFIX 0xf3
|
||||||
#define REPNE_PREFIX 0xf2
|
#define REPNE_PREFIX 0xf2
|
||||||
|
|
||||||
/* Execution mode, passed to the emulator. */
|
|
||||||
#define X86EMUL_MODE_REAL 0 /* Real mode. */
|
|
||||||
#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */
|
|
||||||
#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
|
|
||||||
#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
|
|
||||||
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
|
|
||||||
|
|
||||||
/* any protected mode */
|
|
||||||
#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
|
|
||||||
X86EMUL_MODE_PROT64)
|
|
||||||
|
|
||||||
/* CPUID vendors */
|
/* CPUID vendors */
|
||||||
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
|
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
|
||||||
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
|
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
|
||||||
@@ -394,4 +409,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
|
|||||||
u16 tss_selector, int idt_index, int reason,
|
u16 tss_selector, int idt_index, int reason,
|
||||||
bool has_error_code, u32 error_code);
|
bool has_error_code, u32 error_code);
|
||||||
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
|
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
|
||||||
|
void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
|
||||||
|
void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
|
||||||
|
|
||||||
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
|
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
|
||||||
|
@@ -271,10 +271,24 @@ struct kvm_mmu {
|
|||||||
union kvm_mmu_page_role base_role;
|
union kvm_mmu_page_role base_role;
|
||||||
bool direct_map;
|
bool direct_map;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bitmap; bit set = permission fault
|
||||||
|
* Byte index: page fault error code [4:1]
|
||||||
|
* Bit index: pte permissions in ACC_* format
|
||||||
|
*/
|
||||||
|
u8 permissions[16];
|
||||||
|
|
||||||
u64 *pae_root;
|
u64 *pae_root;
|
||||||
u64 *lm_root;
|
u64 *lm_root;
|
||||||
u64 rsvd_bits_mask[2][4];
|
u64 rsvd_bits_mask[2][4];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bitmap: bit set = last pte in walk
|
||||||
|
* index[0:1]: level (zero-based)
|
||||||
|
* index[2]: pte.ps
|
||||||
|
*/
|
||||||
|
u8 last_pte_bitmap;
|
||||||
|
|
||||||
bool nx;
|
bool nx;
|
||||||
|
|
||||||
u64 pdptrs[4]; /* pae */
|
u64 pdptrs[4]; /* pae */
|
||||||
@@ -398,12 +412,15 @@ struct kvm_vcpu_arch {
|
|||||||
struct x86_emulate_ctxt emulate_ctxt;
|
struct x86_emulate_ctxt emulate_ctxt;
|
||||||
bool emulate_regs_need_sync_to_vcpu;
|
bool emulate_regs_need_sync_to_vcpu;
|
||||||
bool emulate_regs_need_sync_from_vcpu;
|
bool emulate_regs_need_sync_from_vcpu;
|
||||||
|
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
gpa_t time;
|
gpa_t time;
|
||||||
struct pvclock_vcpu_time_info hv_clock;
|
struct pvclock_vcpu_time_info hv_clock;
|
||||||
unsigned int hw_tsc_khz;
|
unsigned int hw_tsc_khz;
|
||||||
unsigned int time_offset;
|
unsigned int time_offset;
|
||||||
struct page *time_page;
|
struct page *time_page;
|
||||||
|
/* set guest stopped flag in pvclock flags field */
|
||||||
|
bool pvclock_set_guest_stopped_request;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
u64 msr_val;
|
u64 msr_val;
|
||||||
@@ -438,6 +455,7 @@ struct kvm_vcpu_arch {
|
|||||||
unsigned long dr6;
|
unsigned long dr6;
|
||||||
unsigned long dr7;
|
unsigned long dr7;
|
||||||
unsigned long eff_db[KVM_NR_DB_REGS];
|
unsigned long eff_db[KVM_NR_DB_REGS];
|
||||||
|
unsigned long guest_debug_dr7;
|
||||||
|
|
||||||
u64 mcg_cap;
|
u64 mcg_cap;
|
||||||
u64 mcg_status;
|
u64 mcg_status;
|
||||||
@@ -484,14 +502,24 @@ struct kvm_vcpu_arch {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_lpage_info {
|
struct kvm_lpage_info {
|
||||||
unsigned long rmap_pde;
|
|
||||||
int write_count;
|
int write_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_arch_memory_slot {
|
struct kvm_arch_memory_slot {
|
||||||
|
unsigned long *rmap[KVM_NR_PAGE_SIZES];
|
||||||
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kvm_apic_map {
|
||||||
|
struct rcu_head rcu;
|
||||||
|
u8 ldr_bits;
|
||||||
|
/* fields bellow are used to decode ldr values in different modes */
|
||||||
|
u32 cid_shift, cid_mask, lid_mask;
|
||||||
|
struct kvm_lapic *phys_map[256];
|
||||||
|
/* first index is cluster id second is cpu id in a cluster */
|
||||||
|
struct kvm_lapic *logical_map[16][16];
|
||||||
|
};
|
||||||
|
|
||||||
struct kvm_arch {
|
struct kvm_arch {
|
||||||
unsigned int n_used_mmu_pages;
|
unsigned int n_used_mmu_pages;
|
||||||
unsigned int n_requested_mmu_pages;
|
unsigned int n_requested_mmu_pages;
|
||||||
@@ -509,6 +537,8 @@ struct kvm_arch {
|
|||||||
struct kvm_ioapic *vioapic;
|
struct kvm_ioapic *vioapic;
|
||||||
struct kvm_pit *vpit;
|
struct kvm_pit *vpit;
|
||||||
int vapics_in_nmi_mode;
|
int vapics_in_nmi_mode;
|
||||||
|
struct mutex apic_map_lock;
|
||||||
|
struct kvm_apic_map *apic_map;
|
||||||
|
|
||||||
unsigned int tss_addr;
|
unsigned int tss_addr;
|
||||||
struct page *apic_access_page;
|
struct page *apic_access_page;
|
||||||
@@ -602,8 +632,7 @@ struct kvm_x86_ops {
|
|||||||
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
|
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
|
||||||
void (*vcpu_put)(struct kvm_vcpu *vcpu);
|
void (*vcpu_put)(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
void (*set_guest_debug)(struct kvm_vcpu *vcpu,
|
void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
|
||||||
struct kvm_guest_debug *dbg);
|
|
||||||
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
|
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
|
||||||
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
||||||
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
|
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
|
||||||
@@ -941,6 +970,7 @@ extern bool kvm_rebooting;
|
|||||||
|
|
||||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||||
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
|
@@ -102,21 +102,21 @@ struct kvm_vcpu_pv_apf_data {
|
|||||||
extern void kvmclock_init(void);
|
extern void kvmclock_init(void);
|
||||||
extern int kvm_register_clock(char *txt);
|
extern int kvm_register_clock(char *txt);
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_CLOCK
|
#ifdef CONFIG_KVM_GUEST
|
||||||
bool kvm_check_and_clear_guest_paused(void);
|
bool kvm_check_and_clear_guest_paused(void);
|
||||||
#else
|
#else
|
||||||
static inline bool kvm_check_and_clear_guest_paused(void)
|
static inline bool kvm_check_and_clear_guest_paused(void)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_KVMCLOCK */
|
#endif /* CONFIG_KVM_GUEST */
|
||||||
|
|
||||||
/* This instruction is vmcall. On non-VT architectures, it will generate a
|
/* This instruction is vmcall. On non-VT architectures, it will generate a
|
||||||
* trap that we will then rewrite to the appropriate instruction.
|
* trap that we will then rewrite to the appropriate instruction.
|
||||||
*/
|
*/
|
||||||
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
|
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
|
||||||
|
|
||||||
/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
|
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
|
||||||
* instruction. The hypervisor may replace it with something else but only the
|
* instruction. The hypervisor may replace it with something else but only the
|
||||||
* instructions are guaranteed to be supported.
|
* instructions are guaranteed to be supported.
|
||||||
*
|
*
|
||||||
|
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
|
|||||||
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
|
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
|
||||||
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
|
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
|
||||||
|
|
||||||
obj-$(CONFIG_KVM_GUEST) += kvm.o
|
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
|
||||||
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
|
|
||||||
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
|
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
|
||||||
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
|
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
|
||||||
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
|
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
|
||||||
|
@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused)
|
|||||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||||
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
|
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
|
||||||
kvm_pv_disable_apf();
|
kvm_pv_disable_apf();
|
||||||
|
kvm_disable_steal_time();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_pv_reboot_notify(struct notifier_block *nb,
|
static int kvm_pv_reboot_notify(struct notifier_block *nb,
|
||||||
@@ -396,9 +397,7 @@ void kvm_disable_steal_time(void)
|
|||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
static void __init kvm_smp_prepare_boot_cpu(void)
|
static void __init kvm_smp_prepare_boot_cpu(void)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_KVM_CLOCK
|
|
||||||
WARN_ON(kvm_register_clock("primary cpu clock"));
|
WARN_ON(kvm_register_clock("primary cpu clock"));
|
||||||
#endif
|
|
||||||
kvm_guest_cpu_init();
|
kvm_guest_cpu_init();
|
||||||
native_smp_prepare_boot_cpu();
|
native_smp_prepare_boot_cpu();
|
||||||
}
|
}
|
||||||
|
@@ -957,7 +957,7 @@ void __init setup_arch(char **cmdline_p)
|
|||||||
initmem_init();
|
initmem_init();
|
||||||
memblock_find_dma_reserve();
|
memblock_find_dma_reserve();
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_CLOCK
|
#ifdef CONFIG_KVM_GUEST
|
||||||
kvmclock_init();
|
kvmclock_init();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -20,6 +20,7 @@ if VIRTUALIZATION
|
|||||||
config KVM
|
config KVM
|
||||||
tristate "Kernel-based Virtual Machine (KVM) support"
|
tristate "Kernel-based Virtual Machine (KVM) support"
|
||||||
depends on HAVE_KVM
|
depends on HAVE_KVM
|
||||||
|
depends on HIGH_RES_TIMERS
|
||||||
# for device assignment:
|
# for device assignment:
|
||||||
depends on PCI
|
depends on PCI
|
||||||
# for TASKSTATS/TASK_DELAY_ACCT:
|
# for TASKSTATS/TASK_DELAY_ACCT:
|
||||||
@@ -37,6 +38,7 @@ config KVM
|
|||||||
select TASK_DELAY_ACCT
|
select TASK_DELAY_ACCT
|
||||||
select PERF_EVENTS
|
select PERF_EVENTS
|
||||||
select HAVE_KVM_MSI
|
select HAVE_KVM_MSI
|
||||||
|
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
---help---
|
---help---
|
||||||
Support hosting fully virtualized guest machines using hardware
|
Support hosting fully virtualized guest machines using hardware
|
||||||
virtualization extensions. You will need a fairly recent
|
virtualization extensions. You will need a fairly recent
|
||||||
|
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
|
|||||||
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
|
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
|
||||||
|
|
||||||
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
||||||
i8254.o timer.o cpuid.o pmu.o
|
i8254.o cpuid.o pmu.o
|
||||||
kvm-intel-y += vmx.o
|
kvm-intel-y += vmx.o
|
||||||
kvm-amd-y += svm.o
|
kvm-amd-y += svm.o
|
||||||
|
|
||||||
|
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||||||
}
|
}
|
||||||
case 7: {
|
case 7: {
|
||||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||||
/* Mask ebx against host capbability word 9 */
|
/* Mask ebx against host capability word 9 */
|
||||||
if (index == 0) {
|
if (index == 0) {
|
||||||
entry->ebx &= kvm_supported_word9_x86_features;
|
entry->ebx &= kvm_supported_word9_x86_features;
|
||||||
cpuid_mask(&entry->ebx, 9);
|
cpuid_mask(&entry->ebx, 9);
|
||||||
@@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case KVM_CPUID_SIGNATURE: {
|
case KVM_CPUID_SIGNATURE: {
|
||||||
char signature[12] = "KVMKVMKVM\0\0";
|
static const char signature[12] = "KVMKVMKVM\0\0";
|
||||||
u32 *sigptr = (u32 *)signature;
|
const u32 *sigptr = (const u32 *)signature;
|
||||||
entry->eax = KVM_CPUID_FEATURES;
|
entry->eax = KVM_CPUID_FEATURES;
|
||||||
entry->ebx = sigptr[0];
|
entry->ebx = sigptr[0];
|
||||||
entry->ecx = sigptr[1];
|
entry->ecx = sigptr[1];
|
||||||
@@ -484,10 +484,10 @@ struct kvm_cpuid_param {
|
|||||||
u32 func;
|
u32 func;
|
||||||
u32 idx;
|
u32 idx;
|
||||||
bool has_leaf_count;
|
bool has_leaf_count;
|
||||||
bool (*qualifier)(struct kvm_cpuid_param *param);
|
bool (*qualifier)(const struct kvm_cpuid_param *param);
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool is_centaur_cpu(struct kvm_cpuid_param *param)
|
static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
|
||||||
{
|
{
|
||||||
return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
|
return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
|
||||||
}
|
}
|
||||||
@@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
|
|||||||
struct kvm_cpuid_entry2 *cpuid_entries;
|
struct kvm_cpuid_entry2 *cpuid_entries;
|
||||||
int limit, nent = 0, r = -E2BIG, i;
|
int limit, nent = 0, r = -E2BIG, i;
|
||||||
u32 func;
|
u32 func;
|
||||||
static struct kvm_cpuid_param param[] = {
|
static const struct kvm_cpuid_param param[] = {
|
||||||
{ .func = 0, .has_leaf_count = true },
|
{ .func = 0, .has_leaf_count = true },
|
||||||
{ .func = 0x80000000, .has_leaf_count = true },
|
{ .func = 0x80000000, .has_leaf_count = true },
|
||||||
{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
|
{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
|
||||||
@@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
|
|||||||
|
|
||||||
r = 0;
|
r = 0;
|
||||||
for (i = 0; i < ARRAY_SIZE(param); i++) {
|
for (i = 0; i < ARRAY_SIZE(param); i++) {
|
||||||
struct kvm_cpuid_param *ent = ¶m[i];
|
const struct kvm_cpuid_param *ent = ¶m[i];
|
||||||
|
|
||||||
if (ent->qualifier && !ent->qualifier(ent))
|
if (ent->qualifier && !ent->qualifier(ent))
|
||||||
continue;
|
continue;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
|
|||||||
ktime_t remaining;
|
ktime_t remaining;
|
||||||
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
|
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
|
||||||
|
|
||||||
if (!ps->pit_timer.period)
|
if (!ps->period)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
|
|||||||
* itself with the initial count and continues counting
|
* itself with the initial count and continues counting
|
||||||
* from there.
|
* from there.
|
||||||
*/
|
*/
|
||||||
remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
|
remaining = hrtimer_get_remaining(&ps->timer);
|
||||||
elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
|
elapsed = ps->period - ktime_to_ns(remaining);
|
||||||
elapsed = mod_64(elapsed, ps->pit_timer.period);
|
elapsed = mod_64(elapsed, ps->period);
|
||||||
|
|
||||||
return elapsed;
|
return elapsed;
|
||||||
}
|
}
|
||||||
@@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
|
|||||||
int value;
|
int value;
|
||||||
|
|
||||||
spin_lock(&ps->inject_lock);
|
spin_lock(&ps->inject_lock);
|
||||||
value = atomic_dec_return(&ps->pit_timer.pending);
|
value = atomic_dec_return(&ps->pending);
|
||||||
if (value < 0)
|
if (value < 0)
|
||||||
/* spurious acks can be generated if, for example, the
|
/* spurious acks can be generated if, for example, the
|
||||||
* PIC is being reset. Handle it gracefully here
|
* PIC is being reset. Handle it gracefully here
|
||||||
*/
|
*/
|
||||||
atomic_inc(&ps->pit_timer.pending);
|
atomic_inc(&ps->pending);
|
||||||
else if (value > 0)
|
else if (value > 0)
|
||||||
/* in this case, we had multiple outstanding pit interrupts
|
/* in this case, we had multiple outstanding pit interrupts
|
||||||
* that we needed to inject. Reinject
|
* that we needed to inject. Reinject
|
||||||
@@ -261,28 +261,17 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
|
|||||||
if (!kvm_vcpu_is_bsp(vcpu) || !pit)
|
if (!kvm_vcpu_is_bsp(vcpu) || !pit)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
timer = &pit->pit_state.pit_timer.timer;
|
timer = &pit->pit_state.timer;
|
||||||
if (hrtimer_cancel(timer))
|
if (hrtimer_cancel(timer))
|
||||||
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void destroy_pit_timer(struct kvm_pit *pit)
|
static void destroy_pit_timer(struct kvm_pit *pit)
|
||||||
{
|
{
|
||||||
hrtimer_cancel(&pit->pit_state.pit_timer.timer);
|
hrtimer_cancel(&pit->pit_state.timer);
|
||||||
flush_kthread_work(&pit->expired);
|
flush_kthread_work(&pit->expired);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kpit_is_periodic(struct kvm_timer *ktimer)
|
|
||||||
{
|
|
||||||
struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
|
|
||||||
pit_timer);
|
|
||||||
return ps->is_periodic;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct kvm_timer_ops kpit_ops = {
|
|
||||||
.is_periodic = kpit_is_periodic,
|
|
||||||
};
|
|
||||||
|
|
||||||
static void pit_do_work(struct kthread_work *work)
|
static void pit_do_work(struct kthread_work *work)
|
||||||
{
|
{
|
||||||
struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
|
struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
|
||||||
@@ -322,16 +311,16 @@ static void pit_do_work(struct kthread_work *work)
|
|||||||
|
|
||||||
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
|
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
|
||||||
{
|
{
|
||||||
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
|
struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
|
||||||
struct kvm_pit *pt = ktimer->kvm->arch.vpit;
|
struct kvm_pit *pt = ps->kvm->arch.vpit;
|
||||||
|
|
||||||
if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
|
if (ps->reinject || !atomic_read(&ps->pending)) {
|
||||||
atomic_inc(&ktimer->pending);
|
atomic_inc(&ps->pending);
|
||||||
queue_kthread_work(&pt->worker, &pt->expired);
|
queue_kthread_work(&pt->worker, &pt->expired);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ktimer->t_ops->is_periodic(ktimer)) {
|
if (ps->is_periodic) {
|
||||||
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
|
hrtimer_add_expires_ns(&ps->timer, ps->period);
|
||||||
return HRTIMER_RESTART;
|
return HRTIMER_RESTART;
|
||||||
} else
|
} else
|
||||||
return HRTIMER_NORESTART;
|
return HRTIMER_NORESTART;
|
||||||
@@ -340,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
|
|||||||
static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
|
static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
|
||||||
{
|
{
|
||||||
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
|
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
|
||||||
struct kvm_timer *pt = &ps->pit_timer;
|
|
||||||
s64 interval;
|
s64 interval;
|
||||||
|
|
||||||
if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
|
if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
|
||||||
@@ -351,19 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
|
|||||||
pr_debug("create pit timer, interval is %llu nsec\n", interval);
|
pr_debug("create pit timer, interval is %llu nsec\n", interval);
|
||||||
|
|
||||||
/* TODO The new value only affected after the retriggered */
|
/* TODO The new value only affected after the retriggered */
|
||||||
hrtimer_cancel(&pt->timer);
|
hrtimer_cancel(&ps->timer);
|
||||||
flush_kthread_work(&ps->pit->expired);
|
flush_kthread_work(&ps->pit->expired);
|
||||||
pt->period = interval;
|
ps->period = interval;
|
||||||
ps->is_periodic = is_period;
|
ps->is_periodic = is_period;
|
||||||
|
|
||||||
pt->timer.function = pit_timer_fn;
|
ps->timer.function = pit_timer_fn;
|
||||||
pt->t_ops = &kpit_ops;
|
ps->kvm = ps->pit->kvm;
|
||||||
pt->kvm = ps->pit->kvm;
|
|
||||||
|
|
||||||
atomic_set(&pt->pending, 0);
|
atomic_set(&ps->pending, 0);
|
||||||
ps->irq_ack = 1;
|
ps->irq_ack = 1;
|
||||||
|
|
||||||
hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
|
hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
|
||||||
HRTIMER_MODE_ABS);
|
HRTIMER_MODE_ABS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -639,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
|
|||||||
}
|
}
|
||||||
mutex_unlock(&pit->pit_state.lock);
|
mutex_unlock(&pit->pit_state.lock);
|
||||||
|
|
||||||
atomic_set(&pit->pit_state.pit_timer.pending, 0);
|
atomic_set(&pit->pit_state.pending, 0);
|
||||||
pit->pit_state.irq_ack = 1;
|
pit->pit_state.irq_ack = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -648,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
|
|||||||
struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
|
struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
|
||||||
|
|
||||||
if (!mask) {
|
if (!mask) {
|
||||||
atomic_set(&pit->pit_state.pit_timer.pending, 0);
|
atomic_set(&pit->pit_state.pending, 0);
|
||||||
pit->pit_state.irq_ack = 1;
|
pit->pit_state.irq_ack = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -706,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
|
|||||||
|
|
||||||
pit_state = &pit->pit_state;
|
pit_state = &pit->pit_state;
|
||||||
pit_state->pit = pit;
|
pit_state->pit = pit;
|
||||||
hrtimer_init(&pit_state->pit_timer.timer,
|
hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||||
CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
|
||||||
pit_state->irq_ack_notifier.gsi = 0;
|
pit_state->irq_ack_notifier.gsi = 0;
|
||||||
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
|
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
|
||||||
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
|
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
|
||||||
pit_state->pit_timer.reinject = true;
|
pit_state->reinject = true;
|
||||||
mutex_unlock(&pit->pit_state.lock);
|
mutex_unlock(&pit->pit_state.lock);
|
||||||
|
|
||||||
kvm_pit_reset(pit);
|
kvm_pit_reset(pit);
|
||||||
@@ -761,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm)
|
|||||||
kvm_unregister_irq_ack_notifier(kvm,
|
kvm_unregister_irq_ack_notifier(kvm,
|
||||||
&kvm->arch.vpit->pit_state.irq_ack_notifier);
|
&kvm->arch.vpit->pit_state.irq_ack_notifier);
|
||||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||||
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
|
timer = &kvm->arch.vpit->pit_state.timer;
|
||||||
hrtimer_cancel(timer);
|
hrtimer_cancel(timer);
|
||||||
flush_kthread_work(&kvm->arch.vpit->expired);
|
flush_kthread_work(&kvm->arch.vpit->expired);
|
||||||
kthread_stop(kvm->arch.vpit->worker_task);
|
kthread_stop(kvm->arch.vpit->worker_task);
|
||||||
|
@@ -24,8 +24,12 @@ struct kvm_kpit_channel_state {
|
|||||||
struct kvm_kpit_state {
|
struct kvm_kpit_state {
|
||||||
struct kvm_kpit_channel_state channels[3];
|
struct kvm_kpit_channel_state channels[3];
|
||||||
u32 flags;
|
u32 flags;
|
||||||
struct kvm_timer pit_timer;
|
|
||||||
bool is_periodic;
|
bool is_periodic;
|
||||||
|
s64 period; /* unit: ns */
|
||||||
|
struct hrtimer timer;
|
||||||
|
atomic_t pending; /* accumulated triggered timers */
|
||||||
|
bool reinject;
|
||||||
|
struct kvm *kvm;
|
||||||
u32 speaker_data_on;
|
u32 speaker_data_on;
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
struct kvm_pit *pit;
|
struct kvm_pit *pit;
|
||||||
|
@@ -190,17 +190,17 @@ void kvm_pic_update_irq(struct kvm_pic *s)
|
|||||||
|
|
||||||
int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
|
int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
|
||||||
{
|
{
|
||||||
int ret = -1;
|
int ret, irq_level;
|
||||||
|
|
||||||
|
BUG_ON(irq < 0 || irq >= PIC_NUM_PINS);
|
||||||
|
|
||||||
pic_lock(s);
|
pic_lock(s);
|
||||||
if (irq >= 0 && irq < PIC_NUM_PINS) {
|
irq_level = __kvm_irq_line_state(&s->irq_states[irq],
|
||||||
int irq_level = __kvm_irq_line_state(&s->irq_states[irq],
|
|
||||||
irq_source_id, level);
|
irq_source_id, level);
|
||||||
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
|
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
|
||||||
pic_update_irq(s);
|
pic_update_irq(s);
|
||||||
trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
|
trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
|
||||||
s->pics[irq >> 3].imr, ret == 0);
|
s->pics[irq >> 3].imr, ret == 0);
|
||||||
}
|
|
||||||
pic_unlock(s);
|
pic_unlock(s);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@@ -275,23 +275,20 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
|
|||||||
{
|
{
|
||||||
int irq, i;
|
int irq, i;
|
||||||
struct kvm_vcpu *vcpu;
|
struct kvm_vcpu *vcpu;
|
||||||
u8 irr = s->irr, isr = s->imr;
|
u8 edge_irr = s->irr & ~s->elcr;
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
|
||||||
s->last_irr = 0;
|
s->last_irr = 0;
|
||||||
s->irr = 0;
|
s->irr &= s->elcr;
|
||||||
s->imr = 0;
|
s->imr = 0;
|
||||||
s->isr = 0;
|
|
||||||
s->priority_add = 0;
|
s->priority_add = 0;
|
||||||
s->irq_base = 0;
|
|
||||||
s->read_reg_select = 0;
|
|
||||||
s->poll = 0;
|
|
||||||
s->special_mask = 0;
|
s->special_mask = 0;
|
||||||
s->init_state = 0;
|
s->read_reg_select = 0;
|
||||||
s->auto_eoi = 0;
|
if (!s->init4) {
|
||||||
s->rotate_on_auto_eoi = 0;
|
|
||||||
s->special_fully_nested_mode = 0;
|
s->special_fully_nested_mode = 0;
|
||||||
s->init4 = 0;
|
s->auto_eoi = 0;
|
||||||
|
}
|
||||||
|
s->init_state = 1;
|
||||||
|
|
||||||
kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
|
kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
|
||||||
if (kvm_apic_accept_pic_intr(vcpu)) {
|
if (kvm_apic_accept_pic_intr(vcpu)) {
|
||||||
@@ -304,7 +301,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
|
for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
|
||||||
if (irr & (1 << irq) || isr & (1 << irq))
|
if (edge_irr & (1 << irq))
|
||||||
pic_clear_isr(s, irq);
|
pic_clear_isr(s, irq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -316,40 +313,13 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
|
|||||||
addr &= 1;
|
addr &= 1;
|
||||||
if (addr == 0) {
|
if (addr == 0) {
|
||||||
if (val & 0x10) {
|
if (val & 0x10) {
|
||||||
u8 edge_irr = s->irr & ~s->elcr;
|
|
||||||
int i;
|
|
||||||
bool found = false;
|
|
||||||
struct kvm_vcpu *vcpu;
|
|
||||||
|
|
||||||
s->init4 = val & 1;
|
s->init4 = val & 1;
|
||||||
s->last_irr = 0;
|
|
||||||
s->irr &= s->elcr;
|
|
||||||
s->imr = 0;
|
|
||||||
s->priority_add = 0;
|
|
||||||
s->special_mask = 0;
|
|
||||||
s->read_reg_select = 0;
|
|
||||||
if (!s->init4) {
|
|
||||||
s->special_fully_nested_mode = 0;
|
|
||||||
s->auto_eoi = 0;
|
|
||||||
}
|
|
||||||
s->init_state = 1;
|
|
||||||
if (val & 0x02)
|
if (val & 0x02)
|
||||||
pr_pic_unimpl("single mode not supported");
|
pr_pic_unimpl("single mode not supported");
|
||||||
if (val & 0x08)
|
if (val & 0x08)
|
||||||
pr_pic_unimpl(
|
pr_pic_unimpl(
|
||||||
"level sensitive irq not supported");
|
"level sensitive irq not supported");
|
||||||
|
kvm_pic_reset(s);
|
||||||
kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
|
|
||||||
if (kvm_apic_accept_pic_intr(vcpu)) {
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (found)
|
|
||||||
for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
|
|
||||||
if (edge_irr & (1 << irq))
|
|
||||||
pic_clear_isr(s, irq);
|
|
||||||
} else if (val & 0x08) {
|
} else if (val & 0x08) {
|
||||||
if (val & 0x04)
|
if (val & 0x04)
|
||||||
s->poll = 1;
|
s->poll = 1;
|
||||||
|
@@ -70,7 +70,7 @@ struct kvm_pic {
|
|||||||
struct kvm_io_device dev_slave;
|
struct kvm_io_device dev_slave;
|
||||||
struct kvm_io_device dev_eclr;
|
struct kvm_io_device dev_eclr;
|
||||||
void (*ack_notifier)(void *opaque, int irq);
|
void (*ack_notifier)(void *opaque, int irq);
|
||||||
unsigned long irq_states[16];
|
unsigned long irq_states[PIC_NUM_PINS];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
|
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
|
||||||
|
@@ -1,18 +0,0 @@
|
|||||||
|
|
||||||
struct kvm_timer {
|
|
||||||
struct hrtimer timer;
|
|
||||||
s64 period; /* unit: ns */
|
|
||||||
u32 timer_mode_mask;
|
|
||||||
u64 tscdeadline;
|
|
||||||
atomic_t pending; /* accumulated triggered timers */
|
|
||||||
bool reinject;
|
|
||||||
struct kvm_timer_ops *t_ops;
|
|
||||||
struct kvm *kvm;
|
|
||||||
struct kvm_vcpu *vcpu;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct kvm_timer_ops {
|
|
||||||
bool (*is_periodic)(struct kvm_timer *);
|
|
||||||
};
|
|
||||||
|
|
||||||
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
|
|
@@ -34,6 +34,7 @@
|
|||||||
#include <asm/current.h>
|
#include <asm/current.h>
|
||||||
#include <asm/apicdef.h>
|
#include <asm/apicdef.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/jump_label.h>
|
||||||
#include "kvm_cache_regs.h"
|
#include "kvm_cache_regs.h"
|
||||||
#include "irq.h"
|
#include "irq.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
@@ -65,6 +66,7 @@
|
|||||||
#define APIC_DEST_NOSHORT 0x0
|
#define APIC_DEST_NOSHORT 0x0
|
||||||
#define APIC_DEST_MASK 0x800
|
#define APIC_DEST_MASK 0x800
|
||||||
#define MAX_APIC_VECTOR 256
|
#define MAX_APIC_VECTOR 256
|
||||||
|
#define APIC_VECTORS_PER_REG 32
|
||||||
|
|
||||||
#define VEC_POS(v) ((v) & (32 - 1))
|
#define VEC_POS(v) ((v) & (32 - 1))
|
||||||
#define REG_POS(v) (((v) >> 5) << 4)
|
#define REG_POS(v) (((v) >> 5) << 4)
|
||||||
@@ -72,11 +74,6 @@
|
|||||||
static unsigned int min_timer_period_us = 500;
|
static unsigned int min_timer_period_us = 500;
|
||||||
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
|
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
|
||||||
|
|
||||||
static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
|
|
||||||
{
|
|
||||||
return *((u32 *) (apic->regs + reg_off));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
|
static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
|
||||||
{
|
{
|
||||||
*((u32 *) (apic->regs + reg_off)) = val;
|
*((u32 *) (apic->regs + reg_off)) = val;
|
||||||
@@ -117,19 +114,23 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
|
|||||||
return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
|
return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_hw_enabled(struct kvm_lapic *apic)
|
struct static_key_deferred apic_hw_disabled __read_mostly;
|
||||||
{
|
struct static_key_deferred apic_sw_disabled __read_mostly;
|
||||||
return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int apic_sw_enabled(struct kvm_lapic *apic)
|
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
|
||||||
{
|
{
|
||||||
return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
|
if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
|
||||||
|
if (val & APIC_SPIV_APIC_ENABLED)
|
||||||
|
static_key_slow_dec_deferred(&apic_sw_disabled);
|
||||||
|
else
|
||||||
|
static_key_slow_inc(&apic_sw_disabled.key);
|
||||||
|
}
|
||||||
|
apic_set_reg(apic, APIC_SPIV, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_enabled(struct kvm_lapic *apic)
|
static inline int apic_enabled(struct kvm_lapic *apic)
|
||||||
{
|
{
|
||||||
return apic_sw_enabled(apic) && apic_hw_enabled(apic);
|
return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define LVT_MASK \
|
#define LVT_MASK \
|
||||||
@@ -139,36 +140,135 @@ static inline int apic_enabled(struct kvm_lapic *apic)
|
|||||||
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
|
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
|
||||||
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
|
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
|
||||||
|
|
||||||
|
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
||||||
|
{
|
||||||
|
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int kvm_apic_id(struct kvm_lapic *apic)
|
static inline int kvm_apic_id(struct kvm_lapic *apic)
|
||||||
{
|
{
|
||||||
return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
|
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
|
||||||
|
{
|
||||||
|
u16 cid;
|
||||||
|
ldr >>= 32 - map->ldr_bits;
|
||||||
|
cid = (ldr >> map->cid_shift) & map->cid_mask;
|
||||||
|
|
||||||
|
BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
|
||||||
|
|
||||||
|
return cid;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
|
||||||
|
{
|
||||||
|
ldr >>= (32 - map->ldr_bits);
|
||||||
|
return ldr & map->lid_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void recalculate_apic_map(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
struct kvm_apic_map *new, *old = NULL;
|
||||||
|
struct kvm_vcpu *vcpu;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
|
||||||
|
|
||||||
|
mutex_lock(&kvm->arch.apic_map_lock);
|
||||||
|
|
||||||
|
if (!new)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
new->ldr_bits = 8;
|
||||||
|
/* flat mode is default */
|
||||||
|
new->cid_shift = 8;
|
||||||
|
new->cid_mask = 0;
|
||||||
|
new->lid_mask = 0xff;
|
||||||
|
|
||||||
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||||
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
u16 cid, lid;
|
||||||
|
u32 ldr;
|
||||||
|
|
||||||
|
if (!kvm_apic_present(vcpu))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All APICs have to be configured in the same mode by an OS.
|
||||||
|
* We take advatage of this while building logical id loockup
|
||||||
|
* table. After reset APICs are in xapic/flat mode, so if we
|
||||||
|
* find apic with different setting we assume this is the mode
|
||||||
|
* OS wants all apics to be in; build lookup table accordingly.
|
||||||
|
*/
|
||||||
|
if (apic_x2apic_mode(apic)) {
|
||||||
|
new->ldr_bits = 32;
|
||||||
|
new->cid_shift = 16;
|
||||||
|
new->cid_mask = new->lid_mask = 0xffff;
|
||||||
|
} else if (kvm_apic_sw_enabled(apic) &&
|
||||||
|
!new->cid_mask /* flat mode */ &&
|
||||||
|
kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
|
||||||
|
new->cid_shift = 4;
|
||||||
|
new->cid_mask = 0xf;
|
||||||
|
new->lid_mask = 0xf;
|
||||||
|
}
|
||||||
|
|
||||||
|
new->phys_map[kvm_apic_id(apic)] = apic;
|
||||||
|
|
||||||
|
ldr = kvm_apic_get_reg(apic, APIC_LDR);
|
||||||
|
cid = apic_cluster_id(new, ldr);
|
||||||
|
lid = apic_logical_id(new, ldr);
|
||||||
|
|
||||||
|
if (lid)
|
||||||
|
new->logical_map[cid][ffs(lid) - 1] = apic;
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
old = rcu_dereference_protected(kvm->arch.apic_map,
|
||||||
|
lockdep_is_held(&kvm->arch.apic_map_lock));
|
||||||
|
rcu_assign_pointer(kvm->arch.apic_map, new);
|
||||||
|
mutex_unlock(&kvm->arch.apic_map_lock);
|
||||||
|
|
||||||
|
if (old)
|
||||||
|
kfree_rcu(old, rcu);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
|
||||||
|
{
|
||||||
|
apic_set_reg(apic, APIC_ID, id << 24);
|
||||||
|
recalculate_apic_map(apic->vcpu->kvm);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
|
||||||
|
{
|
||||||
|
apic_set_reg(apic, APIC_LDR, id);
|
||||||
|
recalculate_apic_map(apic->vcpu->kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
|
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
|
||||||
{
|
{
|
||||||
return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
|
return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
|
static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
|
||||||
{
|
{
|
||||||
return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
|
return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
|
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
|
||||||
{
|
{
|
||||||
return ((apic_get_reg(apic, APIC_LVTT) &
|
return ((kvm_apic_get_reg(apic, APIC_LVTT) &
|
||||||
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
|
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_lvtt_period(struct kvm_lapic *apic)
|
static inline int apic_lvtt_period(struct kvm_lapic *apic)
|
||||||
{
|
{
|
||||||
return ((apic_get_reg(apic, APIC_LVTT) &
|
return ((kvm_apic_get_reg(apic, APIC_LVTT) &
|
||||||
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
|
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
|
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
|
||||||
{
|
{
|
||||||
return ((apic_get_reg(apic, APIC_LVTT) &
|
return ((kvm_apic_get_reg(apic, APIC_LVTT) &
|
||||||
apic->lapic_timer.timer_mode_mask) ==
|
apic->lapic_timer.timer_mode_mask) ==
|
||||||
APIC_LVT_TIMER_TSCDEADLINE);
|
APIC_LVT_TIMER_TSCDEADLINE);
|
||||||
}
|
}
|
||||||
@@ -184,7 +284,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
|
|||||||
struct kvm_cpuid_entry2 *feat;
|
struct kvm_cpuid_entry2 *feat;
|
||||||
u32 v = APIC_VERSION;
|
u32 v = APIC_VERSION;
|
||||||
|
|
||||||
if (!irqchip_in_kernel(vcpu->kvm))
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
|
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
|
||||||
@@ -193,12 +293,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
|
|||||||
apic_set_reg(apic, APIC_LVR, v);
|
apic_set_reg(apic, APIC_LVR, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
|
||||||
{
|
|
||||||
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
|
|
||||||
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
|
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
|
||||||
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
|
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
|
||||||
LVT_MASK | APIC_MODE_MASK, /* LVTPC */
|
LVT_MASK | APIC_MODE_MASK, /* LVTPC */
|
||||||
@@ -208,25 +303,30 @@ static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
|
|||||||
|
|
||||||
static int find_highest_vector(void *bitmap)
|
static int find_highest_vector(void *bitmap)
|
||||||
{
|
{
|
||||||
u32 *word = bitmap;
|
int vec;
|
||||||
int word_offset = MAX_APIC_VECTOR >> 5;
|
u32 *reg;
|
||||||
|
|
||||||
while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
|
for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
|
||||||
continue;
|
vec >= 0; vec -= APIC_VECTORS_PER_REG) {
|
||||||
|
reg = bitmap + REG_POS(vec);
|
||||||
|
if (*reg)
|
||||||
|
return fls(*reg) - 1 + vec;
|
||||||
|
}
|
||||||
|
|
||||||
if (likely(!word_offset && !word[0]))
|
|
||||||
return -1;
|
return -1;
|
||||||
else
|
|
||||||
return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static u8 count_vectors(void *bitmap)
|
static u8 count_vectors(void *bitmap)
|
||||||
{
|
{
|
||||||
u32 *word = bitmap;
|
int vec;
|
||||||
int word_offset;
|
u32 *reg;
|
||||||
u8 count = 0;
|
u8 count = 0;
|
||||||
for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
|
|
||||||
count += hweight32(word[word_offset << 2]);
|
for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
|
||||||
|
reg = bitmap + REG_POS(vec);
|
||||||
|
count += hweight32(*reg);
|
||||||
|
}
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -285,7 +385,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
|
|||||||
|
|
||||||
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
|
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
|
||||||
int highest_irr;
|
int highest_irr;
|
||||||
|
|
||||||
/* This may race with setting of irr in __apic_accept_irq() and
|
/* This may race with setting of irr in __apic_accept_irq() and
|
||||||
@@ -293,9 +392,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
|
|||||||
* will cause vmexit immediately and the value will be recalculated
|
* will cause vmexit immediately and the value will be recalculated
|
||||||
* on the next vmentry.
|
* on the next vmentry.
|
||||||
*/
|
*/
|
||||||
if (!apic)
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
return 0;
|
return 0;
|
||||||
highest_irr = apic_find_highest_irr(apic);
|
highest_irr = apic_find_highest_irr(vcpu->arch.apic);
|
||||||
|
|
||||||
return highest_irr;
|
return highest_irr;
|
||||||
}
|
}
|
||||||
@@ -378,8 +477,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
|
|||||||
u32 tpr, isrv, ppr, old_ppr;
|
u32 tpr, isrv, ppr, old_ppr;
|
||||||
int isr;
|
int isr;
|
||||||
|
|
||||||
old_ppr = apic_get_reg(apic, APIC_PROCPRI);
|
old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);
|
||||||
tpr = apic_get_reg(apic, APIC_TASKPRI);
|
tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);
|
||||||
isr = apic_find_highest_isr(apic);
|
isr = apic_find_highest_isr(apic);
|
||||||
isrv = (isr != -1) ? isr : 0;
|
isrv = (isr != -1) ? isr : 0;
|
||||||
|
|
||||||
@@ -415,13 +514,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
|
|||||||
u32 logical_id;
|
u32 logical_id;
|
||||||
|
|
||||||
if (apic_x2apic_mode(apic)) {
|
if (apic_x2apic_mode(apic)) {
|
||||||
logical_id = apic_get_reg(apic, APIC_LDR);
|
logical_id = kvm_apic_get_reg(apic, APIC_LDR);
|
||||||
return logical_id & mda;
|
return logical_id & mda;
|
||||||
}
|
}
|
||||||
|
|
||||||
logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
|
logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
|
||||||
|
|
||||||
switch (apic_get_reg(apic, APIC_DFR)) {
|
switch (kvm_apic_get_reg(apic, APIC_DFR)) {
|
||||||
case APIC_DFR_FLAT:
|
case APIC_DFR_FLAT:
|
||||||
if (logical_id & mda)
|
if (logical_id & mda)
|
||||||
result = 1;
|
result = 1;
|
||||||
@@ -433,7 +532,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
apic_debug("Bad DFR vcpu %d: %08x\n",
|
apic_debug("Bad DFR vcpu %d: %08x\n",
|
||||||
apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
|
apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -478,6 +577,72 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
|
||||||
|
struct kvm_lapic_irq *irq, int *r)
|
||||||
|
{
|
||||||
|
struct kvm_apic_map *map;
|
||||||
|
unsigned long bitmap = 1;
|
||||||
|
struct kvm_lapic **dst;
|
||||||
|
int i;
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
*r = -1;
|
||||||
|
|
||||||
|
if (irq->shorthand == APIC_DEST_SELF) {
|
||||||
|
*r = kvm_apic_set_irq(src->vcpu, irq);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (irq->shorthand)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
map = rcu_dereference(kvm->arch.apic_map);
|
||||||
|
|
||||||
|
if (!map)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (irq->dest_mode == 0) { /* physical mode */
|
||||||
|
if (irq->delivery_mode == APIC_DM_LOWEST ||
|
||||||
|
irq->dest_id == 0xff)
|
||||||
|
goto out;
|
||||||
|
dst = &map->phys_map[irq->dest_id & 0xff];
|
||||||
|
} else {
|
||||||
|
u32 mda = irq->dest_id << (32 - map->ldr_bits);
|
||||||
|
|
||||||
|
dst = map->logical_map[apic_cluster_id(map, mda)];
|
||||||
|
|
||||||
|
bitmap = apic_logical_id(map, mda);
|
||||||
|
|
||||||
|
if (irq->delivery_mode == APIC_DM_LOWEST) {
|
||||||
|
int l = -1;
|
||||||
|
for_each_set_bit(i, &bitmap, 16) {
|
||||||
|
if (!dst[i])
|
||||||
|
continue;
|
||||||
|
if (l < 0)
|
||||||
|
l = i;
|
||||||
|
else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
|
||||||
|
l = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
bitmap = (l >= 0) ? 1 << l : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for_each_set_bit(i, &bitmap, 16) {
|
||||||
|
if (!dst[i])
|
||||||
|
continue;
|
||||||
|
if (*r < 0)
|
||||||
|
*r = 0;
|
||||||
|
*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = true;
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add a pending IRQ into lapic.
|
* Add a pending IRQ into lapic.
|
||||||
* Return 1 if successfully added and 0 if discarded.
|
* Return 1 if successfully added and 0 if discarded.
|
||||||
@@ -591,7 +756,7 @@ static int apic_set_eoi(struct kvm_lapic *apic)
|
|||||||
apic_clear_isr(vector, apic);
|
apic_clear_isr(vector, apic);
|
||||||
apic_update_ppr(apic);
|
apic_update_ppr(apic);
|
||||||
|
|
||||||
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
|
if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
|
||||||
kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
|
kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
|
||||||
int trigger_mode;
|
int trigger_mode;
|
||||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||||
@@ -606,8 +771,8 @@ static int apic_set_eoi(struct kvm_lapic *apic)
|
|||||||
|
|
||||||
static void apic_send_ipi(struct kvm_lapic *apic)
|
static void apic_send_ipi(struct kvm_lapic *apic)
|
||||||
{
|
{
|
||||||
u32 icr_low = apic_get_reg(apic, APIC_ICR);
|
u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
|
||||||
u32 icr_high = apic_get_reg(apic, APIC_ICR2);
|
u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);
|
||||||
struct kvm_lapic_irq irq;
|
struct kvm_lapic_irq irq;
|
||||||
|
|
||||||
irq.vector = icr_low & APIC_VECTOR_MASK;
|
irq.vector = icr_low & APIC_VECTOR_MASK;
|
||||||
@@ -642,7 +807,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
|
|||||||
ASSERT(apic != NULL);
|
ASSERT(apic != NULL);
|
||||||
|
|
||||||
/* if initial count is 0, current count should also be 0 */
|
/* if initial count is 0, current count should also be 0 */
|
||||||
if (apic_get_reg(apic, APIC_TMICT) == 0)
|
if (kvm_apic_get_reg(apic, APIC_TMICT) == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
|
remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
|
||||||
@@ -696,13 +861,15 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
|
|||||||
|
|
||||||
val = apic_get_tmcct(apic);
|
val = apic_get_tmcct(apic);
|
||||||
break;
|
break;
|
||||||
|
case APIC_PROCPRI:
|
||||||
|
apic_update_ppr(apic);
|
||||||
|
val = kvm_apic_get_reg(apic, offset);
|
||||||
|
break;
|
||||||
case APIC_TASKPRI:
|
case APIC_TASKPRI:
|
||||||
report_tpr_access(apic, false);
|
report_tpr_access(apic, false);
|
||||||
/* fall thru */
|
/* fall thru */
|
||||||
default:
|
default:
|
||||||
apic_update_ppr(apic);
|
val = kvm_apic_get_reg(apic, offset);
|
||||||
val = apic_get_reg(apic, offset);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -719,7 +886,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
|
|||||||
{
|
{
|
||||||
unsigned char alignment = offset & 0xf;
|
unsigned char alignment = offset & 0xf;
|
||||||
u32 result;
|
u32 result;
|
||||||
/* this bitmask has a bit cleared for each reserver register */
|
/* this bitmask has a bit cleared for each reserved register */
|
||||||
static const u64 rmask = 0x43ff01ffffffe70cULL;
|
static const u64 rmask = 0x43ff01ffffffe70cULL;
|
||||||
|
|
||||||
if ((alignment + len) > 4) {
|
if ((alignment + len) > 4) {
|
||||||
@@ -754,7 +921,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
|
|||||||
|
|
||||||
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
|
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
|
||||||
{
|
{
|
||||||
return apic_hw_enabled(apic) &&
|
return kvm_apic_hw_enabled(apic) &&
|
||||||
addr >= apic->base_address &&
|
addr >= apic->base_address &&
|
||||||
addr < apic->base_address + LAPIC_MMIO_LENGTH;
|
addr < apic->base_address + LAPIC_MMIO_LENGTH;
|
||||||
}
|
}
|
||||||
@@ -777,7 +944,7 @@ static void update_divide_count(struct kvm_lapic *apic)
|
|||||||
{
|
{
|
||||||
u32 tmp1, tmp2, tdcr;
|
u32 tmp1, tmp2, tdcr;
|
||||||
|
|
||||||
tdcr = apic_get_reg(apic, APIC_TDCR);
|
tdcr = kvm_apic_get_reg(apic, APIC_TDCR);
|
||||||
tmp1 = tdcr & 0xf;
|
tmp1 = tdcr & 0xf;
|
||||||
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
|
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
|
||||||
apic->divide_count = 0x1 << (tmp2 & 0x7);
|
apic->divide_count = 0x1 << (tmp2 & 0x7);
|
||||||
@@ -792,9 +959,9 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||||||
atomic_set(&apic->lapic_timer.pending, 0);
|
atomic_set(&apic->lapic_timer.pending, 0);
|
||||||
|
|
||||||
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
|
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
|
||||||
/* lapic timer in oneshot or peroidic mode */
|
/* lapic timer in oneshot or periodic mode */
|
||||||
now = apic->lapic_timer.timer.base->get_time();
|
now = apic->lapic_timer.timer.base->get_time();
|
||||||
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
|
apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT)
|
||||||
* APIC_BUS_CYCLE_NS * apic->divide_count;
|
* APIC_BUS_CYCLE_NS * apic->divide_count;
|
||||||
|
|
||||||
if (!apic->lapic_timer.period)
|
if (!apic->lapic_timer.period)
|
||||||
@@ -826,7 +993,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||||||
"timer initial count 0x%x, period %lldns, "
|
"timer initial count 0x%x, period %lldns, "
|
||||||
"expire @ 0x%016" PRIx64 ".\n", __func__,
|
"expire @ 0x%016" PRIx64 ".\n", __func__,
|
||||||
APIC_BUS_CYCLE_NS, ktime_to_ns(now),
|
APIC_BUS_CYCLE_NS, ktime_to_ns(now),
|
||||||
apic_get_reg(apic, APIC_TMICT),
|
kvm_apic_get_reg(apic, APIC_TMICT),
|
||||||
apic->lapic_timer.period,
|
apic->lapic_timer.period,
|
||||||
ktime_to_ns(ktime_add_ns(now,
|
ktime_to_ns(ktime_add_ns(now,
|
||||||
apic->lapic_timer.period)));
|
apic->lapic_timer.period)));
|
||||||
@@ -858,7 +1025,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||||||
|
|
||||||
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
|
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
|
||||||
{
|
{
|
||||||
int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
|
int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0));
|
||||||
|
|
||||||
if (apic_lvt_nmi_mode(lvt0_val)) {
|
if (apic_lvt_nmi_mode(lvt0_val)) {
|
||||||
if (!nmi_wd_enabled) {
|
if (!nmi_wd_enabled) {
|
||||||
@@ -879,7 +1046,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
|||||||
switch (reg) {
|
switch (reg) {
|
||||||
case APIC_ID: /* Local APIC ID */
|
case APIC_ID: /* Local APIC ID */
|
||||||
if (!apic_x2apic_mode(apic))
|
if (!apic_x2apic_mode(apic))
|
||||||
apic_set_reg(apic, APIC_ID, val);
|
kvm_apic_set_id(apic, val >> 24);
|
||||||
else
|
else
|
||||||
ret = 1;
|
ret = 1;
|
||||||
break;
|
break;
|
||||||
@@ -895,29 +1062,30 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
|||||||
|
|
||||||
case APIC_LDR:
|
case APIC_LDR:
|
||||||
if (!apic_x2apic_mode(apic))
|
if (!apic_x2apic_mode(apic))
|
||||||
apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
|
kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
|
||||||
else
|
else
|
||||||
ret = 1;
|
ret = 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case APIC_DFR:
|
case APIC_DFR:
|
||||||
if (!apic_x2apic_mode(apic))
|
if (!apic_x2apic_mode(apic)) {
|
||||||
apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
|
apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
|
||||||
else
|
recalculate_apic_map(apic->vcpu->kvm);
|
||||||
|
} else
|
||||||
ret = 1;
|
ret = 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case APIC_SPIV: {
|
case APIC_SPIV: {
|
||||||
u32 mask = 0x3ff;
|
u32 mask = 0x3ff;
|
||||||
if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
|
if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
|
||||||
mask |= APIC_SPIV_DIRECTED_EOI;
|
mask |= APIC_SPIV_DIRECTED_EOI;
|
||||||
apic_set_reg(apic, APIC_SPIV, val & mask);
|
apic_set_spiv(apic, val & mask);
|
||||||
if (!(val & APIC_SPIV_APIC_ENABLED)) {
|
if (!(val & APIC_SPIV_APIC_ENABLED)) {
|
||||||
int i;
|
int i;
|
||||||
u32 lvt_val;
|
u32 lvt_val;
|
||||||
|
|
||||||
for (i = 0; i < APIC_LVT_NUM; i++) {
|
for (i = 0; i < APIC_LVT_NUM; i++) {
|
||||||
lvt_val = apic_get_reg(apic,
|
lvt_val = kvm_apic_get_reg(apic,
|
||||||
APIC_LVTT + 0x10 * i);
|
APIC_LVTT + 0x10 * i);
|
||||||
apic_set_reg(apic, APIC_LVTT + 0x10 * i,
|
apic_set_reg(apic, APIC_LVTT + 0x10 * i,
|
||||||
lvt_val | APIC_LVT_MASKED);
|
lvt_val | APIC_LVT_MASKED);
|
||||||
@@ -946,7 +1114,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
|||||||
case APIC_LVT1:
|
case APIC_LVT1:
|
||||||
case APIC_LVTERR:
|
case APIC_LVTERR:
|
||||||
/* TODO: Check vector */
|
/* TODO: Check vector */
|
||||||
if (!apic_sw_enabled(apic))
|
if (!kvm_apic_sw_enabled(apic))
|
||||||
val |= APIC_LVT_MASKED;
|
val |= APIC_LVT_MASKED;
|
||||||
|
|
||||||
val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
|
val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
|
||||||
@@ -955,12 +1123,12 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case APIC_LVTT:
|
case APIC_LVTT:
|
||||||
if ((apic_get_reg(apic, APIC_LVTT) &
|
if ((kvm_apic_get_reg(apic, APIC_LVTT) &
|
||||||
apic->lapic_timer.timer_mode_mask) !=
|
apic->lapic_timer.timer_mode_mask) !=
|
||||||
(val & apic->lapic_timer.timer_mode_mask))
|
(val & apic->lapic_timer.timer_mode_mask))
|
||||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||||
|
|
||||||
if (!apic_sw_enabled(apic))
|
if (!kvm_apic_sw_enabled(apic))
|
||||||
val |= APIC_LVT_MASKED;
|
val |= APIC_LVT_MASKED;
|
||||||
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
|
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
|
||||||
apic_set_reg(apic, APIC_LVTT, val);
|
apic_set_reg(apic, APIC_LVTT, val);
|
||||||
@@ -1039,24 +1207,30 @@ static int apic_mmio_write(struct kvm_io_device *this,
|
|||||||
|
|
||||||
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
|
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
if (kvm_vcpu_has_lapic(vcpu))
|
||||||
|
|
||||||
if (apic)
|
|
||||||
apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
|
apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
|
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
|
||||||
|
|
||||||
void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
|
||||||
if (!vcpu->arch.apic)
|
if (!vcpu->arch.apic)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
|
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||||
|
|
||||||
if (vcpu->arch.apic->regs)
|
if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
|
||||||
free_page((unsigned long)vcpu->arch.apic->regs);
|
static_key_slow_dec_deferred(&apic_hw_disabled);
|
||||||
|
|
||||||
kfree(vcpu->arch.apic);
|
if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
|
||||||
|
static_key_slow_dec_deferred(&apic_sw_disabled);
|
||||||
|
|
||||||
|
if (apic->regs)
|
||||||
|
free_page((unsigned long)apic->regs);
|
||||||
|
|
||||||
|
kfree(apic);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1068,10 +1242,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
|||||||
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
|
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
if (!apic)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
|
if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
|
||||||
|
apic_lvtt_period(apic))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return apic->lapic_timer.tscdeadline;
|
return apic->lapic_timer.tscdeadline;
|
||||||
@@ -1080,10 +1253,9 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
|
|||||||
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
|
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
if (!apic)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
|
if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
|
||||||
|
apic_lvtt_period(apic))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||||
@@ -1095,20 +1267,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
|
||||||
if (!apic)
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
|
apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
|
||||||
| (apic_get_reg(apic, APIC_TASKPRI) & 4));
|
| (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
|
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
|
||||||
u64 tpr;
|
u64 tpr;
|
||||||
|
|
||||||
if (!apic)
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
return 0;
|
return 0;
|
||||||
tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
|
|
||||||
|
tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
|
||||||
|
|
||||||
return (tpr & 0xf0) >> 4;
|
return (tpr & 0xf0) >> 4;
|
||||||
}
|
}
|
||||||
@@ -1123,6 +1296,15 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* update jump label if enable bit changes */
|
||||||
|
if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) {
|
||||||
|
if (value & MSR_IA32_APICBASE_ENABLE)
|
||||||
|
static_key_slow_dec_deferred(&apic_hw_disabled);
|
||||||
|
else
|
||||||
|
static_key_slow_inc(&apic_hw_disabled.key);
|
||||||
|
recalculate_apic_map(vcpu->kvm);
|
||||||
|
}
|
||||||
|
|
||||||
if (!kvm_vcpu_is_bsp(apic->vcpu))
|
if (!kvm_vcpu_is_bsp(apic->vcpu))
|
||||||
value &= ~MSR_IA32_APICBASE_BSP;
|
value &= ~MSR_IA32_APICBASE_BSP;
|
||||||
|
|
||||||
@@ -1130,7 +1312,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
|||||||
if (apic_x2apic_mode(apic)) {
|
if (apic_x2apic_mode(apic)) {
|
||||||
u32 id = kvm_apic_id(apic);
|
u32 id = kvm_apic_id(apic);
|
||||||
u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
|
u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
|
||||||
apic_set_reg(apic, APIC_LDR, ldr);
|
kvm_apic_set_ldr(apic, ldr);
|
||||||
}
|
}
|
||||||
apic->base_address = apic->vcpu->arch.apic_base &
|
apic->base_address = apic->vcpu->arch.apic_base &
|
||||||
MSR_IA32_APICBASE_BASE;
|
MSR_IA32_APICBASE_BASE;
|
||||||
@@ -1155,7 +1337,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
|||||||
/* Stop the timer in case it's a reset to an active apic */
|
/* Stop the timer in case it's a reset to an active apic */
|
||||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||||
|
|
||||||
apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
|
kvm_apic_set_id(apic, vcpu->vcpu_id);
|
||||||
kvm_apic_set_version(apic->vcpu);
|
kvm_apic_set_version(apic->vcpu);
|
||||||
|
|
||||||
for (i = 0; i < APIC_LVT_NUM; i++)
|
for (i = 0; i < APIC_LVT_NUM; i++)
|
||||||
@@ -1164,9 +1346,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
|||||||
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
|
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
|
||||||
|
|
||||||
apic_set_reg(apic, APIC_DFR, 0xffffffffU);
|
apic_set_reg(apic, APIC_DFR, 0xffffffffU);
|
||||||
apic_set_reg(apic, APIC_SPIV, 0xff);
|
apic_set_spiv(apic, 0xff);
|
||||||
apic_set_reg(apic, APIC_TASKPRI, 0);
|
apic_set_reg(apic, APIC_TASKPRI, 0);
|
||||||
apic_set_reg(apic, APIC_LDR, 0);
|
kvm_apic_set_ldr(apic, 0);
|
||||||
apic_set_reg(apic, APIC_ESR, 0);
|
apic_set_reg(apic, APIC_ESR, 0);
|
||||||
apic_set_reg(apic, APIC_ICR, 0);
|
apic_set_reg(apic, APIC_ICR, 0);
|
||||||
apic_set_reg(apic, APIC_ICR2, 0);
|
apic_set_reg(apic, APIC_ICR2, 0);
|
||||||
@@ -1183,7 +1365,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
|||||||
update_divide_count(apic);
|
update_divide_count(apic);
|
||||||
atomic_set(&apic->lapic_timer.pending, 0);
|
atomic_set(&apic->lapic_timer.pending, 0);
|
||||||
if (kvm_vcpu_is_bsp(vcpu))
|
if (kvm_vcpu_is_bsp(vcpu))
|
||||||
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
kvm_lapic_set_base(vcpu,
|
||||||
|
vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
|
||||||
vcpu->arch.pv_eoi.msr_val = 0;
|
vcpu->arch.pv_eoi.msr_val = 0;
|
||||||
apic_update_ppr(apic);
|
apic_update_ppr(apic);
|
||||||
|
|
||||||
@@ -1196,45 +1379,34 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
|||||||
vcpu->arch.apic_base, apic->base_address);
|
vcpu->arch.apic_base, apic->base_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_apic_present(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*----------------------------------------------------------------------
|
*----------------------------------------------------------------------
|
||||||
* timer interface
|
* timer interface
|
||||||
*----------------------------------------------------------------------
|
*----------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool lapic_is_periodic(struct kvm_timer *ktimer)
|
static bool lapic_is_periodic(struct kvm_lapic *apic)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
|
|
||||||
lapic_timer);
|
|
||||||
return apic_lvtt_period(apic);
|
return apic_lvtt_period(apic);
|
||||||
}
|
}
|
||||||
|
|
||||||
int apic_has_pending_timer(struct kvm_vcpu *vcpu)
|
int apic_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *lapic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
|
||||||
if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
|
if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
|
||||||
return atomic_read(&lapic->lapic_timer.pending);
|
apic_lvt_enabled(apic, APIC_LVTT))
|
||||||
|
return atomic_read(&apic->lapic_timer.pending);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
|
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
|
||||||
{
|
{
|
||||||
u32 reg = apic_get_reg(apic, lvt_type);
|
u32 reg = kvm_apic_get_reg(apic, lvt_type);
|
||||||
int vector, mode, trig_mode;
|
int vector, mode, trig_mode;
|
||||||
|
|
||||||
if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
|
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
|
||||||
vector = reg & APIC_VECTOR_MASK;
|
vector = reg & APIC_VECTOR_MASK;
|
||||||
mode = reg & APIC_MODE_MASK;
|
mode = reg & APIC_MODE_MASK;
|
||||||
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
|
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
|
||||||
@@ -1251,15 +1423,40 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
|
|||||||
kvm_apic_local_deliver(apic, APIC_LVT0);
|
kvm_apic_local_deliver(apic, APIC_LVT0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kvm_timer_ops lapic_timer_ops = {
|
|
||||||
.is_periodic = lapic_is_periodic,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const struct kvm_io_device_ops apic_mmio_ops = {
|
static const struct kvm_io_device_ops apic_mmio_ops = {
|
||||||
.read = apic_mmio_read,
|
.read = apic_mmio_read,
|
||||||
.write = apic_mmio_write,
|
.write = apic_mmio_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
|
||||||
|
{
|
||||||
|
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
|
||||||
|
struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
|
||||||
|
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||||
|
wait_queue_head_t *q = &vcpu->wq;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There is a race window between reading and incrementing, but we do
|
||||||
|
* not care about potentially losing timer events in the !reinject
|
||||||
|
* case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
|
||||||
|
* in vcpu_enter_guest.
|
||||||
|
*/
|
||||||
|
if (!atomic_read(&ktimer->pending)) {
|
||||||
|
atomic_inc(&ktimer->pending);
|
||||||
|
/* FIXME: this code should not know anything about vcpus */
|
||||||
|
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (waitqueue_active(q))
|
||||||
|
wake_up_interruptible(q);
|
||||||
|
|
||||||
|
if (lapic_is_periodic(apic)) {
|
||||||
|
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
|
||||||
|
return HRTIMER_RESTART;
|
||||||
|
} else
|
||||||
|
return HRTIMER_NORESTART;
|
||||||
|
}
|
||||||
|
|
||||||
int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic;
|
struct kvm_lapic *apic;
|
||||||
@@ -1283,14 +1480,17 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
|
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
|
||||||
HRTIMER_MODE_ABS);
|
HRTIMER_MODE_ABS);
|
||||||
apic->lapic_timer.timer.function = kvm_timer_fn;
|
apic->lapic_timer.timer.function = apic_timer_fn;
|
||||||
apic->lapic_timer.t_ops = &lapic_timer_ops;
|
|
||||||
apic->lapic_timer.kvm = vcpu->kvm;
|
|
||||||
apic->lapic_timer.vcpu = vcpu;
|
|
||||||
|
|
||||||
apic->base_address = APIC_DEFAULT_PHYS_BASE;
|
/*
|
||||||
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
|
* APIC is created enabled. This will prevent kvm_lapic_set_base from
|
||||||
|
* thinking that APIC satet has changed.
|
||||||
|
*/
|
||||||
|
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
|
||||||
|
kvm_lapic_set_base(vcpu,
|
||||||
|
APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
|
||||||
|
|
||||||
|
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
|
||||||
kvm_lapic_reset(vcpu);
|
kvm_lapic_reset(vcpu);
|
||||||
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
|
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
|
||||||
|
|
||||||
@@ -1306,23 +1506,23 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
|
|||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
int highest_irr;
|
int highest_irr;
|
||||||
|
|
||||||
if (!apic || !apic_enabled(apic))
|
if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
apic_update_ppr(apic);
|
apic_update_ppr(apic);
|
||||||
highest_irr = apic_find_highest_irr(apic);
|
highest_irr = apic_find_highest_irr(apic);
|
||||||
if ((highest_irr == -1) ||
|
if ((highest_irr == -1) ||
|
||||||
((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
|
((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI)))
|
||||||
return -1;
|
return -1;
|
||||||
return highest_irr;
|
return highest_irr;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
|
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
|
u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
|
||||||
if (!apic_hw_enabled(vcpu->arch.apic))
|
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
|
||||||
r = 1;
|
r = 1;
|
||||||
if ((lvt0 & APIC_LVT_MASKED) == 0 &&
|
if ((lvt0 & APIC_LVT_MASKED) == 0 &&
|
||||||
GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
|
GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
|
||||||
@@ -1334,7 +1534,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
|
|||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
|
||||||
if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (atomic_read(&apic->lapic_timer.pending) > 0) {
|
||||||
if (kvm_apic_local_deliver(apic, APIC_LVTT))
|
if (kvm_apic_local_deliver(apic, APIC_LVTT))
|
||||||
atomic_dec(&apic->lapic_timer.pending);
|
atomic_dec(&apic->lapic_timer.pending);
|
||||||
}
|
}
|
||||||
@@ -1354,12 +1557,17 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
|
|||||||
return vector;
|
return vector;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
|
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
|
||||||
|
struct kvm_lapic_state *s)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
|
||||||
apic->base_address = vcpu->arch.apic_base &
|
kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
|
||||||
MSR_IA32_APICBASE_BASE;
|
/* set SPIV separately to get count of SW disabled APICs right */
|
||||||
|
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
|
||||||
|
memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
|
||||||
|
/* call kvm_apic_set_id() to put apic into apic_map */
|
||||||
|
kvm_apic_set_id(apic, kvm_apic_id(apic));
|
||||||
kvm_apic_set_version(vcpu);
|
kvm_apic_set_version(vcpu);
|
||||||
|
|
||||||
apic_update_ppr(apic);
|
apic_update_ppr(apic);
|
||||||
@@ -1374,13 +1582,12 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
|
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
|
||||||
struct hrtimer *timer;
|
struct hrtimer *timer;
|
||||||
|
|
||||||
if (!apic)
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
timer = &apic->lapic_timer.timer;
|
timer = &vcpu->arch.apic->lapic_timer.timer;
|
||||||
if (hrtimer_cancel(timer))
|
if (hrtimer_cancel(timer))
|
||||||
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
||||||
}
|
}
|
||||||
@@ -1478,7 +1685,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
|
|||||||
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
|
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
|
tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff;
|
||||||
max_irr = apic_find_highest_irr(apic);
|
max_irr = apic_find_highest_irr(apic);
|
||||||
if (max_irr < 0)
|
if (max_irr < 0)
|
||||||
max_irr = 0;
|
max_irr = 0;
|
||||||
@@ -1537,7 +1744,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
|
|||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
|
||||||
if (!irqchip_in_kernel(vcpu->kvm))
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* if this is ICR write vector before command */
|
/* if this is ICR write vector before command */
|
||||||
@@ -1551,7 +1758,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
|
|||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
u32 low, high = 0;
|
u32 low, high = 0;
|
||||||
|
|
||||||
if (!irqchip_in_kernel(vcpu->kvm))
|
if (!kvm_vcpu_has_lapic(vcpu))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if (apic_reg_read(apic, reg, 4, &low))
|
if (apic_reg_read(apic, reg, 4, &low))
|
||||||
@@ -1576,3 +1783,10 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
|
|||||||
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
|
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
|
||||||
addr);
|
addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kvm_lapic_init(void)
|
||||||
|
{
|
||||||
|
/* do not patch jump label more than once per second */
|
||||||
|
jump_label_rate_limit(&apic_hw_disabled, HZ);
|
||||||
|
jump_label_rate_limit(&apic_sw_disabled, HZ);
|
||||||
|
}
|
||||||
|
@@ -2,10 +2,17 @@
|
|||||||
#define __KVM_X86_LAPIC_H
|
#define __KVM_X86_LAPIC_H
|
||||||
|
|
||||||
#include "iodev.h"
|
#include "iodev.h"
|
||||||
#include "kvm_timer.h"
|
|
||||||
|
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
|
|
||||||
|
struct kvm_timer {
|
||||||
|
struct hrtimer timer;
|
||||||
|
s64 period; /* unit: ns */
|
||||||
|
u32 timer_mode_mask;
|
||||||
|
u64 tscdeadline;
|
||||||
|
atomic_t pending; /* accumulated triggered timers */
|
||||||
|
};
|
||||||
|
|
||||||
struct kvm_lapic {
|
struct kvm_lapic {
|
||||||
unsigned long base_address;
|
unsigned long base_address;
|
||||||
struct kvm_io_device dev;
|
struct kvm_io_device dev;
|
||||||
@@ -45,11 +52,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
|
|||||||
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
|
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
|
||||||
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
|
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
|
||||||
|
|
||||||
|
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
|
||||||
|
struct kvm_lapic_irq *irq, int *r);
|
||||||
|
|
||||||
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
|
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
|
||||||
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
|
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
|
||||||
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
|
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
|
||||||
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
|
struct kvm_lapic_state *s);
|
||||||
bool kvm_apic_present(struct kvm_vcpu *vcpu);
|
|
||||||
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
|
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
|
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
|
||||||
@@ -71,4 +80,48 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
|
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
|
||||||
|
void kvm_lapic_init(void);
|
||||||
|
|
||||||
|
static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
|
||||||
|
{
|
||||||
|
return *((u32 *) (apic->regs + reg_off));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern struct static_key kvm_no_apic_vcpu;
|
||||||
|
|
||||||
|
static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
if (static_key_false(&kvm_no_apic_vcpu))
|
||||||
|
return vcpu->arch.apic;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern struct static_key_deferred apic_hw_disabled;
|
||||||
|
|
||||||
|
static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
|
||||||
|
{
|
||||||
|
if (static_key_false(&apic_hw_disabled.key))
|
||||||
|
return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
|
||||||
|
return MSR_IA32_APICBASE_ENABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern struct static_key_deferred apic_sw_disabled;
|
||||||
|
|
||||||
|
static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
|
||||||
|
{
|
||||||
|
if (static_key_false(&apic_sw_disabled.key))
|
||||||
|
return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
|
||||||
|
return APIC_SPIV_APIC_ENABLED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
pfn = spte_to_pfn(old_spte);
|
pfn = spte_to_pfn(old_spte);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* KVM does not hold the refcount of the page used by
|
||||||
|
* kvm mmu, before reclaiming the page, we should
|
||||||
|
* unmap it from mmu first.
|
||||||
|
*/
|
||||||
|
WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
|
||||||
|
|
||||||
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
|
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
|
||||||
kvm_set_pfn_accessed(pfn);
|
kvm_set_pfn_accessed(pfn);
|
||||||
if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
|
if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
|
||||||
@@ -960,13 +968,10 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
|
|||||||
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
|
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
|
||||||
struct kvm_memory_slot *slot)
|
struct kvm_memory_slot *slot)
|
||||||
{
|
{
|
||||||
struct kvm_lpage_info *linfo;
|
unsigned long idx;
|
||||||
|
|
||||||
if (likely(level == PT_PAGE_TABLE_LEVEL))
|
idx = gfn_to_index(gfn, slot->base_gfn, level);
|
||||||
return &slot->rmap[gfn - slot->base_gfn];
|
return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
|
||||||
|
|
||||||
linfo = lpage_info_slot(gfn, slot, level);
|
|
||||||
return &linfo->rmap_pde;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1173,7 +1178,8 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
|||||||
unsigned long *rmapp;
|
unsigned long *rmapp;
|
||||||
|
|
||||||
while (mask) {
|
while (mask) {
|
||||||
rmapp = &slot->rmap[gfn_offset + __ffs(mask)];
|
rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
|
||||||
|
PT_PAGE_TABLE_LEVEL, slot);
|
||||||
__rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false);
|
__rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false);
|
||||||
|
|
||||||
/* clear the first set bit */
|
/* clear the first set bit */
|
||||||
@@ -1200,7 +1206,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@@ -1218,7 +1224,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@@ -1259,43 +1265,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
static int kvm_handle_hva_range(struct kvm *kvm,
|
||||||
|
unsigned long start,
|
||||||
|
unsigned long end,
|
||||||
unsigned long data,
|
unsigned long data,
|
||||||
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
int (*handler)(struct kvm *kvm,
|
||||||
|
unsigned long *rmapp,
|
||||||
|
struct kvm_memory_slot *slot,
|
||||||
unsigned long data))
|
unsigned long data))
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
int ret;
|
int ret = 0;
|
||||||
int retval = 0;
|
|
||||||
struct kvm_memslots *slots;
|
struct kvm_memslots *slots;
|
||||||
struct kvm_memory_slot *memslot;
|
struct kvm_memory_slot *memslot;
|
||||||
|
|
||||||
slots = kvm_memslots(kvm);
|
slots = kvm_memslots(kvm);
|
||||||
|
|
||||||
kvm_for_each_memslot(memslot, slots) {
|
kvm_for_each_memslot(memslot, slots) {
|
||||||
unsigned long start = memslot->userspace_addr;
|
unsigned long hva_start, hva_end;
|
||||||
unsigned long end;
|
gfn_t gfn_start, gfn_end;
|
||||||
|
|
||||||
end = start + (memslot->npages << PAGE_SHIFT);
|
hva_start = max(start, memslot->userspace_addr);
|
||||||
if (hva >= start && hva < end) {
|
hva_end = min(end, memslot->userspace_addr +
|
||||||
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
|
(memslot->npages << PAGE_SHIFT));
|
||||||
gfn_t gfn = memslot->base_gfn + gfn_offset;
|
if (hva_start >= hva_end)
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* {gfn(page) | page intersects with [hva_start, hva_end)} =
|
||||||
|
* {gfn_start, gfn_start+1, ..., gfn_end-1}.
|
||||||
|
*/
|
||||||
|
gfn_start = hva_to_gfn_memslot(hva_start, memslot);
|
||||||
|
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
|
||||||
|
|
||||||
ret = handler(kvm, &memslot->rmap[gfn_offset], data);
|
for (j = PT_PAGE_TABLE_LEVEL;
|
||||||
|
j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
|
||||||
|
unsigned long idx, idx_end;
|
||||||
|
unsigned long *rmapp;
|
||||||
|
|
||||||
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
|
/*
|
||||||
struct kvm_lpage_info *linfo;
|
* {idx(page_j) | page_j intersects with
|
||||||
|
* [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}.
|
||||||
|
*/
|
||||||
|
idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
|
||||||
|
idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
|
||||||
|
|
||||||
linfo = lpage_info_slot(gfn, memslot,
|
rmapp = __gfn_to_rmap(gfn_start, j, memslot);
|
||||||
PT_DIRECTORY_LEVEL + j);
|
|
||||||
ret |= handler(kvm, &linfo->rmap_pde, data);
|
for (; idx <= idx_end; ++idx)
|
||||||
}
|
ret |= handler(kvm, rmapp++, memslot, data);
|
||||||
trace_kvm_age_page(hva, memslot, ret);
|
|
||||||
retval |= ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return retval;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
||||||
|
unsigned long data,
|
||||||
|
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
||||||
|
struct kvm_memory_slot *slot,
|
||||||
|
unsigned long data))
|
||||||
|
{
|
||||||
|
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||||
@@ -1303,13 +1333,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
|||||||
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
|
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
|
||||||
|
}
|
||||||
|
|
||||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||||
{
|
{
|
||||||
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
|
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator uninitialized_var(iter);
|
struct rmap_iterator uninitialized_var(iter);
|
||||||
@@ -1323,8 +1358,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
* This has some overhead, but not as much as the cost of swapping
|
* This has some overhead, but not as much as the cost of swapping
|
||||||
* out actively used pages or breaking up actively used hugepages.
|
* out actively used pages or breaking up actively used hugepages.
|
||||||
*/
|
*/
|
||||||
if (!shadow_accessed_mask)
|
if (!shadow_accessed_mask) {
|
||||||
return kvm_unmap_rmapp(kvm, rmapp, data);
|
young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
||||||
sptep = rmap_get_next(&iter)) {
|
sptep = rmap_get_next(&iter)) {
|
||||||
@@ -1336,12 +1373,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
(unsigned long *)sptep);
|
(unsigned long *)sptep);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
|
/* @data has hva passed to kvm_age_hva(). */
|
||||||
|
trace_kvm_age_page(data, slot, young);
|
||||||
return young;
|
return young;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@@ -1379,13 +1418,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
|
|||||||
|
|
||||||
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
|
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
|
||||||
|
|
||||||
kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
|
kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
|
||||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
||||||
{
|
{
|
||||||
return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
|
return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||||
@@ -2457,6 +2496,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||||||
rmap_recycle(vcpu, sptep, gfn);
|
rmap_recycle(vcpu, sptep, gfn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is_error_pfn(pfn))
|
||||||
kvm_release_pfn_clean(pfn);
|
kvm_release_pfn_clean(pfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2469,17 +2510,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
|
|||||||
bool no_dirty_log)
|
bool no_dirty_log)
|
||||||
{
|
{
|
||||||
struct kvm_memory_slot *slot;
|
struct kvm_memory_slot *slot;
|
||||||
unsigned long hva;
|
|
||||||
|
|
||||||
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
|
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
|
||||||
if (!slot) {
|
if (!slot)
|
||||||
get_page(fault_page);
|
return KVM_PFN_ERR_FAULT;
|
||||||
return page_to_pfn(fault_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
hva = gfn_to_hva_memslot(slot, gfn);
|
return gfn_to_pfn_memslot_atomic(slot, gfn);
|
||||||
|
|
||||||
return hva_to_pfn_atomic(vcpu->kvm, hva);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
||||||
@@ -2580,11 +2616,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
|||||||
sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
|
sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
|
||||||
iterator.level - 1,
|
iterator.level - 1,
|
||||||
1, ACC_ALL, iterator.sptep);
|
1, ACC_ALL, iterator.sptep);
|
||||||
if (!sp) {
|
|
||||||
pgprintk("nonpaging_map: ENOMEM\n");
|
|
||||||
kvm_release_pfn_clean(pfn);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
mmu_spte_set(iterator.sptep,
|
mmu_spte_set(iterator.sptep,
|
||||||
__pa(sp->spt)
|
__pa(sp->spt)
|
||||||
@@ -2611,8 +2642,16 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
|
|||||||
|
|
||||||
static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
|
static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
|
||||||
{
|
{
|
||||||
kvm_release_pfn_clean(pfn);
|
/*
|
||||||
if (is_hwpoison_pfn(pfn)) {
|
* Do not cache the mmio info caused by writing the readonly gfn
|
||||||
|
* into the spte otherwise read access on readonly gfn also can
|
||||||
|
* caused mmio page fault and treat it as mmio access.
|
||||||
|
* Return 1 to tell kvm to emulate it.
|
||||||
|
*/
|
||||||
|
if (pfn == KVM_PFN_ERR_RO_FAULT)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (pfn == KVM_PFN_ERR_HWPOISON) {
|
||||||
kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
|
kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -3236,8 +3275,6 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
|||||||
if (!async)
|
if (!async)
|
||||||
return false; /* *pfn has correct page already */
|
return false; /* *pfn has correct page already */
|
||||||
|
|
||||||
put_page(pfn_to_page(*pfn));
|
|
||||||
|
|
||||||
if (!prefault && can_do_async_pf(vcpu)) {
|
if (!prefault && can_do_async_pf(vcpu)) {
|
||||||
trace_kvm_try_async_get_page(gva, gfn);
|
trace_kvm_try_async_get_page(gva, gfn);
|
||||||
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
|
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
|
||||||
@@ -3371,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
|
|||||||
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
|
||||||
|
{
|
||||||
|
unsigned mask;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
|
||||||
|
|
||||||
|
mask = (unsigned)~ACC_WRITE_MASK;
|
||||||
|
/* Allow write access to dirty gptes */
|
||||||
|
mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
|
||||||
|
*access &= mask;
|
||||||
|
}
|
||||||
|
|
||||||
static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
|
static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
|
||||||
int *nr_present)
|
int *nr_present)
|
||||||
{
|
{
|
||||||
@@ -3388,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
|
||||||
|
{
|
||||||
|
unsigned access;
|
||||||
|
|
||||||
|
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
|
||||||
|
access &= ~(gpte >> PT64_NX_SHIFT);
|
||||||
|
|
||||||
|
return access;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
|
||||||
|
{
|
||||||
|
unsigned index;
|
||||||
|
|
||||||
|
index = level - 1;
|
||||||
|
index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
|
||||||
|
return mmu->last_pte_bitmap & (1 << index);
|
||||||
|
}
|
||||||
|
|
||||||
#define PTTYPE 64
|
#define PTTYPE 64
|
||||||
#include "paging_tmpl.h"
|
#include "paging_tmpl.h"
|
||||||
#undef PTTYPE
|
#undef PTTYPE
|
||||||
@@ -3457,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
|
||||||
|
{
|
||||||
|
unsigned bit, byte, pfec;
|
||||||
|
u8 map;
|
||||||
|
bool fault, x, w, u, wf, uf, ff, smep;
|
||||||
|
|
||||||
|
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
|
||||||
|
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
|
||||||
|
pfec = byte << 1;
|
||||||
|
map = 0;
|
||||||
|
wf = pfec & PFERR_WRITE_MASK;
|
||||||
|
uf = pfec & PFERR_USER_MASK;
|
||||||
|
ff = pfec & PFERR_FETCH_MASK;
|
||||||
|
for (bit = 0; bit < 8; ++bit) {
|
||||||
|
x = bit & ACC_EXEC_MASK;
|
||||||
|
w = bit & ACC_WRITE_MASK;
|
||||||
|
u = bit & ACC_USER_MASK;
|
||||||
|
|
||||||
|
/* Not really needed: !nx will cause pte.nx to fault */
|
||||||
|
x |= !mmu->nx;
|
||||||
|
/* Allow supervisor writes if !cr0.wp */
|
||||||
|
w |= !is_write_protection(vcpu) && !uf;
|
||||||
|
/* Disallow supervisor fetches of user code if cr4.smep */
|
||||||
|
x &= !(smep && u && !uf);
|
||||||
|
|
||||||
|
fault = (ff && !x) || (uf && !u) || (wf && !w);
|
||||||
|
map |= fault << bit;
|
||||||
|
}
|
||||||
|
mmu->permissions[byte] = map;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
|
||||||
|
{
|
||||||
|
u8 map;
|
||||||
|
unsigned level, root_level = mmu->root_level;
|
||||||
|
const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */
|
||||||
|
|
||||||
|
if (root_level == PT32E_ROOT_LEVEL)
|
||||||
|
--root_level;
|
||||||
|
/* PT_PAGE_TABLE_LEVEL always terminates */
|
||||||
|
map = 1 | (1 << ps_set_index);
|
||||||
|
for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
|
||||||
|
if (level <= PT_PDPE_LEVEL
|
||||||
|
&& (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
|
||||||
|
map |= 1 << (ps_set_index | (level - 1));
|
||||||
|
}
|
||||||
|
mmu->last_pte_bitmap = map;
|
||||||
|
}
|
||||||
|
|
||||||
static int paging64_init_context_common(struct kvm_vcpu *vcpu,
|
static int paging64_init_context_common(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_mmu *context,
|
struct kvm_mmu *context,
|
||||||
int level)
|
int level)
|
||||||
@@ -3465,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
|
|||||||
context->root_level = level;
|
context->root_level = level;
|
||||||
|
|
||||||
reset_rsvds_bits_mask(vcpu, context);
|
reset_rsvds_bits_mask(vcpu, context);
|
||||||
|
update_permission_bitmask(vcpu, context);
|
||||||
|
update_last_pte_bitmap(vcpu, context);
|
||||||
|
|
||||||
ASSERT(is_pae(vcpu));
|
ASSERT(is_pae(vcpu));
|
||||||
context->new_cr3 = paging_new_cr3;
|
context->new_cr3 = paging_new_cr3;
|
||||||
@@ -3493,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
|
|||||||
context->root_level = PT32_ROOT_LEVEL;
|
context->root_level = PT32_ROOT_LEVEL;
|
||||||
|
|
||||||
reset_rsvds_bits_mask(vcpu, context);
|
reset_rsvds_bits_mask(vcpu, context);
|
||||||
|
update_permission_bitmask(vcpu, context);
|
||||||
|
update_last_pte_bitmap(vcpu, context);
|
||||||
|
|
||||||
context->new_cr3 = paging_new_cr3;
|
context->new_cr3 = paging_new_cr3;
|
||||||
context->page_fault = paging32_page_fault;
|
context->page_fault = paging32_page_fault;
|
||||||
@@ -3553,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||||||
context->gva_to_gpa = paging32_gva_to_gpa;
|
context->gva_to_gpa = paging32_gva_to_gpa;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_permission_bitmask(vcpu, context);
|
||||||
|
update_last_pte_bitmap(vcpu, context);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3628,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
|||||||
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
|
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_permission_bitmask(vcpu, g_context);
|
||||||
|
update_last_pte_bitmap(vcpu, g_context);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -18,8 +18,10 @@
|
|||||||
#define PT_PCD_MASK (1ULL << 4)
|
#define PT_PCD_MASK (1ULL << 4)
|
||||||
#define PT_ACCESSED_SHIFT 5
|
#define PT_ACCESSED_SHIFT 5
|
||||||
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
|
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
|
||||||
#define PT_DIRTY_MASK (1ULL << 6)
|
#define PT_DIRTY_SHIFT 6
|
||||||
#define PT_PAGE_SIZE_MASK (1ULL << 7)
|
#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
|
||||||
|
#define PT_PAGE_SIZE_SHIFT 7
|
||||||
|
#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
|
||||||
#define PT_PAT_MASK (1ULL << 7)
|
#define PT_PAT_MASK (1ULL << 7)
|
||||||
#define PT_GLOBAL_MASK (1ULL << 8)
|
#define PT_GLOBAL_MASK (1ULL << 8)
|
||||||
#define PT64_NX_SHIFT 63
|
#define PT64_NX_SHIFT 63
|
||||||
@@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
|
|||||||
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
|
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool check_write_user_access(struct kvm_vcpu *vcpu,
|
/*
|
||||||
bool write_fault, bool user_fault,
|
* Will a fault with a given page-fault error code (pfec) cause a permission
|
||||||
unsigned long pte)
|
* fault with the given access (in ACC_* format)?
|
||||||
|
*/
|
||||||
|
static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
|
||||||
|
unsigned pfec)
|
||||||
{
|
{
|
||||||
if (unlikely(write_fault && !is_writable_pte(pte)
|
return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
|
||||||
&& (user_fault || is_write_protection(vcpu))))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (unlikely(user_fault && !(pte & PT_USER_MASK)))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -116,10 +116,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
|||||||
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
||||||
pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
|
pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
|
||||||
|
|
||||||
if (is_error_pfn(pfn)) {
|
if (is_error_pfn(pfn))
|
||||||
kvm_release_pfn_clean(pfn);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
hpa = pfn << PAGE_SHIFT;
|
hpa = pfn << PAGE_SHIFT;
|
||||||
if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
|
if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
|
||||||
@@ -190,7 +188,6 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||||||
|
|
||||||
static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||||
{
|
{
|
||||||
struct kvm_memory_slot *slot;
|
|
||||||
unsigned long *rmapp;
|
unsigned long *rmapp;
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@@ -198,8 +195,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||||||
if (sp->role.direct || sp->unsync || sp->role.invalid)
|
if (sp->role.direct || sp->unsync || sp->role.invalid)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
slot = gfn_to_memslot(kvm, sp->gfn);
|
rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);
|
||||||
rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
|
|
||||||
|
|
||||||
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
||||||
sptep = rmap_get_next(&iter)) {
|
sptep = rmap_get_next(&iter)) {
|
||||||
|
@@ -63,10 +63,12 @@
|
|||||||
*/
|
*/
|
||||||
struct guest_walker {
|
struct guest_walker {
|
||||||
int level;
|
int level;
|
||||||
|
unsigned max_level;
|
||||||
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
|
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
|
||||||
pt_element_t ptes[PT_MAX_FULL_LEVELS];
|
pt_element_t ptes[PT_MAX_FULL_LEVELS];
|
||||||
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
|
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
|
||||||
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
|
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
|
||||||
|
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
|
||||||
unsigned pt_access;
|
unsigned pt_access;
|
||||||
unsigned pte_access;
|
unsigned pte_access;
|
||||||
gfn_t gfn;
|
gfn_t gfn;
|
||||||
@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|||||||
return (ret != orig_pte);
|
return (ret != orig_pte);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte,
|
static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
|
||||||
bool last)
|
struct kvm_mmu *mmu,
|
||||||
|
struct guest_walker *walker,
|
||||||
|
int write_fault)
|
||||||
{
|
{
|
||||||
unsigned access;
|
unsigned level, index;
|
||||||
|
pt_element_t pte, orig_pte;
|
||||||
|
pt_element_t __user *ptep_user;
|
||||||
|
gfn_t table_gfn;
|
||||||
|
int ret;
|
||||||
|
|
||||||
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
|
for (level = walker->max_level; level >= walker->level; --level) {
|
||||||
if (last && !is_dirty_gpte(gpte))
|
pte = orig_pte = walker->ptes[level - 1];
|
||||||
access &= ~ACC_WRITE_MASK;
|
table_gfn = walker->table_gfn[level - 1];
|
||||||
|
ptep_user = walker->ptep_user[level - 1];
|
||||||
|
index = offset_in_page(ptep_user) / sizeof(pt_element_t);
|
||||||
|
if (!(pte & PT_ACCESSED_MASK)) {
|
||||||
|
trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
|
||||||
|
pte |= PT_ACCESSED_MASK;
|
||||||
|
}
|
||||||
|
if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
|
||||||
|
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
|
||||||
|
pte |= PT_DIRTY_MASK;
|
||||||
|
}
|
||||||
|
if (pte == orig_pte)
|
||||||
|
continue;
|
||||||
|
|
||||||
#if PTTYPE == 64
|
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
|
||||||
if (vcpu->arch.mmu.nx)
|
if (ret)
|
||||||
access &= ~(gpte >> PT64_NX_SHIFT);
|
return ret;
|
||||||
#endif
|
|
||||||
return access;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool FNAME(is_last_gpte)(struct guest_walker *walker,
|
mark_page_dirty(vcpu->kvm, table_gfn);
|
||||||
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
walker->ptes[level] = pte;
|
||||||
pt_element_t gpte)
|
}
|
||||||
{
|
return 0;
|
||||||
if (walker->level == PT_PAGE_TABLE_LEVEL)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) &&
|
|
||||||
(PTTYPE == 64 || is_pse(vcpu)))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) &&
|
|
||||||
(mmu->root_level == PT64_ROOT_LEVEL))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
|||||||
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||||
gva_t addr, u32 access)
|
gva_t addr, u32 access)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
pt_element_t pte;
|
pt_element_t pte;
|
||||||
pt_element_t __user *uninitialized_var(ptep_user);
|
pt_element_t __user *uninitialized_var(ptep_user);
|
||||||
gfn_t table_gfn;
|
gfn_t table_gfn;
|
||||||
unsigned index, pt_access, uninitialized_var(pte_access);
|
unsigned index, pt_access, pte_access, accessed_dirty, shift;
|
||||||
gpa_t pte_gpa;
|
gpa_t pte_gpa;
|
||||||
bool eperm, last_gpte;
|
|
||||||
int offset;
|
int offset;
|
||||||
const int write_fault = access & PFERR_WRITE_MASK;
|
const int write_fault = access & PFERR_WRITE_MASK;
|
||||||
const int user_fault = access & PFERR_USER_MASK;
|
const int user_fault = access & PFERR_USER_MASK;
|
||||||
const int fetch_fault = access & PFERR_FETCH_MASK;
|
const int fetch_fault = access & PFERR_FETCH_MASK;
|
||||||
u16 errcode = 0;
|
u16 errcode = 0;
|
||||||
|
gpa_t real_gpa;
|
||||||
|
gfn_t gfn;
|
||||||
|
|
||||||
trace_kvm_mmu_pagetable_walk(addr, access);
|
trace_kvm_mmu_pagetable_walk(addr, access);
|
||||||
retry_walk:
|
retry_walk:
|
||||||
eperm = false;
|
|
||||||
walker->level = mmu->root_level;
|
walker->level = mmu->root_level;
|
||||||
pte = mmu->get_cr3(vcpu);
|
pte = mmu->get_cr3(vcpu);
|
||||||
|
|
||||||
@@ -169,15 +175,21 @@ retry_walk:
|
|||||||
--walker->level;
|
--walker->level;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
walker->max_level = walker->level;
|
||||||
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
|
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
|
||||||
(mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
|
(mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
|
||||||
|
|
||||||
pt_access = ACC_ALL;
|
accessed_dirty = PT_ACCESSED_MASK;
|
||||||
|
pt_access = pte_access = ACC_ALL;
|
||||||
|
++walker->level;
|
||||||
|
|
||||||
for (;;) {
|
do {
|
||||||
gfn_t real_gfn;
|
gfn_t real_gfn;
|
||||||
unsigned long host_addr;
|
unsigned long host_addr;
|
||||||
|
|
||||||
|
pt_access &= pte_access;
|
||||||
|
--walker->level;
|
||||||
|
|
||||||
index = PT_INDEX(addr, walker->level);
|
index = PT_INDEX(addr, walker->level);
|
||||||
|
|
||||||
table_gfn = gpte_to_gfn(pte);
|
table_gfn = gpte_to_gfn(pte);
|
||||||
@@ -199,6 +211,7 @@ retry_walk:
|
|||||||
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
|
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
|
||||||
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
|
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
|
||||||
goto error;
|
goto error;
|
||||||
|
walker->ptep_user[walker->level - 1] = ptep_user;
|
||||||
|
|
||||||
trace_kvm_mmu_paging_element(pte, walker->level);
|
trace_kvm_mmu_paging_element(pte, walker->level);
|
||||||
|
|
||||||
@@ -211,92 +224,48 @@ retry_walk:
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!check_write_user_access(vcpu, write_fault, user_fault,
|
accessed_dirty &= pte;
|
||||||
pte))
|
pte_access = pt_access & gpte_access(vcpu, pte);
|
||||||
eperm = true;
|
|
||||||
|
|
||||||
#if PTTYPE == 64
|
|
||||||
if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
|
|
||||||
eperm = true;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
|
|
||||||
if (last_gpte) {
|
|
||||||
pte_access = pt_access &
|
|
||||||
FNAME(gpte_access)(vcpu, pte, true);
|
|
||||||
/* check if the kernel is fetching from user page */
|
|
||||||
if (unlikely(pte_access & PT_USER_MASK) &&
|
|
||||||
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
|
|
||||||
if (fetch_fault && !user_fault)
|
|
||||||
eperm = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
|
|
||||||
int ret;
|
|
||||||
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
|
|
||||||
sizeof(pte));
|
|
||||||
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
|
|
||||||
pte, pte|PT_ACCESSED_MASK);
|
|
||||||
if (unlikely(ret < 0))
|
|
||||||
goto error;
|
|
||||||
else if (ret)
|
|
||||||
goto retry_walk;
|
|
||||||
|
|
||||||
mark_page_dirty(vcpu->kvm, table_gfn);
|
|
||||||
pte |= PT_ACCESSED_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
walker->ptes[walker->level - 1] = pte;
|
walker->ptes[walker->level - 1] = pte;
|
||||||
|
} while (!is_last_gpte(mmu, walker->level, pte));
|
||||||
|
|
||||||
if (last_gpte) {
|
if (unlikely(permission_fault(mmu, pte_access, access))) {
|
||||||
int lvl = walker->level;
|
errcode |= PFERR_PRESENT_MASK;
|
||||||
gpa_t real_gpa;
|
goto error;
|
||||||
gfn_t gfn;
|
}
|
||||||
u32 ac;
|
|
||||||
|
|
||||||
gfn = gpte_to_gfn_lvl(pte, lvl);
|
gfn = gpte_to_gfn_lvl(pte, walker->level);
|
||||||
gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
|
gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
|
||||||
|
|
||||||
if (PTTYPE == 32 &&
|
if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
|
||||||
walker->level == PT_DIRECTORY_LEVEL &&
|
|
||||||
is_cpuid_PSE36())
|
|
||||||
gfn += pse36_gfn_delta(pte);
|
gfn += pse36_gfn_delta(pte);
|
||||||
|
|
||||||
ac = write_fault | fetch_fault | user_fault;
|
real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
|
||||||
|
|
||||||
real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
|
|
||||||
ac);
|
|
||||||
if (real_gpa == UNMAPPED_GVA)
|
if (real_gpa == UNMAPPED_GVA)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
walker->gfn = real_gpa >> PAGE_SHIFT;
|
walker->gfn = real_gpa >> PAGE_SHIFT;
|
||||||
|
|
||||||
break;
|
if (!write_fault)
|
||||||
}
|
protect_clean_gpte(&pte_access, pte);
|
||||||
|
|
||||||
pt_access &= FNAME(gpte_access)(vcpu, pte, false);
|
/*
|
||||||
--walker->level;
|
* On a write fault, fold the dirty bit into accessed_dirty by shifting it one
|
||||||
}
|
* place right.
|
||||||
|
*
|
||||||
|
* On a read fault, do nothing.
|
||||||
|
*/
|
||||||
|
shift = write_fault >> ilog2(PFERR_WRITE_MASK);
|
||||||
|
shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
|
||||||
|
accessed_dirty &= pte >> shift;
|
||||||
|
|
||||||
if (unlikely(eperm)) {
|
if (unlikely(!accessed_dirty)) {
|
||||||
errcode |= PFERR_PRESENT_MASK;
|
ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (write_fault && unlikely(!is_dirty_gpte(pte))) {
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
|
|
||||||
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
|
|
||||||
pte, pte|PT_DIRTY_MASK);
|
|
||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
goto error;
|
goto error;
|
||||||
else if (ret)
|
else if (ret)
|
||||||
goto retry_walk;
|
goto retry_walk;
|
||||||
|
|
||||||
mark_page_dirty(vcpu->kvm, table_gfn);
|
|
||||||
pte |= PT_DIRTY_MASK;
|
|
||||||
walker->ptes[walker->level - 1] = pte;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
walker->pt_access = pt_access;
|
walker->pt_access = pt_access;
|
||||||
@@ -368,12 +337,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
|
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||||
|
protect_clean_gpte(&pte_access, gpte);
|
||||||
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
|
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
|
||||||
if (mmu_invalid_pfn(pfn)) {
|
if (mmu_invalid_pfn(pfn))
|
||||||
kvm_release_pfn_clean(pfn);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we call mmu_set_spte() with host_writable = true because that
|
* we call mmu_set_spte() with host_writable = true because that
|
||||||
@@ -443,15 +411,13 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
|||||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
|
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||||
true);
|
protect_clean_gpte(&pte_access, gpte);
|
||||||
gfn = gpte_to_gfn(gpte);
|
gfn = gpte_to_gfn(gpte);
|
||||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||||
pte_access & ACC_WRITE_MASK);
|
pte_access & ACC_WRITE_MASK);
|
||||||
if (mmu_invalid_pfn(pfn)) {
|
if (mmu_invalid_pfn(pfn))
|
||||||
kvm_release_pfn_clean(pfn);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||||
NULL, PT_PAGE_TABLE_LEVEL, gfn,
|
NULL, PT_PAGE_TABLE_LEVEL, gfn,
|
||||||
@@ -798,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
|||||||
|
|
||||||
gfn = gpte_to_gfn(gpte);
|
gfn = gpte_to_gfn(gpte);
|
||||||
pte_access = sp->role.access;
|
pte_access = sp->role.access;
|
||||||
pte_access &= FNAME(gpte_access)(vcpu, gpte, true);
|
pte_access &= gpte_access(vcpu, gpte);
|
||||||
|
protect_clean_gpte(&pte_access, gpte);
|
||||||
|
|
||||||
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
|
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
|
||||||
continue;
|
continue;
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Kernel-based Virtual Machine -- Performane Monitoring Unit support
|
* Kernel-based Virtual Machine -- Performance Monitoring Unit support
|
||||||
*
|
*
|
||||||
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
|
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
|
||||||
*
|
*
|
||||||
|
@@ -163,7 +163,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
|
|||||||
|
|
||||||
#define MSR_INVALID 0xffffffffU
|
#define MSR_INVALID 0xffffffffU
|
||||||
|
|
||||||
static struct svm_direct_access_msrs {
|
static const struct svm_direct_access_msrs {
|
||||||
u32 index; /* Index of the MSR */
|
u32 index; /* Index of the MSR */
|
||||||
bool always; /* True if intercept is always on */
|
bool always; /* True if intercept is always on */
|
||||||
} direct_access_msrs[] = {
|
} direct_access_msrs[] = {
|
||||||
@@ -400,7 +400,7 @@ struct svm_init_data {
|
|||||||
int r;
|
int r;
|
||||||
};
|
};
|
||||||
|
|
||||||
static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
|
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
|
||||||
|
|
||||||
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
|
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
|
||||||
#define MSRS_RANGE_SIZE 2048
|
#define MSRS_RANGE_SIZE 2048
|
||||||
@@ -1146,7 +1146,6 @@ static void init_vmcb(struct vcpu_svm *svm)
|
|||||||
|
|
||||||
svm_set_efer(&svm->vcpu, 0);
|
svm_set_efer(&svm->vcpu, 0);
|
||||||
save->dr6 = 0xffff0ff0;
|
save->dr6 = 0xffff0ff0;
|
||||||
save->dr7 = 0x400;
|
|
||||||
kvm_set_rflags(&svm->vcpu, 2);
|
kvm_set_rflags(&svm->vcpu, 2);
|
||||||
save->rip = 0x0000fff0;
|
save->rip = 0x0000fff0;
|
||||||
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
|
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
|
||||||
@@ -1643,7 +1642,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
|
|||||||
mark_dirty(svm->vmcb, VMCB_SEG);
|
mark_dirty(svm->vmcb, VMCB_SEG);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_db_intercept(struct kvm_vcpu *vcpu)
|
static void update_db_bp_intercept(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
|
||||||
@@ -1663,20 +1662,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
|
|||||||
vcpu->guest_debug = 0;
|
vcpu->guest_debug = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
|
|
||||||
{
|
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
|
||||||
|
|
||||||
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
|
|
||||||
svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
|
|
||||||
else
|
|
||||||
svm->vmcb->save.dr7 = vcpu->arch.dr7;
|
|
||||||
|
|
||||||
mark_dirty(svm->vmcb, VMCB_DR);
|
|
||||||
|
|
||||||
update_db_intercept(vcpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
|
static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
|
||||||
{
|
{
|
||||||
if (sd->next_asid > sd->max_asid) {
|
if (sd->next_asid > sd->max_asid) {
|
||||||
@@ -1748,7 +1733,7 @@ static int db_interception(struct vcpu_svm *svm)
|
|||||||
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
|
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
|
||||||
svm->vmcb->save.rflags &=
|
svm->vmcb->save.rflags &=
|
||||||
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
|
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
|
||||||
update_db_intercept(&svm->vcpu);
|
update_db_bp_intercept(&svm->vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (svm->vcpu.guest_debug &
|
if (svm->vcpu.guest_debug &
|
||||||
@@ -2063,7 +2048,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
|
|||||||
if (svm->nested.intercept & 1ULL) {
|
if (svm->nested.intercept & 1ULL) {
|
||||||
/*
|
/*
|
||||||
* The #vmexit can't be emulated here directly because this
|
* The #vmexit can't be emulated here directly because this
|
||||||
* code path runs with irqs and preemtion disabled. A
|
* code path runs with irqs and preemption disabled. A
|
||||||
* #vmexit emulation might sleep. Only signal request for
|
* #vmexit emulation might sleep. Only signal request for
|
||||||
* the #vmexit here.
|
* the #vmexit here.
|
||||||
*/
|
*/
|
||||||
@@ -2105,7 +2090,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
|
|||||||
return kmap(page);
|
return kmap(page);
|
||||||
|
|
||||||
error:
|
error:
|
||||||
kvm_release_page_clean(page);
|
|
||||||
kvm_inject_gp(&svm->vcpu, 0);
|
kvm_inject_gp(&svm->vcpu, 0);
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -2409,7 +2393,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
|
|||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* This function merges the msr permission bitmaps of kvm and the
|
* This function merges the msr permission bitmaps of kvm and the
|
||||||
* nested vmcb. It is omptimized in that it only merges the parts where
|
* nested vmcb. It is optimized in that it only merges the parts where
|
||||||
* the kvm msr permission bitmap may contain zero bits
|
* the kvm msr permission bitmap may contain zero bits
|
||||||
*/
|
*/
|
||||||
int i;
|
int i;
|
||||||
@@ -3268,7 +3252,7 @@ static int pause_interception(struct vcpu_svm *svm)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
||||||
[SVM_EXIT_READ_CR0] = cr_interception,
|
[SVM_EXIT_READ_CR0] = cr_interception,
|
||||||
[SVM_EXIT_READ_CR3] = cr_interception,
|
[SVM_EXIT_READ_CR3] = cr_interception,
|
||||||
[SVM_EXIT_READ_CR4] = cr_interception,
|
[SVM_EXIT_READ_CR4] = cr_interception,
|
||||||
@@ -3660,7 +3644,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
|||||||
*/
|
*/
|
||||||
svm->nmi_singlestep = true;
|
svm->nmi_singlestep = true;
|
||||||
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
|
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
|
||||||
update_db_intercept(vcpu);
|
update_db_bp_intercept(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
|
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
|
||||||
@@ -3783,12 +3767,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
|
|||||||
svm_complete_interrupts(svm);
|
svm_complete_interrupts(svm);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
#define R "r"
|
|
||||||
#else
|
|
||||||
#define R "e"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
@@ -3815,13 +3793,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"push %%"R"bp; \n\t"
|
"push %%" _ASM_BP "; \n\t"
|
||||||
"mov %c[rbx](%[svm]), %%"R"bx \n\t"
|
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
|
||||||
"mov %c[rcx](%[svm]), %%"R"cx \n\t"
|
"mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
|
||||||
"mov %c[rdx](%[svm]), %%"R"dx \n\t"
|
"mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
|
||||||
"mov %c[rsi](%[svm]), %%"R"si \n\t"
|
"mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
|
||||||
"mov %c[rdi](%[svm]), %%"R"di \n\t"
|
"mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
|
||||||
"mov %c[rbp](%[svm]), %%"R"bp \n\t"
|
"mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
"mov %c[r8](%[svm]), %%r8 \n\t"
|
"mov %c[r8](%[svm]), %%r8 \n\t"
|
||||||
"mov %c[r9](%[svm]), %%r9 \n\t"
|
"mov %c[r9](%[svm]), %%r9 \n\t"
|
||||||
@@ -3834,20 +3812,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Enter guest mode */
|
/* Enter guest mode */
|
||||||
"push %%"R"ax \n\t"
|
"push %%" _ASM_AX " \n\t"
|
||||||
"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
|
"mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
|
||||||
__ex(SVM_VMLOAD) "\n\t"
|
__ex(SVM_VMLOAD) "\n\t"
|
||||||
__ex(SVM_VMRUN) "\n\t"
|
__ex(SVM_VMRUN) "\n\t"
|
||||||
__ex(SVM_VMSAVE) "\n\t"
|
__ex(SVM_VMSAVE) "\n\t"
|
||||||
"pop %%"R"ax \n\t"
|
"pop %%" _ASM_AX " \n\t"
|
||||||
|
|
||||||
/* Save guest registers, load host registers */
|
/* Save guest registers, load host registers */
|
||||||
"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
|
"mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
|
||||||
"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
|
"mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
|
||||||
"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
|
"mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
|
||||||
"mov %%"R"si, %c[rsi](%[svm]) \n\t"
|
"mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
|
||||||
"mov %%"R"di, %c[rdi](%[svm]) \n\t"
|
"mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
|
||||||
"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
|
"mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
"mov %%r8, %c[r8](%[svm]) \n\t"
|
"mov %%r8, %c[r8](%[svm]) \n\t"
|
||||||
"mov %%r9, %c[r9](%[svm]) \n\t"
|
"mov %%r9, %c[r9](%[svm]) \n\t"
|
||||||
@@ -3858,7 +3836,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
"mov %%r14, %c[r14](%[svm]) \n\t"
|
"mov %%r14, %c[r14](%[svm]) \n\t"
|
||||||
"mov %%r15, %c[r15](%[svm]) \n\t"
|
"mov %%r15, %c[r15](%[svm]) \n\t"
|
||||||
#endif
|
#endif
|
||||||
"pop %%"R"bp"
|
"pop %%" _ASM_BP
|
||||||
:
|
:
|
||||||
: [svm]"a"(svm),
|
: [svm]"a"(svm),
|
||||||
[vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
|
[vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
|
||||||
@@ -3879,9 +3857,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
[r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
|
[r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
|
||||||
#endif
|
#endif
|
||||||
: "cc", "memory"
|
: "cc", "memory"
|
||||||
, R"bx", R"cx", R"dx", R"si", R"di"
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
, "rbx", "rcx", "rdx", "rsi", "rdi"
|
||||||
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
|
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
|
||||||
|
#else
|
||||||
|
, "ebx", "ecx", "edx", "esi", "edi"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -3941,8 +3921,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
mark_all_clean(svm->vmcb);
|
mark_all_clean(svm->vmcb);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef R
|
|
||||||
|
|
||||||
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
|
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
|
||||||
{
|
{
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
@@ -4069,7 +4047,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
|
|||||||
#define POST_MEM(exit) { .exit_code = (exit), \
|
#define POST_MEM(exit) { .exit_code = (exit), \
|
||||||
.stage = X86_ICPT_POST_MEMACCESS, }
|
.stage = X86_ICPT_POST_MEMACCESS, }
|
||||||
|
|
||||||
static struct __x86_intercept {
|
static const struct __x86_intercept {
|
||||||
u32 exit_code;
|
u32 exit_code;
|
||||||
enum x86_intercept_stage stage;
|
enum x86_intercept_stage stage;
|
||||||
} x86_intercept_map[] = {
|
} x86_intercept_map[] = {
|
||||||
@@ -4260,7 +4238,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
|||||||
.vcpu_load = svm_vcpu_load,
|
.vcpu_load = svm_vcpu_load,
|
||||||
.vcpu_put = svm_vcpu_put,
|
.vcpu_put = svm_vcpu_put,
|
||||||
|
|
||||||
.set_guest_debug = svm_guest_debug,
|
.update_db_bp_intercept = update_db_bp_intercept,
|
||||||
.get_msr = svm_get_msr,
|
.get_msr = svm_get_msr,
|
||||||
.set_msr = svm_set_msr,
|
.set_msr = svm_set_msr,
|
||||||
.get_segment_base = svm_get_segment_base,
|
.get_segment_base = svm_get_segment_base,
|
||||||
|
@@ -1,47 +0,0 @@
|
|||||||
/*
|
|
||||||
* Kernel-based Virtual Machine driver for Linux
|
|
||||||
*
|
|
||||||
* This module enables machines with Intel VT-x extensions to run virtual
|
|
||||||
* machines without emulation or binary translation.
|
|
||||||
*
|
|
||||||
* timer support
|
|
||||||
*
|
|
||||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
|
||||||
*
|
|
||||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
||||||
* the COPYING file in the top-level directory.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <linux/kvm_host.h>
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
#include <linux/hrtimer.h>
|
|
||||||
#include <linux/atomic.h>
|
|
||||||
#include "kvm_timer.h"
|
|
||||||
|
|
||||||
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
|
|
||||||
{
|
|
||||||
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
|
|
||||||
struct kvm_vcpu *vcpu = ktimer->vcpu;
|
|
||||||
wait_queue_head_t *q = &vcpu->wq;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* There is a race window between reading and incrementing, but we do
|
|
||||||
* not care about potentially losing timer events in the !reinject
|
|
||||||
* case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
|
|
||||||
* in vcpu_enter_guest.
|
|
||||||
*/
|
|
||||||
if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
|
|
||||||
atomic_inc(&ktimer->pending);
|
|
||||||
/* FIXME: this code should not know anything about vcpus */
|
|
||||||
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (waitqueue_active(q))
|
|
||||||
wake_up_interruptible(q);
|
|
||||||
|
|
||||||
if (ktimer->t_ops->is_periodic(ktimer)) {
|
|
||||||
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
|
|
||||||
return HRTIMER_RESTART;
|
|
||||||
} else
|
|
||||||
return HRTIMER_NORESTART;
|
|
||||||
}
|
|
@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO);
|
|||||||
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
|
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
|
||||||
module_param(ple_window, int, S_IRUGO);
|
module_param(ple_window, int, S_IRUGO);
|
||||||
|
|
||||||
|
extern const ulong vmx_return;
|
||||||
|
|
||||||
#define NR_AUTOLOAD_MSRS 8
|
#define NR_AUTOLOAD_MSRS 8
|
||||||
#define VMCS02_POOL_SIZE 1
|
#define VMCS02_POOL_SIZE 1
|
||||||
|
|
||||||
@@ -405,16 +407,16 @@ struct vcpu_vmx {
|
|||||||
struct {
|
struct {
|
||||||
int vm86_active;
|
int vm86_active;
|
||||||
ulong save_rflags;
|
ulong save_rflags;
|
||||||
|
struct kvm_segment segs[8];
|
||||||
|
} rmode;
|
||||||
|
struct {
|
||||||
|
u32 bitmask; /* 4 bits per segment (1 bit per field) */
|
||||||
struct kvm_save_segment {
|
struct kvm_save_segment {
|
||||||
u16 selector;
|
u16 selector;
|
||||||
unsigned long base;
|
unsigned long base;
|
||||||
u32 limit;
|
u32 limit;
|
||||||
u32 ar;
|
u32 ar;
|
||||||
} tr, es, ds, fs, gs;
|
} seg[8];
|
||||||
} rmode;
|
|
||||||
struct {
|
|
||||||
u32 bitmask; /* 4 bits per segment (1 bit per field) */
|
|
||||||
struct kvm_save_segment seg[8];
|
|
||||||
} segment_cache;
|
} segment_cache;
|
||||||
int vpid;
|
int vpid;
|
||||||
bool emulation_required;
|
bool emulation_required;
|
||||||
@@ -450,7 +452,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
|
|||||||
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
|
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
|
||||||
[number##_HIGH] = VMCS12_OFFSET(name)+4
|
[number##_HIGH] = VMCS12_OFFSET(name)+4
|
||||||
|
|
||||||
static unsigned short vmcs_field_to_offset_table[] = {
|
static const unsigned short vmcs_field_to_offset_table[] = {
|
||||||
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
|
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
|
||||||
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
|
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
|
||||||
FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
|
FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
|
||||||
@@ -596,10 +598,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
|
|||||||
static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
|
static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
|
||||||
{
|
{
|
||||||
struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
|
struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
|
||||||
if (is_error_page(page)) {
|
if (is_error_page(page))
|
||||||
kvm_release_page_clean(page);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -667,7 +668,7 @@ static struct vmx_capability {
|
|||||||
.ar_bytes = GUEST_##seg##_AR_BYTES, \
|
.ar_bytes = GUEST_##seg##_AR_BYTES, \
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kvm_vmx_segment_field {
|
static const struct kvm_vmx_segment_field {
|
||||||
unsigned selector;
|
unsigned selector;
|
||||||
unsigned base;
|
unsigned base;
|
||||||
unsigned limit;
|
unsigned limit;
|
||||||
@@ -1343,7 +1344,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
|
|||||||
guest_efer = vmx->vcpu.arch.efer;
|
guest_efer = vmx->vcpu.arch.efer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NX is emulated; LMA and LME handled by hardware; SCE meaninless
|
* NX is emulated; LMA and LME handled by hardware; SCE meaningless
|
||||||
* outside long mode
|
* outside long mode
|
||||||
*/
|
*/
|
||||||
ignore_bits = EFER_NX | EFER_SCE;
|
ignore_bits = EFER_NX | EFER_SCE;
|
||||||
@@ -1995,7 +1996,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
|
|||||||
#endif
|
#endif
|
||||||
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
|
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
|
||||||
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
|
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
|
||||||
CPU_BASED_RDPMC_EXITING |
|
CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
|
||||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
||||||
/*
|
/*
|
||||||
* We can allow some features even when not supported by the
|
* We can allow some features even when not supported by the
|
||||||
@@ -2291,16 +2292,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
|
|
||||||
{
|
|
||||||
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
|
|
||||||
vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
|
|
||||||
else
|
|
||||||
vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
|
|
||||||
|
|
||||||
update_exception_bitmap(vcpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __init int cpu_has_kvm_support(void)
|
static __init int cpu_has_kvm_support(void)
|
||||||
{
|
{
|
||||||
return cpu_has_vmx();
|
return cpu_has_vmx();
|
||||||
@@ -2698,20 +2689,17 @@ static __exit void hardware_unsetup(void)
|
|||||||
free_kvm_area();
|
free_kvm_area();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
|
static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
|
||||||
{
|
{
|
||||||
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
||||||
|
struct kvm_segment tmp = *save;
|
||||||
|
|
||||||
if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) {
|
if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
|
||||||
vmcs_write16(sf->selector, save->selector);
|
tmp.base = vmcs_readl(sf->base);
|
||||||
vmcs_writel(sf->base, save->base);
|
tmp.selector = vmcs_read16(sf->selector);
|
||||||
vmcs_write32(sf->limit, save->limit);
|
tmp.s = 1;
|
||||||
vmcs_write32(sf->ar_bytes, save->ar);
|
|
||||||
} else {
|
|
||||||
u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
|
|
||||||
<< AR_DPL_SHIFT;
|
|
||||||
vmcs_write32(sf->ar_bytes, 0x93 | dpl);
|
|
||||||
}
|
}
|
||||||
|
vmx_set_segment(vcpu, &tmp, seg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void enter_pmode(struct kvm_vcpu *vcpu)
|
static void enter_pmode(struct kvm_vcpu *vcpu)
|
||||||
@@ -2724,10 +2712,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
vmx_segment_cache_clear(vmx);
|
vmx_segment_cache_clear(vmx);
|
||||||
|
|
||||||
vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
|
vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
|
||||||
vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
|
|
||||||
vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
|
|
||||||
vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
|
|
||||||
|
|
||||||
flags = vmcs_readl(GUEST_RFLAGS);
|
flags = vmcs_readl(GUEST_RFLAGS);
|
||||||
flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
|
flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
|
||||||
@@ -2742,10 +2727,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
|
|||||||
if (emulate_invalid_guest_state)
|
if (emulate_invalid_guest_state)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es);
|
fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
|
||||||
fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds);
|
fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
|
||||||
fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
|
fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
|
||||||
fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
|
fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
|
||||||
|
|
||||||
vmx_segment_cache_clear(vmx);
|
vmx_segment_cache_clear(vmx);
|
||||||
|
|
||||||
@@ -2773,14 +2758,10 @@ static gva_t rmode_tss_base(struct kvm *kvm)
|
|||||||
return kvm->arch.tss_addr;
|
return kvm->arch.tss_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
|
static void fix_rmode_seg(int seg, struct kvm_segment *save)
|
||||||
{
|
{
|
||||||
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
||||||
|
|
||||||
save->selector = vmcs_read16(sf->selector);
|
|
||||||
save->base = vmcs_readl(sf->base);
|
|
||||||
save->limit = vmcs_read32(sf->limit);
|
|
||||||
save->ar = vmcs_read32(sf->ar_bytes);
|
|
||||||
vmcs_write16(sf->selector, save->base >> 4);
|
vmcs_write16(sf->selector, save->base >> 4);
|
||||||
vmcs_write32(sf->base, save->base & 0xffff0);
|
vmcs_write32(sf->base, save->base & 0xffff0);
|
||||||
vmcs_write32(sf->limit, 0xffff);
|
vmcs_write32(sf->limit, 0xffff);
|
||||||
@@ -2800,9 +2781,16 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
|
|||||||
if (enable_unrestricted_guest)
|
if (enable_unrestricted_guest)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
|
||||||
|
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
|
||||||
|
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
|
||||||
|
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
|
||||||
|
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
|
||||||
|
|
||||||
vmx->emulation_required = 1;
|
vmx->emulation_required = 1;
|
||||||
vmx->rmode.vm86_active = 1;
|
vmx->rmode.vm86_active = 1;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
|
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
|
||||||
* vcpu. Call it here with phys address pointing 16M below 4G.
|
* vcpu. Call it here with phys address pointing 16M below 4G.
|
||||||
@@ -2817,14 +2805,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
vmx_segment_cache_clear(vmx);
|
vmx_segment_cache_clear(vmx);
|
||||||
|
|
||||||
vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
|
|
||||||
vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
|
|
||||||
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
|
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
|
||||||
|
|
||||||
vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
|
|
||||||
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
|
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
|
||||||
|
|
||||||
vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
|
|
||||||
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
|
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
|
||||||
|
|
||||||
flags = vmcs_readl(GUEST_RFLAGS);
|
flags = vmcs_readl(GUEST_RFLAGS);
|
||||||
@@ -3117,35 +3099,24 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
|
|||||||
struct kvm_segment *var, int seg)
|
struct kvm_segment *var, int seg)
|
||||||
{
|
{
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
struct kvm_save_segment *save;
|
|
||||||
u32 ar;
|
u32 ar;
|
||||||
|
|
||||||
if (vmx->rmode.vm86_active
|
if (vmx->rmode.vm86_active
|
||||||
&& (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
|
&& (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
|
||||||
|| seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
|
|| seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
|
||||||
|| seg == VCPU_SREG_GS)
|
|| seg == VCPU_SREG_GS)) {
|
||||||
&& !emulate_invalid_guest_state) {
|
*var = vmx->rmode.segs[seg];
|
||||||
switch (seg) {
|
|
||||||
case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
|
|
||||||
case VCPU_SREG_ES: save = &vmx->rmode.es; break;
|
|
||||||
case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
|
|
||||||
case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
|
|
||||||
case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
|
|
||||||
default: BUG();
|
|
||||||
}
|
|
||||||
var->selector = save->selector;
|
|
||||||
var->base = save->base;
|
|
||||||
var->limit = save->limit;
|
|
||||||
ar = save->ar;
|
|
||||||
if (seg == VCPU_SREG_TR
|
if (seg == VCPU_SREG_TR
|
||||||
|| var->selector == vmx_read_guest_seg_selector(vmx, seg))
|
|| var->selector == vmx_read_guest_seg_selector(vmx, seg))
|
||||||
goto use_saved_rmode_seg;
|
return;
|
||||||
|
var->base = vmx_read_guest_seg_base(vmx, seg);
|
||||||
|
var->selector = vmx_read_guest_seg_selector(vmx, seg);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
var->base = vmx_read_guest_seg_base(vmx, seg);
|
var->base = vmx_read_guest_seg_base(vmx, seg);
|
||||||
var->limit = vmx_read_guest_seg_limit(vmx, seg);
|
var->limit = vmx_read_guest_seg_limit(vmx, seg);
|
||||||
var->selector = vmx_read_guest_seg_selector(vmx, seg);
|
var->selector = vmx_read_guest_seg_selector(vmx, seg);
|
||||||
ar = vmx_read_guest_seg_ar(vmx, seg);
|
ar = vmx_read_guest_seg_ar(vmx, seg);
|
||||||
use_saved_rmode_seg:
|
|
||||||
if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
|
if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
|
||||||
ar = 0;
|
ar = 0;
|
||||||
var->type = ar & 15;
|
var->type = ar & 15;
|
||||||
@@ -3227,23 +3198,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
|||||||
struct kvm_segment *var, int seg)
|
struct kvm_segment *var, int seg)
|
||||||
{
|
{
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
||||||
u32 ar;
|
u32 ar;
|
||||||
|
|
||||||
vmx_segment_cache_clear(vmx);
|
vmx_segment_cache_clear(vmx);
|
||||||
|
|
||||||
if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
|
if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
|
||||||
vmcs_write16(sf->selector, var->selector);
|
vmcs_write16(sf->selector, var->selector);
|
||||||
vmx->rmode.tr.selector = var->selector;
|
vmx->rmode.segs[VCPU_SREG_TR] = *var;
|
||||||
vmx->rmode.tr.base = var->base;
|
|
||||||
vmx->rmode.tr.limit = var->limit;
|
|
||||||
vmx->rmode.tr.ar = vmx_segment_access_rights(var);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
vmcs_writel(sf->base, var->base);
|
vmcs_writel(sf->base, var->base);
|
||||||
vmcs_write32(sf->limit, var->limit);
|
vmcs_write32(sf->limit, var->limit);
|
||||||
vmcs_write16(sf->selector, var->selector);
|
vmcs_write16(sf->selector, var->selector);
|
||||||
if (vmx->rmode.vm86_active && var->s) {
|
if (vmx->rmode.vm86_active && var->s) {
|
||||||
|
vmx->rmode.segs[seg] = *var;
|
||||||
/*
|
/*
|
||||||
* Hack real-mode segments into vm86 compatibility.
|
* Hack real-mode segments into vm86 compatibility.
|
||||||
*/
|
*/
|
||||||
@@ -3258,7 +3227,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
|||||||
* qemu binaries.
|
* qemu binaries.
|
||||||
* IA32 arch specifies that at the time of processor reset the
|
* IA32 arch specifies that at the time of processor reset the
|
||||||
* "Accessed" bit in the AR field of segment registers is 1. And qemu
|
* "Accessed" bit in the AR field of segment registers is 1. And qemu
|
||||||
* is setting it to 0 in the usedland code. This causes invalid guest
|
* is setting it to 0 in the userland code. This causes invalid guest
|
||||||
* state vmexit when "unrestricted guest" mode is turned on.
|
* state vmexit when "unrestricted guest" mode is turned on.
|
||||||
* Fix for this setup issue in cpu_reset is being pushed in the qemu
|
* Fix for this setup issue in cpu_reset is being pushed in the qemu
|
||||||
* tree. Newer qemu binaries with that qemu fix would not need this
|
* tree. Newer qemu binaries with that qemu fix would not need this
|
||||||
@@ -3288,16 +3257,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
|||||||
vmcs_readl(GUEST_CS_BASE) >> 4);
|
vmcs_readl(GUEST_CS_BASE) >> 4);
|
||||||
break;
|
break;
|
||||||
case VCPU_SREG_ES:
|
case VCPU_SREG_ES:
|
||||||
fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
|
|
||||||
break;
|
|
||||||
case VCPU_SREG_DS:
|
case VCPU_SREG_DS:
|
||||||
fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
|
|
||||||
break;
|
|
||||||
case VCPU_SREG_GS:
|
case VCPU_SREG_GS:
|
||||||
fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
|
|
||||||
break;
|
|
||||||
case VCPU_SREG_FS:
|
case VCPU_SREG_FS:
|
||||||
fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
|
fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
|
||||||
break;
|
break;
|
||||||
case VCPU_SREG_SS:
|
case VCPU_SREG_SS:
|
||||||
vmcs_write16(GUEST_SS_SELECTOR,
|
vmcs_write16(GUEST_SS_SELECTOR,
|
||||||
@@ -3351,9 +3314,9 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
|
|||||||
|
|
||||||
if (var.base != (var.selector << 4))
|
if (var.base != (var.selector << 4))
|
||||||
return false;
|
return false;
|
||||||
if (var.limit != 0xffff)
|
if (var.limit < 0xffff)
|
||||||
return false;
|
return false;
|
||||||
if (ar != 0xf3)
|
if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -3605,7 +3568,7 @@ out:
|
|||||||
|
|
||||||
static void seg_setup(int seg)
|
static void seg_setup(int seg)
|
||||||
{
|
{
|
||||||
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
|
||||||
unsigned int ar;
|
unsigned int ar;
|
||||||
|
|
||||||
vmcs_write16(sf->selector, 0);
|
vmcs_write16(sf->selector, 0);
|
||||||
@@ -3770,8 +3733,7 @@ static void vmx_set_constant_host_state(void)
|
|||||||
native_store_idt(&dt);
|
native_store_idt(&dt);
|
||||||
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
|
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
|
||||||
|
|
||||||
asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl));
|
vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
|
||||||
vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */
|
|
||||||
|
|
||||||
rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
|
rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
|
||||||
vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
|
vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
|
||||||
@@ -4005,8 +3967,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||||||
kvm_rip_write(vcpu, 0);
|
kvm_rip_write(vcpu, 0);
|
||||||
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
|
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
|
||||||
|
|
||||||
vmcs_writel(GUEST_DR7, 0x400);
|
|
||||||
|
|
||||||
vmcs_writel(GUEST_GDTR_BASE, 0);
|
vmcs_writel(GUEST_GDTR_BASE, 0);
|
||||||
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
|
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
|
||||||
|
|
||||||
@@ -4456,7 +4416,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
|
|||||||
hypercall[2] = 0xc1;
|
hypercall[2] = 0xc1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* called to set cr0 as approriate for a mov-to-cr0 exit. */
|
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
|
||||||
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
|
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
|
||||||
{
|
{
|
||||||
if (to_vmx(vcpu)->nested.vmxon &&
|
if (to_vmx(vcpu)->nested.vmxon &&
|
||||||
@@ -5701,7 +5661,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
|
|||||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||||
* to be done to userspace and return 0.
|
* to be done to userspace and return 0.
|
||||||
*/
|
*/
|
||||||
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
||||||
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
|
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
|
||||||
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
|
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
|
||||||
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
|
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
|
||||||
@@ -6229,17 +6189,10 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
|
|||||||
msrs[i].host);
|
msrs[i].host);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
#define R "r"
|
|
||||||
#define Q "q"
|
|
||||||
#else
|
|
||||||
#define R "e"
|
|
||||||
#define Q "l"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
|
unsigned long debugctlmsr;
|
||||||
|
|
||||||
if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
|
if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
|
||||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||||
@@ -6279,34 +6232,35 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
vmx_set_interrupt_shadow(vcpu, 0);
|
vmx_set_interrupt_shadow(vcpu, 0);
|
||||||
|
|
||||||
atomic_switch_perf_msrs(vmx);
|
atomic_switch_perf_msrs(vmx);
|
||||||
|
debugctlmsr = get_debugctlmsr();
|
||||||
|
|
||||||
vmx->__launched = vmx->loaded_vmcs->launched;
|
vmx->__launched = vmx->loaded_vmcs->launched;
|
||||||
asm(
|
asm(
|
||||||
/* Store host registers */
|
/* Store host registers */
|
||||||
"push %%"R"dx; push %%"R"bp;"
|
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
|
||||||
"push %%"R"cx \n\t" /* placeholder for guest rcx */
|
"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
|
||||||
"push %%"R"cx \n\t"
|
"push %%" _ASM_CX " \n\t"
|
||||||
"cmp %%"R"sp, %c[host_rsp](%0) \n\t"
|
"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
|
||||||
"je 1f \n\t"
|
"je 1f \n\t"
|
||||||
"mov %%"R"sp, %c[host_rsp](%0) \n\t"
|
"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
|
||||||
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
|
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
/* Reload cr2 if changed */
|
/* Reload cr2 if changed */
|
||||||
"mov %c[cr2](%0), %%"R"ax \n\t"
|
"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
|
||||||
"mov %%cr2, %%"R"dx \n\t"
|
"mov %%cr2, %%" _ASM_DX " \n\t"
|
||||||
"cmp %%"R"ax, %%"R"dx \n\t"
|
"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
|
||||||
"je 2f \n\t"
|
"je 2f \n\t"
|
||||||
"mov %%"R"ax, %%cr2 \n\t"
|
"mov %%" _ASM_AX", %%cr2 \n\t"
|
||||||
"2: \n\t"
|
"2: \n\t"
|
||||||
/* Check if vmlaunch of vmresume is needed */
|
/* Check if vmlaunch of vmresume is needed */
|
||||||
"cmpl $0, %c[launched](%0) \n\t"
|
"cmpl $0, %c[launched](%0) \n\t"
|
||||||
/* Load guest registers. Don't clobber flags. */
|
/* Load guest registers. Don't clobber flags. */
|
||||||
"mov %c[rax](%0), %%"R"ax \n\t"
|
"mov %c[rax](%0), %%" _ASM_AX " \n\t"
|
||||||
"mov %c[rbx](%0), %%"R"bx \n\t"
|
"mov %c[rbx](%0), %%" _ASM_BX " \n\t"
|
||||||
"mov %c[rdx](%0), %%"R"dx \n\t"
|
"mov %c[rdx](%0), %%" _ASM_DX " \n\t"
|
||||||
"mov %c[rsi](%0), %%"R"si \n\t"
|
"mov %c[rsi](%0), %%" _ASM_SI " \n\t"
|
||||||
"mov %c[rdi](%0), %%"R"di \n\t"
|
"mov %c[rdi](%0), %%" _ASM_DI " \n\t"
|
||||||
"mov %c[rbp](%0), %%"R"bp \n\t"
|
"mov %c[rbp](%0), %%" _ASM_BP " \n\t"
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
"mov %c[r8](%0), %%r8 \n\t"
|
"mov %c[r8](%0), %%r8 \n\t"
|
||||||
"mov %c[r9](%0), %%r9 \n\t"
|
"mov %c[r9](%0), %%r9 \n\t"
|
||||||
@@ -6317,24 +6271,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
"mov %c[r14](%0), %%r14 \n\t"
|
"mov %c[r14](%0), %%r14 \n\t"
|
||||||
"mov %c[r15](%0), %%r15 \n\t"
|
"mov %c[r15](%0), %%r15 \n\t"
|
||||||
#endif
|
#endif
|
||||||
"mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
|
"mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
|
||||||
|
|
||||||
/* Enter guest mode */
|
/* Enter guest mode */
|
||||||
"jne .Llaunched \n\t"
|
"jne 1f \n\t"
|
||||||
__ex(ASM_VMX_VMLAUNCH) "\n\t"
|
__ex(ASM_VMX_VMLAUNCH) "\n\t"
|
||||||
"jmp .Lkvm_vmx_return \n\t"
|
"jmp 2f \n\t"
|
||||||
".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
|
"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
|
||||||
".Lkvm_vmx_return: "
|
"2: "
|
||||||
/* Save guest registers, load host registers, keep flags */
|
/* Save guest registers, load host registers, keep flags */
|
||||||
"mov %0, %c[wordsize](%%"R"sp) \n\t"
|
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
|
||||||
"pop %0 \n\t"
|
"pop %0 \n\t"
|
||||||
"mov %%"R"ax, %c[rax](%0) \n\t"
|
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
|
||||||
"mov %%"R"bx, %c[rbx](%0) \n\t"
|
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
|
||||||
"pop"Q" %c[rcx](%0) \n\t"
|
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
|
||||||
"mov %%"R"dx, %c[rdx](%0) \n\t"
|
"mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
|
||||||
"mov %%"R"si, %c[rsi](%0) \n\t"
|
"mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
|
||||||
"mov %%"R"di, %c[rdi](%0) \n\t"
|
"mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
|
||||||
"mov %%"R"bp, %c[rbp](%0) \n\t"
|
"mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
"mov %%r8, %c[r8](%0) \n\t"
|
"mov %%r8, %c[r8](%0) \n\t"
|
||||||
"mov %%r9, %c[r9](%0) \n\t"
|
"mov %%r9, %c[r9](%0) \n\t"
|
||||||
@@ -6345,11 +6299,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
"mov %%r14, %c[r14](%0) \n\t"
|
"mov %%r14, %c[r14](%0) \n\t"
|
||||||
"mov %%r15, %c[r15](%0) \n\t"
|
"mov %%r15, %c[r15](%0) \n\t"
|
||||||
#endif
|
#endif
|
||||||
"mov %%cr2, %%"R"ax \n\t"
|
"mov %%cr2, %%" _ASM_AX " \n\t"
|
||||||
"mov %%"R"ax, %c[cr2](%0) \n\t"
|
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
|
||||||
|
|
||||||
"pop %%"R"bp; pop %%"R"dx \n\t"
|
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
|
||||||
"setbe %c[fail](%0) \n\t"
|
"setbe %c[fail](%0) \n\t"
|
||||||
|
".pushsection .rodata \n\t"
|
||||||
|
".global vmx_return \n\t"
|
||||||
|
"vmx_return: " _ASM_PTR " 2b \n\t"
|
||||||
|
".popsection"
|
||||||
: : "c"(vmx), "d"((unsigned long)HOST_RSP),
|
: : "c"(vmx), "d"((unsigned long)HOST_RSP),
|
||||||
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
|
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
|
||||||
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
|
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
|
||||||
@@ -6374,12 +6332,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
|
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
|
||||||
[wordsize]"i"(sizeof(ulong))
|
[wordsize]"i"(sizeof(ulong))
|
||||||
: "cc", "memory"
|
: "cc", "memory"
|
||||||
, R"ax", R"bx", R"di", R"si"
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
, "rax", "rbx", "rdi", "rsi"
|
||||||
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
|
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
|
||||||
|
#else
|
||||||
|
, "eax", "ebx", "edi", "esi"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
|
||||||
|
if (debugctlmsr)
|
||||||
|
update_debugctlmsr(debugctlmsr);
|
||||||
|
|
||||||
#ifndef CONFIG_X86_64
|
#ifndef CONFIG_X86_64
|
||||||
/*
|
/*
|
||||||
* The sysexit path does not restore ds/es, so we must set them to
|
* The sysexit path does not restore ds/es, so we must set them to
|
||||||
@@ -6424,9 +6388,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
vmx_complete_interrupts(vmx);
|
vmx_complete_interrupts(vmx);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef R
|
|
||||||
#undef Q
|
|
||||||
|
|
||||||
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
@@ -7281,7 +7242,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|||||||
.vcpu_load = vmx_vcpu_load,
|
.vcpu_load = vmx_vcpu_load,
|
||||||
.vcpu_put = vmx_vcpu_put,
|
.vcpu_put = vmx_vcpu_put,
|
||||||
|
|
||||||
.set_guest_debug = set_guest_debug,
|
.update_db_bp_intercept = update_exception_bitmap,
|
||||||
.get_msr = vmx_get_msr,
|
.get_msr = vmx_get_msr,
|
||||||
.set_msr = vmx_set_msr,
|
.set_msr = vmx_set_msr,
|
||||||
.get_segment_base = vmx_get_segment_base,
|
.get_segment_base = vmx_get_segment_base,
|
||||||
|
@@ -246,9 +246,6 @@ static void drop_user_return_notifiers(void *ignore)
|
|||||||
|
|
||||||
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
|
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
if (irqchip_in_kernel(vcpu->kvm))
|
|
||||||
return vcpu->arch.apic_base;
|
|
||||||
else
|
|
||||||
return vcpu->arch.apic_base;
|
return vcpu->arch.apic_base;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
|
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
|
||||||
@@ -256,10 +253,7 @@ EXPORT_SYMBOL_GPL(kvm_get_apic_base);
|
|||||||
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
|
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
|
||||||
{
|
{
|
||||||
/* TODO: reserve bits check */
|
/* TODO: reserve bits check */
|
||||||
if (irqchip_in_kernel(vcpu->kvm))
|
|
||||||
kvm_lapic_set_base(vcpu, data);
|
kvm_lapic_set_base(vcpu, data);
|
||||||
else
|
|
||||||
vcpu->arch.apic_base = data;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
|
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
|
||||||
|
|
||||||
@@ -698,6 +692,18 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_get_cr8);
|
EXPORT_SYMBOL_GPL(kvm_get_cr8);
|
||||||
|
|
||||||
|
static void kvm_update_dr7(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
unsigned long dr7;
|
||||||
|
|
||||||
|
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
|
||||||
|
dr7 = vcpu->arch.guest_debug_dr7;
|
||||||
|
else
|
||||||
|
dr7 = vcpu->arch.dr7;
|
||||||
|
kvm_x86_ops->set_dr7(vcpu, dr7);
|
||||||
|
vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
|
static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
|
||||||
{
|
{
|
||||||
switch (dr) {
|
switch (dr) {
|
||||||
@@ -723,10 +729,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
|
|||||||
if (val & 0xffffffff00000000ULL)
|
if (val & 0xffffffff00000000ULL)
|
||||||
return -1; /* #GP */
|
return -1; /* #GP */
|
||||||
vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
|
vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
|
||||||
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
|
kvm_update_dr7(vcpu);
|
||||||
kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
|
|
||||||
vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -823,7 +826,7 @@ static u32 msrs_to_save[] = {
|
|||||||
|
|
||||||
static unsigned num_msrs_to_save;
|
static unsigned num_msrs_to_save;
|
||||||
|
|
||||||
static u32 emulated_msrs[] = {
|
static const u32 emulated_msrs[] = {
|
||||||
MSR_IA32_TSCDEADLINE,
|
MSR_IA32_TSCDEADLINE,
|
||||||
MSR_IA32_MISC_ENABLE,
|
MSR_IA32_MISC_ENABLE,
|
||||||
MSR_IA32_MCG_STATUS,
|
MSR_IA32_MCG_STATUS,
|
||||||
@@ -1097,7 +1100,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
|||||||
* For each generation, we track the original measured
|
* For each generation, we track the original measured
|
||||||
* nanosecond time, offset, and write, so if TSCs are in
|
* nanosecond time, offset, and write, so if TSCs are in
|
||||||
* sync, we can match exact offset, and if not, we can match
|
* sync, we can match exact offset, and if not, we can match
|
||||||
* exact software computaion in compute_guest_tsc()
|
* exact software computation in compute_guest_tsc()
|
||||||
*
|
*
|
||||||
* These values are tracked in kvm->arch.cur_xxx variables.
|
* These values are tracked in kvm->arch.cur_xxx variables.
|
||||||
*/
|
*/
|
||||||
@@ -1140,6 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|||||||
unsigned long this_tsc_khz;
|
unsigned long this_tsc_khz;
|
||||||
s64 kernel_ns, max_kernel_ns;
|
s64 kernel_ns, max_kernel_ns;
|
||||||
u64 tsc_timestamp;
|
u64 tsc_timestamp;
|
||||||
|
u8 pvclock_flags;
|
||||||
|
|
||||||
/* Keep irq disabled to prevent changes to the clock */
|
/* Keep irq disabled to prevent changes to the clock */
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
@@ -1221,7 +1225,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|||||||
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
||||||
vcpu->last_kernel_ns = kernel_ns;
|
vcpu->last_kernel_ns = kernel_ns;
|
||||||
vcpu->last_guest_tsc = tsc_timestamp;
|
vcpu->last_guest_tsc = tsc_timestamp;
|
||||||
vcpu->hv_clock.flags = 0;
|
|
||||||
|
pvclock_flags = 0;
|
||||||
|
if (vcpu->pvclock_set_guest_stopped_request) {
|
||||||
|
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
||||||
|
vcpu->pvclock_set_guest_stopped_request = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
vcpu->hv_clock.flags = pvclock_flags;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The interface expects us to write an even number signaling that the
|
* The interface expects us to write an even number signaling that the
|
||||||
@@ -1504,7 +1515,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
|
|||||||
{
|
{
|
||||||
gpa_t gpa = data & ~0x3f;
|
gpa_t gpa = data & ~0x3f;
|
||||||
|
|
||||||
/* Bits 2:5 are resrved, Should be zero */
|
/* Bits 2:5 are reserved, Should be zero */
|
||||||
if (data & 0x3c)
|
if (data & 0x3c)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@@ -1639,10 +1650,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||||||
vcpu->arch.time_page =
|
vcpu->arch.time_page =
|
||||||
gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
|
gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
|
||||||
|
|
||||||
if (is_error_page(vcpu->arch.time_page)) {
|
if (is_error_page(vcpu->arch.time_page))
|
||||||
kvm_release_page_clean(vcpu->arch.time_page);
|
|
||||||
vcpu->arch.time_page = NULL;
|
vcpu->arch.time_page = NULL;
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MSR_KVM_ASYNC_PF_EN:
|
case MSR_KVM_ASYNC_PF_EN:
|
||||||
@@ -1727,7 +1737,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||||||
* Ignore all writes to this no longer documented MSR.
|
* Ignore all writes to this no longer documented MSR.
|
||||||
* Writes are only relevant for old K7 processors,
|
* Writes are only relevant for old K7 processors,
|
||||||
* all pre-dating SVM, but a recommended workaround from
|
* all pre-dating SVM, but a recommended workaround from
|
||||||
* AMD for these chips. It is possible to speicify the
|
* AMD for these chips. It is possible to specify the
|
||||||
* affected processor models on the command line, hence
|
* affected processor models on the command line, hence
|
||||||
* the need to ignore the workaround.
|
* the need to ignore the workaround.
|
||||||
*/
|
*/
|
||||||
@@ -2177,6 +2187,8 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|||||||
case KVM_CAP_GET_TSC_KHZ:
|
case KVM_CAP_GET_TSC_KHZ:
|
||||||
case KVM_CAP_PCI_2_3:
|
case KVM_CAP_PCI_2_3:
|
||||||
case KVM_CAP_KVMCLOCK_CTRL:
|
case KVM_CAP_KVMCLOCK_CTRL:
|
||||||
|
case KVM_CAP_READONLY_MEM:
|
||||||
|
case KVM_CAP_IRQFD_RESAMPLE:
|
||||||
r = 1;
|
r = 1;
|
||||||
break;
|
break;
|
||||||
case KVM_CAP_COALESCED_MMIO:
|
case KVM_CAP_COALESCED_MMIO:
|
||||||
@@ -2358,8 +2370,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
|
|||||||
static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
|
static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_lapic_state *s)
|
struct kvm_lapic_state *s)
|
||||||
{
|
{
|
||||||
memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
|
kvm_apic_post_state_restore(vcpu, s);
|
||||||
kvm_apic_post_state_restore(vcpu);
|
|
||||||
update_cr8_intercept(vcpu);
|
update_cr8_intercept(vcpu);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -2368,7 +2379,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
|
|||||||
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_interrupt *irq)
|
struct kvm_interrupt *irq)
|
||||||
{
|
{
|
||||||
if (irq->irq < 0 || irq->irq >= 256)
|
if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (irqchip_in_kernel(vcpu->kvm))
|
if (irqchip_in_kernel(vcpu->kvm))
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
@@ -2635,11 +2646,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
|
|||||||
*/
|
*/
|
||||||
static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
|
static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
|
|
||||||
if (!vcpu->arch.time_page)
|
if (!vcpu->arch.time_page)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
src->flags |= PVCLOCK_GUEST_STOPPED;
|
vcpu->arch.pvclock_set_guest_stopped_request = true;
|
||||||
mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
|
|
||||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -3090,7 +3099,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
|
|||||||
if (!kvm->arch.vpit)
|
if (!kvm->arch.vpit)
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||||
kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
|
kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
|
||||||
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -3173,6 +3182,16 @@ out:
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
|
||||||
|
{
|
||||||
|
if (!irqchip_in_kernel(kvm))
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
||||||
|
irq_event->irq, irq_event->level);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
long kvm_arch_vm_ioctl(struct file *filp,
|
long kvm_arch_vm_ioctl(struct file *filp,
|
||||||
unsigned int ioctl, unsigned long arg)
|
unsigned int ioctl, unsigned long arg)
|
||||||
{
|
{
|
||||||
@@ -3279,29 +3298,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||||||
create_pit_unlock:
|
create_pit_unlock:
|
||||||
mutex_unlock(&kvm->slots_lock);
|
mutex_unlock(&kvm->slots_lock);
|
||||||
break;
|
break;
|
||||||
case KVM_IRQ_LINE_STATUS:
|
|
||||||
case KVM_IRQ_LINE: {
|
|
||||||
struct kvm_irq_level irq_event;
|
|
||||||
|
|
||||||
r = -EFAULT;
|
|
||||||
if (copy_from_user(&irq_event, argp, sizeof irq_event))
|
|
||||||
goto out;
|
|
||||||
r = -ENXIO;
|
|
||||||
if (irqchip_in_kernel(kvm)) {
|
|
||||||
__s32 status;
|
|
||||||
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
|
||||||
irq_event.irq, irq_event.level);
|
|
||||||
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
|
||||||
r = -EFAULT;
|
|
||||||
irq_event.status = status;
|
|
||||||
if (copy_to_user(argp, &irq_event,
|
|
||||||
sizeof irq_event))
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
r = 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case KVM_GET_IRQCHIP: {
|
case KVM_GET_IRQCHIP: {
|
||||||
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
|
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
|
||||||
struct kvm_irqchip *chip;
|
struct kvm_irqchip *chip;
|
||||||
@@ -3689,20 +3685,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
|
|||||||
gpa_t *gpa, struct x86_exception *exception,
|
gpa_t *gpa, struct x86_exception *exception,
|
||||||
bool write)
|
bool write)
|
||||||
{
|
{
|
||||||
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
|
u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
|
||||||
|
| (write ? PFERR_WRITE_MASK : 0);
|
||||||
|
|
||||||
if (vcpu_match_mmio_gva(vcpu, gva) &&
|
if (vcpu_match_mmio_gva(vcpu, gva)
|
||||||
check_write_user_access(vcpu, write, access,
|
&& !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
|
||||||
vcpu->arch.access)) {
|
|
||||||
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
|
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
|
||||||
(gva & (PAGE_SIZE - 1));
|
(gva & (PAGE_SIZE - 1));
|
||||||
trace_vcpu_match_mmio(gva, *gpa, write, false);
|
trace_vcpu_match_mmio(gva, *gpa, write, false);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (write)
|
|
||||||
access |= PFERR_WRITE_MASK;
|
|
||||||
|
|
||||||
*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
|
||||||
|
|
||||||
if (*gpa == UNMAPPED_GVA)
|
if (*gpa == UNMAPPED_GVA)
|
||||||
@@ -3790,14 +3783,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|||||||
return X86EMUL_CONTINUE;
|
return X86EMUL_CONTINUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct read_write_emulator_ops read_emultor = {
|
static const struct read_write_emulator_ops read_emultor = {
|
||||||
.read_write_prepare = read_prepare,
|
.read_write_prepare = read_prepare,
|
||||||
.read_write_emulate = read_emulate,
|
.read_write_emulate = read_emulate,
|
||||||
.read_write_mmio = vcpu_mmio_read,
|
.read_write_mmio = vcpu_mmio_read,
|
||||||
.read_write_exit_mmio = read_exit_mmio,
|
.read_write_exit_mmio = read_exit_mmio,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct read_write_emulator_ops write_emultor = {
|
static const struct read_write_emulator_ops write_emultor = {
|
||||||
.read_write_emulate = write_emulate,
|
.read_write_emulate = write_emulate,
|
||||||
.read_write_mmio = write_mmio,
|
.read_write_mmio = write_mmio,
|
||||||
.read_write_exit_mmio = write_exit_mmio,
|
.read_write_exit_mmio = write_exit_mmio,
|
||||||
@@ -3808,7 +3801,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
|
|||||||
unsigned int bytes,
|
unsigned int bytes,
|
||||||
struct x86_exception *exception,
|
struct x86_exception *exception,
|
||||||
struct kvm_vcpu *vcpu,
|
struct kvm_vcpu *vcpu,
|
||||||
struct read_write_emulator_ops *ops)
|
const struct read_write_emulator_ops *ops)
|
||||||
{
|
{
|
||||||
gpa_t gpa;
|
gpa_t gpa;
|
||||||
int handled, ret;
|
int handled, ret;
|
||||||
@@ -3857,7 +3850,7 @@ mmio:
|
|||||||
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
|
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
|
||||||
void *val, unsigned int bytes,
|
void *val, unsigned int bytes,
|
||||||
struct x86_exception *exception,
|
struct x86_exception *exception,
|
||||||
struct read_write_emulator_ops *ops)
|
const struct read_write_emulator_ops *ops)
|
||||||
{
|
{
|
||||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||||
gpa_t gpa;
|
gpa_t gpa;
|
||||||
@@ -3962,10 +3955,8 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
|
|||||||
goto emul_write;
|
goto emul_write;
|
||||||
|
|
||||||
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
|
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
|
||||||
if (is_error_page(page)) {
|
if (is_error_page(page))
|
||||||
kvm_release_page_clean(page);
|
|
||||||
goto emul_write;
|
goto emul_write;
|
||||||
}
|
|
||||||
|
|
||||||
kaddr = kmap_atomic(page);
|
kaddr = kmap_atomic(page);
|
||||||
kaddr += offset_in_page(gpa);
|
kaddr += offset_in_page(gpa);
|
||||||
@@ -4332,7 +4323,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
|
|||||||
kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
|
kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct x86_emulate_ops emulate_ops = {
|
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
|
||||||
|
{
|
||||||
|
return kvm_register_read(emul_to_vcpu(ctxt), reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
|
||||||
|
{
|
||||||
|
kvm_register_write(emul_to_vcpu(ctxt), reg, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct x86_emulate_ops emulate_ops = {
|
||||||
|
.read_gpr = emulator_read_gpr,
|
||||||
|
.write_gpr = emulator_write_gpr,
|
||||||
.read_std = kvm_read_guest_virt_system,
|
.read_std = kvm_read_guest_virt_system,
|
||||||
.write_std = kvm_write_guest_virt_system,
|
.write_std = kvm_write_guest_virt_system,
|
||||||
.fetch = kvm_fetch_guest_virt,
|
.fetch = kvm_fetch_guest_virt,
|
||||||
@@ -4367,14 +4370,6 @@ static struct x86_emulate_ops emulate_ops = {
|
|||||||
.get_cpuid = emulator_get_cpuid,
|
.get_cpuid = emulator_get_cpuid,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void cache_all_regs(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
kvm_register_read(vcpu, VCPU_REGS_RAX);
|
|
||||||
kvm_register_read(vcpu, VCPU_REGS_RSP);
|
|
||||||
kvm_register_read(vcpu, VCPU_REGS_RIP);
|
|
||||||
vcpu->arch.regs_dirty = ~0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
|
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
|
||||||
{
|
{
|
||||||
u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
|
u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
|
||||||
@@ -4401,12 +4396,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
|
|||||||
kvm_queue_exception(vcpu, ctxt->exception.vector);
|
kvm_queue_exception(vcpu, ctxt->exception.vector);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
|
static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
|
||||||
const unsigned long *regs)
|
|
||||||
{
|
{
|
||||||
memset(&ctxt->twobyte, 0,
|
memset(&ctxt->twobyte, 0,
|
||||||
(void *)&ctxt->regs - (void *)&ctxt->twobyte);
|
(void *)&ctxt->_regs - (void *)&ctxt->twobyte);
|
||||||
memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
|
|
||||||
|
|
||||||
ctxt->fetch.start = 0;
|
ctxt->fetch.start = 0;
|
||||||
ctxt->fetch.end = 0;
|
ctxt->fetch.end = 0;
|
||||||
@@ -4421,14 +4414,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
|
|||||||
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
||||||
int cs_db, cs_l;
|
int cs_db, cs_l;
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: fix emulate.c to use guest_read/write_register
|
|
||||||
* instead of direct ->regs accesses, can save hundred cycles
|
|
||||||
* on Intel for instructions that don't read/change RSP, for
|
|
||||||
* for example.
|
|
||||||
*/
|
|
||||||
cache_all_regs(vcpu);
|
|
||||||
|
|
||||||
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
|
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
|
||||||
|
|
||||||
ctxt->eflags = kvm_get_rflags(vcpu);
|
ctxt->eflags = kvm_get_rflags(vcpu);
|
||||||
@@ -4440,7 +4425,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
|
|||||||
X86EMUL_MODE_PROT16;
|
X86EMUL_MODE_PROT16;
|
||||||
ctxt->guest_mode = is_guest_mode(vcpu);
|
ctxt->guest_mode = is_guest_mode(vcpu);
|
||||||
|
|
||||||
init_decode_cache(ctxt, vcpu->arch.regs);
|
init_decode_cache(ctxt);
|
||||||
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
|
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4460,7 +4445,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
|
|||||||
return EMULATE_FAIL;
|
return EMULATE_FAIL;
|
||||||
|
|
||||||
ctxt->eip = ctxt->_eip;
|
ctxt->eip = ctxt->_eip;
|
||||||
memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
|
|
||||||
kvm_rip_write(vcpu, ctxt->eip);
|
kvm_rip_write(vcpu, ctxt->eip);
|
||||||
kvm_set_rflags(vcpu, ctxt->eflags);
|
kvm_set_rflags(vcpu, ctxt->eflags);
|
||||||
|
|
||||||
@@ -4493,13 +4477,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
|
|||||||
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||||
{
|
{
|
||||||
gpa_t gpa;
|
gpa_t gpa;
|
||||||
|
pfn_t pfn;
|
||||||
|
|
||||||
if (tdp_enabled)
|
if (tdp_enabled)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if emulation was due to access to shadowed page table
|
* if emulation was due to access to shadowed page table
|
||||||
* and it failed try to unshadow page and re-entetr the
|
* and it failed try to unshadow page and re-enter the
|
||||||
* guest to let CPU execute the instruction.
|
* guest to let CPU execute the instruction.
|
||||||
*/
|
*/
|
||||||
if (kvm_mmu_unprotect_page_virt(vcpu, gva))
|
if (kvm_mmu_unprotect_page_virt(vcpu, gva))
|
||||||
@@ -4510,8 +4495,17 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
|||||||
if (gpa == UNMAPPED_GVA)
|
if (gpa == UNMAPPED_GVA)
|
||||||
return true; /* let cpu generate fault */
|
return true; /* let cpu generate fault */
|
||||||
|
|
||||||
if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
|
/*
|
||||||
|
* Do not retry the unhandleable instruction if it faults on the
|
||||||
|
* readonly host memory, otherwise it will goto a infinite loop:
|
||||||
|
* retry instruction -> write #PF -> emulation fail -> retry
|
||||||
|
* instruction -> ...
|
||||||
|
*/
|
||||||
|
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
|
||||||
|
if (!is_error_pfn(pfn)) {
|
||||||
|
kvm_release_pfn_clean(pfn);
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -4560,6 +4554,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
|
||||||
|
static int complete_emulated_pio(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||||
unsigned long cr2,
|
unsigned long cr2,
|
||||||
int emulation_type,
|
int emulation_type,
|
||||||
@@ -4608,7 +4605,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
|||||||
changes registers values during IO operation */
|
changes registers values during IO operation */
|
||||||
if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
|
if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
|
||||||
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
|
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
|
||||||
memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
|
emulator_invalidate_register_cache(ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
@@ -4630,13 +4627,16 @@ restart:
|
|||||||
} else if (vcpu->arch.pio.count) {
|
} else if (vcpu->arch.pio.count) {
|
||||||
if (!vcpu->arch.pio.in)
|
if (!vcpu->arch.pio.in)
|
||||||
vcpu->arch.pio.count = 0;
|
vcpu->arch.pio.count = 0;
|
||||||
else
|
else {
|
||||||
writeback = false;
|
writeback = false;
|
||||||
|
vcpu->arch.complete_userspace_io = complete_emulated_pio;
|
||||||
|
}
|
||||||
r = EMULATE_DO_MMIO;
|
r = EMULATE_DO_MMIO;
|
||||||
} else if (vcpu->mmio_needed) {
|
} else if (vcpu->mmio_needed) {
|
||||||
if (!vcpu->mmio_is_write)
|
if (!vcpu->mmio_is_write)
|
||||||
writeback = false;
|
writeback = false;
|
||||||
r = EMULATE_DO_MMIO;
|
r = EMULATE_DO_MMIO;
|
||||||
|
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
|
||||||
} else if (r == EMULATION_RESTART)
|
} else if (r == EMULATION_RESTART)
|
||||||
goto restart;
|
goto restart;
|
||||||
else
|
else
|
||||||
@@ -4646,7 +4646,6 @@ restart:
|
|||||||
toggle_interruptibility(vcpu, ctxt->interruptibility);
|
toggle_interruptibility(vcpu, ctxt->interruptibility);
|
||||||
kvm_set_rflags(vcpu, ctxt->eflags);
|
kvm_set_rflags(vcpu, ctxt->eflags);
|
||||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||||
memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
|
|
||||||
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
|
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
|
||||||
kvm_rip_write(vcpu, ctxt->eip);
|
kvm_rip_write(vcpu, ctxt->eip);
|
||||||
} else
|
} else
|
||||||
@@ -4929,6 +4928,7 @@ int kvm_arch_init(void *opaque)
|
|||||||
if (cpu_has_xsave)
|
if (cpu_has_xsave)
|
||||||
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||||
|
|
||||||
|
kvm_lapic_init();
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@@ -5499,6 +5499,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||||
|
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
||||||
|
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||||
|
if (r != EMULATE_DONE)
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int complete_emulated_pio(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
BUG_ON(!vcpu->arch.pio.count);
|
||||||
|
|
||||||
|
return complete_emulated_io(vcpu);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Implements the following, as a state machine:
|
* Implements the following, as a state machine:
|
||||||
*
|
*
|
||||||
@@ -5515,16 +5533,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
|||||||
* copy data
|
* copy data
|
||||||
* exit
|
* exit
|
||||||
*/
|
*/
|
||||||
static int complete_mmio(struct kvm_vcpu *vcpu)
|
static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_run *run = vcpu->run;
|
struct kvm_run *run = vcpu->run;
|
||||||
struct kvm_mmio_fragment *frag;
|
struct kvm_mmio_fragment *frag;
|
||||||
int r;
|
|
||||||
|
|
||||||
if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
|
BUG_ON(!vcpu->mmio_needed);
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (vcpu->mmio_needed) {
|
|
||||||
/* Complete previous fragment */
|
/* Complete previous fragment */
|
||||||
frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
|
frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
|
||||||
if (!vcpu->mmio_is_write)
|
if (!vcpu->mmio_is_write)
|
||||||
@@ -5534,7 +5549,7 @@ static int complete_mmio(struct kvm_vcpu *vcpu)
|
|||||||
if (vcpu->mmio_is_write)
|
if (vcpu->mmio_is_write)
|
||||||
return 1;
|
return 1;
|
||||||
vcpu->mmio_read_completed = 1;
|
vcpu->mmio_read_completed = 1;
|
||||||
goto done;
|
return complete_emulated_io(vcpu);
|
||||||
}
|
}
|
||||||
/* Initiate next fragment */
|
/* Initiate next fragment */
|
||||||
++frag;
|
++frag;
|
||||||
@@ -5544,18 +5559,11 @@ static int complete_mmio(struct kvm_vcpu *vcpu)
|
|||||||
memcpy(run->mmio.data, frag->data, frag->len);
|
memcpy(run->mmio.data, frag->data, frag->len);
|
||||||
run->mmio.len = frag->len;
|
run->mmio.len = frag->len;
|
||||||
run->mmio.is_write = vcpu->mmio_is_write;
|
run->mmio.is_write = vcpu->mmio_is_write;
|
||||||
|
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
}
|
|
||||||
done:
|
|
||||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
|
||||||
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
|
||||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
|
||||||
if (r != EMULATE_DONE)
|
|
||||||
return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
@@ -5582,9 +5590,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r = complete_mmio(vcpu);
|
if (unlikely(vcpu->arch.complete_userspace_io)) {
|
||||||
|
int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
|
||||||
|
vcpu->arch.complete_userspace_io = NULL;
|
||||||
|
r = cui(vcpu);
|
||||||
if (r <= 0)
|
if (r <= 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
} else
|
||||||
|
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
|
||||||
|
|
||||||
r = __vcpu_run(vcpu);
|
r = __vcpu_run(vcpu);
|
||||||
|
|
||||||
@@ -5602,12 +5615,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|||||||
/*
|
/*
|
||||||
* We are here if userspace calls get_regs() in the middle of
|
* We are here if userspace calls get_regs() in the middle of
|
||||||
* instruction emulation. Registers state needs to be copied
|
* instruction emulation. Registers state needs to be copied
|
||||||
* back from emulation context to vcpu. Usrapace shouldn't do
|
* back from emulation context to vcpu. Userspace shouldn't do
|
||||||
* that usually, but some bad designed PV devices (vmware
|
* that usually, but some bad designed PV devices (vmware
|
||||||
* backdoor interface) need this to work
|
* backdoor interface) need this to work
|
||||||
*/
|
*/
|
||||||
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
|
||||||
memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
|
|
||||||
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
|
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
|
||||||
}
|
}
|
||||||
regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
|
regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
|
||||||
@@ -5747,7 +5759,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
|
|||||||
if (ret)
|
if (ret)
|
||||||
return EMULATE_FAIL;
|
return EMULATE_FAIL;
|
||||||
|
|
||||||
memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
|
|
||||||
kvm_rip_write(vcpu, ctxt->eip);
|
kvm_rip_write(vcpu, ctxt->eip);
|
||||||
kvm_set_rflags(vcpu, ctxt->eflags);
|
kvm_set_rflags(vcpu, ctxt->eflags);
|
||||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||||
@@ -5799,7 +5810,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|||||||
if (mmu_reset_needed)
|
if (mmu_reset_needed)
|
||||||
kvm_mmu_reset_context(vcpu);
|
kvm_mmu_reset_context(vcpu);
|
||||||
|
|
||||||
max_bits = (sizeof sregs->interrupt_bitmap) << 3;
|
max_bits = KVM_NR_INTERRUPTS;
|
||||||
pending_vec = find_first_bit(
|
pending_vec = find_first_bit(
|
||||||
(const unsigned long *)sregs->interrupt_bitmap, max_bits);
|
(const unsigned long *)sregs->interrupt_bitmap, max_bits);
|
||||||
if (pending_vec < max_bits) {
|
if (pending_vec < max_bits) {
|
||||||
@@ -5859,13 +5870,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
|||||||
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
|
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
|
||||||
for (i = 0; i < KVM_NR_DB_REGS; ++i)
|
for (i = 0; i < KVM_NR_DB_REGS; ++i)
|
||||||
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
|
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
|
||||||
vcpu->arch.switch_db_regs =
|
vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
|
||||||
(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
|
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < KVM_NR_DB_REGS; i++)
|
for (i = 0; i < KVM_NR_DB_REGS; i++)
|
||||||
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
|
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
|
||||||
vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
|
|
||||||
}
|
}
|
||||||
|
kvm_update_dr7(vcpu);
|
||||||
|
|
||||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
|
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
|
||||||
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
|
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
|
||||||
@@ -5877,7 +5887,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
|||||||
*/
|
*/
|
||||||
kvm_set_rflags(vcpu, rflags);
|
kvm_set_rflags(vcpu, rflags);
|
||||||
|
|
||||||
kvm_x86_ops->set_guest_debug(vcpu, dbg);
|
kvm_x86_ops->update_db_bp_intercept(vcpu);
|
||||||
|
|
||||||
r = 0;
|
r = 0;
|
||||||
|
|
||||||
@@ -6023,7 +6033,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||||||
int r;
|
int r;
|
||||||
|
|
||||||
vcpu->arch.mtrr_state.have_fixed = 1;
|
vcpu->arch.mtrr_state.have_fixed = 1;
|
||||||
vcpu_load(vcpu);
|
r = vcpu_load(vcpu);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
r = kvm_arch_vcpu_reset(vcpu);
|
r = kvm_arch_vcpu_reset(vcpu);
|
||||||
if (r == 0)
|
if (r == 0)
|
||||||
r = kvm_mmu_setup(vcpu);
|
r = kvm_mmu_setup(vcpu);
|
||||||
@@ -6034,9 +6046,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
|
int r;
|
||||||
vcpu->arch.apf.msr_val = 0;
|
vcpu->arch.apf.msr_val = 0;
|
||||||
|
|
||||||
vcpu_load(vcpu);
|
r = vcpu_load(vcpu);
|
||||||
|
BUG_ON(r);
|
||||||
kvm_mmu_unload(vcpu);
|
kvm_mmu_unload(vcpu);
|
||||||
vcpu_put(vcpu);
|
vcpu_put(vcpu);
|
||||||
|
|
||||||
@@ -6050,10 +6064,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||||||
vcpu->arch.nmi_pending = 0;
|
vcpu->arch.nmi_pending = 0;
|
||||||
vcpu->arch.nmi_injected = false;
|
vcpu->arch.nmi_injected = false;
|
||||||
|
|
||||||
vcpu->arch.switch_db_regs = 0;
|
|
||||||
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
|
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
|
||||||
vcpu->arch.dr6 = DR6_FIXED_1;
|
vcpu->arch.dr6 = DR6_FIXED_1;
|
||||||
vcpu->arch.dr7 = DR7_FIXED_1;
|
vcpu->arch.dr7 = DR7_FIXED_1;
|
||||||
|
kvm_update_dr7(vcpu);
|
||||||
|
|
||||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||||
vcpu->arch.apf.msr_val = 0;
|
vcpu->arch.apf.msr_val = 0;
|
||||||
@@ -6132,7 +6146,7 @@ int kvm_arch_hardware_enable(void *garbage)
|
|||||||
* as we reset last_host_tsc on all VCPUs to stop this from being
|
* as we reset last_host_tsc on all VCPUs to stop this from being
|
||||||
* called multiple times (one for each physical CPU bringup).
|
* called multiple times (one for each physical CPU bringup).
|
||||||
*
|
*
|
||||||
* Platforms with unnreliable TSCs don't have to deal with this, they
|
* Platforms with unreliable TSCs don't have to deal with this, they
|
||||||
* will be compensated by the logic in vcpu_load, which sets the TSC to
|
* will be compensated by the logic in vcpu_load, which sets the TSC to
|
||||||
* catchup mode. This will catchup all VCPUs to real time, but cannot
|
* catchup mode. This will catchup all VCPUs to real time, but cannot
|
||||||
* guarantee that they stay in perfect synchronization.
|
* guarantee that they stay in perfect synchronization.
|
||||||
@@ -6185,6 +6199,8 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
|
|||||||
return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
|
return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct static_key kvm_no_apic_vcpu __read_mostly;
|
||||||
|
|
||||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct page *page;
|
struct page *page;
|
||||||
@@ -6217,7 +6233,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|||||||
r = kvm_create_lapic(vcpu);
|
r = kvm_create_lapic(vcpu);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto fail_mmu_destroy;
|
goto fail_mmu_destroy;
|
||||||
}
|
} else
|
||||||
|
static_key_slow_inc(&kvm_no_apic_vcpu);
|
||||||
|
|
||||||
vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
|
vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
@@ -6257,6 +6274,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
|||||||
kvm_mmu_destroy(vcpu);
|
kvm_mmu_destroy(vcpu);
|
||||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||||
free_page((unsigned long)vcpu->arch.pio_data);
|
free_page((unsigned long)vcpu->arch.pio_data);
|
||||||
|
if (!irqchip_in_kernel(vcpu->kvm))
|
||||||
|
static_key_slow_dec(&kvm_no_apic_vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||||
@@ -6269,15 +6288,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||||||
|
|
||||||
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
|
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
|
||||||
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
|
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
|
||||||
|
/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
|
||||||
|
set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||||
|
&kvm->arch.irq_sources_bitmap);
|
||||||
|
|
||||||
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||||
|
mutex_init(&kvm->arch.apic_map_lock);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
vcpu_load(vcpu);
|
int r;
|
||||||
|
r = vcpu_load(vcpu);
|
||||||
|
BUG_ON(r);
|
||||||
kvm_mmu_unload(vcpu);
|
kvm_mmu_unload(vcpu);
|
||||||
vcpu_put(vcpu);
|
vcpu_put(vcpu);
|
||||||
}
|
}
|
||||||
@@ -6321,6 +6346,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|||||||
put_page(kvm->arch.apic_access_page);
|
put_page(kvm->arch.apic_access_page);
|
||||||
if (kvm->arch.ept_identity_pagetable)
|
if (kvm->arch.ept_identity_pagetable)
|
||||||
put_page(kvm->arch.ept_identity_pagetable);
|
put_page(kvm->arch.ept_identity_pagetable);
|
||||||
|
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||||
@@ -6328,10 +6354,18 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
|
||||||
if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
|
if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
|
||||||
kvm_kvfree(free->arch.lpage_info[i]);
|
kvm_kvfree(free->arch.rmap[i]);
|
||||||
free->arch.lpage_info[i] = NULL;
|
free->arch.rmap[i] = NULL;
|
||||||
|
}
|
||||||
|
if (i == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!dont || free->arch.lpage_info[i - 1] !=
|
||||||
|
dont->arch.lpage_info[i - 1]) {
|
||||||
|
kvm_kvfree(free->arch.lpage_info[i - 1]);
|
||||||
|
free->arch.lpage_info[i - 1] = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -6340,23 +6374,30 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
|
||||||
unsigned long ugfn;
|
unsigned long ugfn;
|
||||||
int lpages;
|
int lpages;
|
||||||
int level = i + 2;
|
int level = i + 1;
|
||||||
|
|
||||||
lpages = gfn_to_index(slot->base_gfn + npages - 1,
|
lpages = gfn_to_index(slot->base_gfn + npages - 1,
|
||||||
slot->base_gfn, level) + 1;
|
slot->base_gfn, level) + 1;
|
||||||
|
|
||||||
slot->arch.lpage_info[i] =
|
slot->arch.rmap[i] =
|
||||||
kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
|
kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
|
||||||
if (!slot->arch.lpage_info[i])
|
if (!slot->arch.rmap[i])
|
||||||
|
goto out_free;
|
||||||
|
if (i == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
|
||||||
|
sizeof(*slot->arch.lpage_info[i - 1]));
|
||||||
|
if (!slot->arch.lpage_info[i - 1])
|
||||||
goto out_free;
|
goto out_free;
|
||||||
|
|
||||||
if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
|
if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||||
slot->arch.lpage_info[i][0].write_count = 1;
|
slot->arch.lpage_info[i - 1][0].write_count = 1;
|
||||||
if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
|
if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
|
||||||
slot->arch.lpage_info[i][lpages - 1].write_count = 1;
|
slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
|
||||||
ugfn = slot->userspace_addr >> PAGE_SHIFT;
|
ugfn = slot->userspace_addr >> PAGE_SHIFT;
|
||||||
/*
|
/*
|
||||||
* If the gfn and userspace address are not aligned wrt each
|
* If the gfn and userspace address are not aligned wrt each
|
||||||
@@ -6368,16 +6409,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
|||||||
unsigned long j;
|
unsigned long j;
|
||||||
|
|
||||||
for (j = 0; j < lpages; ++j)
|
for (j = 0; j < lpages; ++j)
|
||||||
slot->arch.lpage_info[i][j].write_count = 1;
|
slot->arch.lpage_info[i - 1][j].write_count = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_free:
|
out_free:
|
||||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
|
||||||
kvm_kvfree(slot->arch.lpage_info[i]);
|
kvm_kvfree(slot->arch.rmap[i]);
|
||||||
slot->arch.lpage_info[i] = NULL;
|
slot->arch.rmap[i] = NULL;
|
||||||
|
if (i == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
kvm_kvfree(slot->arch.lpage_info[i - 1]);
|
||||||
|
slot->arch.lpage_info[i - 1] = NULL;
|
||||||
}
|
}
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
@@ -6396,10 +6442,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||||||
map_flags = MAP_SHARED | MAP_ANONYMOUS;
|
map_flags = MAP_SHARED | MAP_ANONYMOUS;
|
||||||
|
|
||||||
/*To keep backward compatibility with older userspace,
|
/*To keep backward compatibility with older userspace,
|
||||||
*x86 needs to hanlde !user_alloc case.
|
*x86 needs to handle !user_alloc case.
|
||||||
*/
|
*/
|
||||||
if (!user_alloc) {
|
if (!user_alloc) {
|
||||||
if (npages && !old.rmap) {
|
if (npages && !old.npages) {
|
||||||
unsigned long userspace_addr;
|
unsigned long userspace_addr;
|
||||||
|
|
||||||
userspace_addr = vm_mmap(NULL, 0,
|
userspace_addr = vm_mmap(NULL, 0,
|
||||||
@@ -6427,7 +6473,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||||||
|
|
||||||
int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
|
int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
|
||||||
|
|
||||||
if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
|
if (!user_alloc && !old.user_alloc && old.npages && !npages) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = vm_munmap(old.userspace_addr,
|
ret = vm_munmap(old.userspace_addr,
|
||||||
@@ -6446,14 +6492,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||||||
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
|
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
|
||||||
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
|
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
|
||||||
spin_unlock(&kvm->mmu_lock);
|
spin_unlock(&kvm->mmu_lock);
|
||||||
|
/*
|
||||||
|
* If memory slot is created, or moved, we need to clear all
|
||||||
|
* mmio sptes.
|
||||||
|
*/
|
||||||
|
if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
|
||||||
|
kvm_mmu_zap_all(kvm);
|
||||||
|
kvm_reload_remote_mmus(kvm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_arch_flush_shadow(struct kvm *kvm)
|
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
kvm_mmu_zap_all(kvm);
|
kvm_mmu_zap_all(kvm);
|
||||||
kvm_reload_remote_mmus(kvm);
|
kvm_reload_remote_mmus(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||||
|
struct kvm_memory_slot *slot)
|
||||||
|
{
|
||||||
|
kvm_arch_flush_shadow_all(kvm);
|
||||||
|
}
|
||||||
|
|
||||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
|
||||||
|
@@ -124,4 +124,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
|
|||||||
|
|
||||||
extern u64 host_xcr0;
|
extern u64 host_xcr0;
|
||||||
|
|
||||||
|
extern struct static_key kvm_no_apic_vcpu;
|
||||||
#endif
|
#endif
|
||||||
|
@@ -101,9 +101,13 @@ struct kvm_userspace_memory_region {
|
|||||||
__u64 userspace_addr; /* start of the userspace allocated memory */
|
__u64 userspace_addr; /* start of the userspace allocated memory */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* for kvm_memory_region::flags */
|
/*
|
||||||
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
|
* The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
|
||||||
#define KVM_MEMSLOT_INVALID (1UL << 1)
|
* other bits are reserved for kvm internal use which are defined in
|
||||||
|
* include/linux/kvm_host.h.
|
||||||
|
*/
|
||||||
|
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||||
|
#define KVM_MEM_READONLY (1UL << 1)
|
||||||
|
|
||||||
/* for KVM_IRQ_LINE */
|
/* for KVM_IRQ_LINE */
|
||||||
struct kvm_irq_level {
|
struct kvm_irq_level {
|
||||||
@@ -618,6 +622,10 @@ struct kvm_ppc_smmu_info {
|
|||||||
#define KVM_CAP_PPC_GET_SMMU_INFO 78
|
#define KVM_CAP_PPC_GET_SMMU_INFO 78
|
||||||
#define KVM_CAP_S390_COW 79
|
#define KVM_CAP_S390_COW 79
|
||||||
#define KVM_CAP_PPC_ALLOC_HTAB 80
|
#define KVM_CAP_PPC_ALLOC_HTAB 80
|
||||||
|
#ifdef __KVM_HAVE_READONLY_MEM
|
||||||
|
#define KVM_CAP_READONLY_MEM 81
|
||||||
|
#endif
|
||||||
|
#define KVM_CAP_IRQFD_RESAMPLE 82
|
||||||
|
|
||||||
#ifdef KVM_CAP_IRQ_ROUTING
|
#ifdef KVM_CAP_IRQ_ROUTING
|
||||||
|
|
||||||
@@ -683,12 +691,21 @@ struct kvm_xen_hvm_config {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
|
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
|
||||||
|
/*
|
||||||
|
* Available with KVM_CAP_IRQFD_RESAMPLE
|
||||||
|
*
|
||||||
|
* KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
|
||||||
|
* the irqfd to operate in resampling mode for level triggered interrupt
|
||||||
|
* emlation. See Documentation/virtual/kvm/api.txt.
|
||||||
|
*/
|
||||||
|
#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
|
||||||
|
|
||||||
struct kvm_irqfd {
|
struct kvm_irqfd {
|
||||||
__u32 fd;
|
__u32 fd;
|
||||||
__u32 gsi;
|
__u32 gsi;
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
__u8 pad[20];
|
__u32 resamplefd;
|
||||||
|
__u8 pad[16];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_clock_data {
|
struct kvm_clock_data {
|
||||||
|
@@ -21,6 +21,7 @@
|
|||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/rcupdate.h>
|
#include <linux/rcupdate.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
|
#include <linux/err.h>
|
||||||
#include <asm/signal.h>
|
#include <asm/signal.h>
|
||||||
|
|
||||||
#include <linux/kvm.h>
|
#include <linux/kvm.h>
|
||||||
@@ -34,6 +35,13 @@
|
|||||||
#define KVM_MMIO_SIZE 8
|
#define KVM_MMIO_SIZE 8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
|
||||||
|
* in kvm, other bits are visible for userspace which are defined in
|
||||||
|
* include/linux/kvm_h.
|
||||||
|
*/
|
||||||
|
#define KVM_MEMSLOT_INVALID (1UL << 16)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we support unaligned MMIO, at most one fragment will be split into two:
|
* If we support unaligned MMIO, at most one fragment will be split into two:
|
||||||
*/
|
*/
|
||||||
@@ -48,6 +56,47 @@
|
|||||||
#define KVM_MAX_MMIO_FRAGMENTS \
|
#define KVM_MAX_MMIO_FRAGMENTS \
|
||||||
(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
|
(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For the normal pfn, the highest 12 bits should be zero,
|
||||||
|
* so we can mask these bits to indicate the error.
|
||||||
|
*/
|
||||||
|
#define KVM_PFN_ERR_MASK (0xfffULL << 52)
|
||||||
|
|
||||||
|
#define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK)
|
||||||
|
#define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1)
|
||||||
|
#define KVM_PFN_ERR_BAD (KVM_PFN_ERR_MASK + 2)
|
||||||
|
#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 3)
|
||||||
|
|
||||||
|
static inline bool is_error_pfn(pfn_t pfn)
|
||||||
|
{
|
||||||
|
return !!(pfn & KVM_PFN_ERR_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool is_noslot_pfn(pfn_t pfn)
|
||||||
|
{
|
||||||
|
return pfn == KVM_PFN_ERR_BAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool is_invalid_pfn(pfn_t pfn)
|
||||||
|
{
|
||||||
|
return !is_noslot_pfn(pfn) && is_error_pfn(pfn);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KVM_HVA_ERR_BAD (PAGE_OFFSET)
|
||||||
|
#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
|
||||||
|
|
||||||
|
static inline bool kvm_is_error_hva(unsigned long addr)
|
||||||
|
{
|
||||||
|
return addr >= PAGE_OFFSET;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT))
|
||||||
|
|
||||||
|
static inline bool is_error_page(struct page *page)
|
||||||
|
{
|
||||||
|
return IS_ERR(page);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* vcpu->requests bit members
|
* vcpu->requests bit members
|
||||||
*/
|
*/
|
||||||
@@ -71,6 +120,7 @@
|
|||||||
#define KVM_REQ_PMI 17
|
#define KVM_REQ_PMI 17
|
||||||
|
|
||||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||||
|
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||||
|
|
||||||
struct kvm;
|
struct kvm;
|
||||||
struct kvm_vcpu;
|
struct kvm_vcpu;
|
||||||
@@ -183,6 +233,18 @@ struct kvm_vcpu {
|
|||||||
} async_pf;
|
} async_pf;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
/*
|
||||||
|
* Cpu relax intercept or pause loop exit optimization
|
||||||
|
* in_spin_loop: set when a vcpu does a pause loop exit
|
||||||
|
* or cpu relax intercepted.
|
||||||
|
* dy_eligible: indicates whether vcpu is eligible for directed yield.
|
||||||
|
*/
|
||||||
|
struct {
|
||||||
|
bool in_spin_loop;
|
||||||
|
bool dy_eligible;
|
||||||
|
} spin_loop;
|
||||||
|
#endif
|
||||||
struct kvm_vcpu_arch arch;
|
struct kvm_vcpu_arch arch;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -201,7 +263,6 @@ struct kvm_memory_slot {
|
|||||||
gfn_t base_gfn;
|
gfn_t base_gfn;
|
||||||
unsigned long npages;
|
unsigned long npages;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long *rmap;
|
|
||||||
unsigned long *dirty_bitmap;
|
unsigned long *dirty_bitmap;
|
||||||
struct kvm_arch_memory_slot arch;
|
struct kvm_arch_memory_slot arch;
|
||||||
unsigned long userspace_addr;
|
unsigned long userspace_addr;
|
||||||
@@ -283,6 +344,8 @@ struct kvm {
|
|||||||
struct {
|
struct {
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
struct list_head items;
|
struct list_head items;
|
||||||
|
struct list_head resampler_list;
|
||||||
|
struct mutex resampler_lock;
|
||||||
} irqfds;
|
} irqfds;
|
||||||
struct list_head ioeventfds;
|
struct list_head ioeventfds;
|
||||||
#endif
|
#endif
|
||||||
@@ -348,7 +411,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
|
|||||||
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
|
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
|
||||||
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
|
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
void vcpu_load(struct kvm_vcpu *vcpu);
|
int __must_check vcpu_load(struct kvm_vcpu *vcpu);
|
||||||
void vcpu_put(struct kvm_vcpu *vcpu);
|
void vcpu_put(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
||||||
@@ -378,23 +441,6 @@ id_to_memslot(struct kvm_memslots *slots, int id)
|
|||||||
return slot;
|
return slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
|
|
||||||
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
|
|
||||||
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
|
|
||||||
|
|
||||||
extern struct page *bad_page;
|
|
||||||
extern struct page *fault_page;
|
|
||||||
|
|
||||||
extern pfn_t bad_pfn;
|
|
||||||
extern pfn_t fault_pfn;
|
|
||||||
|
|
||||||
int is_error_page(struct page *page);
|
|
||||||
int is_error_pfn(pfn_t pfn);
|
|
||||||
int is_hwpoison_pfn(pfn_t pfn);
|
|
||||||
int is_fault_pfn(pfn_t pfn);
|
|
||||||
int is_noslot_pfn(pfn_t pfn);
|
|
||||||
int is_invalid_pfn(pfn_t pfn);
|
|
||||||
int kvm_is_error_hva(unsigned long addr);
|
|
||||||
int kvm_set_memory_region(struct kvm *kvm,
|
int kvm_set_memory_region(struct kvm *kvm,
|
||||||
struct kvm_userspace_memory_region *mem,
|
struct kvm_userspace_memory_region *mem,
|
||||||
int user_alloc);
|
int user_alloc);
|
||||||
@@ -415,28 +461,33 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|||||||
int user_alloc);
|
int user_alloc);
|
||||||
bool kvm_largepages_enabled(void);
|
bool kvm_largepages_enabled(void);
|
||||||
void kvm_disable_largepages(void);
|
void kvm_disable_largepages(void);
|
||||||
void kvm_arch_flush_shadow(struct kvm *kvm);
|
/* flush all memory translations */
|
||||||
|
void kvm_arch_flush_shadow_all(struct kvm *kvm);
|
||||||
|
/* flush memory translations pointing to 'slot' */
|
||||||
|
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||||
|
struct kvm_memory_slot *slot);
|
||||||
|
|
||||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||||
int nr_pages);
|
int nr_pages);
|
||||||
|
|
||||||
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
|
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
|
||||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
|
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
|
||||||
|
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||||
void kvm_release_page_clean(struct page *page);
|
void kvm_release_page_clean(struct page *page);
|
||||||
void kvm_release_page_dirty(struct page *page);
|
void kvm_release_page_dirty(struct page *page);
|
||||||
void kvm_set_page_dirty(struct page *page);
|
void kvm_set_page_dirty(struct page *page);
|
||||||
void kvm_set_page_accessed(struct page *page);
|
void kvm_set_page_accessed(struct page *page);
|
||||||
|
|
||||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
|
|
||||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
|
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
|
||||||
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
||||||
bool write_fault, bool *writable);
|
bool write_fault, bool *writable);
|
||||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
|
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
|
||||||
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
||||||
bool *writable);
|
bool *writable);
|
||||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn);
|
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||||
void kvm_release_pfn_dirty(pfn_t);
|
|
||||||
|
void kvm_release_pfn_dirty(pfn_t pfn);
|
||||||
void kvm_release_pfn_clean(pfn_t pfn);
|
void kvm_release_pfn_clean(pfn_t pfn);
|
||||||
void kvm_set_pfn_dirty(pfn_t pfn);
|
void kvm_set_pfn_dirty(pfn_t pfn);
|
||||||
void kvm_set_pfn_accessed(pfn_t pfn);
|
void kvm_set_pfn_accessed(pfn_t pfn);
|
||||||
@@ -494,6 +545,7 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
|
|||||||
struct
|
struct
|
||||||
kvm_userspace_memory_region *mem,
|
kvm_userspace_memory_region *mem,
|
||||||
int user_alloc);
|
int user_alloc);
|
||||||
|
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
|
||||||
long kvm_arch_vm_ioctl(struct file *filp,
|
long kvm_arch_vm_ioctl(struct file *filp,
|
||||||
unsigned int ioctl, unsigned long arg);
|
unsigned int ioctl, unsigned long arg);
|
||||||
|
|
||||||
@@ -573,7 +625,7 @@ void kvm_arch_sync_events(struct kvm *kvm);
|
|||||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
|
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
|
||||||
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
|
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int kvm_is_mmio_pfn(pfn_t pfn);
|
bool kvm_is_mmio_pfn(pfn_t pfn);
|
||||||
|
|
||||||
struct kvm_irq_ack_notifier {
|
struct kvm_irq_ack_notifier {
|
||||||
struct hlist_node link;
|
struct hlist_node link;
|
||||||
@@ -728,6 +780,12 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
|
|||||||
return search_memslots(slots, gfn);
|
return search_memslots(slots, gfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned long
|
||||||
|
__gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||||
|
{
|
||||||
|
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
|
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
|
||||||
{
|
{
|
||||||
return gfn_to_memslot(kvm, gfn)->id;
|
return gfn_to_memslot(kvm, gfn)->id;
|
||||||
@@ -740,10 +798,12 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
|||||||
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
|
static inline gfn_t
|
||||||
gfn_t gfn)
|
hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
|
||||||
{
|
{
|
||||||
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
|
gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
return slot->base_gfn + gfn_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline gpa_t gfn_to_gpa(gfn_t gfn)
|
static inline gpa_t gfn_to_gpa(gfn_t gfn)
|
||||||
@@ -899,5 +959,32 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
|
||||||
|
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
vcpu->spin_loop.in_spin_loop = val;
|
||||||
|
}
|
||||||
|
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
vcpu->spin_loop.dy_eligible = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
|
||||||
|
|
||||||
|
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -118,6 +118,7 @@ void jump_label_rate_limit(struct static_key_deferred *key,
|
|||||||
key->timeout = rl;
|
key->timeout = rl;
|
||||||
INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
|
INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(jump_label_rate_limit);
|
||||||
|
|
||||||
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
|
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
|
||||||
{
|
{
|
||||||
|
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
|
|||||||
|
|
||||||
config HAVE_KVM_MSI
|
config HAVE_KVM_MSI
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
bool
|
||||||
|
@@ -111,8 +111,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
|||||||
list_entry(vcpu->async_pf.done.next,
|
list_entry(vcpu->async_pf.done.next,
|
||||||
typeof(*work), link);
|
typeof(*work), link);
|
||||||
list_del(&work->link);
|
list_del(&work->link);
|
||||||
if (work->page)
|
if (!is_error_page(work->page))
|
||||||
put_page(work->page);
|
kvm_release_page_clean(work->page);
|
||||||
kmem_cache_free(async_pf_cache, work);
|
kmem_cache_free(async_pf_cache, work);
|
||||||
}
|
}
|
||||||
spin_unlock(&vcpu->async_pf.lock);
|
spin_unlock(&vcpu->async_pf.lock);
|
||||||
@@ -138,8 +138,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
list_del(&work->queue);
|
list_del(&work->queue);
|
||||||
vcpu->async_pf.queued--;
|
vcpu->async_pf.queued--;
|
||||||
if (work->page)
|
if (!is_error_page(work->page))
|
||||||
put_page(work->page);
|
kvm_release_page_clean(work->page);
|
||||||
kmem_cache_free(async_pf_cache, work);
|
kmem_cache_free(async_pf_cache, work);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -203,8 +203,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
|
|||||||
if (!work)
|
if (!work)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
work->page = bad_page;
|
work->page = KVM_ERR_PTR_BAD_PAGE;
|
||||||
get_page(bad_page);
|
|
||||||
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
|
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
|
||||||
|
|
||||||
spin_lock(&vcpu->async_pf.lock);
|
spin_lock(&vcpu->async_pf.lock);
|
||||||
|
@@ -43,6 +43,31 @@
|
|||||||
* --------------------------------------------------------------------
|
* --------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Resampling irqfds are a special variety of irqfds used to emulate
|
||||||
|
* level triggered interrupts. The interrupt is asserted on eventfd
|
||||||
|
* trigger. On acknowledgement through the irq ack notifier, the
|
||||||
|
* interrupt is de-asserted and userspace is notified through the
|
||||||
|
* resamplefd. All resamplers on the same gsi are de-asserted
|
||||||
|
* together, so we don't need to track the state of each individual
|
||||||
|
* user. We can also therefore share the same irq source ID.
|
||||||
|
*/
|
||||||
|
struct _irqfd_resampler {
|
||||||
|
struct kvm *kvm;
|
||||||
|
/*
|
||||||
|
* List of resampling struct _irqfd objects sharing this gsi.
|
||||||
|
* RCU list modified under kvm->irqfds.resampler_lock
|
||||||
|
*/
|
||||||
|
struct list_head list;
|
||||||
|
struct kvm_irq_ack_notifier notifier;
|
||||||
|
/*
|
||||||
|
* Entry in list of kvm->irqfd.resampler_list. Use for sharing
|
||||||
|
* resamplers among irqfds on the same gsi.
|
||||||
|
* Accessed and modified under kvm->irqfds.resampler_lock
|
||||||
|
*/
|
||||||
|
struct list_head link;
|
||||||
|
};
|
||||||
|
|
||||||
struct _irqfd {
|
struct _irqfd {
|
||||||
/* Used for MSI fast-path */
|
/* Used for MSI fast-path */
|
||||||
struct kvm *kvm;
|
struct kvm *kvm;
|
||||||
@@ -52,6 +77,12 @@ struct _irqfd {
|
|||||||
/* Used for level IRQ fast-path */
|
/* Used for level IRQ fast-path */
|
||||||
int gsi;
|
int gsi;
|
||||||
struct work_struct inject;
|
struct work_struct inject;
|
||||||
|
/* The resampler used by this irqfd (resampler-only) */
|
||||||
|
struct _irqfd_resampler *resampler;
|
||||||
|
/* Eventfd notified on resample (resampler-only) */
|
||||||
|
struct eventfd_ctx *resamplefd;
|
||||||
|
/* Entry in list of irqfds for a resampler (resampler-only) */
|
||||||
|
struct list_head resampler_link;
|
||||||
/* Used for setup/shutdown */
|
/* Used for setup/shutdown */
|
||||||
struct eventfd_ctx *eventfd;
|
struct eventfd_ctx *eventfd;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
@@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work)
|
|||||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
|
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
|
||||||
struct kvm *kvm = irqfd->kvm;
|
struct kvm *kvm = irqfd->kvm;
|
||||||
|
|
||||||
|
if (!irqfd->resampler) {
|
||||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
|
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
|
||||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
|
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
|
||||||
|
} else
|
||||||
|
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||||
|
irqfd->gsi, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since resampler irqfds share an IRQ source ID, we de-assert once
|
||||||
|
* then notify all of the resampler irqfds using this GSI. We can't
|
||||||
|
* do multiple de-asserts or we risk racing with incoming re-asserts.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
|
||||||
|
{
|
||||||
|
struct _irqfd_resampler *resampler;
|
||||||
|
struct _irqfd *irqfd;
|
||||||
|
|
||||||
|
resampler = container_of(kian, struct _irqfd_resampler, notifier);
|
||||||
|
|
||||||
|
kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||||
|
resampler->notifier.gsi, 0);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
|
||||||
|
eventfd_signal(irqfd->resamplefd, 1);
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
irqfd_resampler_shutdown(struct _irqfd *irqfd)
|
||||||
|
{
|
||||||
|
struct _irqfd_resampler *resampler = irqfd->resampler;
|
||||||
|
struct kvm *kvm = resampler->kvm;
|
||||||
|
|
||||||
|
mutex_lock(&kvm->irqfds.resampler_lock);
|
||||||
|
|
||||||
|
list_del_rcu(&irqfd->resampler_link);
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
|
if (list_empty(&resampler->list)) {
|
||||||
|
list_del(&resampler->link);
|
||||||
|
kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
|
||||||
|
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||||
|
resampler->notifier.gsi, 0);
|
||||||
|
kfree(resampler);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&kvm->irqfds.resampler_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work)
|
|||||||
*/
|
*/
|
||||||
flush_work(&irqfd->inject);
|
flush_work(&irqfd->inject);
|
||||||
|
|
||||||
|
if (irqfd->resampler) {
|
||||||
|
irqfd_resampler_shutdown(irqfd);
|
||||||
|
eventfd_ctx_put(irqfd->resamplefd);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It is now safe to release the object's resources
|
* It is now safe to release the object's resources
|
||||||
*/
|
*/
|
||||||
@@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
|||||||
struct kvm_irq_routing_table *irq_rt;
|
struct kvm_irq_routing_table *irq_rt;
|
||||||
struct _irqfd *irqfd, *tmp;
|
struct _irqfd *irqfd, *tmp;
|
||||||
struct file *file = NULL;
|
struct file *file = NULL;
|
||||||
struct eventfd_ctx *eventfd = NULL;
|
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
unsigned int events;
|
unsigned int events;
|
||||||
|
|
||||||
@@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
|||||||
|
|
||||||
irqfd->eventfd = eventfd;
|
irqfd->eventfd = eventfd;
|
||||||
|
|
||||||
|
if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
|
||||||
|
struct _irqfd_resampler *resampler;
|
||||||
|
|
||||||
|
resamplefd = eventfd_ctx_fdget(args->resamplefd);
|
||||||
|
if (IS_ERR(resamplefd)) {
|
||||||
|
ret = PTR_ERR(resamplefd);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
irqfd->resamplefd = resamplefd;
|
||||||
|
INIT_LIST_HEAD(&irqfd->resampler_link);
|
||||||
|
|
||||||
|
mutex_lock(&kvm->irqfds.resampler_lock);
|
||||||
|
|
||||||
|
list_for_each_entry(resampler,
|
||||||
|
&kvm->irqfds.resampler_list, list) {
|
||||||
|
if (resampler->notifier.gsi == irqfd->gsi) {
|
||||||
|
irqfd->resampler = resampler;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!irqfd->resampler) {
|
||||||
|
resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
|
||||||
|
if (!resampler) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
mutex_unlock(&kvm->irqfds.resampler_lock);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
resampler->kvm = kvm;
|
||||||
|
INIT_LIST_HEAD(&resampler->list);
|
||||||
|
resampler->notifier.gsi = irqfd->gsi;
|
||||||
|
resampler->notifier.irq_acked = irqfd_resampler_ack;
|
||||||
|
INIT_LIST_HEAD(&resampler->link);
|
||||||
|
|
||||||
|
list_add(&resampler->link, &kvm->irqfds.resampler_list);
|
||||||
|
kvm_register_irq_ack_notifier(kvm,
|
||||||
|
&resampler->notifier);
|
||||||
|
irqfd->resampler = resampler;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
|
mutex_unlock(&kvm->irqfds.resampler_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Install our own custom wake-up handling so we are notified via
|
* Install our own custom wake-up handling so we are notified via
|
||||||
* a callback whenever someone signals the underlying eventfd
|
* a callback whenever someone signals the underlying eventfd
|
||||||
@@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
|
if (irqfd->resampler)
|
||||||
|
irqfd_resampler_shutdown(irqfd);
|
||||||
|
|
||||||
|
if (resamplefd && !IS_ERR(resamplefd))
|
||||||
|
eventfd_ctx_put(resamplefd);
|
||||||
|
|
||||||
if (eventfd && !IS_ERR(eventfd))
|
if (eventfd && !IS_ERR(eventfd))
|
||||||
eventfd_ctx_put(eventfd);
|
eventfd_ctx_put(eventfd);
|
||||||
|
|
||||||
@@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm)
|
|||||||
{
|
{
|
||||||
spin_lock_init(&kvm->irqfds.lock);
|
spin_lock_init(&kvm->irqfds.lock);
|
||||||
INIT_LIST_HEAD(&kvm->irqfds.items);
|
INIT_LIST_HEAD(&kvm->irqfds.items);
|
||||||
|
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
|
||||||
|
mutex_init(&kvm->irqfds.resampler_lock);
|
||||||
INIT_LIST_HEAD(&kvm->ioeventfds);
|
INIT_LIST_HEAD(&kvm->ioeventfds);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
|
|||||||
int
|
int
|
||||||
kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
|
kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
|
||||||
{
|
{
|
||||||
if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN)
|
if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
|
if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
|
||||||
|
@@ -197,18 +197,20 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
|
|||||||
u32 old_irr;
|
u32 old_irr;
|
||||||
u32 mask = 1 << irq;
|
u32 mask = 1 << irq;
|
||||||
union kvm_ioapic_redirect_entry entry;
|
union kvm_ioapic_redirect_entry entry;
|
||||||
int ret = 1;
|
int ret, irq_level;
|
||||||
|
|
||||||
|
BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
|
||||||
|
|
||||||
spin_lock(&ioapic->lock);
|
spin_lock(&ioapic->lock);
|
||||||
old_irr = ioapic->irr;
|
old_irr = ioapic->irr;
|
||||||
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
|
irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
|
||||||
int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
|
|
||||||
irq_source_id, level);
|
irq_source_id, level);
|
||||||
entry = ioapic->redirtbl[irq];
|
entry = ioapic->redirtbl[irq];
|
||||||
irq_level ^= entry.fields.polarity;
|
irq_level ^= entry.fields.polarity;
|
||||||
if (!irq_level)
|
if (!irq_level) {
|
||||||
ioapic->irr &= ~mask;
|
ioapic->irr &= ~mask;
|
||||||
else {
|
ret = 1;
|
||||||
|
} else {
|
||||||
int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
|
int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
|
||||||
ioapic->irr |= mask;
|
ioapic->irr |= mask;
|
||||||
if ((edge && old_irr != ioapic->irr) ||
|
if ((edge && old_irr != ioapic->irr) ||
|
||||||
@@ -218,7 +220,6 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
|
|||||||
ret = 0; /* report coalesced interrupt */
|
ret = 0; /* report coalesced interrupt */
|
||||||
}
|
}
|
||||||
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
|
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
|
||||||
}
|
|
||||||
spin_unlock(&ioapic->lock);
|
spin_unlock(&ioapic->lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
|
|||||||
static void kvm_iommu_put_pages(struct kvm *kvm,
|
static void kvm_iommu_put_pages(struct kvm *kvm,
|
||||||
gfn_t base_gfn, unsigned long npages);
|
gfn_t base_gfn, unsigned long npages);
|
||||||
|
|
||||||
static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
|
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||||
gfn_t gfn, unsigned long size)
|
unsigned long size)
|
||||||
{
|
{
|
||||||
gfn_t end_gfn;
|
gfn_t end_gfn;
|
||||||
pfn_t pfn;
|
pfn_t pfn;
|
||||||
|
|
||||||
pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
|
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||||
end_gfn = gfn + (size >> PAGE_SHIFT);
|
end_gfn = gfn + (size >> PAGE_SHIFT);
|
||||||
gfn += 1;
|
gfn += 1;
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|||||||
return pfn;
|
return pfn;
|
||||||
|
|
||||||
while (gfn < end_gfn)
|
while (gfn < end_gfn)
|
||||||
gfn_to_pfn_memslot(kvm, slot, gfn++);
|
gfn_to_pfn_memslot(slot, gfn++);
|
||||||
|
|
||||||
return pfn;
|
return pfn;
|
||||||
}
|
}
|
||||||
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
|||||||
* Pin all pages we are about to map in memory. This is
|
* Pin all pages we are about to map in memory. This is
|
||||||
* important because we unmap and unpin in 4kb steps later.
|
* important because we unmap and unpin in 4kb steps later.
|
||||||
*/
|
*/
|
||||||
pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
|
pfn = kvm_pin_pages(slot, gfn, page_size);
|
||||||
if (is_error_pfn(pfn)) {
|
if (is_error_pfn(pfn)) {
|
||||||
gfn += 1;
|
gfn += 1;
|
||||||
continue;
|
continue;
|
||||||
@@ -300,6 +300,12 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
|
|||||||
|
|
||||||
/* Get physical address */
|
/* Get physical address */
|
||||||
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
|
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
|
||||||
|
|
||||||
|
if (!phys) {
|
||||||
|
gfn++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
pfn = phys >> PAGE_SHIFT;
|
pfn = phys >> PAGE_SHIFT;
|
||||||
|
|
||||||
/* Unmap address from IO address space */
|
/* Unmap address from IO address space */
|
||||||
|
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
|||||||
struct kvm_vcpu *vcpu, *lowest = NULL;
|
struct kvm_vcpu *vcpu, *lowest = NULL;
|
||||||
|
|
||||||
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
|
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
|
||||||
kvm_is_dm_lowest_prio(irq))
|
kvm_is_dm_lowest_prio(irq)) {
|
||||||
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
|
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
|
||||||
|
irq->delivery_mode = APIC_DM_FIXED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
|
||||||
|
return r;
|
||||||
|
|
||||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||||
if (!kvm_apic_present(vcpu))
|
if (!kvm_apic_present(vcpu))
|
||||||
@@ -223,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm)
|
|||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
||||||
|
#ifdef CONFIG_X86
|
||||||
|
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
|
||||||
|
#endif
|
||||||
set_bit(irq_source_id, bitmap);
|
set_bit(irq_source_id, bitmap);
|
||||||
unlock:
|
unlock:
|
||||||
mutex_unlock(&kvm->irq_lock);
|
mutex_unlock(&kvm->irq_lock);
|
||||||
@@ -233,6 +241,9 @@ unlock:
|
|||||||
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
||||||
{
|
{
|
||||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
||||||
|
#ifdef CONFIG_X86
|
||||||
|
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
|
||||||
|
#endif
|
||||||
|
|
||||||
mutex_lock(&kvm->irq_lock);
|
mutex_lock(&kvm->irq_lock);
|
||||||
if (irq_source_id < 0 ||
|
if (irq_source_id < 0 ||
|
||||||
@@ -321,11 +332,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
|
|||||||
switch (ue->u.irqchip.irqchip) {
|
switch (ue->u.irqchip.irqchip) {
|
||||||
case KVM_IRQCHIP_PIC_MASTER:
|
case KVM_IRQCHIP_PIC_MASTER:
|
||||||
e->set = kvm_set_pic_irq;
|
e->set = kvm_set_pic_irq;
|
||||||
max_pin = 16;
|
max_pin = PIC_NUM_PINS;
|
||||||
break;
|
break;
|
||||||
case KVM_IRQCHIP_PIC_SLAVE:
|
case KVM_IRQCHIP_PIC_SLAVE:
|
||||||
e->set = kvm_set_pic_irq;
|
e->set = kvm_set_pic_irq;
|
||||||
max_pin = 16;
|
max_pin = PIC_NUM_PINS;
|
||||||
delta = 8;
|
delta = 8;
|
||||||
break;
|
break;
|
||||||
case KVM_IRQCHIP_IOAPIC:
|
case KVM_IRQCHIP_IOAPIC:
|
||||||
|
@@ -100,13 +100,7 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
|
|||||||
|
|
||||||
static bool largepages_enabled = true;
|
static bool largepages_enabled = true;
|
||||||
|
|
||||||
static struct page *hwpoison_page;
|
bool kvm_is_mmio_pfn(pfn_t pfn)
|
||||||
static pfn_t hwpoison_pfn;
|
|
||||||
|
|
||||||
struct page *fault_page;
|
|
||||||
pfn_t fault_pfn;
|
|
||||||
|
|
||||||
inline int kvm_is_mmio_pfn(pfn_t pfn)
|
|
||||||
{
|
{
|
||||||
if (pfn_valid(pfn)) {
|
if (pfn_valid(pfn)) {
|
||||||
int reserved;
|
int reserved;
|
||||||
@@ -137,11 +131,12 @@ inline int kvm_is_mmio_pfn(pfn_t pfn)
|
|||||||
/*
|
/*
|
||||||
* Switches to specified vcpu, until a matching vcpu_put()
|
* Switches to specified vcpu, until a matching vcpu_put()
|
||||||
*/
|
*/
|
||||||
void vcpu_load(struct kvm_vcpu *vcpu)
|
int vcpu_load(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
mutex_lock(&vcpu->mutex);
|
if (mutex_lock_killable(&vcpu->mutex))
|
||||||
|
return -EINTR;
|
||||||
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
|
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
|
||||||
/* The thread running this VCPU changed. */
|
/* The thread running this VCPU changed. */
|
||||||
struct pid *oldpid = vcpu->pid;
|
struct pid *oldpid = vcpu->pid;
|
||||||
@@ -154,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
|
|||||||
preempt_notifier_register(&vcpu->preempt_notifier);
|
preempt_notifier_register(&vcpu->preempt_notifier);
|
||||||
kvm_arch_vcpu_load(vcpu, cpu);
|
kvm_arch_vcpu_load(vcpu, cpu);
|
||||||
put_cpu();
|
put_cpu();
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vcpu_put(struct kvm_vcpu *vcpu)
|
void vcpu_put(struct kvm_vcpu *vcpu)
|
||||||
@@ -236,6 +232,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
|||||||
}
|
}
|
||||||
vcpu->run = page_address(page);
|
vcpu->run = page_address(page);
|
||||||
|
|
||||||
|
kvm_vcpu_set_in_spin_loop(vcpu, false);
|
||||||
|
kvm_vcpu_set_dy_eligible(vcpu, false);
|
||||||
|
|
||||||
r = kvm_arch_vcpu_init(vcpu);
|
r = kvm_arch_vcpu_init(vcpu);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto fail_free_run;
|
goto fail_free_run;
|
||||||
@@ -332,8 +331,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
|||||||
* count is also read inside the mmu_lock critical section.
|
* count is also read inside the mmu_lock critical section.
|
||||||
*/
|
*/
|
||||||
kvm->mmu_notifier_count++;
|
kvm->mmu_notifier_count++;
|
||||||
for (; start < end; start += PAGE_SIZE)
|
need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
|
||||||
need_tlb_flush |= kvm_unmap_hva(kvm, start);
|
|
||||||
need_tlb_flush |= kvm->tlbs_dirty;
|
need_tlb_flush |= kvm->tlbs_dirty;
|
||||||
/* we've to flush the tlb before the pages can be freed */
|
/* we've to flush the tlb before the pages can be freed */
|
||||||
if (need_tlb_flush)
|
if (need_tlb_flush)
|
||||||
@@ -412,7 +410,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
|
|||||||
int idx;
|
int idx;
|
||||||
|
|
||||||
idx = srcu_read_lock(&kvm->srcu);
|
idx = srcu_read_lock(&kvm->srcu);
|
||||||
kvm_arch_flush_shadow(kvm);
|
kvm_arch_flush_shadow_all(kvm);
|
||||||
srcu_read_unlock(&kvm->srcu, idx);
|
srcu_read_unlock(&kvm->srcu, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -551,16 +549,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
|||||||
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
|
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
|
||||||
struct kvm_memory_slot *dont)
|
struct kvm_memory_slot *dont)
|
||||||
{
|
{
|
||||||
if (!dont || free->rmap != dont->rmap)
|
|
||||||
vfree(free->rmap);
|
|
||||||
|
|
||||||
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
||||||
kvm_destroy_dirty_bitmap(free);
|
kvm_destroy_dirty_bitmap(free);
|
||||||
|
|
||||||
kvm_arch_free_memslot(free, dont);
|
kvm_arch_free_memslot(free, dont);
|
||||||
|
|
||||||
free->npages = 0;
|
free->npages = 0;
|
||||||
free->rmap = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_free_physmem(struct kvm *kvm)
|
void kvm_free_physmem(struct kvm *kvm)
|
||||||
@@ -590,7 +584,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
|||||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||||
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
|
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
|
||||||
#else
|
#else
|
||||||
kvm_arch_flush_shadow(kvm);
|
kvm_arch_flush_shadow_all(kvm);
|
||||||
#endif
|
#endif
|
||||||
kvm_arch_destroy_vm(kvm);
|
kvm_arch_destroy_vm(kvm);
|
||||||
kvm_free_physmem(kvm);
|
kvm_free_physmem(kvm);
|
||||||
@@ -686,6 +680,20 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
|
|||||||
slots->generation++;
|
slots->generation++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
|
||||||
|
{
|
||||||
|
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
|
||||||
|
|
||||||
|
#ifdef KVM_CAP_READONLY_MEM
|
||||||
|
valid_flags |= KVM_MEM_READONLY;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (mem->flags & ~valid_flags)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate some memory and give it an address in the guest physical address
|
* Allocate some memory and give it an address in the guest physical address
|
||||||
* space.
|
* space.
|
||||||
@@ -706,6 +714,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||||||
struct kvm_memory_slot old, new;
|
struct kvm_memory_slot old, new;
|
||||||
struct kvm_memslots *slots, *old_memslots;
|
struct kvm_memslots *slots, *old_memslots;
|
||||||
|
|
||||||
|
r = check_memory_region_flags(mem);
|
||||||
|
if (r)
|
||||||
|
goto out;
|
||||||
|
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
/* General sanity checks */
|
/* General sanity checks */
|
||||||
if (mem->memory_size & (PAGE_SIZE - 1))
|
if (mem->memory_size & (PAGE_SIZE - 1))
|
||||||
@@ -769,11 +781,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||||||
if (npages && !old.npages) {
|
if (npages && !old.npages) {
|
||||||
new.user_alloc = user_alloc;
|
new.user_alloc = user_alloc;
|
||||||
new.userspace_addr = mem->userspace_addr;
|
new.userspace_addr = mem->userspace_addr;
|
||||||
#ifndef CONFIG_S390
|
|
||||||
new.rmap = vzalloc(npages * sizeof(*new.rmap));
|
|
||||||
if (!new.rmap)
|
|
||||||
goto out_free;
|
|
||||||
#endif /* not defined CONFIG_S390 */
|
|
||||||
if (kvm_arch_create_memslot(&new, npages))
|
if (kvm_arch_create_memslot(&new, npages))
|
||||||
goto out_free;
|
goto out_free;
|
||||||
}
|
}
|
||||||
@@ -785,7 +793,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||||||
/* destroy any largepage mappings for dirty tracking */
|
/* destroy any largepage mappings for dirty tracking */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!npages) {
|
if (!npages || base_gfn != old.base_gfn) {
|
||||||
struct kvm_memory_slot *slot;
|
struct kvm_memory_slot *slot;
|
||||||
|
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
@@ -801,14 +809,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||||||
old_memslots = kvm->memslots;
|
old_memslots = kvm->memslots;
|
||||||
rcu_assign_pointer(kvm->memslots, slots);
|
rcu_assign_pointer(kvm->memslots, slots);
|
||||||
synchronize_srcu_expedited(&kvm->srcu);
|
synchronize_srcu_expedited(&kvm->srcu);
|
||||||
/* From this point no new shadow pages pointing to a deleted
|
/* From this point no new shadow pages pointing to a deleted,
|
||||||
* memslot will be created.
|
* or moved, memslot will be created.
|
||||||
*
|
*
|
||||||
* validation of sp->gfn happens in:
|
* validation of sp->gfn happens in:
|
||||||
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
|
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
|
||||||
* - kvm_is_visible_gfn (mmu_check_roots)
|
* - kvm_is_visible_gfn (mmu_check_roots)
|
||||||
*/
|
*/
|
||||||
kvm_arch_flush_shadow(kvm);
|
kvm_arch_flush_shadow_memslot(kvm, slot);
|
||||||
kfree(old_memslots);
|
kfree(old_memslots);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -832,7 +840,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||||||
|
|
||||||
/* actual memory is freed via old in kvm_free_physmem_slot below */
|
/* actual memory is freed via old in kvm_free_physmem_slot below */
|
||||||
if (!npages) {
|
if (!npages) {
|
||||||
new.rmap = NULL;
|
|
||||||
new.dirty_bitmap = NULL;
|
new.dirty_bitmap = NULL;
|
||||||
memset(&new.arch, 0, sizeof(new.arch));
|
memset(&new.arch, 0, sizeof(new.arch));
|
||||||
}
|
}
|
||||||
@@ -844,13 +851,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||||||
|
|
||||||
kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
|
kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
|
||||||
|
|
||||||
/*
|
|
||||||
* If the new memory slot is created, we need to clear all
|
|
||||||
* mmio sptes.
|
|
||||||
*/
|
|
||||||
if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
|
|
||||||
kvm_arch_flush_shadow(kvm);
|
|
||||||
|
|
||||||
kvm_free_physmem_slot(&old, &new);
|
kvm_free_physmem_slot(&old, &new);
|
||||||
kfree(old_memslots);
|
kfree(old_memslots);
|
||||||
|
|
||||||
@@ -932,53 +932,6 @@ void kvm_disable_largepages(void)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_disable_largepages);
|
EXPORT_SYMBOL_GPL(kvm_disable_largepages);
|
||||||
|
|
||||||
int is_error_page(struct page *page)
|
|
||||||
{
|
|
||||||
return page == bad_page || page == hwpoison_page || page == fault_page;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(is_error_page);
|
|
||||||
|
|
||||||
int is_error_pfn(pfn_t pfn)
|
|
||||||
{
|
|
||||||
return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(is_error_pfn);
|
|
||||||
|
|
||||||
int is_hwpoison_pfn(pfn_t pfn)
|
|
||||||
{
|
|
||||||
return pfn == hwpoison_pfn;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
|
|
||||||
|
|
||||||
int is_fault_pfn(pfn_t pfn)
|
|
||||||
{
|
|
||||||
return pfn == fault_pfn;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(is_fault_pfn);
|
|
||||||
|
|
||||||
int is_noslot_pfn(pfn_t pfn)
|
|
||||||
{
|
|
||||||
return pfn == bad_pfn;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(is_noslot_pfn);
|
|
||||||
|
|
||||||
int is_invalid_pfn(pfn_t pfn)
|
|
||||||
{
|
|
||||||
return pfn == hwpoison_pfn || pfn == fault_pfn;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(is_invalid_pfn);
|
|
||||||
|
|
||||||
static inline unsigned long bad_hva(void)
|
|
||||||
{
|
|
||||||
return PAGE_OFFSET;
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_is_error_hva(unsigned long addr)
|
|
||||||
{
|
|
||||||
return addr == bad_hva();
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
|
|
||||||
|
|
||||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
||||||
{
|
{
|
||||||
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||||
@@ -1021,28 +974,62 @@ out:
|
|||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
|
static bool memslot_is_readonly(struct kvm_memory_slot *slot)
|
||||||
gfn_t *nr_pages)
|
{
|
||||||
|
return slot->flags & KVM_MEM_READONLY;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||||
|
gfn_t *nr_pages, bool write)
|
||||||
{
|
{
|
||||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
||||||
return bad_hva();
|
return KVM_HVA_ERR_BAD;
|
||||||
|
|
||||||
|
if (memslot_is_readonly(slot) && write)
|
||||||
|
return KVM_HVA_ERR_RO_BAD;
|
||||||
|
|
||||||
if (nr_pages)
|
if (nr_pages)
|
||||||
*nr_pages = slot->npages - (gfn - slot->base_gfn);
|
*nr_pages = slot->npages - (gfn - slot->base_gfn);
|
||||||
|
|
||||||
return gfn_to_hva_memslot(slot, gfn);
|
return __gfn_to_hva_memslot(slot, gfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||||
|
gfn_t *nr_pages)
|
||||||
|
{
|
||||||
|
return __gfn_to_hva_many(slot, gfn, nr_pages, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
|
||||||
|
gfn_t gfn)
|
||||||
|
{
|
||||||
|
return gfn_to_hva_many(slot, gfn, NULL);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
|
||||||
|
|
||||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
||||||
{
|
{
|
||||||
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
|
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
||||||
|
|
||||||
static pfn_t get_fault_pfn(void)
|
/*
|
||||||
|
* The hva returned by this function is only allowed to be read.
|
||||||
|
* It should pair with kvm_read_hva() or kvm_read_hva_atomic().
|
||||||
|
*/
|
||||||
|
static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
|
||||||
{
|
{
|
||||||
get_page(fault_page);
|
return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
|
||||||
return fault_pfn;
|
}
|
||||||
|
|
||||||
|
static int kvm_read_hva(void *data, void __user *hva, int len)
|
||||||
|
{
|
||||||
|
return __copy_from_user(data, hva, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
|
||||||
|
{
|
||||||
|
return __copy_from_user_inatomic(data, hva, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
|
int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
@@ -1065,25 +1052,49 @@ static inline int check_user_page_hwpoison(unsigned long addr)
|
|||||||
return rc == -EHWPOISON;
|
return rc == -EHWPOISON;
|
||||||
}
|
}
|
||||||
|
|
||||||
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
|
/*
|
||||||
bool *async, bool write_fault, bool *writable)
|
* The atomic path to get the writable pfn which will be stored in @pfn,
|
||||||
|
* true indicates success, otherwise false is returned.
|
||||||
|
*/
|
||||||
|
static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
|
||||||
|
bool write_fault, bool *writable, pfn_t *pfn)
|
||||||
{
|
{
|
||||||
struct page *page[1];
|
struct page *page[1];
|
||||||
int npages = 0;
|
int npages;
|
||||||
pfn_t pfn;
|
|
||||||
|
|
||||||
/* we can do it either atomically or asynchronously, not both */
|
if (!(async || atomic))
|
||||||
BUG_ON(atomic && async);
|
return false;
|
||||||
|
|
||||||
BUG_ON(!write_fault && !writable);
|
/*
|
||||||
|
* Fast pin a writable pfn only if it is a write fault request
|
||||||
|
* or the caller allows to map a writable pfn for a read fault
|
||||||
|
* request.
|
||||||
|
*/
|
||||||
|
if (!(write_fault || writable))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
npages = __get_user_pages_fast(addr, 1, 1, page);
|
||||||
|
if (npages == 1) {
|
||||||
|
*pfn = page_to_pfn(page[0]);
|
||||||
|
|
||||||
if (writable)
|
if (writable)
|
||||||
*writable = true;
|
*writable = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (atomic || async)
|
return false;
|
||||||
npages = __get_user_pages_fast(addr, 1, 1, page);
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The slow path to get the pfn of the specified host virtual address,
|
||||||
|
* 1 indicates success, -errno is returned if error is detected.
|
||||||
|
*/
|
||||||
|
static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
|
||||||
|
bool *writable, pfn_t *pfn)
|
||||||
|
{
|
||||||
|
struct page *page[1];
|
||||||
|
int npages = 0;
|
||||||
|
|
||||||
if (unlikely(npages != 1) && !atomic) {
|
|
||||||
might_sleep();
|
might_sleep();
|
||||||
|
|
||||||
if (writable)
|
if (writable)
|
||||||
@@ -1097,9 +1108,11 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
|
|||||||
} else
|
} else
|
||||||
npages = get_user_pages_fast(addr, 1, write_fault,
|
npages = get_user_pages_fast(addr, 1, write_fault,
|
||||||
page);
|
page);
|
||||||
|
if (npages != 1)
|
||||||
|
return npages;
|
||||||
|
|
||||||
/* map read fault as writable if possible */
|
/* map read fault as writable if possible */
|
||||||
if (unlikely(!write_fault) && npages == 1) {
|
if (unlikely(!write_fault) && writable) {
|
||||||
struct page *wpage[1];
|
struct page *wpage[1];
|
||||||
|
|
||||||
npages = __get_user_pages_fast(addr, 1, 1, wpage);
|
npages = __get_user_pages_fast(addr, 1, 1, wpage);
|
||||||
@@ -1108,65 +1121,117 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
|
|||||||
put_page(page[0]);
|
put_page(page[0]);
|
||||||
page[0] = wpage[0];
|
page[0] = wpage[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
npages = 1;
|
npages = 1;
|
||||||
}
|
}
|
||||||
}
|
*pfn = page_to_pfn(page[0]);
|
||||||
|
return npages;
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(npages != 1)) {
|
static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
|
||||||
|
{
|
||||||
|
if (unlikely(!(vma->vm_flags & VM_READ)))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pin guest page in memory and return its pfn.
|
||||||
|
* @addr: host virtual address which maps memory to the guest
|
||||||
|
* @atomic: whether this function can sleep
|
||||||
|
* @async: whether this function need to wait IO complete if the
|
||||||
|
* host page is not in the memory
|
||||||
|
* @write_fault: whether we should get a writable host page
|
||||||
|
* @writable: whether it allows to map a writable host page for !@write_fault
|
||||||
|
*
|
||||||
|
* The function will map a writable host page for these two cases:
|
||||||
|
* 1): @write_fault = true
|
||||||
|
* 2): @write_fault = false && @writable, @writable will tell the caller
|
||||||
|
* whether the mapping is writable.
|
||||||
|
*/
|
||||||
|
static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
|
||||||
|
bool write_fault, bool *writable)
|
||||||
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
pfn_t pfn = 0;
|
||||||
|
int npages;
|
||||||
|
|
||||||
|
/* we can do it either atomically or asynchronously, not both */
|
||||||
|
BUG_ON(atomic && async);
|
||||||
|
|
||||||
|
if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
|
||||||
|
return pfn;
|
||||||
|
|
||||||
if (atomic)
|
if (atomic)
|
||||||
return get_fault_pfn();
|
return KVM_PFN_ERR_FAULT;
|
||||||
|
|
||||||
|
npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
|
||||||
|
if (npages == 1)
|
||||||
|
return pfn;
|
||||||
|
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(¤t->mm->mmap_sem);
|
||||||
if (npages == -EHWPOISON ||
|
if (npages == -EHWPOISON ||
|
||||||
(!async && check_user_page_hwpoison(addr))) {
|
(!async && check_user_page_hwpoison(addr))) {
|
||||||
up_read(¤t->mm->mmap_sem);
|
pfn = KVM_PFN_ERR_HWPOISON;
|
||||||
get_page(hwpoison_page);
|
goto exit;
|
||||||
return page_to_pfn(hwpoison_page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vma = find_vma_intersection(current->mm, addr, addr+1);
|
vma = find_vma_intersection(current->mm, addr, addr + 1);
|
||||||
|
|
||||||
if (vma == NULL)
|
if (vma == NULL)
|
||||||
pfn = get_fault_pfn();
|
pfn = KVM_PFN_ERR_FAULT;
|
||||||
else if ((vma->vm_flags & VM_PFNMAP)) {
|
else if ((vma->vm_flags & VM_PFNMAP)) {
|
||||||
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
|
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
|
||||||
vma->vm_pgoff;
|
vma->vm_pgoff;
|
||||||
BUG_ON(!kvm_is_mmio_pfn(pfn));
|
BUG_ON(!kvm_is_mmio_pfn(pfn));
|
||||||
} else {
|
} else {
|
||||||
if (async && (vma->vm_flags & VM_WRITE))
|
if (async && vma_is_valid(vma, write_fault))
|
||||||
*async = true;
|
*async = true;
|
||||||
pfn = get_fault_pfn();
|
pfn = KVM_PFN_ERR_FAULT;
|
||||||
}
|
}
|
||||||
|
exit:
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(¤t->mm->mmap_sem);
|
||||||
} else
|
|
||||||
pfn = page_to_pfn(page[0]);
|
|
||||||
|
|
||||||
return pfn;
|
return pfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
|
static pfn_t
|
||||||
|
__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
|
||||||
|
bool *async, bool write_fault, bool *writable)
|
||||||
{
|
{
|
||||||
return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
|
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
|
||||||
|
|
||||||
|
if (addr == KVM_HVA_ERR_RO_BAD)
|
||||||
|
return KVM_PFN_ERR_RO_FAULT;
|
||||||
|
|
||||||
|
if (kvm_is_error_hva(addr))
|
||||||
|
return KVM_PFN_ERR_BAD;
|
||||||
|
|
||||||
|
/* Do not map writable pfn in the readonly memslot. */
|
||||||
|
if (writable && memslot_is_readonly(slot)) {
|
||||||
|
*writable = false;
|
||||||
|
writable = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hva_to_pfn(addr, atomic, async, write_fault,
|
||||||
|
writable);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
|
|
||||||
|
|
||||||
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
|
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
|
||||||
bool write_fault, bool *writable)
|
bool write_fault, bool *writable)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
struct kvm_memory_slot *slot;
|
||||||
|
|
||||||
if (async)
|
if (async)
|
||||||
*async = false;
|
*async = false;
|
||||||
|
|
||||||
addr = gfn_to_hva(kvm, gfn);
|
slot = gfn_to_memslot(kvm, gfn);
|
||||||
if (kvm_is_error_hva(addr)) {
|
|
||||||
get_page(bad_page);
|
|
||||||
return page_to_pfn(bad_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
|
return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
|
||||||
|
writable);
|
||||||
}
|
}
|
||||||
|
|
||||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
||||||
@@ -1195,13 +1260,17 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
|
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
|
||||||
|
|
||||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn)
|
|
||||||
{
|
{
|
||||||
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
|
return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
|
||||||
return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||||
|
{
|
||||||
|
return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
|
||||||
|
|
||||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||||
int nr_pages)
|
int nr_pages)
|
||||||
{
|
{
|
||||||
@@ -1219,30 +1288,42 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
|
EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
|
||||||
|
|
||||||
|
static struct page *kvm_pfn_to_page(pfn_t pfn)
|
||||||
|
{
|
||||||
|
if (is_error_pfn(pfn))
|
||||||
|
return KVM_ERR_PTR_BAD_PAGE;
|
||||||
|
|
||||||
|
if (kvm_is_mmio_pfn(pfn)) {
|
||||||
|
WARN_ON(1);
|
||||||
|
return KVM_ERR_PTR_BAD_PAGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pfn_to_page(pfn);
|
||||||
|
}
|
||||||
|
|
||||||
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
|
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
|
||||||
{
|
{
|
||||||
pfn_t pfn;
|
pfn_t pfn;
|
||||||
|
|
||||||
pfn = gfn_to_pfn(kvm, gfn);
|
pfn = gfn_to_pfn(kvm, gfn);
|
||||||
if (!kvm_is_mmio_pfn(pfn))
|
|
||||||
return pfn_to_page(pfn);
|
|
||||||
|
|
||||||
WARN_ON(kvm_is_mmio_pfn(pfn));
|
return kvm_pfn_to_page(pfn);
|
||||||
|
|
||||||
get_page(bad_page);
|
|
||||||
return bad_page;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(gfn_to_page);
|
EXPORT_SYMBOL_GPL(gfn_to_page);
|
||||||
|
|
||||||
void kvm_release_page_clean(struct page *page)
|
void kvm_release_page_clean(struct page *page)
|
||||||
{
|
{
|
||||||
|
WARN_ON(is_error_page(page));
|
||||||
|
|
||||||
kvm_release_pfn_clean(page_to_pfn(page));
|
kvm_release_pfn_clean(page_to_pfn(page));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_release_page_clean);
|
EXPORT_SYMBOL_GPL(kvm_release_page_clean);
|
||||||
|
|
||||||
void kvm_release_pfn_clean(pfn_t pfn)
|
void kvm_release_pfn_clean(pfn_t pfn)
|
||||||
{
|
{
|
||||||
|
WARN_ON(is_error_pfn(pfn));
|
||||||
|
|
||||||
if (!kvm_is_mmio_pfn(pfn))
|
if (!kvm_is_mmio_pfn(pfn))
|
||||||
put_page(pfn_to_page(pfn));
|
put_page(pfn_to_page(pfn));
|
||||||
}
|
}
|
||||||
@@ -1250,6 +1331,8 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
|
|||||||
|
|
||||||
void kvm_release_page_dirty(struct page *page)
|
void kvm_release_page_dirty(struct page *page)
|
||||||
{
|
{
|
||||||
|
WARN_ON(is_error_page(page));
|
||||||
|
|
||||||
kvm_release_pfn_dirty(page_to_pfn(page));
|
kvm_release_pfn_dirty(page_to_pfn(page));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
|
EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
|
||||||
@@ -1305,10 +1388,10 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
|||||||
int r;
|
int r;
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
|
|
||||||
addr = gfn_to_hva(kvm, gfn);
|
addr = gfn_to_hva_read(kvm, gfn);
|
||||||
if (kvm_is_error_hva(addr))
|
if (kvm_is_error_hva(addr))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
r = __copy_from_user(data, (void __user *)addr + offset, len);
|
r = kvm_read_hva(data, (void __user *)addr + offset, len);
|
||||||
if (r)
|
if (r)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1343,11 +1426,11 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
|
|||||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||||
int offset = offset_in_page(gpa);
|
int offset = offset_in_page(gpa);
|
||||||
|
|
||||||
addr = gfn_to_hva(kvm, gfn);
|
addr = gfn_to_hva_read(kvm, gfn);
|
||||||
if (kvm_is_error_hva(addr))
|
if (kvm_is_error_hva(addr))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
pagefault_disable();
|
pagefault_disable();
|
||||||
r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
|
r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
|
||||||
pagefault_enable();
|
pagefault_enable();
|
||||||
if (r)
|
if (r)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
@@ -1580,6 +1663,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
|
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
/*
|
||||||
|
* Helper that checks whether a VCPU is eligible for directed yield.
|
||||||
|
* Most eligible candidate to yield is decided by following heuristics:
|
||||||
|
*
|
||||||
|
* (a) VCPU which has not done pl-exit or cpu relax intercepted recently
|
||||||
|
* (preempted lock holder), indicated by @in_spin_loop.
|
||||||
|
* Set at the beiginning and cleared at the end of interception/PLE handler.
|
||||||
|
*
|
||||||
|
* (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
|
||||||
|
* chance last time (mostly it has become eligible now since we have probably
|
||||||
|
* yielded to lockholder in last iteration. This is done by toggling
|
||||||
|
* @dy_eligible each time a VCPU checked for eligibility.)
|
||||||
|
*
|
||||||
|
* Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
|
||||||
|
* to preempted lock-holder could result in wrong VCPU selection and CPU
|
||||||
|
* burning. Giving priority for a potential lock-holder increases lock
|
||||||
|
* progress.
|
||||||
|
*
|
||||||
|
* Since algorithm is based on heuristics, accessing another VCPU data without
|
||||||
|
* locking does not harm. It may result in trying to yield to same VCPU, fail
|
||||||
|
* and continue with next VCPU and so on.
|
||||||
|
*/
|
||||||
|
bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
bool eligible;
|
||||||
|
|
||||||
|
eligible = !vcpu->spin_loop.in_spin_loop ||
|
||||||
|
(vcpu->spin_loop.in_spin_loop &&
|
||||||
|
vcpu->spin_loop.dy_eligible);
|
||||||
|
|
||||||
|
if (vcpu->spin_loop.in_spin_loop)
|
||||||
|
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
|
||||||
|
|
||||||
|
return eligible;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||||
{
|
{
|
||||||
struct kvm *kvm = me->kvm;
|
struct kvm *kvm = me->kvm;
|
||||||
@@ -1589,6 +1709,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
|||||||
int pass;
|
int pass;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
kvm_vcpu_set_in_spin_loop(me, true);
|
||||||
/*
|
/*
|
||||||
* We boost the priority of a VCPU that is runnable but not
|
* We boost the priority of a VCPU that is runnable but not
|
||||||
* currently running, because it got preempted by something
|
* currently running, because it got preempted by something
|
||||||
@@ -1607,6 +1728,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
|||||||
continue;
|
continue;
|
||||||
if (waitqueue_active(&vcpu->wq))
|
if (waitqueue_active(&vcpu->wq))
|
||||||
continue;
|
continue;
|
||||||
|
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||||
|
continue;
|
||||||
if (kvm_vcpu_yield_to(vcpu)) {
|
if (kvm_vcpu_yield_to(vcpu)) {
|
||||||
kvm->last_boosted_vcpu = i;
|
kvm->last_boosted_vcpu = i;
|
||||||
yielded = 1;
|
yielded = 1;
|
||||||
@@ -1614,6 +1737,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
kvm_vcpu_set_in_spin_loop(me, false);
|
||||||
|
|
||||||
|
/* Ensure vcpu is not eligible during next spinloop */
|
||||||
|
kvm_vcpu_set_dy_eligible(me, false);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
|
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
|
||||||
|
|
||||||
@@ -1766,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
vcpu_load(vcpu);
|
r = vcpu_load(vcpu);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
switch (ioctl) {
|
switch (ioctl) {
|
||||||
case KVM_RUN:
|
case KVM_RUN:
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
@@ -2093,6 +2222,29 @@ static long kvm_vm_ioctl(struct file *filp,
|
|||||||
r = kvm_send_userspace_msi(kvm, &msi);
|
r = kvm_send_userspace_msi(kvm, &msi);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef __KVM_HAVE_IRQ_LINE
|
||||||
|
case KVM_IRQ_LINE_STATUS:
|
||||||
|
case KVM_IRQ_LINE: {
|
||||||
|
struct kvm_irq_level irq_event;
|
||||||
|
|
||||||
|
r = -EFAULT;
|
||||||
|
if (copy_from_user(&irq_event, argp, sizeof irq_event))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
|
||||||
|
if (r)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
r = -EFAULT;
|
||||||
|
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
||||||
|
if (copy_to_user(argp, &irq_event, sizeof irq_event))
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
|
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
|
||||||
@@ -2698,9 +2850,6 @@ static struct syscore_ops kvm_syscore_ops = {
|
|||||||
.resume = kvm_resume,
|
.resume = kvm_resume,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct page *bad_page;
|
|
||||||
pfn_t bad_pfn;
|
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
|
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
|
||||||
{
|
{
|
||||||
@@ -2732,33 +2881,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
|||||||
if (r)
|
if (r)
|
||||||
goto out_fail;
|
goto out_fail;
|
||||||
|
|
||||||
bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
|
||||||
|
|
||||||
if (bad_page == NULL) {
|
|
||||||
r = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
bad_pfn = page_to_pfn(bad_page);
|
|
||||||
|
|
||||||
hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
|
||||||
|
|
||||||
if (hwpoison_page == NULL) {
|
|
||||||
r = -ENOMEM;
|
|
||||||
goto out_free_0;
|
|
||||||
}
|
|
||||||
|
|
||||||
hwpoison_pfn = page_to_pfn(hwpoison_page);
|
|
||||||
|
|
||||||
fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
|
||||||
|
|
||||||
if (fault_page == NULL) {
|
|
||||||
r = -ENOMEM;
|
|
||||||
goto out_free_0;
|
|
||||||
}
|
|
||||||
|
|
||||||
fault_pfn = page_to_pfn(fault_page);
|
|
||||||
|
|
||||||
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
|
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
goto out_free_0;
|
goto out_free_0;
|
||||||
@@ -2833,12 +2955,6 @@ out_free_1:
|
|||||||
out_free_0a:
|
out_free_0a:
|
||||||
free_cpumask_var(cpus_hardware_enabled);
|
free_cpumask_var(cpus_hardware_enabled);
|
||||||
out_free_0:
|
out_free_0:
|
||||||
if (fault_page)
|
|
||||||
__free_page(fault_page);
|
|
||||||
if (hwpoison_page)
|
|
||||||
__free_page(hwpoison_page);
|
|
||||||
__free_page(bad_page);
|
|
||||||
out:
|
|
||||||
kvm_arch_exit();
|
kvm_arch_exit();
|
||||||
out_fail:
|
out_fail:
|
||||||
return r;
|
return r;
|
||||||
@@ -2858,8 +2974,5 @@ void kvm_exit(void)
|
|||||||
kvm_arch_hardware_unsetup();
|
kvm_arch_hardware_unsetup();
|
||||||
kvm_arch_exit();
|
kvm_arch_exit();
|
||||||
free_cpumask_var(cpus_hardware_enabled);
|
free_cpumask_var(cpus_hardware_enabled);
|
||||||
__free_page(fault_page);
|
|
||||||
__free_page(hwpoison_page);
|
|
||||||
__free_page(bad_page);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_exit);
|
EXPORT_SYMBOL_GPL(kvm_exit);
|
||||||
|
Reference in New Issue
Block a user