kvm: introduce manual dirty log reprotect

There are two problems with KVM_GET_DIRTY_LOG.  First, and less important,
it can take kvm->mmu_lock for an extended period of time.  Second, its user
can actually see many false positives in some cases.  The latter is due
to a benign race like this:

  1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects
     them.
  2. The guest modifies the pages, causing them to be marked ditry.
  3. Userspace actually copies the pages.
  4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though
     they were not written to since (3).

This is especially a problem for large guests, where the time between
(1) and (3) can be substantial.  This patch introduces a new
capability which, when enabled, makes KVM_GET_DIRTY_LOG not
write-protect the pages it returns.  Instead, userspace has to
explicitly clear the dirty log bits just before using the content
of the page.  The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a
64-page granularity rather than requiring to sync a full memslot;
this way, the mmu_lock is taken for small amounts of time, and
only a small amount of time will pass between write protection
of pages and the sending of their content.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini
2018-10-23 02:36:47 +02:00
parent 8fe65a8299
commit 2a31b9db15
12 changed files with 308 additions and 19 deletions

View File

@@ -16,8 +16,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))

View File

@@ -0,0 +1,2 @@
#define USE_CLEAR_DIRTY_LOG
#include "dirty_log_test.c"

View File

@@ -275,6 +275,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
#ifdef USE_CLEAR_DIRTY_LOG
struct kvm_enable_cap cap = {};
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT;
cap.args[0] = 1;
vm_enable_cap(vm, &cap);
#endif
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
guest_test_mem,
@@ -316,6 +324,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
/* Give the vcpu thread some time to dirty some pages */
usleep(interval * 1000);
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
#ifdef USE_CLEAR_DIRTY_LOG
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
DIV_ROUND_UP(host_num_pages, 64) * 64);
#endif
vm_dirty_log_verify(bmap);
iteration++;
sync_global_to_guest(vm, iteration);
@@ -392,6 +404,13 @@ int main(int argc, char *argv[])
unsigned int mode;
int opt, i;
#ifdef USE_CLEAR_DIRTY_LOG
if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) {
fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
exit(KSFT_SKIP);
}
#endif
while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
switch (opt) {
case 'i':

View File

@@ -58,6 +58,8 @@ void kvm_vm_free(struct kvm_vm *vmp);
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
void kvm_vm_release(struct kvm_vm *vmp);
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
uint64_t first_page, uint32_t num_pages);
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
size_t len);

View File

@@ -231,6 +231,19 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
strerror(-ret));
}
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
uint64_t first_page, uint32_t num_pages)
{
struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
.first_page = first_page,
.num_pages = num_pages };
int ret;
ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
strerror(-ret));
}
/*
* Userspace Memory Region Find
*