Merge tag 'hlp_stage1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into kvms390/next

KVM: s390: initial host large page support

- must be enabled via module parameter hpage=1
- cannot be used together with nested
- does support migration
- does support hugetlbfs
- no THP yet
Этот коммит содержится в:
Janosch Frank
2018-07-30 23:20:00 +02:00
родитель 57cb198cfd a449938297
Коммит 2375846193
13 изменённых файлов: 756 добавлений и 120 удалений

Просмотреть файл

@@ -172,6 +172,10 @@ static int nested;
module_param(nested, int, S_IRUGO);
MODULE_PARM_DESC(nested, "Nested virtualization support");
/* allow 1m huge page guest backing, if !nested */
static int hpage;
module_param(hpage, int, 0444);
MODULE_PARM_DESC(hpage, "1m huge page backing support");
/*
* For now we handle at most 16 double words as this is what the s390 base
@@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS_MIGRATION:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
r = 0;
if (hpage)
r = 1;
break;
case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE;
break;
@@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
}
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot)
struct kvm_memory_slot *memslot)
{
int i;
gfn_t cur_gfn, last_gfn;
unsigned long address;
unsigned long gaddr, vmaddr;
struct gmap *gmap = kvm->arch.gmap;
DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
/* Loop over all guest pages */
/* Loop over all guest segments */
cur_gfn = memslot->base_gfn;
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
address = gfn_to_hva_memslot(memslot, cur_gfn);
for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
gaddr = gfn_to_gpa(cur_gfn);
vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
if (kvm_is_error_hva(vmaddr))
continue;
bitmap_zero(bitmap, _PAGE_ENTRIES);
gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
for (i = 0; i < _PAGE_ENTRIES; i++) {
if (test_bit(i, bitmap))
mark_page_dirty(kvm, cur_gfn + i);
}
if (test_and_clear_guest_dirty(gmap->mm, address))
mark_page_dirty(kvm, cur_gfn);
if (fatal_signal_pending(current))
return;
cond_resched();
@@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_HPAGE_1M:
mutex_lock(&kvm->lock);
if (kvm->created_vcpus)
r = -EBUSY;
else if (!hpage || kvm->arch.use_cmma)
r = -EINVAL;
else {
r = 0;
kvm->mm->context.allow_gmap_hpage_1m = 1;
/*
* We might have to create fake 4k page
* tables. To avoid that the hardware works on
* stale PGSTEs, we emulate these instructions.
*/
kvm->arch.use_skf = 0;
kvm->arch.use_pfmfi = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
@@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (!sclp.has_cmma)
break;
ret = -EBUSY;
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus) {
if (kvm->created_vcpus)
ret = -EBUSY;
else if (kvm->mm->context.allow_gmap_hpage_1m)
ret = -EINVAL;
else {
kvm->arch.use_cmma = 1;
/* Not compatible with cmma. */
kvm->arch.use_pfmfi = 0;
@@ -1514,6 +1558,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
uint8_t *keys;
uint64_t hva;
int srcu_idx, i, r = 0;
bool unlocked;
if (args->flags != 0)
return -EINVAL;
@@ -1538,9 +1583,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (r)
goto out;
i = 0;
down_read(&current->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
while (i < args->count) {
unlocked = false;
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
@@ -1554,8 +1601,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
}
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r)
break;
if (r) {
r = fixup_user_fault(current, current->mm, hva,
FAULT_FLAG_WRITE, &unlocked);
if (r)
break;
}
if (!r)
i++;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&current->mm->mmap_sem);
@@ -4141,6 +4194,11 @@ static int __init kvm_s390_init(void)
return -ENODEV;
}
if (nested && hpage) {
pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
return -EINVAL;
}
for (i = 0; i < 16; i++)
kvm_s390_fac_base[i] |=
S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);

Просмотреть файл

@@ -244,9 +244,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
static int handle_iske(struct kvm_vcpu *vcpu)
{
unsigned long addr;
unsigned long gaddr, vmaddr;
unsigned char key;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_iske++;
@@ -260,18 +261,28 @@ static int handle_iske(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
addr = kvm_s390_logical_to_effective(vcpu, addr);
addr = kvm_s390_real_to_abs(vcpu, addr);
addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
if (kvm_is_error_hva(addr))
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
down_read(&current->mm->mmap_sem);
rc = get_guest_storage_key(current->mm, addr, &key);
up_read(&current->mm->mmap_sem);
rc = get_guest_storage_key(current->mm, vmaddr, &key);
if (rc) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
up_read(&current->mm->mmap_sem);
goto retry;
}
}
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
vcpu->run->s.regs.gprs[reg1] |= key;
return 0;
@@ -279,8 +290,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
static int handle_rrbe(struct kvm_vcpu *vcpu)
{
unsigned long addr;
unsigned long vmaddr, gaddr;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_rrbe++;
@@ -294,19 +306,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
addr = kvm_s390_logical_to_effective(vcpu, addr);
addr = kvm_s390_real_to_abs(vcpu, addr);
addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
if (kvm_is_error_hva(addr))
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
down_read(&current->mm->mmap_sem);
rc = reset_guest_reference_bit(current->mm, addr);
up_read(&current->mm->mmap_sem);
rc = reset_guest_reference_bit(current->mm, vmaddr);
if (rc < 0) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
up_read(&current->mm->mmap_sem);
goto retry;
}
}
if (rc < 0)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
kvm_s390_set_psw_cc(vcpu, rc);
return 0;
}
@@ -321,6 +341,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
unsigned long start, end;
unsigned char key, oldkey;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_sske++;
@@ -353,19 +374,28 @@ static int handle_sske(struct kvm_vcpu *vcpu)
}
while (start != end) {
unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
unlocked = false;
if (kvm_is_error_hva(addr))
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
down_read(&current->mm->mmap_sem);
rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
m3 & SSKE_NQ, m3 & SSKE_MR,
m3 & SSKE_MC);
up_read(&current->mm->mmap_sem);
if (rc < 0)
if (rc < 0) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
start += PAGE_SIZE;
up_read(&current->mm->mmap_sem);
if (rc >= 0)
start += PAGE_SIZE;
}
if (m3 & (SSKE_MC | SSKE_MR)) {
@@ -946,11 +976,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
while (start != end) {
unsigned long useraddr;
unsigned long vmaddr;
bool unlocked = false;
/* Translate guest address to host address */
useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
if (kvm_is_error_hva(useraddr))
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
@@ -964,14 +995,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc)
return rc;
down_read(&current->mm->mmap_sem);
rc = cond_set_guest_storage_key(current->mm, useraddr,
rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
up_read(&current->mm->mmap_sem);
if (rc < 0)
if (rc < 0) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
start += PAGE_SIZE;
up_read(&current->mm->mmap_sem);
if (rc >= 0)
start += PAGE_SIZE;
}
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {