Merge tag 'kvm-ppc-next-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

PPC KVM update for 5.4

- Some prep for extending the uses of the rmap array
- Various minor fixes
- Commits from the powerpc topic/ppc-kvm branch, which fix a problem
  with interrupts arriving after free_irq, causing host hangs and crashes.
Este commit está contenido en:
Paolo Bonzini
2019-09-10 16:51:17 +02:00
Se han modificado 553 ficheros con 4125 adiciones y 4911 borrados

Ver fichero

@@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void)
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
kvmppc_xive_native_init_module();
kvm_register_device_ops(&kvm_xive_native_ops,
KVM_DEV_TYPE_XIVE);
if (kvmppc_xive_native_supported()) {
kvmppc_xive_native_init_module();
kvm_register_device_ops(&kvm_xive_native_ops,
KVM_DEV_TYPE_XIVE);
}
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);

Ver fichero

@@ -239,6 +239,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
case 2:
case 6:
pte->may_write = true;
/* fall through */
case 3:
case 5:
case 7:

Ver fichero

@@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.pspb);
break;
case KVM_REG_PPC_DPDES:
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
/*
* On POWER9, where we are emulating msgsndp etc.,
* we return 1 bit for each vcpu, which can come from
* either vcore->dpdes or doorbell_request.
* On POWER8, doorbell_request is 0.
*/
*val = get_reg_val(id, vcpu->arch.vcore->dpdes |
vcpu->arch.doorbell_request);
break;
case KVM_REG_PPC_VTB:
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
@@ -2860,7 +2867,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
if (!spin_trylock(&pvc->lock))
continue;
prepare_threads(pvc);
if (!pvc->n_runnable) {
if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
list_del_init(&pvc->preempt_list);
if (pvc->runner == NULL) {
pvc->vcore_state = VCORE_INACTIVE;
@@ -2881,15 +2888,20 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&lp->lock);
}
static bool recheck_signals(struct core_info *cip)
static bool recheck_signals_and_mmu(struct core_info *cip)
{
int sub, i;
struct kvm_vcpu *vcpu;
struct kvmppc_vcore *vc;
for (sub = 0; sub < cip->n_subcores; ++sub)
for_each_runnable_thread(i, vcpu, cip->vc[sub])
for (sub = 0; sub < cip->n_subcores; ++sub) {
vc = cip->vc[sub];
if (!vc->kvm->arch.mmu_ready)
return true;
for_each_runnable_thread(i, vcpu, vc)
if (signal_pending(vcpu->arch.run_task))
return true;
}
return false;
}
@@ -3119,7 +3131,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
local_irq_disable();
hard_irq_disable();
if (lazy_irq_pending() || need_resched() ||
recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
recheck_signals_and_mmu(&core_info)) {
local_irq_enable();
vc->vcore_state = VCORE_INACTIVE;
/* Unlock all except the primary vcore */
@@ -3569,9 +3581,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
/*
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
* this is done in kvmhv_load_hv_regs_and_go() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
struct hv_guest_state hvregs;
host_psscr = mfspr(SPRN_PSSCR_PR);
mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
kvmhv_save_hv_regs(vcpu, &hvregs);
hvregs.lpcr = lpcr;
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
@@ -3590,6 +3611,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
mtspr(SPRN_PSSCR_PR, host_psscr);
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
@@ -3654,6 +3677,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.vpa.dirty = 1;
save_pmu = lp->pmcregs_in_use;
}
/* Must save pmu if this guest is capable of running nested guests */
save_pmu |= nesting_enabled(vcpu->kvm);
kvmhv_save_guest_pmu(vcpu, save_pmu);

Ver fichero

@@ -99,7 +99,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
} else {
rev->forw = rev->back = pte_index;
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
pte_index | KVMPPC_RMAP_PRESENT;
pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
}
unlock_rmap(rmap);
}

Ver fichero

@@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
lwz r8, VCPU_XIVE_CAM_WORD(r4)
cmpwi r8, 0
beq no_xive
li r7, TM_QW1_OS + TM_WORD2
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
@@ -2831,29 +2833,39 @@ kvm_cede_prodded:
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
#ifdef CONFIG_KVM_XICS
/* Abort if we still have a pending escalation */
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
beq 1f
li r0, 0
stb r0, VCPU_CEDED(r9)
1: /* Enable XIVE escalation */
li r5, XIVE_ESB_SET_PQ_00
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
/* are we using XIVE with single escalation? */
ld r10, VCPU_XIVE_ESC_VADDR(r9)
cmpdi r10, 0
beq 3f
ldx r0, r10, r5
li r6, XIVE_ESB_SET_PQ_00
/*
* If we still have a pending escalation, abort the cede,
* and we must set PQ to 10 rather than 00 so that we don't
* potentially end up with two entries for the escalation
* interrupt in the XIVE interrupt queue. In that case
* we also don't want to set xive_esc_on to 1 here in
* case we race with xive_esc_irq().
*/
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
li r6, XIVE_ESB_SET_PQ_10
b 5f
4: li r0, 1
stb r0, VCPU_XIVE_ESC_ON(r9)
/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
sync
5: /* Enable XIVE escalation */
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
ldx r0, r10, r6
b 2f
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
cmpdi r10, 0
beq 3f
ldcix r0, r10, r5
ldcix r0, r10, r6
2: sync
li r0, 1
stb r0, VCPU_XIVE_ESC_ON(r9)
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont

Ver fichero

@@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima)
/*
* Nothing to do if the platform doesn't have a XIVE
* or this vCPU doesn't have its own XIVE context
* (e.g. because it's not using an in-kernel interrupt controller).
*/
if (!tima || !vcpu->arch.xive_cam_word)
return;
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
@@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
*/
vcpu->arch.xive_esc_on = false;
/* This orders xive_esc_on = false vs. subsequent stale_p = true */
smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
return IRQ_HANDLED;
}
@@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.xive_esc_raddr = 0;
}
/*
* In single escalation mode, the escalation interrupt is marked so
* that EOI doesn't re-enable it, but just sets the stale_p flag to
* indicate that the P bit has already been dealt with. However, the
* assembly code that enters the guest sets PQ to 00 without clearing
* stale_p (because it has no easy way to address it). Hence we have
* to adjust stale_p before shutting down the interrupt.
*/
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
/*
* This slightly odd sequence gives the right result
* (i.e. stale_p set if xive_esc_on is false) even if
* we race with xive_esc_irq() and xive_irq_eoi().
*/
xd->stale_p = false;
smp_mb(); /* paired with smb_wmb in xive_esc_irq */
if (!vcpu->arch.xive_esc_on)
xd->stale_p = true;
}
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Mask the VP IPI */
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Free the queues & associated interrupts */
/* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
/* Free the escalation irq */
if (xc->esc_virq[i]) {
if (xc->xive->single_escalation)
xive_cleanup_single_escalation(vcpu, xc,
xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
}
/* Free the queue */
}
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Clear the cam word so guest entry won't try to push context */
vcpu->arch.xive_cam_word = 0;
/* Free the queues */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
xive_native_disable_queue(xc->vp_id, q, i);
if (q->qpage) {
free_pages((unsigned long)q->qpage,
@@ -1986,10 +2028,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
xive->single_escalation = xive_native_has_single_escalation();
if (ret) {
kfree(xive);
if (ret)
return ret;
}
return 0;
}

Ver fichero

@@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */

Ver fichero

@@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
xc->valid = false;
kvmppc_xive_disable_vcpu_interrupts(vcpu);
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Free the queues & associated interrupts */
/* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
/* Free the escalation irq */
if (xc->esc_virq[i]) {
if (xc->xive->single_escalation)
xive_cleanup_single_escalation(vcpu, xc,
xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
xc->esc_virq[i] = 0;
}
}
/* Free the queue */
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Clear the cam word so guest entry won't try to push context */
vcpu->arch.xive_cam_word = 0;
/* Free the queues */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
kvmppc_xive_native_cleanup_queue(vcpu, i);
}
@@ -1090,9 +1098,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
xive->ops = &kvmppc_xive_native_ops;
if (ret)
kfree(xive);
return ret;
return ret;
return 0;
}
/*
@@ -1171,6 +1179,11 @@ int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
return 0;
}
bool kvmppc_xive_native_supported(void)
{
return xive_native_has_queue_state_support();
}
static int xive_native_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xive *xive = m->private;

Ver fichero

@@ -271,6 +271,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
*/
if (inst == KVMPPC_INST_SW_BREAKPOINT) {
run->exit_reason = KVM_EXIT_DEBUG;
run->debug.arch.status = 0;
run->debug.arch.address = kvmppc_get_pc(vcpu);
emulated = EMULATE_EXIT_USER;
advance = 0;

Ver fichero

@@ -89,12 +89,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
rs = get_rs(inst);
rt = get_rt(inst);
/*
* if mmio_vsx_tx_sx_enabled == 0, copy data between
* VSR[0..31] and memory
* if mmio_vsx_tx_sx_enabled == 1, copy data between
* VSR[32..63] and memory
*/
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;

Ver fichero

@@ -561,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* a POWER9 processor) and the PowerNV platform, as
* nested is not yet supported.
*/
r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
kvmppc_xive_native_supported();
break;
#endif