Merge tag 'kvm-ppc-next-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD
PPC KVM update for 5.4 - Some prep for extending the uses of the rmap array - Various minor fixes - Commits from the powerpc topic/ppc-kvm branch, which fix a problem with interrupts arriving after free_irq, causing host hangs and crashes.
Este commit está contenido en:
@@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void)
|
||||
if (xics_on_xive()) {
|
||||
kvmppc_xive_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
|
||||
kvmppc_xive_native_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_native_ops,
|
||||
KVM_DEV_TYPE_XIVE);
|
||||
if (kvmppc_xive_native_supported()) {
|
||||
kvmppc_xive_native_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_native_ops,
|
||||
KVM_DEV_TYPE_XIVE);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
|
||||
|
@@ -239,6 +239,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
case 2:
|
||||
case 6:
|
||||
pte->may_write = true;
|
||||
/* fall through */
|
||||
case 3:
|
||||
case 5:
|
||||
case 7:
|
||||
|
@@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
||||
*val = get_reg_val(id, vcpu->arch.pspb);
|
||||
break;
|
||||
case KVM_REG_PPC_DPDES:
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
|
||||
/*
|
||||
* On POWER9, where we are emulating msgsndp etc.,
|
||||
* we return 1 bit for each vcpu, which can come from
|
||||
* either vcore->dpdes or doorbell_request.
|
||||
* On POWER8, doorbell_request is 0.
|
||||
*/
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->dpdes |
|
||||
vcpu->arch.doorbell_request);
|
||||
break;
|
||||
case KVM_REG_PPC_VTB:
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
|
||||
@@ -2860,7 +2867,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
|
||||
if (!spin_trylock(&pvc->lock))
|
||||
continue;
|
||||
prepare_threads(pvc);
|
||||
if (!pvc->n_runnable) {
|
||||
if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
|
||||
list_del_init(&pvc->preempt_list);
|
||||
if (pvc->runner == NULL) {
|
||||
pvc->vcore_state = VCORE_INACTIVE;
|
||||
@@ -2881,15 +2888,20 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
|
||||
spin_unlock(&lp->lock);
|
||||
}
|
||||
|
||||
static bool recheck_signals(struct core_info *cip)
|
||||
static bool recheck_signals_and_mmu(struct core_info *cip)
|
||||
{
|
||||
int sub, i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvmppc_vcore *vc;
|
||||
|
||||
for (sub = 0; sub < cip->n_subcores; ++sub)
|
||||
for_each_runnable_thread(i, vcpu, cip->vc[sub])
|
||||
for (sub = 0; sub < cip->n_subcores; ++sub) {
|
||||
vc = cip->vc[sub];
|
||||
if (!vc->kvm->arch.mmu_ready)
|
||||
return true;
|
||||
for_each_runnable_thread(i, vcpu, vc)
|
||||
if (signal_pending(vcpu->arch.run_task))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -3119,7 +3131,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
local_irq_disable();
|
||||
hard_irq_disable();
|
||||
if (lazy_irq_pending() || need_resched() ||
|
||||
recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
|
||||
recheck_signals_and_mmu(&core_info)) {
|
||||
local_irq_enable();
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
/* Unlock all except the primary vcore */
|
||||
@@ -3569,9 +3581,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
|
||||
|
||||
if (kvmhv_on_pseries()) {
|
||||
/*
|
||||
* We need to save and restore the guest visible part of the
|
||||
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
|
||||
* doesn't do this for us. Note only required if pseries since
|
||||
* this is done in kvmhv_load_hv_regs_and_go() below otherwise.
|
||||
*/
|
||||
unsigned long host_psscr;
|
||||
/* call our hypervisor to load up HV regs and go */
|
||||
struct hv_guest_state hvregs;
|
||||
|
||||
host_psscr = mfspr(SPRN_PSSCR_PR);
|
||||
mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
|
||||
kvmhv_save_hv_regs(vcpu, &hvregs);
|
||||
hvregs.lpcr = lpcr;
|
||||
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
|
||||
@@ -3590,6 +3611,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
|
||||
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
|
||||
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
|
||||
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
|
||||
mtspr(SPRN_PSSCR_PR, host_psscr);
|
||||
|
||||
/* H_CEDE has to be handled now, not later */
|
||||
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
|
||||
@@ -3654,6 +3677,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
vcpu->arch.vpa.dirty = 1;
|
||||
save_pmu = lp->pmcregs_in_use;
|
||||
}
|
||||
/* Must save pmu if this guest is capable of running nested guests */
|
||||
save_pmu |= nesting_enabled(vcpu->kvm);
|
||||
|
||||
kvmhv_save_guest_pmu(vcpu, save_pmu);
|
||||
|
||||
|
@@ -99,7 +99,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
||||
} else {
|
||||
rev->forw = rev->back = pte_index;
|
||||
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
|
||||
pte_index | KVMPPC_RMAP_PRESENT;
|
||||
pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
|
||||
}
|
||||
unlock_rmap(rmap);
|
||||
}
|
||||
|
@@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
ld r11, VCPU_XIVE_SAVED_STATE(r4)
|
||||
li r9, TM_QW1_OS
|
||||
lwz r8, VCPU_XIVE_CAM_WORD(r4)
|
||||
cmpwi r8, 0
|
||||
beq no_xive
|
||||
li r7, TM_QW1_OS + TM_WORD2
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
@@ -2831,29 +2833,39 @@ kvm_cede_prodded:
|
||||
kvm_cede_exit:
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/* Abort if we still have a pending escalation */
|
||||
lbz r5, VCPU_XIVE_ESC_ON(r9)
|
||||
cmpwi r5, 0
|
||||
beq 1f
|
||||
li r0, 0
|
||||
stb r0, VCPU_CEDED(r9)
|
||||
1: /* Enable XIVE escalation */
|
||||
li r5, XIVE_ESB_SET_PQ_00
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 1f
|
||||
/* are we using XIVE with single escalation? */
|
||||
ld r10, VCPU_XIVE_ESC_VADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldx r0, r10, r5
|
||||
li r6, XIVE_ESB_SET_PQ_00
|
||||
/*
|
||||
* If we still have a pending escalation, abort the cede,
|
||||
* and we must set PQ to 10 rather than 00 so that we don't
|
||||
* potentially end up with two entries for the escalation
|
||||
* interrupt in the XIVE interrupt queue. In that case
|
||||
* we also don't want to set xive_esc_on to 1 here in
|
||||
* case we race with xive_esc_irq().
|
||||
*/
|
||||
lbz r5, VCPU_XIVE_ESC_ON(r9)
|
||||
cmpwi r5, 0
|
||||
beq 4f
|
||||
li r0, 0
|
||||
stb r0, VCPU_CEDED(r9)
|
||||
li r6, XIVE_ESB_SET_PQ_10
|
||||
b 5f
|
||||
4: li r0, 1
|
||||
stb r0, VCPU_XIVE_ESC_ON(r9)
|
||||
/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
|
||||
sync
|
||||
5: /* Enable XIVE escalation */
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 1f
|
||||
ldx r0, r10, r6
|
||||
b 2f
|
||||
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldcix r0, r10, r5
|
||||
ldcix r0, r10, r6
|
||||
2: sync
|
||||
li r0, 1
|
||||
stb r0, VCPU_XIVE_ESC_ON(r9)
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
3: b guest_exit_cont
|
||||
|
||||
|
@@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
|
||||
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
|
||||
u64 pq;
|
||||
|
||||
if (!tima)
|
||||
/*
|
||||
* Nothing to do if the platform doesn't have a XIVE
|
||||
* or this vCPU doesn't have its own XIVE context
|
||||
* (e.g. because it's not using an in-kernel interrupt controller).
|
||||
*/
|
||||
if (!tima || !vcpu->arch.xive_cam_word)
|
||||
return;
|
||||
|
||||
eieio();
|
||||
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
|
||||
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
|
||||
@@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
|
||||
*/
|
||||
vcpu->arch.xive_esc_on = false;
|
||||
|
||||
/* This orders xive_esc_on = false vs. subsequent stale_p = true */
|
||||
smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.xive_esc_raddr = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* In single escalation mode, the escalation interrupt is marked so
|
||||
* that EOI doesn't re-enable it, but just sets the stale_p flag to
|
||||
* indicate that the P bit has already been dealt with. However, the
|
||||
* assembly code that enters the guest sets PQ to 00 without clearing
|
||||
* stale_p (because it has no easy way to address it). Hence we have
|
||||
* to adjust stale_p before shutting down the interrupt.
|
||||
*/
|
||||
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
|
||||
struct kvmppc_xive_vcpu *xc, int irq)
|
||||
{
|
||||
struct irq_data *d = irq_get_irq_data(irq);
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
|
||||
/*
|
||||
* This slightly odd sequence gives the right result
|
||||
* (i.e. stale_p set if xive_esc_on is false) even if
|
||||
* we race with xive_esc_irq() and xive_irq_eoi().
|
||||
*/
|
||||
xd->stale_p = false;
|
||||
smp_mb(); /* paired with smb_wmb in xive_esc_irq */
|
||||
if (!vcpu->arch.xive_esc_on)
|
||||
xd->stale_p = true;
|
||||
}
|
||||
|
||||
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
||||
@@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
/* Mask the VP IPI */
|
||||
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues & associated interrupts */
|
||||
/* Free escalations */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
/* Free the escalation irq */
|
||||
if (xc->esc_virq[i]) {
|
||||
if (xc->xive->single_escalation)
|
||||
xive_cleanup_single_escalation(vcpu, xc,
|
||||
xc->esc_virq[i]);
|
||||
free_irq(xc->esc_virq[i], vcpu);
|
||||
irq_dispose_mapping(xc->esc_virq[i]);
|
||||
kfree(xc->esc_virq_names[i]);
|
||||
}
|
||||
/* Free the queue */
|
||||
}
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Clear the cam word so guest entry won't try to push context */
|
||||
vcpu->arch.xive_cam_word = 0;
|
||||
|
||||
/* Free the queues */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
xive_native_disable_queue(xc->vp_id, q, i);
|
||||
if (q->qpage) {
|
||||
free_pages((unsigned long)q->qpage,
|
||||
@@ -1986,10 +2028,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
|
||||
|
||||
xive->single_escalation = xive_native_has_single_escalation();
|
||||
|
||||
if (ret) {
|
||||
kfree(xive);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
|
||||
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
|
||||
bool single_escalation);
|
||||
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
|
||||
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
|
||||
struct kvmppc_xive_vcpu *xc, int irq);
|
||||
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
#endif /* _KVM_PPC_BOOK3S_XICS_H */
|
||||
|
@@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
xc->valid = false;
|
||||
kvmppc_xive_disable_vcpu_interrupts(vcpu);
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues & associated interrupts */
|
||||
/* Free escalations */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
/* Free the escalation irq */
|
||||
if (xc->esc_virq[i]) {
|
||||
if (xc->xive->single_escalation)
|
||||
xive_cleanup_single_escalation(vcpu, xc,
|
||||
xc->esc_virq[i]);
|
||||
free_irq(xc->esc_virq[i], vcpu);
|
||||
irq_dispose_mapping(xc->esc_virq[i]);
|
||||
kfree(xc->esc_virq_names[i]);
|
||||
xc->esc_virq[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free the queue */
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Clear the cam word so guest entry won't try to push context */
|
||||
vcpu->arch.xive_cam_word = 0;
|
||||
|
||||
/* Free the queues */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
kvmppc_xive_native_cleanup_queue(vcpu, i);
|
||||
}
|
||||
|
||||
@@ -1090,9 +1098,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
|
||||
xive->ops = &kvmppc_xive_native_ops;
|
||||
|
||||
if (ret)
|
||||
kfree(xive);
|
||||
return ret;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1171,6 +1179,11 @@ int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool kvmppc_xive_native_supported(void)
|
||||
{
|
||||
return xive_native_has_queue_state_support();
|
||||
}
|
||||
|
||||
static int xive_native_debug_show(struct seq_file *m, void *private)
|
||||
{
|
||||
struct kvmppc_xive *xive = m->private;
|
||||
|
@@ -271,6 +271,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
if (inst == KVMPPC_INST_SW_BREAKPOINT) {
|
||||
run->exit_reason = KVM_EXIT_DEBUG;
|
||||
run->debug.arch.status = 0;
|
||||
run->debug.arch.address = kvmppc_get_pc(vcpu);
|
||||
emulated = EMULATE_EXIT_USER;
|
||||
advance = 0;
|
||||
|
@@ -89,12 +89,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
|
||||
rs = get_rs(inst);
|
||||
rt = get_rt(inst);
|
||||
|
||||
/*
|
||||
* if mmio_vsx_tx_sx_enabled == 0, copy data between
|
||||
* VSR[0..31] and memory
|
||||
* if mmio_vsx_tx_sx_enabled == 1, copy data between
|
||||
* VSR[32..63] and memory
|
||||
*/
|
||||
vcpu->arch.mmio_vsx_copy_nums = 0;
|
||||
vcpu->arch.mmio_vsx_offset = 0;
|
||||
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
|
||||
|
@@ -561,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
* a POWER9 processor) and the PowerNV platform, as
|
||||
* nested is not yet supported.
|
||||
*/
|
||||
r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
|
||||
r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
|
||||
kvmppc_xive_native_supported();
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
Referencia en una nueva incidencia
Block a user