Merge tag 'kvm-ppc-next-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
PPC KVM update for 4.16 - Allow HPT guests to run on a radix host on POWER9 v2.2 CPUs without requiring the complex thread synchronization that earlier CPU versions required. - A series from Ben Herrenschmidt to improve the handling of escalation interrupts with the XIVE interrupt controller. - Provide for the decrementer register to be copied across on migration. - Various minor cleanups and bugfixes.
This commit is contained in:
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
|
||||
j = i + 1;
|
||||
if (npages) {
|
||||
set_dirty_bits(map, i, npages);
|
||||
i = j + npages;
|
||||
j = i + npages;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@@ -118,6 +118,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
|
||||
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
|
||||
#endif
|
||||
|
||||
/* If set, the threads on each CPU core have to be in the same MMU mode */
|
||||
static bool no_mixing_hpt_and_radix;
|
||||
|
||||
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
|
||||
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
|
||||
|
||||
@@ -1497,6 +1500,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
||||
case KVM_REG_PPC_ARCH_COMPAT:
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
|
||||
break;
|
||||
case KVM_REG_PPC_DEC_EXPIRY:
|
||||
*val = get_reg_val(id, vcpu->arch.dec_expires +
|
||||
vcpu->arch.vcore->tb_offset);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@@ -1724,6 +1731,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
||||
case KVM_REG_PPC_ARCH_COMPAT:
|
||||
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
|
||||
break;
|
||||
case KVM_REG_PPC_DEC_EXPIRY:
|
||||
vcpu->arch.dec_expires = set_reg_val(id, *val) -
|
||||
vcpu->arch.vcore->tb_offset;
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@@ -2378,8 +2389,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
|
||||
static bool subcore_config_ok(int n_subcores, int n_threads)
|
||||
{
|
||||
/*
|
||||
* POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
|
||||
* mode, with one thread per subcore.
|
||||
* POWER9 "SMT4" cores are permanently in what is effectively a 4-way
|
||||
* split-core mode, with one thread per subcore.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
return n_subcores <= 4 && n_threads == 1;
|
||||
@@ -2415,8 +2426,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
|
||||
return false;
|
||||
|
||||
/* POWER9 currently requires all threads to be in the same MMU mode */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300) &&
|
||||
/* Some POWER9 chips require all threads to be in the same MMU mode */
|
||||
if (no_mixing_hpt_and_radix &&
|
||||
kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
|
||||
return false;
|
||||
|
||||
@@ -2679,9 +2690,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
* threads are offline. Also check if the number of threads in this
|
||||
* guest are greater than the current system threads per guest.
|
||||
* On POWER9, we need to be not in independent-threads mode if
|
||||
* this is a HPT guest on a radix host.
|
||||
* this is a HPT guest on a radix host machine where the
|
||||
* CPU threads may not be in different MMU modes.
|
||||
*/
|
||||
hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
|
||||
hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
|
||||
!kvm_is_radix(vc->kvm);
|
||||
if (((controlled_threads > 1) &&
|
||||
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
|
||||
(hpt_on_radix && vc->kvm->arch.threads_indep)) {
|
||||
@@ -2831,7 +2844,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
*/
|
||||
if (!thr0_done)
|
||||
kvmppc_start_thread(NULL, pvc);
|
||||
thr += pvc->num_threads;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2987,7 +2999,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!xive_enabled())
|
||||
return false;
|
||||
return vcpu->arch.xive_saved_state.pipr <
|
||||
return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
|
||||
vcpu->arch.xive_saved_state.cppr;
|
||||
}
|
||||
#else
|
||||
@@ -3176,17 +3188,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
* this thread straight away and have it join in.
|
||||
*/
|
||||
if (!signal_pending(current)) {
|
||||
if (vc->vcore_state == VCORE_PIGGYBACK) {
|
||||
if (spin_trylock(&vc->lock)) {
|
||||
if (vc->vcore_state == VCORE_RUNNING &&
|
||||
!VCORE_IS_EXITING(vc)) {
|
||||
kvmppc_create_dtl_entry(vcpu, vc);
|
||||
kvmppc_start_thread(vcpu, vc);
|
||||
trace_kvm_guest_enter(vcpu);
|
||||
}
|
||||
spin_unlock(&vc->lock);
|
||||
}
|
||||
} else if (vc->vcore_state == VCORE_RUNNING &&
|
||||
if ((vc->vcore_state == VCORE_PIGGYBACK ||
|
||||
vc->vcore_state == VCORE_RUNNING) &&
|
||||
!VCORE_IS_EXITING(vc)) {
|
||||
kvmppc_create_dtl_entry(vcpu, vc);
|
||||
kvmppc_start_thread(vcpu, vc);
|
||||
@@ -4448,6 +4451,19 @@ static int kvmppc_book3s_init_hv(void)
|
||||
|
||||
if (kvmppc_radix_possible())
|
||||
r = kvmppc_radix_init();
|
||||
|
||||
/*
|
||||
* POWER9 chips before version 2.02 can't have some threads in
|
||||
* HPT mode and some in radix mode on the same core.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||
unsigned int pvr = mfspr(SPRN_PVR);
|
||||
if ((pvr >> 16) == PVR_POWER9 &&
|
||||
(((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
|
||||
((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
|
||||
no_mixing_hpt_and_radix = true;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void)
|
||||
* secondary threads to proceed.
|
||||
* - All secondary threads will eventually call opal hmi handler on
|
||||
* their exit path.
|
||||
*
|
||||
* Returns 1 if the timebase offset should be applied, 0 if not.
|
||||
*/
|
||||
|
||||
long kvmppc_realmode_hmi_handler(void)
|
||||
{
|
||||
int ptid = local_paca->kvm_hstate.ptid;
|
||||
bool resync_req;
|
||||
|
||||
/* This is only called on primary thread. */
|
||||
BUG_ON(ptid != 0);
|
||||
__this_cpu_inc(irq_stat.hmi_exceptions);
|
||||
|
||||
if (hmi_handle_debugtrig(NULL) >= 0)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* By now primary thread has already completed guest->host
|
||||
* partition switch but haven't signaled secondaries yet.
|
||||
|
@@ -617,13 +617,6 @@ kvmppc_hv_entry:
|
||||
lbz r0, KVM_RADIX(r9)
|
||||
cmpwi cr7, r0, 0
|
||||
|
||||
/* Clear out SLB if hash */
|
||||
bne cr7, 2f
|
||||
li r6,0
|
||||
slbmte r6,r6
|
||||
slbia
|
||||
ptesync
|
||||
2:
|
||||
/*
|
||||
* POWER7/POWER8 host -> guest partition switch code.
|
||||
* We don't have to lock against concurrent tlbies,
|
||||
@@ -738,19 +731,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
10: cmpdi r4, 0
|
||||
beq kvmppc_primary_no_guest
|
||||
kvmppc_got_guest:
|
||||
|
||||
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
|
||||
lwz r5,VCPU_SLB_MAX(r4)
|
||||
cmpwi r5,0
|
||||
beq 9f
|
||||
mtctr r5
|
||||
addi r6,r4,VCPU_SLB
|
||||
1: ld r8,VCPU_SLB_E(r6)
|
||||
ld r9,VCPU_SLB_V(r6)
|
||||
slbmte r9,r8
|
||||
addi r6,r6,VCPU_SLB_SIZE
|
||||
bdnz 1b
|
||||
9:
|
||||
/* Increment yield count if they have a VPA */
|
||||
ld r3, VCPU_VPA(r4)
|
||||
cmpdi r3, 0
|
||||
@@ -957,7 +937,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
mftb r7
|
||||
subf r3,r7,r8
|
||||
mtspr SPRN_DEC,r3
|
||||
std r3,VCPU_DEC(r4)
|
||||
|
||||
ld r5, VCPU_SPRG0(r4)
|
||||
ld r6, VCPU_SPRG1(r4)
|
||||
@@ -1018,6 +997,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
cmpdi r3, 512 /* 1 microsecond */
|
||||
blt hdec_soon
|
||||
|
||||
/* For hash guest, clear out and reload the SLB */
|
||||
ld r6, VCPU_KVM(r4)
|
||||
lbz r0, KVM_RADIX(r6)
|
||||
cmpwi r0, 0
|
||||
bne 9f
|
||||
li r6, 0
|
||||
slbmte r6, r6
|
||||
slbia
|
||||
ptesync
|
||||
|
||||
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
|
||||
lwz r5,VCPU_SLB_MAX(r4)
|
||||
cmpwi r5,0
|
||||
beq 9f
|
||||
mtctr r5
|
||||
addi r6,r4,VCPU_SLB
|
||||
1: ld r8,VCPU_SLB_E(r6)
|
||||
ld r9,VCPU_SLB_V(r6)
|
||||
slbmte r9,r8
|
||||
addi r6,r6,VCPU_SLB_SIZE
|
||||
bdnz 1b
|
||||
9:
|
||||
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/* We are entering the guest on that thread, push VCPU to XIVE */
|
||||
ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
|
||||
@@ -1031,8 +1033,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
li r9, TM_QW1_OS + TM_WORD2
|
||||
stwcix r11,r9,r10
|
||||
li r9, 1
|
||||
stw r9, VCPU_XIVE_PUSHED(r4)
|
||||
stb r9, VCPU_XIVE_PUSHED(r4)
|
||||
eieio
|
||||
|
||||
/*
|
||||
* We clear the irq_pending flag. There is a small chance of a
|
||||
* race vs. the escalation interrupt happening on another
|
||||
* processor setting it again, but the only consequence is to
|
||||
* cause a spurrious wakeup on the next H_CEDE which is not an
|
||||
* issue.
|
||||
*/
|
||||
li r0,0
|
||||
stb r0, VCPU_IRQ_PENDING(r4)
|
||||
|
||||
/*
|
||||
* In single escalation mode, if the escalation interrupt is
|
||||
* on, we mask it.
|
||||
*/
|
||||
lbz r0, VCPU_XIVE_ESC_ON(r4)
|
||||
cmpwi r0,0
|
||||
beq 1f
|
||||
ld r10, VCPU_XIVE_ESC_RADDR(r4)
|
||||
li r9, XIVE_ESB_SET_PQ_01
|
||||
ldcix r0, r10, r9
|
||||
sync
|
||||
|
||||
/* We have a possible subtle race here: The escalation interrupt might
|
||||
* have fired and be on its way to the host queue while we mask it,
|
||||
* and if we unmask it early enough (re-cede right away), there is
|
||||
* a theorical possibility that it fires again, thus landing in the
|
||||
* target queue more than once which is a big no-no.
|
||||
*
|
||||
* Fortunately, solving this is rather easy. If the above load setting
|
||||
* PQ to 01 returns a previous value where P is set, then we know the
|
||||
* escalation interrupt is somewhere on its way to the host. In that
|
||||
* case we simply don't clear the xive_esc_on flag below. It will be
|
||||
* eventually cleared by the handler for the escalation interrupt.
|
||||
*
|
||||
* Then, when doing a cede, we check that flag again before re-enabling
|
||||
* the escalation interrupt, and if set, we abort the cede.
|
||||
*/
|
||||
andi. r0, r0, XIVE_ESB_VAL_P
|
||||
bne- 1f
|
||||
|
||||
/* Now P is 0, we can clear the flag */
|
||||
li r0, 0
|
||||
stb r0, VCPU_XIVE_ESC_ON(r4)
|
||||
1:
|
||||
no_xive:
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
|
||||
@@ -1193,7 +1240,7 @@ hdec_soon:
|
||||
addi r3, r4, VCPU_TB_RMEXIT
|
||||
bl kvmhv_accumulate_time
|
||||
#endif
|
||||
b guest_exit_cont
|
||||
b guest_bypass
|
||||
|
||||
/******************************************************************************
|
||||
* *
|
||||
@@ -1423,15 +1470,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
blt deliver_guest_interrupt
|
||||
|
||||
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
/* Save more register state */
|
||||
mfdar r6
|
||||
mfdsisr r7
|
||||
std r6, VCPU_DAR(r9)
|
||||
stw r7, VCPU_DSISR(r9)
|
||||
/* don't overwrite fault_dar/fault_dsisr if HDSI */
|
||||
cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
|
||||
beq mc_cont
|
||||
std r6, VCPU_FAULT_DAR(r9)
|
||||
stw r7, VCPU_FAULT_DSISR(r9)
|
||||
|
||||
/* See if it is a machine check */
|
||||
cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
beq machine_check_realmode
|
||||
mc_cont:
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
|
||||
addi r3, r9, VCPU_TB_RMEXIT
|
||||
mr r4, r9
|
||||
bl kvmhv_accumulate_time
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/* We are exiting, pull the VP from the XIVE */
|
||||
lwz r0, VCPU_XIVE_PUSHED(r9)
|
||||
lbz r0, VCPU_XIVE_PUSHED(r9)
|
||||
cmpwi cr0, r0, 0
|
||||
beq 1f
|
||||
li r7, TM_SPC_PULL_OS_CTX
|
||||
li r6, TM_QW1_OS
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_IR /* in real mode? */
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 2f
|
||||
ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
|
||||
cmpldi cr0, r10, 0
|
||||
@@ -1454,33 +1521,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
/* Fixup some of the state for the next load */
|
||||
li r10, 0
|
||||
li r0, 0xff
|
||||
stw r10, VCPU_XIVE_PUSHED(r9)
|
||||
stb r10, VCPU_XIVE_PUSHED(r9)
|
||||
stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
|
||||
stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
|
||||
eieio
|
||||
1:
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
/* Save more register state */
|
||||
mfdar r6
|
||||
mfdsisr r7
|
||||
std r6, VCPU_DAR(r9)
|
||||
stw r7, VCPU_DSISR(r9)
|
||||
/* don't overwrite fault_dar/fault_dsisr if HDSI */
|
||||
cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
|
||||
beq mc_cont
|
||||
std r6, VCPU_FAULT_DAR(r9)
|
||||
stw r7, VCPU_FAULT_DSISR(r9)
|
||||
|
||||
/* See if it is a machine check */
|
||||
cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
beq machine_check_realmode
|
||||
mc_cont:
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
|
||||
addi r3, r9, VCPU_TB_RMEXIT
|
||||
mr r4, r9
|
||||
bl kvmhv_accumulate_time
|
||||
#endif
|
||||
/* For hash guest, read the guest SLB and save it away */
|
||||
ld r5, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r5)
|
||||
li r5, 0
|
||||
cmpwi r0, 0
|
||||
bne 3f /* for radix, save 0 entries */
|
||||
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
|
||||
mtctr r0
|
||||
li r6,0
|
||||
addi r7,r9,VCPU_SLB
|
||||
1: slbmfee r8,r6
|
||||
andis. r0,r8,SLB_ESID_V@h
|
||||
beq 2f
|
||||
add r8,r8,r6 /* put index in */
|
||||
slbmfev r3,r6
|
||||
std r8,VCPU_SLB_E(r7)
|
||||
std r3,VCPU_SLB_V(r7)
|
||||
addi r7,r7,VCPU_SLB_SIZE
|
||||
addi r5,r5,1
|
||||
2: addi r6,r6,1
|
||||
bdnz 1b
|
||||
/* Finally clear out the SLB */
|
||||
li r0,0
|
||||
slbmte r0,r0
|
||||
slbia
|
||||
ptesync
|
||||
3: stw r5,VCPU_SLB_MAX(r9)
|
||||
|
||||
guest_bypass:
|
||||
mr r3, r12
|
||||
/* Increment exit count, poke other threads to exit */
|
||||
bl kvmhv_commence_exit
|
||||
@@ -1501,31 +1577,6 @@ mc_cont:
|
||||
ori r6,r6,1
|
||||
mtspr SPRN_CTRLT,r6
|
||||
4:
|
||||
/* Check if we are running hash or radix and store it in cr2 */
|
||||
ld r5, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r5)
|
||||
cmpwi cr2,r0,0
|
||||
|
||||
/* Read the guest SLB and save it away */
|
||||
li r5, 0
|
||||
bne cr2, 3f /* for radix, save 0 entries */
|
||||
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
|
||||
mtctr r0
|
||||
li r6,0
|
||||
addi r7,r9,VCPU_SLB
|
||||
1: slbmfee r8,r6
|
||||
andis. r0,r8,SLB_ESID_V@h
|
||||
beq 2f
|
||||
add r8,r8,r6 /* put index in */
|
||||
slbmfev r3,r6
|
||||
std r8,VCPU_SLB_E(r7)
|
||||
std r3,VCPU_SLB_V(r7)
|
||||
addi r7,r7,VCPU_SLB_SIZE
|
||||
addi r5,r5,1
|
||||
2: addi r6,r6,1
|
||||
bdnz 1b
|
||||
3: stw r5,VCPU_SLB_MAX(r9)
|
||||
|
||||
/*
|
||||
* Save the guest PURR/SPURR
|
||||
*/
|
||||
@@ -1803,7 +1854,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
ld r5, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r5)
|
||||
cmpwi cr2, r0, 0
|
||||
beq cr2, 3f
|
||||
beq cr2, 4f
|
||||
|
||||
/* Radix: Handle the case where the guest used an illegal PID */
|
||||
LOAD_REG_ADDR(r4, mmu_base_pid)
|
||||
@@ -1839,15 +1890,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
BEGIN_FTR_SECTION
|
||||
PPC_INVALIDATE_ERAT
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
|
||||
b 4f
|
||||
4:
|
||||
#endif /* CONFIG_PPC_RADIX_MMU */
|
||||
|
||||
/* Hash: clear out SLB */
|
||||
3: li r5,0
|
||||
slbmte r5,r5
|
||||
slbia
|
||||
ptesync
|
||||
4:
|
||||
/*
|
||||
* POWER7/POWER8 guest -> host partition switch code.
|
||||
* We don't have to lock against tlbies but we do
|
||||
@@ -1908,16 +1953,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
bne 27f
|
||||
bl kvmppc_realmode_hmi_handler
|
||||
nop
|
||||
cmpdi r3, 0
|
||||
li r12, BOOK3S_INTERRUPT_HMI
|
||||
/*
|
||||
* At this point kvmppc_realmode_hmi_handler would have resync-ed
|
||||
* the TB. Hence it is not required to subtract guest timebase
|
||||
* offset from timebase. So, skip it.
|
||||
* At this point kvmppc_realmode_hmi_handler may have resync-ed
|
||||
* the TB, and if it has, we must not subtract the guest timebase
|
||||
* offset from the timebase. So, skip it.
|
||||
*
|
||||
* Also, do not call kvmppc_subcore_exit_guest() because it has
|
||||
* been invoked as part of kvmppc_realmode_hmi_handler().
|
||||
*/
|
||||
b 30f
|
||||
beq 30f
|
||||
|
||||
27:
|
||||
/* Subtract timebase offset from timebase */
|
||||
@@ -2744,7 +2790,32 @@ kvm_cede_prodded:
|
||||
/* we've ceded but we want to give control to the host */
|
||||
kvm_cede_exit:
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
b guest_exit_cont
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/* Abort if we still have a pending escalation */
|
||||
lbz r5, VCPU_XIVE_ESC_ON(r9)
|
||||
cmpwi r5, 0
|
||||
beq 1f
|
||||
li r0, 0
|
||||
stb r0, VCPU_CEDED(r9)
|
||||
1: /* Enable XIVE escalation */
|
||||
li r5, XIVE_ESB_SET_PQ_00
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 1f
|
||||
ld r10, VCPU_XIVE_ESC_VADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldx r0, r10, r5
|
||||
b 2f
|
||||
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldcix r0, r10, r5
|
||||
2: sync
|
||||
li r0, 1
|
||||
stb r0, VCPU_XIVE_ESC_ON(r9)
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
3: b guest_exit_cont
|
||||
|
||||
/* Try to handle a machine check in real mode */
|
||||
machine_check_realmode:
|
||||
|
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = data;
|
||||
|
||||
/* We use the existing H_PROD mechanism to wake up the target */
|
||||
vcpu->arch.prodded = 1;
|
||||
vcpu->arch.irq_pending = 1;
|
||||
smp_mb();
|
||||
if (vcpu->arch.ceded)
|
||||
kvmppc_fast_vcpu_kick(vcpu);
|
||||
|
||||
/* Since we have the no-EOI flag, the interrupt is effectively
|
||||
* disabled now. Clearing xive_esc_on means we won't bother
|
||||
* doing so on the next entry.
|
||||
*
|
||||
* This also allows the entry code to know that if a PQ combination
|
||||
* of 10 is observed while xive_esc_on is true, it means the queue
|
||||
* contains an unprocessed escalation interrupt. We don't make use of
|
||||
* that knowledge today but might (see comment in book3s_hv_rmhandler.S)
|
||||
*/
|
||||
vcpu->arch.xive_esc_on = false;
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Future improvement: start with them disabled
|
||||
* and handle DD2 and later scheme of merged escalation
|
||||
* interrupts
|
||||
*/
|
||||
name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
|
||||
vcpu->kvm->arch.lpid, xc->server_num, prio);
|
||||
if (xc->xive->single_escalation)
|
||||
name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
|
||||
vcpu->kvm->arch.lpid, xc->server_num);
|
||||
else
|
||||
name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
|
||||
vcpu->kvm->arch.lpid, xc->server_num, prio);
|
||||
if (!name) {
|
||||
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
|
||||
prio, xc->server_num);
|
||||
rc = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
|
||||
|
||||
rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
|
||||
IRQF_NO_THREAD, name, vcpu);
|
||||
if (rc) {
|
||||
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
|
||||
goto error;
|
||||
}
|
||||
xc->esc_virq_names[prio] = name;
|
||||
|
||||
/* In single escalation mode, we grab the ESB MMIO of the
|
||||
* interrupt and mask it. Also populate the VCPU v/raddr
|
||||
* of the ESB page for use by asm entry/exit code. Finally
|
||||
* set the XIVE_IRQ_NO_EOI flag which will prevent the
|
||||
* core code from performing an EOI on the escalation
|
||||
* interrupt, thus leaving it effectively masked after
|
||||
* it fires once.
|
||||
*/
|
||||
if (xc->xive->single_escalation) {
|
||||
struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
|
||||
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
|
||||
vcpu->arch.xive_esc_raddr = xd->eoi_page;
|
||||
vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
|
||||
xd->flags |= XIVE_IRQ_NO_EOI;
|
||||
}
|
||||
|
||||
return 0;
|
||||
error:
|
||||
irq_dispose_mapping(xc->esc_virq[prio]);
|
||||
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
|
||||
|
||||
pr_devel("Provisioning prio... %d\n", prio);
|
||||
|
||||
/* Provision each VCPU and enable escalations */
|
||||
/* Provision each VCPU and enable escalations if needed */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!vcpu->arch.xive_vcpu)
|
||||
continue;
|
||||
rc = xive_provision_queue(vcpu, prio);
|
||||
if (rc == 0)
|
||||
if (rc == 0 && !xive->single_escalation)
|
||||
xive_attach_escalation(vcpu, prio);
|
||||
if (rc)
|
||||
return rc;
|
||||
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
||||
/* Allocate IPI */
|
||||
xc->vp_ipi = xive_native_alloc_irq();
|
||||
if (!xc->vp_ipi) {
|
||||
pr_err("Failed to allocate xive irq for VCPU IPI\n");
|
||||
r = -EIO;
|
||||
goto bail;
|
||||
}
|
||||
@@ -1091,19 +1123,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
||||
if (r)
|
||||
goto bail;
|
||||
|
||||
/*
|
||||
* Enable the VP first as the single escalation mode will
|
||||
* affect escalation interrupts numbering
|
||||
*/
|
||||
r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
|
||||
if (r) {
|
||||
pr_err("Failed to enable VP in OPAL, err %d\n", r);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize queues. Initially we set them all for no queueing
|
||||
* and we enable escalation for queue 0 only which we'll use for
|
||||
* our mfrr change notifications. If the VCPU is hot-plugged, we
|
||||
* do handle provisioning however.
|
||||
* do handle provisioning however based on the existing "map"
|
||||
* of enabled queues.
|
||||
*/
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
/* Single escalation, no queue 7 */
|
||||
if (i == 7 && xive->single_escalation)
|
||||
break;
|
||||
|
||||
/* Is queue already enabled ? Provision it */
|
||||
if (xive->qmap & (1 << i)) {
|
||||
r = xive_provision_queue(vcpu, i);
|
||||
if (r == 0)
|
||||
if (r == 0 && !xive->single_escalation)
|
||||
xive_attach_escalation(vcpu, i);
|
||||
if (r)
|
||||
goto bail;
|
||||
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
||||
if (r)
|
||||
goto bail;
|
||||
|
||||
/* Enable the VP */
|
||||
r = xive_native_enable_vp(xc->vp_id);
|
||||
if (r)
|
||||
goto bail;
|
||||
|
||||
/* Route the IPI */
|
||||
r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
|
||||
if (!r)
|
||||
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
|
||||
|
||||
pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
|
||||
val, server, guest_prio);
|
||||
|
||||
/*
|
||||
* If the source doesn't already have an IPI, allocate
|
||||
* one and get the corresponding data
|
||||
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
|
||||
if (xive->vp_base == XIVE_INVALID_VP)
|
||||
ret = -ENOMEM;
|
||||
|
||||
xive->single_escalation = xive_native_has_single_escalation();
|
||||
|
||||
if (ret) {
|
||||
kfree(xive);
|
||||
return ret;
|
||||
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
||||
unsigned int i;
|
||||
|
||||
if (!xc)
|
||||
continue;
|
||||
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
|
||||
xc->server_num, xc->cppr, xc->hw_cppr,
|
||||
xc->mfrr, xc->pending,
|
||||
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
u32 i0, i1, idx;
|
||||
|
||||
if (!q->qpage && !xc->esc_virq[i])
|
||||
continue;
|
||||
|
||||
seq_printf(m, " [q%d]: ", i);
|
||||
|
||||
if (q->qpage) {
|
||||
idx = q->idx;
|
||||
i0 = be32_to_cpup(q->qpage + idx);
|
||||
idx = (idx + 1) & q->msk;
|
||||
i1 = be32_to_cpup(q->qpage + idx);
|
||||
seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
|
||||
}
|
||||
if (xc->esc_virq[i]) {
|
||||
struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
|
||||
seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
|
||||
(pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
|
||||
(pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
|
||||
xc->esc_virq[i], pq, xd->eoi_page);
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
t_rm_h_xirr += xc->stat_rm_h_xirr;
|
||||
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
|
||||
|
@@ -120,6 +120,8 @@ struct kvmppc_xive {
|
||||
u32 q_order;
|
||||
u32 q_page_order;
|
||||
|
||||
/* Flags */
|
||||
u8 single_escalation;
|
||||
};
|
||||
|
||||
#define KVMPPC_XIVE_Q_COUNT 8
|
||||
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
|
||||
* is as follow.
|
||||
*
|
||||
* Guest request for 0...6 are honored. Guest request for anything
|
||||
* higher results in a priority of 7 being applied.
|
||||
*
|
||||
* However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
|
||||
* in order to match AIX expectations
|
||||
* higher results in a priority of 6 being applied.
|
||||
*
|
||||
* Similar mapping is done for CPPR values
|
||||
*/
|
||||
static inline u8 xive_prio_from_guest(u8 prio)
|
||||
{
|
||||
if (prio == 0xff || prio < 8)
|
||||
if (prio == 0xff || prio < 6)
|
||||
return prio;
|
||||
return 7;
|
||||
return 6;
|
||||
}
|
||||
|
||||
static inline u8 xive_prio_to_guest(u8 prio)
|
||||
{
|
||||
if (prio == 0xff || prio < 7)
|
||||
return prio;
|
||||
return 0xb;
|
||||
return prio;
|
||||
}
|
||||
|
||||
static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
|
||||
|
@@ -763,7 +763,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
|
||||
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
|
||||
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
|
||||
vcpu->arch.dec_expires = ~(u64)0;
|
||||
vcpu->arch.dec_expires = get_tb();
|
||||
|
||||
#ifdef CONFIG_KVM_EXIT_TIMING
|
||||
mutex_init(&vcpu->arch.exit_timing_lock);
|
||||
@@ -1106,11 +1106,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
{
|
||||
enum emulation_result emulated = EMULATE_DONE;
|
||||
|
||||
/* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
|
||||
if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
|
||||
(vcpu->arch.mmio_vsx_copy_nums < 0) ) {
|
||||
/* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
|
||||
if (vcpu->arch.mmio_vsx_copy_nums > 4)
|
||||
return EMULATE_FAIL;
|
||||
}
|
||||
|
||||
while (vcpu->arch.mmio_vsx_copy_nums) {
|
||||
emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
|
||||
@@ -1252,11 +1250,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
vcpu->arch.io_gpr = rs;
|
||||
|
||||
/* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
|
||||
if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
|
||||
(vcpu->arch.mmio_vsx_copy_nums < 0) ) {
|
||||
/* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
|
||||
if (vcpu->arch.mmio_vsx_copy_nums > 4)
|
||||
return EMULATE_FAIL;
|
||||
}
|
||||
|
||||
while (vcpu->arch.mmio_vsx_copy_nums) {
|
||||
if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
|
||||
|
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
|
||||
int i;
|
||||
u64 min, max, sum, sum_quad;
|
||||
|
||||
seq_printf(m, "%s", "type count min max sum sum_squared\n");
|
||||
|
||||
seq_puts(m, "type count min max sum sum_squared\n");
|
||||
|
||||
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
|
||||
|
||||
|
Reference in New Issue
Block a user