Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "PPC:
   - Better machine check handling for HV KVM
   - Ability to support guests with threads=2, 4 or 8 on POWER9
   - Fix for a race that could cause delayed recognition of signals
   - Fix for a bug where POWER9 guests could sleep with interrupts pending.

  ARM:
   - VCPU request overhaul
   - allow timer and PMU to have their interrupt number selected from userspace
   - workaround for Cavium erratum 30115
   - handling of memory poisonning
   - the usual crop of fixes and cleanups

  s390:
   - initial machine check forwarding
   - migration support for the CMMA page hinting information
   - cleanups and fixes

  x86:
   - nested VMX bugfixes and improvements
   - more reliable NMI window detection on AMD
   - APIC timer optimizations

  Generic:
   - VCPU request overhaul + documentation of common code patterns
   - kvm_stat improvements"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
  Update my email address
  kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
  x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12
  kvm: x86: mmu: allow A/D bits to be disabled in an mmu
  x86: kvm: mmu: make spte mmio mask more explicit
  x86: kvm: mmu: dead code thanks to access tracking
  KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code
  KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
  KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
  KVM: x86: remove ignored type attribute
  KVM: LAPIC: Fix lapic timer injection delay
  KVM: lapic: reorganize restart_apic_timer
  KVM: lapic: reorganize start_hv_timer
  kvm: nVMX: Check memory operand to INVVPID
  KVM: s390: Inject machine check into the nested guest
  KVM: s390: Inject machine check into the guest
  tools/kvm_stat: add new interactive command 'b'
  tools/kvm_stat: add new command line switch '-i'
  tools/kvm_stat: fix error on interactive command 'g'
  KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit
  ...
This commit is contained in:
Linus Torvalds
2017-07-06 18:38:31 -07:00
95 changed files with 4255 additions and 972 deletions

View File

@@ -46,6 +46,8 @@
#include <linux/of.h>
#include <asm/reg.h>
#include <asm/ppc-opcode.h>
#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
unsigned long stolen;
unsigned long core_stolen;
u64 now;
unsigned long flags;
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
@@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
spin_lock_irq(&vcpu->arch.tbacct_lock);
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
spin_unlock_irq(&vcpu->arch.tbacct_lock);
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
@@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
}
/* See if there is a doorbell interrupt pending for a vcpu */
static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
{
int thr;
struct kvmppc_vcore *vc;
if (vcpu->arch.doorbell_request)
return true;
/*
* Ensure that the read of vcore->dpdes comes after the read
* of vcpu->doorbell_request. This barrier matches the
* lwsync in book3s_hv_rmhandlers.S just before the
* fast_guest_return label.
*/
smp_rmb();
vc = vcpu->arch.vcore;
thr = vcpu->vcpu_id - vc->first_vcpuid;
return !!(vc->dpdes & (1 << thr));
}
static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
@@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
}
}
static void do_nothing(void *x)
{
}
static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
{
int thr, cpu, pcpu, nthreads;
struct kvm_vcpu *v;
unsigned long dpdes;
nthreads = vcpu->kvm->arch.emul_smt_mode;
dpdes = 0;
cpu = vcpu->vcpu_id & ~(nthreads - 1);
for (thr = 0; thr < nthreads; ++thr, ++cpu) {
v = kvmppc_find_vcpu(vcpu->kvm, cpu);
if (!v)
continue;
/*
* If the vcpu is currently running on a physical cpu thread,
* interrupt it in order to pull it out of the guest briefly,
* which will update its vcore->dpdes value.
*/
pcpu = READ_ONCE(v->cpu);
if (pcpu >= 0)
smp_call_function_single(pcpu, do_nothing, NULL, 1);
if (kvmppc_doorbell_pending(v))
dpdes |= 1 << thr;
}
return dpdes;
}
/*
* On POWER9, emulate doorbell-related instructions in order to
* give the guest the illusion of running on a multi-threaded core.
* The instructions emulated are msgsndp, msgclrp, mfspr TIR,
* and mfspr DPDES.
*/
static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
{
u32 inst, rb, thr;
unsigned long arg;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tvcpu;
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return EMULATE_FAIL;
if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
return RESUME_GUEST;
if (get_op(inst) != 31)
return EMULATE_FAIL;
rb = get_rb(inst);
thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
switch (get_xop(inst)) {
case OP_31_XOP_MSGSNDP:
arg = kvmppc_get_gpr(vcpu, rb);
if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
break;
arg &= 0x3f;
if (arg >= kvm->arch.emul_smt_mode)
break;
tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
if (!tvcpu)
break;
if (!tvcpu->arch.doorbell_request) {
tvcpu->arch.doorbell_request = 1;
kvmppc_fast_vcpu_kick_hv(tvcpu);
}
break;
case OP_31_XOP_MSGCLRP:
arg = kvmppc_get_gpr(vcpu, rb);
if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
break;
vcpu->arch.vcore->dpdes = 0;
vcpu->arch.doorbell_request = 0;
break;
case OP_31_XOP_MFSPR:
switch (get_sprn(inst)) {
case SPRN_TIR:
arg = thr;
break;
case SPRN_DPDES:
arg = kvmppc_read_dpdes(vcpu);
break;
default:
return EMULATE_FAIL;
}
kvmppc_set_gpr(vcpu, get_rt(inst), arg);
break;
default:
return EMULATE_FAIL;
}
kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
return RESUME_GUEST;
}
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
/*
* Deliver a machine check interrupt to the guest.
* We have to do this, even if the host has handled the
* machine check, because machine checks use SRR0/1 and
* the interrupt might have trashed guest state in them.
*/
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_MACHINE_CHECK);
r = RESUME_GUEST;
/* Exit to guest with KVM_EXIT_NMI as exit reason */
run->exit_reason = KVM_EXIT_NMI;
run->hw.hardware_exit_reason = vcpu->arch.trap;
/* Clear out the old NMI status from run->flags */
run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
/* Now set the NMI status */
if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
else
run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
r = RESUME_HOST;
/* Print the MCE event to host console. */
machine_check_print_event_info(&vcpu->arch.mce_evt, false);
break;
case BOOK3S_INTERRUPT_PROGRAM:
{
@@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/*
* This occurs if the guest (kernel or userspace), does something that
* is prohibited by HFSCR. We just generate a program interrupt to
* the guest.
* is prohibited by HFSCR.
* On POWER9, this could be a doorbell instruction that we need
* to emulate.
* Otherwise, we just generate a program interrupt to the guest.
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
r = EMULATE_FAIL;
if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
r = kvmppc_emulate_doorbell_instr(vcpu);
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
}
break;
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
@@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mask |= LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
* guest, whether or not the host has it enabled.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
mask |= LPCR_LD;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
@@ -1611,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_vcore();
vcore->first_vcpuid = core * kvm->arch.smt_mode;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -1770,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
int err = -EINVAL;
int err;
int core;
struct kvmppc_vcore *vcore;
core = id / threads_per_vcore();
if (core >= KVM_MAX_VCORES)
goto out;
err = -ENOMEM;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
@@ -1808,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu->arch.busy_preempt = TB_NIL;
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
/*
* Set the default HFSCR for the guest from the host value.
* This value is only used on POWER9.
* On POWER9 DD1, TM doesn't work, so we make sure to
* prevent the guest from using it.
* On POWER9, we want to virtualize the doorbell facility, so we
* turn off the HFSCR bit, which causes those instructions to trap.
*/
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
if (!cpu_has_feature(CPU_FTR_TM))
vcpu->arch.hfscr &= ~HFSCR_TM;
if (cpu_has_feature(CPU_FTR_ARCH_300))
vcpu->arch.hfscr &= ~HFSCR_MSGP;
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -1815,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
init_waitqueue_head(&vcpu->arch.cpu_run);
mutex_lock(&kvm->lock);
vcore = kvm->arch.vcores[core];
if (!vcore) {
vcore = kvmppc_vcore_create(kvm, core);
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
vcore = NULL;
err = -EINVAL;
core = id / kvm->arch.smt_mode;
if (core < KVM_MAX_VCORES) {
vcore = kvm->arch.vcores[core];
if (!vcore) {
err = -ENOMEM;
vcore = kvmppc_vcore_create(kvm, core);
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
}
}
mutex_unlock(&kvm->lock);
@@ -1847,6 +1999,43 @@ out:
return ERR_PTR(err);
}
static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
unsigned long flags)
{
int err;
int esmt = 0;
if (flags)
return -EINVAL;
if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
return -EINVAL;
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
/*
* On POWER8 (or POWER7), the threading mode is "strict",
* so we pack smt_mode vcpus per vcore.
*/
if (smt_mode > threads_per_subcore)
return -EINVAL;
} else {
/*
* On POWER9, the threading mode is "loose",
* so each vcpu gets its own vcore.
*/
esmt = smt_mode;
smt_mode = 1;
}
mutex_lock(&kvm->lock);
err = -EBUSY;
if (!kvm->arch.online_vcores) {
kvm->arch.smt_mode = smt_mode;
kvm->arch.emul_smt_mode = esmt;
err = 0;
}
mutex_unlock(&kvm->lock);
return err;
}
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
{
if (vpa->pinned_addr)
@@ -1897,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
}
extern void __kvmppc_vcore_entry(void);
extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@@ -1962,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu)
tpaca->kvm_hstate.kvm_split_mode = NULL;
}
static void do_nothing(void *x)
{
}
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
{
int i;
@@ -1983,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
{
struct kvm *kvm = vcpu->kvm;
/*
* With radix, the guest can do TLB invalidations itself,
* and it could choose to use the local form (tlbiel) if
* it is invalidating a translation that has only ever been
* used on one vcpu. However, that doesn't mean it has
* only ever been used on one physical cpu, since vcpus
* can move around between pcpus. To cope with this, when
* a vcpu moves from one pcpu to another, we need to tell
* any vcpus running on the same core as this vcpu previously
* ran to flush the TLB. The TLB is shared between threads,
* so we use a single bit in .need_tlb_flush for all 4 threads.
*/
if (vcpu->arch.prev_cpu != pcpu) {
if (vcpu->arch.prev_cpu >= 0 &&
cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
cpu_first_thread_sibling(pcpu))
radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
vcpu->arch.prev_cpu = pcpu;
}
}
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
struct kvmppc_vcore *mvc = vc->master_vcore;
struct kvm *kvm = vc->kvm;
cpu = vc->pcpu;
@@ -1997,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
vcpu->arch.timer_running = 0;
}
cpu += vcpu->arch.ptid;
vcpu->cpu = mvc->pcpu;
vcpu->cpu = vc->pcpu;
vcpu->arch.thread_cpu = cpu;
/*
* With radix, the guest can do TLB invalidations itself,
* and it could choose to use the local form (tlbiel) if
* it is invalidating a translation that has only ever been
* used on one vcpu. However, that doesn't mean it has
* only ever been used on one physical cpu, since vcpus
* can move around between pcpus. To cope with this, when
* a vcpu moves from one pcpu to another, we need to tell
* any vcpus running on the same core as this vcpu previously
* ran to flush the TLB. The TLB is shared between threads,
* so we use a single bit in .need_tlb_flush for all 4 threads.
*/
if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
if (vcpu->arch.prev_cpu >= 0 &&
cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
cpu_first_thread_sibling(cpu))
radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
vcpu->arch.prev_cpu = cpu;
}
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
tpaca = &paca[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
smp_wmb();
tpaca->kvm_hstate.kvm_vcore = mvc;
tpaca->kvm_hstate.kvm_vcore = vc;
if (cpu != smp_processor_id())
kvmppc_ipi_thread(cpu);
}
@@ -2155,8 +2344,7 @@ struct core_info {
int max_subcore_threads;
int total_threads;
int subcore_threads[MAX_SUBCORES];
struct kvm *subcore_vm[MAX_SUBCORES];
struct list_head vcs[MAX_SUBCORES];
struct kvmppc_vcore *vc[MAX_SUBCORES];
};
/*
@@ -2167,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
{
int sub;
memset(cip, 0, sizeof(*cip));
cip->n_subcores = 1;
cip->max_subcore_threads = vc->num_threads;
cip->total_threads = vc->num_threads;
cip->subcore_threads[0] = vc->num_threads;
cip->subcore_vm[0] = vc->kvm;
for (sub = 0; sub < MAX_SUBCORES; ++sub)
INIT_LIST_HEAD(&cip->vcs[sub]);
list_add_tail(&vc->preempt_list, &cip->vcs[0]);
cip->vc[0] = vc;
}
static bool subcore_config_ok(int n_subcores, int n_threads)
@@ -2197,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
}
static void init_master_vcore(struct kvmppc_vcore *vc)
static void init_vcore_to_run(struct kvmppc_vcore *vc)
{
vc->master_vcore = vc;
vc->entry_exit_map = 0;
vc->in_guest = 0;
vc->napping_threads = 0;
@@ -2224,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
++cip->n_subcores;
cip->total_threads += vc->num_threads;
cip->subcore_threads[sub] = vc->num_threads;
cip->subcore_vm[sub] = vc->kvm;
init_master_vcore(vc);
list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
cip->vc[sub] = vc;
init_vcore_to_run(vc);
list_del_init(&vc->preempt_list);
return true;
}
@@ -2294,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&lp->lock);
}
static bool recheck_signals(struct core_info *cip)
{
int sub, i;
struct kvm_vcpu *vcpu;
for (sub = 0; sub < cip->n_subcores; ++sub)
for_each_runnable_thread(i, vcpu, cip->vc[sub])
if (signal_pending(vcpu->arch.run_task))
return true;
return false;
}
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
int still_running = 0, i;
@@ -2331,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
wake_up(&vcpu->arch.cpu_run);
}
}
list_del_init(&vc->preempt_list);
if (!is_master) {
if (still_running > 0) {
kvmppc_vcore_preempt(vc);
@@ -2393,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
return 0;
}
static void set_irq_happened(int trap)
{
switch (trap) {
case BOOK3S_INTERRUPT_EXTERNAL:
local_paca->irq_happened |= PACA_IRQ_EE;
break;
case BOOK3S_INTERRUPT_H_DOORBELL:
local_paca->irq_happened |= PACA_IRQ_DBELL;
break;
case BOOK3S_INTERRUPT_HMI:
local_paca->irq_happened |= PACA_IRQ_HMI;
break;
}
}
/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
@@ -2403,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int i;
int srcu_idx;
struct core_info core_info;
struct kvmppc_vcore *pvc, *vcnext;
struct kvmppc_vcore *pvc;
struct kvm_split_mode split_info, *sip;
int split, subcore_size, active;
int sub;
@@ -2412,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int pcpu, thr;
int target_threads;
int controlled_threads;
int trap;
/*
* Remove from the list any threads that have a signal pending
@@ -2426,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/*
* Initialize *vc.
*/
init_master_vcore(vc);
init_vcore_to_run(vc);
vc->preempt_tb = TB_NIL;
/*
@@ -2463,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
if (vc->num_threads < target_threads)
collect_piggybacks(&core_info, target_threads);
/*
* On radix, arrange for TLB flushing if necessary.
* This has to be done before disabling interrupts since
* it uses smp_call_function().
*/
pcpu = smp_processor_id();
if (kvm_is_radix(vc->kvm)) {
for (sub = 0; sub < core_info.n_subcores; ++sub)
for_each_runnable_thread(i, vcpu, core_info.vc[sub])
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
}
/*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
* and return without going into the guest(s).
*/
local_irq_disable();
hard_irq_disable();
if (lazy_irq_pending() || need_resched() ||
recheck_signals(&core_info)) {
local_irq_enable();
vc->vcore_state = VCORE_INACTIVE;
/* Unlock all except the primary vcore */
for (sub = 1; sub < core_info.n_subcores; ++sub) {
pvc = core_info.vc[sub];
/* Put back on to the preempted vcores list */
kvmppc_vcore_preempt(pvc);
spin_unlock(&pvc->lock);
}
for (i = 0; i < controlled_threads; ++i)
kvmppc_release_hwthread(pcpu + i);
return;
}
kvmppc_clear_host_core(pcpu);
/* Decide on micro-threading (split-core) mode */
subcore_size = threads_per_subcore;
cmd_bit = stat_bit = 0;
@@ -2486,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.ldbar = mfspr(SPRN_LDBAR);
split_info.subcore_size = subcore_size;
for (sub = 0; sub < core_info.n_subcores; ++sub)
split_info.master_vcs[sub] =
list_first_entry(&core_info.vcs[sub],
struct kvmppc_vcore, preempt_list);
split_info.vc[sub] = core_info.vc[sub];
/* order writes to split_info before kvm_split_mode pointer */
smp_wmb();
}
pcpu = smp_processor_id();
for (thr = 0; thr < controlled_threads; ++thr)
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
@@ -2512,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
kvmppc_clear_host_core(pcpu);
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
thr = subcore_thread_map[sub];
thr0_done = false;
active |= 1 << thr;
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
pvc->pcpu = pcpu + thr;
for_each_runnable_thread(i, vcpu, pvc) {
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
if (!vcpu->arch.ptid)
thr0_done = true;
active |= 1 << (thr + vcpu->arch.ptid);
}
/*
* We need to start the first thread of each subcore
* even if it doesn't have a vcpu.
*/
if (pvc->master_vcore == pvc && !thr0_done)
kvmppc_start_thread(NULL, pvc);
thr += pvc->num_threads;
pvc = core_info.vc[sub];
pvc->pcpu = pcpu + thr;
for_each_runnable_thread(i, vcpu, pvc) {
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
if (!vcpu->arch.ptid)
thr0_done = true;
active |= 1 << (thr + vcpu->arch.ptid);
}
/*
* We need to start the first thread of each subcore
* even if it doesn't have a vcpu.
*/
if (!thr0_done)
kvmppc_start_thread(NULL, pvc);
thr += pvc->num_threads;
}
/*
@@ -2564,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 0);
for (sub = 0; sub < core_info.n_subcores; ++sub)
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
spin_unlock(&pvc->lock);
spin_unlock(&core_info.vc[sub]->lock);
/*
* Interrupts will be enabled once we get into the guest,
* so tell lockdep that we're about to enable interrupts.
*/
trace_hardirqs_on();
guest_enter();
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
__kvmppc_vcore_entry();
trap = __kvmppc_vcore_entry();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
guest_exit();
trace_hardirqs_off();
set_irq_happened(trap);
spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
@@ -2602,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.do_nap = 0;
}
kvmppc_set_host_core(pcpu);
local_irq_enable();
/* Let secondaries go back to the offline loop */
for (i = 0; i < controlled_threads; ++i) {
kvmppc_release_hwthread(pcpu + i);
@@ -2610,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
}
kvmppc_set_host_core(pcpu);
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
guest_exit();
for (sub = 0; sub < core_info.n_subcores; ++sub)
list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
preempt_list)
post_guest_process(pvc, pvc == vc);
for (sub = 0; sub < core_info.n_subcores; ++sub) {
pvc = core_info.vc[sub];
post_guest_process(pvc, pvc == vc);
}
spin_lock(&vc->lock);
preempt_enable();
@@ -2666,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns /= halt_poll_ns_shrink;
}
#ifdef CONFIG_KVM_XICS
static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
{
if (!xive_enabled())
return false;
return vcpu->arch.xive_saved_state.pipr <
vcpu->arch.xive_saved_state.cppr;
}
#else
static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
{
return false;
}
#endif /* CONFIG_KVM_XICS */
static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
return true;
return false;
}
/*
* Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
@@ -2676,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
int i;
for_each_runnable_thread(i, vcpu, vc) {
if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
vcpu->arch.prodded)
if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
return 1;
}
@@ -2819,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
*/
if (!signal_pending(current)) {
if (vc->vcore_state == VCORE_PIGGYBACK) {
struct kvmppc_vcore *mvc = vc->master_vcore;
if (spin_trylock(&mvc->lock)) {
if (mvc->vcore_state == VCORE_RUNNING &&
!VCORE_IS_EXITING(mvc)) {
if (spin_trylock(&vc->lock)) {
if (vc->vcore_state == VCORE_RUNNING &&
!VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
}
spin_unlock(&mvc->lock);
spin_unlock(&vc->lock);
}
} else if (vc->vcore_state == VCORE_RUNNING &&
!VCORE_IS_EXITING(vc)) {
@@ -2863,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
n_ceded = 0;
for_each_runnable_thread(i, v, vc) {
if (!v->arch.pending_exceptions && !v->arch.prodded)
if (!kvmppc_vcpu_woken(v))
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
@@ -3518,6 +3791,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
* Initialize smt_mode depending on processor.
* POWER8 and earlier have to use "strict" threading, where
* all vCPUs in a vcore have to run on the same (sub)core,
* whereas on POWER9 the threads can each run a different
* guest.
*/
if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm->arch.smt_mode = threads_per_subcore;
else
kvm->arch.smt_mode = 1;
kvm->arch.emul_smt_mode = 1;
/*
* Create a debugfs directory for the VM
*/
@@ -3947,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = {
#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
};
static int kvm_init_subcore_bitmap(void)

View File

@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
return;
for (i = 0; i < MAX_SUBCORES; ++i) {
vc = sip->master_vcs[i];
vc = sip->vc[i];
if (!vc)
break;
do {

View File

@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
std r3, HSTATE_DABR(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Hard-disable interrupts */
mfmsr r10
std r10, HSTATE_HOST_MSR(r13)
rldicl r10,r10,48,1
rotldi r10,r10,16
mtmsrd r10,1
/* Save host PMU registers */
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
@@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*
* R1 = host R1
* R2 = host R2
* R3 = trap number on this thread
* R12 = exit handler id
* R13 = PACA
*/

View File

@@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
out:
/*
* For guest that supports FWNMI capability, hook the MCE event into
* vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
* exit reason. On our way to exit we will pull this event from vcpu
* structure and print it from thread 0 of the core/subcore.
*
* For guest that does not support FWNMI capability (old QEMU):
* We are now going enter guest either through machine check
* interrupt (for unhandled errors) or will continue from
* current HSRR0 (for handled errors) in guest. Hence
* queue up the event so that we can log it from host console later.
*/
machine_check_queue_event();
if (vcpu->kvm->arch.fwnmi_enabled) {
/*
* Hook up the mce event on to vcpu structure.
* First clear the old event.
*/
memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
vcpu->arch.mce_evt = mce_evt;
}
} else
machine_check_queue_event();
return handled;
}

View File

@@ -45,7 +45,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_NOVCPU 2
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 144
#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
@@ -54,6 +54,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_CIABR (SFS-48)
#define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64)
#define STACK_SLOT_HFSCR (SFS-72)
/*
* Call kvmppc_hv_entry in real mode.
@@ -68,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
mfmsr r10
std r10, HSTATE_HOST_MSR(r13)
LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
@@ -152,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stb r0, HSTATE_HWTHREAD_REQ(r13)
/*
* For external and machine check interrupts, we need
* to call the Linux handler to process the interrupt.
* We do that by jumping to absolute address 0x500 for
* external interrupts, or the machine_check_fwnmi label
* for machine checks (since firmware might have patched
* the vector area at 0x200). The [h]rfid at the end of the
* handler will return to the book3s_hv_interrupts.S code.
* For other interrupts we do the rfid to get back
* to the book3s_hv_interrupts.S code here.
* For external interrupts we need to call the Linux
* handler to process the interrupt. We do that by jumping
* to absolute address 0x500 for external interrupts.
* The [h]rfid at the end of the handler will return to
* the book3s_hv_interrupts.S code. For other interrupts
* we do the rfid to get back to the book3s_hv_interrupts.S
* code here.
*/
ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r7, HSTATE_HOST_MSR(r13)
/* Return the trap number on this thread as the return value */
mr r3, r12
/*
* If we came back from the guest via a relocation-on interrupt,
* we will be in virtual mode at this point, which makes it a
@@ -175,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 11f
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
beq 15f /* Invoke the H_DOORBELL handler */
cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
beq cr2, 14f /* HMI check */
/* RFI into the highmem handler, or branch to interrupt handler */
/* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
beq cr1, 13f /* machine check */
RFI
/* On POWER7, we have external interrupts set to use HSRR0/1 */
11: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
ba 0x500
13: b machine_check_fwnmi
14: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
b hmi_exception_after_realmode
15: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
ba 0xe80
/* Virtual-mode return - can't get here for HMI or machine check */
/* Virtual-mode return */
.Lvirt_return:
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 16f
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
beq 17f
andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
beq 18f
mtmsrd r7, 1 /* if so then re-enable them */
18: mtlr r8
mtlr r8
blr
16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
mtspr SPRN_HSRR1, r7
b exc_virt_0x4500_hardware_interrupt
17: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
b exc_virt_0x4e80_h_doorbell
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -769,6 +733,8 @@ BEGIN_FTR_SECTION
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
std r8, STACK_SLOT_IAMR(r1)
mfspr r5, SPRN_HFSCR
std r5, STACK_SLOT_HFSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
@@ -920,8 +886,10 @@ FTR_SECTION_ELSE
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
@@ -936,7 +904,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
stw r3,VCPU_DEC(r4)
std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
@@ -1048,7 +1016,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
li r0, BOOK3S_INTERRUPT_EXTERNAL
bne cr1, 12f
mfspr r0, SPRN_DEC
cmpwi r0, 0
BEGIN_FTR_SECTION
/* On POWER9 check whether the guest has large decrementer enabled */
andis. r8, r8, LPCR_LD@h
bne 15f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r0, r0
15: cmpdi r0, 0
li r0, BOOK3S_INTERRUPT_DECREMENTER
bge 5f
@@ -1058,6 +1032,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
mr r9, r4
bl kvmppc_msr_interrupt
5:
BEGIN_FTR_SECTION
b fast_guest_return
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* On POWER9, check for pending doorbell requests */
lbz r0, VCPU_DBELL_REQ(r4)
cmpwi r0, 0
beq fast_guest_return
ld r5, HSTATE_KVM_VCORE(r13)
/* Set DPDES register so the CPU will take a doorbell interrupt */
li r0, 1
mtspr SPRN_DPDES, r0
std r0, VCORE_DPDES(r5)
/* Make sure other cpus see vcore->dpdes set before dbell req clear */
lwsync
/* Clear the pending doorbell request */
li r0, 0
stb r0, VCPU_DBELL_REQ(r4)
/*
* Required state:
@@ -1232,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
/*
* Now that we have saved away SRR0/1 and HSRR0/1,
* interrupts are recoverable in principle, so set MSR_RI.
* This becomes important for relocation-on interrupts from
* the guest, which we can get in radix mode on POWER9.
*/
li r0, MSR_RI
mtmsrd r0, 1
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r9, VCPU_TB_RMINTR
mr r4, r9
@@ -1288,6 +1288,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 4f
b guest_exit_cont
3:
/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
bne 14f
mfspr r3, SPRN_HFSCR
std r3, VCPU_HFSCR(r9)
b guest_exit_cont
14:
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ guest_exit_cont
@@ -1475,12 +1482,18 @@ mc_cont:
mtspr SPRN_SPURR,r4
/* Save DEC */
ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
/* On P9, if the guest has large decr enabled, don't sign extend */
BEGIN_FTR_SECTION
ld r4, VCORE_LPCR(r3)
andis. r4, r4, LPCR_LD@h
bne 16f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
add r5,r5,r6
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
ld r3,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_TB_OFFSET(r3)
subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
@@ -1525,6 +1538,9 @@ FTR_SECTION_ELSE
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
rotldi r6, r6, 60
std r6, VCPU_PSSCR(r9)
/* Restore host HFSCR value */
ld r7, STACK_SLOT_HFSCR(r1)
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
@@ -2402,8 +2418,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
BEGIN_FTR_SECTION
/* On P9 check whether the guest has large decrementer mode enabled */
ld r6, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_LPCR(r6)
andis. r6, r6, LPCR_LD@h
bne 68f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
EXTEND_HDEC(r4)
68: EXTEND_HDEC(r4)
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
@@ -2589,22 +2612,32 @@ machine_check_realmode:
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
* machine check interrupt (set HSRR0 to 0x200). And for handled
* errors (no-fatal), just go back to guest execution with current
* HSRR0 instead of exiting guest. This new approach will inject
* machine check to guest for fatal error causing guest to crash.
*
* The old code used to return to host for unhandled errors which
* was causing guest to hang with soft lockups inside guest and
* makes it difficult to recover guest instance.
* For the guest that is FWNMI capable, deliver all the MCE errors
* (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
* reason. This new approach injects machine check errors in guest
* address space to guest with additional information in the form
* of RTAS event, thus enabling guest kernel to suitably handle
* such errors.
*
* For the guest that is not FWNMI capable (old QEMU) fallback
* to old behaviour for backward compatibility:
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
* through machine check interrupt (set HSRR0 to 0x200).
* For handled errors (no-fatal), just go back to guest execution
* with current HSRR0.
* if we receive machine check with MSR(RI=0) then deliver it to
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
bne mc_cont /* if so, exit to host */
/* Check if guest is capable of handling NMI exit */
ld r10, VCPU_KVM(r9)
lbz r10, KVM_FWNMI(r10)
cmpdi r10, 1 /* FWNMI capable? */
beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
/* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)

View File

@@ -1257,8 +1257,8 @@ static void xive_pre_save_scan(struct kvmppc_xive *xive)
if (!xc)
continue;
for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
if (xc->queues[i].qpage)
xive_pre_save_queue(xive, &xc->queues[i]);
if (xc->queues[j].qpage)
xive_pre_save_queue(xive, &xc->queues[j]);
}
}

View File

@@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
kvmppc_core_check_exceptions(vcpu);
if (vcpu->requests) {
if (kvm_request_pending(vcpu)) {
/* Exception delivery raised request; start over */
return 1;
}

View File

@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
unsigned long dec_nsec;
unsigned long long dec_time;
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
#ifdef CONFIG_PPC_BOOK3S
@@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_TBWU: break;
case SPRN_DEC:
vcpu->arch.dec = spr_val;
vcpu->arch.dec = (u32) spr_val;
kvmppc_emulate_dec(vcpu);
break;

View File

@@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
return !!(v->arch.pending_exceptions) ||
v->requests;
return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
*/
smp_mb();
if (vcpu->requests) {
if (kvm_request_pending(vcpu)) {
/* Make sure we process requests preemptable */
local_irq_enable();
trace_kvm_check_requests(vcpu);
@@ -554,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
r = 0;
if (hv_enabled) {
if (kvm) {
if (kvm->arch.emul_smt_mode > 1)
r = kvm->arch.emul_smt_mode;
else
r = kvm->arch.smt_mode;
} else if (hv_enabled) {
if (cpu_has_feature(CPU_FTR_ARCH_300))
r = 1;
else
r = threads_per_subcore;
}
break;
case KVM_CAP_PPC_SMT_POSSIBLE:
r = 1;
if (hv_enabled) {
if (!cpu_has_feature(CPU_FTR_ARCH_300))
r = ((threads_per_subcore << 1) - 1);
else
/* P9 can emulate dbells, so allow any mode */
r = 8 | 4 | 2 | 1;
}
break;
case KVM_CAP_PPC_RMA:
r = 0;
break;
@@ -618,6 +632,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
/* Disable this on POWER9 until code handles new HPTE format */
r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
break;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_FWNMI:
r = hv_enabled;
break;
#endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
@@ -1538,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif /* CONFIG_KVM_XICS */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_FWNMI:
r = -EINVAL;
if (!is_kvmppc_hv_enabled(vcpu->kvm))
break;
r = 0;
vcpu->kvm->arch.fwnmi_enabled = true;
break;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
default:
r = -EINVAL;
break;
@@ -1712,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
break;
}
case KVM_CAP_PPC_SMT: {
unsigned long mode = cap->args[0];
unsigned long flags = cap->args[1];
r = -EINVAL;
if (kvm->arch.kvm_ops->set_smt_mode)
r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
break;
}
#endif
default:
r = -EINVAL;