avic.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Kernel-based Virtual Machine driver for Linux
  4. *
  5. * AMD SVM support
  6. *
  7. * Copyright (C) 2006 Qumranet, Inc.
  8. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  9. *
  10. * Authors:
  11. * Yaniv Kamay <[email protected]>
  12. * Avi Kivity <[email protected]>
  13. */
  14. #define pr_fmt(fmt) "SVM: " fmt
  15. #include <linux/kvm_types.h>
  16. #include <linux/hashtable.h>
  17. #include <linux/amd-iommu.h>
  18. #include <linux/kvm_host.h>
  19. #include <asm/irq_remapping.h>
  20. #include "trace.h"
  21. #include "lapic.h"
  22. #include "x86.h"
  23. #include "irq.h"
  24. #include "svm.h"
  25. /*
  26. * Encode the arbitrary VM ID and the vCPU's default APIC ID, i.e the vCPU ID,
  27. * into the GATag so that KVM can retrieve the correct vCPU from a GALog entry
  28. * if an interrupt can't be delivered, e.g. because the vCPU isn't running.
  29. *
  30. * For the vCPU ID, use however many bits are currently allowed for the max
  31. * guest physical APIC ID (limited by the size of the physical ID table), and
  32. * use whatever bits remain to assign arbitrary AVIC IDs to VMs. Note, the
  33. * size of the GATag is defined by hardware (32 bits), but is an opaque value
  34. * as far as hardware is concerned.
  35. */
  36. #define AVIC_VCPU_ID_MASK AVIC_PHYSICAL_MAX_INDEX_MASK
  37. #define AVIC_VM_ID_SHIFT HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
  38. #define AVIC_VM_ID_MASK (GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
  39. #define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
  40. (y & AVIC_VCPU_ID_MASK))
  41. #define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
  42. #define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
  43. static_assert(AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
  44. static bool force_avic;
  45. module_param_unsafe(force_avic, bool, 0444);
  46. /* Note:
  47. * This hash table is used to map VM_ID to a struct kvm_svm,
  48. * when handling AMD IOMMU GALOG notification to schedule in
  49. * a particular vCPU.
  50. */
  51. #define SVM_VM_DATA_HASH_BITS 8
  52. static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
  53. static u32 next_vm_id = 0;
  54. static bool next_vm_id_wrapped = 0;
  55. static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
  56. enum avic_modes avic_mode;
  57. /*
  58. * This is a wrapper of struct amd_iommu_ir_data.
  59. */
  60. struct amd_svm_iommu_ir {
  61. struct list_head node; /* Used by SVM for per-vcpu ir_list */
  62. void *data; /* Storing pointer to struct amd_ir_data */
  63. };
  64. static void avic_activate_vmcb(struct vcpu_svm *svm)
  65. {
  66. struct vmcb *vmcb = svm->vmcb01.ptr;
  67. vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
  68. vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
  69. vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
  70. /* Note:
  71. * KVM can support hybrid-AVIC mode, where KVM emulates x2APIC
  72. * MSR accesses, while interrupt injection to a running vCPU
  73. * can be achieved using AVIC doorbell. The AVIC hardware still
  74. * accelerate MMIO accesses, but this does not cause any harm
  75. * as the guest is not supposed to access xAPIC mmio when uses x2APIC.
  76. */
  77. if (apic_x2apic_mode(svm->vcpu.arch.apic) &&
  78. avic_mode == AVIC_MODE_X2) {
  79. vmcb->control.int_ctl |= X2APIC_MODE_MASK;
  80. vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
  81. /* Disabling MSR intercept for x2APIC registers */
  82. svm_set_x2apic_msr_interception(svm, false);
  83. } else {
  84. /*
  85. * Flush the TLB, the guest may have inserted a non-APIC
  86. * mapping into the TLB while AVIC was disabled.
  87. */
  88. kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
  89. /* For xAVIC and hybrid-xAVIC modes */
  90. vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
  91. /* Enabling MSR intercept for x2APIC registers */
  92. svm_set_x2apic_msr_interception(svm, true);
  93. }
  94. }
  95. static void avic_deactivate_vmcb(struct vcpu_svm *svm)
  96. {
  97. struct vmcb *vmcb = svm->vmcb01.ptr;
  98. vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
  99. vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
  100. /*
  101. * If running nested and the guest uses its own MSR bitmap, there
  102. * is no need to update L0's msr bitmap
  103. */
  104. if (is_guest_mode(&svm->vcpu) &&
  105. vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))
  106. return;
  107. /* Enabling MSR intercept for x2APIC registers */
  108. svm_set_x2apic_msr_interception(svm, true);
  109. }
  110. /* Note:
  111. * This function is called from IOMMU driver to notify
  112. * SVM to schedule in a particular vCPU of a particular VM.
  113. */
  114. int avic_ga_log_notifier(u32 ga_tag)
  115. {
  116. unsigned long flags;
  117. struct kvm_svm *kvm_svm;
  118. struct kvm_vcpu *vcpu = NULL;
  119. u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
  120. u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
  121. pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
  122. trace_kvm_avic_ga_log(vm_id, vcpu_id);
  123. spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
  124. hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
  125. if (kvm_svm->avic_vm_id != vm_id)
  126. continue;
  127. vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
  128. break;
  129. }
  130. spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
  131. /* Note:
  132. * At this point, the IOMMU should have already set the pending
  133. * bit in the vAPIC backing page. So, we just need to schedule
  134. * in the vcpu.
  135. */
  136. if (vcpu)
  137. kvm_vcpu_wake_up(vcpu);
  138. return 0;
  139. }
  140. void avic_vm_destroy(struct kvm *kvm)
  141. {
  142. unsigned long flags;
  143. struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
  144. if (!enable_apicv)
  145. return;
  146. if (kvm_svm->avic_logical_id_table_page)
  147. __free_page(kvm_svm->avic_logical_id_table_page);
  148. if (kvm_svm->avic_physical_id_table_page)
  149. __free_page(kvm_svm->avic_physical_id_table_page);
  150. spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
  151. hash_del(&kvm_svm->hnode);
  152. spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
  153. }
  154. int avic_vm_init(struct kvm *kvm)
  155. {
  156. unsigned long flags;
  157. int err = -ENOMEM;
  158. struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
  159. struct kvm_svm *k2;
  160. struct page *p_page;
  161. struct page *l_page;
  162. u32 vm_id;
  163. if (!enable_apicv)
  164. return 0;
  165. /* Allocating physical APIC ID table (4KB) */
  166. p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
  167. if (!p_page)
  168. goto free_avic;
  169. kvm_svm->avic_physical_id_table_page = p_page;
  170. /* Allocating logical APIC ID table (4KB) */
  171. l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
  172. if (!l_page)
  173. goto free_avic;
  174. kvm_svm->avic_logical_id_table_page = l_page;
  175. spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
  176. again:
  177. vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
  178. if (vm_id == 0) { /* id is 1-based, zero is not okay */
  179. next_vm_id_wrapped = 1;
  180. goto again;
  181. }
  182. /* Is it still in use? Only possible if wrapped at least once */
  183. if (next_vm_id_wrapped) {
  184. hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
  185. if (k2->avic_vm_id == vm_id)
  186. goto again;
  187. }
  188. }
  189. kvm_svm->avic_vm_id = vm_id;
  190. hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
  191. spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
  192. return 0;
  193. free_avic:
  194. avic_vm_destroy(kvm);
  195. return err;
  196. }
  197. void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
  198. {
  199. struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
  200. phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
  201. phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
  202. phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
  203. vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
  204. vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
  205. vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
  206. vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
  207. if (kvm_apicv_activated(svm->vcpu.kvm))
  208. avic_activate_vmcb(svm);
  209. else
  210. avic_deactivate_vmcb(svm);
  211. }
  212. static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
  213. unsigned int index)
  214. {
  215. u64 *avic_physical_id_table;
  216. struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
  217. if ((avic_mode == AVIC_MODE_X1 && index > AVIC_MAX_PHYSICAL_ID) ||
  218. (avic_mode == AVIC_MODE_X2 && index > X2AVIC_MAX_PHYSICAL_ID))
  219. return NULL;
  220. avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
  221. return &avic_physical_id_table[index];
  222. }
  223. /*
  224. * Note:
  225. * AVIC hardware walks the nested page table to check permissions,
  226. * but does not use the SPA address specified in the leaf page
  227. * table entry since it uses address in the AVIC_BACKING_PAGE pointer
  228. * field of the VMCB. Therefore, we set up the
  229. * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
  230. */
  231. static int avic_alloc_access_page(struct kvm *kvm)
  232. {
  233. void __user *ret;
  234. int r = 0;
  235. mutex_lock(&kvm->slots_lock);
  236. if (kvm->arch.apic_access_memslot_enabled)
  237. goto out;
  238. ret = __x86_set_memory_region(kvm,
  239. APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
  240. APIC_DEFAULT_PHYS_BASE,
  241. PAGE_SIZE);
  242. if (IS_ERR(ret)) {
  243. r = PTR_ERR(ret);
  244. goto out;
  245. }
  246. kvm->arch.apic_access_memslot_enabled = true;
  247. out:
  248. mutex_unlock(&kvm->slots_lock);
  249. return r;
  250. }
  251. static int avic_init_backing_page(struct kvm_vcpu *vcpu)
  252. {
  253. u64 *entry, new_entry;
  254. int id = vcpu->vcpu_id;
  255. struct vcpu_svm *svm = to_svm(vcpu);
  256. if ((avic_mode == AVIC_MODE_X1 && id > AVIC_MAX_PHYSICAL_ID) ||
  257. (avic_mode == AVIC_MODE_X2 && id > X2AVIC_MAX_PHYSICAL_ID))
  258. return -EINVAL;
  259. if (!vcpu->arch.apic->regs)
  260. return -EINVAL;
  261. if (kvm_apicv_activated(vcpu->kvm)) {
  262. int ret;
  263. ret = avic_alloc_access_page(vcpu->kvm);
  264. if (ret)
  265. return ret;
  266. }
  267. svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
  268. /* Setting AVIC backing page address in the phy APIC ID table */
  269. entry = avic_get_physical_id_entry(vcpu, id);
  270. if (!entry)
  271. return -EINVAL;
  272. new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
  273. AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
  274. AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
  275. WRITE_ONCE(*entry, new_entry);
  276. svm->avic_physical_id_cache = entry;
  277. return 0;
  278. }
  279. void avic_ring_doorbell(struct kvm_vcpu *vcpu)
  280. {
  281. /*
  282. * Note, the vCPU could get migrated to a different pCPU at any point,
  283. * which could result in signalling the wrong/previous pCPU. But if
  284. * that happens the vCPU is guaranteed to do a VMRUN (after being
  285. * migrated) and thus will process pending interrupts, i.e. a doorbell
  286. * is not needed (and the spurious one is harmless).
  287. */
  288. int cpu = READ_ONCE(vcpu->cpu);
  289. if (cpu != get_cpu()) {
  290. wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
  291. trace_kvm_avic_doorbell(vcpu->vcpu_id, kvm_cpu_get_apicid(cpu));
  292. }
  293. put_cpu();
  294. }
  295. /*
  296. * A fast-path version of avic_kick_target_vcpus(), which attempts to match
  297. * destination APIC ID to vCPU without looping through all vCPUs.
  298. */
  299. static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
  300. u32 icrl, u32 icrh, u32 index)
  301. {
  302. u32 l1_physical_id, dest;
  303. struct kvm_vcpu *target_vcpu;
  304. int dest_mode = icrl & APIC_DEST_MASK;
  305. int shorthand = icrl & APIC_SHORT_MASK;
  306. struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
  307. if (shorthand != APIC_DEST_NOSHORT)
  308. return -EINVAL;
  309. if (apic_x2apic_mode(source))
  310. dest = icrh;
  311. else
  312. dest = GET_XAPIC_DEST_FIELD(icrh);
  313. if (dest_mode == APIC_DEST_PHYSICAL) {
  314. /* broadcast destination, use slow path */
  315. if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
  316. return -EINVAL;
  317. if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
  318. return -EINVAL;
  319. l1_physical_id = dest;
  320. if (WARN_ON_ONCE(l1_physical_id != index))
  321. return -EINVAL;
  322. } else {
  323. u32 bitmap, cluster;
  324. int logid_index;
  325. if (apic_x2apic_mode(source)) {
  326. /* 16 bit dest mask, 16 bit cluster id */
  327. bitmap = dest & 0xFFFF0000;
  328. cluster = (dest >> 16) << 4;
  329. } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
  330. /* 8 bit dest mask*/
  331. bitmap = dest;
  332. cluster = 0;
  333. } else {
  334. /* 4 bit desk mask, 4 bit cluster id */
  335. bitmap = dest & 0xF;
  336. cluster = (dest >> 4) << 2;
  337. }
  338. if (unlikely(!bitmap))
  339. /* guest bug: nobody to send the logical interrupt to */
  340. return 0;
  341. if (!is_power_of_2(bitmap))
  342. /* multiple logical destinations, use slow path */
  343. return -EINVAL;
  344. logid_index = cluster + __ffs(bitmap);
  345. if (!apic_x2apic_mode(source)) {
  346. u32 *avic_logical_id_table =
  347. page_address(kvm_svm->avic_logical_id_table_page);
  348. u32 logid_entry = avic_logical_id_table[logid_index];
  349. if (WARN_ON_ONCE(index != logid_index))
  350. return -EINVAL;
  351. /* guest bug: non existing/reserved logical destination */
  352. if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
  353. return 0;
  354. l1_physical_id = logid_entry &
  355. AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
  356. } else {
  357. /*
  358. * For x2APIC logical mode, cannot leverage the index.
  359. * Instead, calculate physical ID from logical ID in ICRH.
  360. */
  361. int cluster = (icrh & 0xffff0000) >> 16;
  362. int apic = ffs(icrh & 0xffff) - 1;
  363. /*
  364. * If the x2APIC logical ID sub-field (i.e. icrh[15:0])
  365. * contains anything but a single bit, we cannot use the
  366. * fast path, because it is limited to a single vCPU.
  367. */
  368. if (apic < 0 || icrh != (1 << apic))
  369. return -EINVAL;
  370. l1_physical_id = (cluster << 4) + apic;
  371. }
  372. }
  373. target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
  374. if (unlikely(!target_vcpu))
  375. /* guest bug: non existing vCPU is a target of this IPI*/
  376. return 0;
  377. target_vcpu->arch.apic->irr_pending = true;
  378. svm_complete_interrupt_delivery(target_vcpu,
  379. icrl & APIC_MODE_MASK,
  380. icrl & APIC_INT_LEVELTRIG,
  381. icrl & APIC_VECTOR_MASK);
  382. return 0;
  383. }
  384. static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
  385. u32 icrl, u32 icrh, u32 index)
  386. {
  387. unsigned long i;
  388. struct kvm_vcpu *vcpu;
  389. if (!avic_kick_target_vcpus_fast(kvm, source, icrl, icrh, index))
  390. return;
  391. trace_kvm_avic_kick_vcpu_slowpath(icrh, icrl, index);
  392. /*
  393. * Wake any target vCPUs that are blocking, i.e. waiting for a wake
  394. * event. There's no need to signal doorbells, as hardware has handled
  395. * vCPUs that were in guest at the time of the IPI, and vCPUs that have
  396. * since entered the guest will have processed pending IRQs at VMRUN.
  397. */
  398. kvm_for_each_vcpu(i, vcpu, kvm) {
  399. u32 dest;
  400. if (apic_x2apic_mode(vcpu->arch.apic))
  401. dest = icrh;
  402. else
  403. dest = GET_XAPIC_DEST_FIELD(icrh);
  404. if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
  405. dest, icrl & APIC_DEST_MASK)) {
  406. vcpu->arch.apic->irr_pending = true;
  407. svm_complete_interrupt_delivery(vcpu,
  408. icrl & APIC_MODE_MASK,
  409. icrl & APIC_INT_LEVELTRIG,
  410. icrl & APIC_VECTOR_MASK);
  411. }
  412. }
  413. }
  414. int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
  415. {
  416. struct vcpu_svm *svm = to_svm(vcpu);
  417. u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
  418. u32 icrl = svm->vmcb->control.exit_info_1;
  419. u32 id = svm->vmcb->control.exit_info_2 >> 32;
  420. u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
  421. struct kvm_lapic *apic = vcpu->arch.apic;
  422. trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
  423. switch (id) {
  424. case AVIC_IPI_FAILURE_INVALID_TARGET:
  425. case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
  426. /*
  427. * Emulate IPIs that are not handled by AVIC hardware, which
  428. * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
  429. * if _any_ targets are invalid, e.g. if the logical mode mask
  430. * is a superset of running vCPUs.
  431. *
  432. * The exit is a trap, e.g. ICR holds the correct value and RIP
  433. * has been advanced, KVM is responsible only for emulating the
  434. * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
  435. * in which case KVM needs to emulate the ICR write as well in
  436. * order to clear the BUSY flag.
  437. */
  438. if (icrl & APIC_ICR_BUSY)
  439. kvm_apic_write_nodecode(vcpu, APIC_ICR);
  440. else
  441. kvm_apic_send_ipi(apic, icrl, icrh);
  442. break;
  443. case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
  444. /*
  445. * At this point, we expect that the AVIC HW has already
  446. * set the appropriate IRR bits on the valid target
  447. * vcpus. So, we just need to kick the appropriate vcpu.
  448. */
  449. avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
  450. break;
  451. case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
  452. WARN_ONCE(1, "Invalid backing page\n");
  453. break;
  454. case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
  455. /* Invalid IPI with vector < 16 */
  456. break;
  457. default:
  458. vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
  459. }
  460. return 1;
  461. }
  462. unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
  463. {
  464. if (is_guest_mode(vcpu))
  465. return APICV_INHIBIT_REASON_NESTED;
  466. return 0;
  467. }
  468. static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
  469. {
  470. struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
  471. int index;
  472. u32 *logical_apic_id_table;
  473. int dlid = GET_APIC_LOGICAL_ID(ldr);
  474. if (!dlid)
  475. return NULL;
  476. if (flat) { /* flat */
  477. index = ffs(dlid) - 1;
  478. if (index > 7)
  479. return NULL;
  480. } else { /* cluster */
  481. int cluster = (dlid & 0xf0) >> 4;
  482. int apic = ffs(dlid & 0x0f) - 1;
  483. if ((apic < 0) || (apic > 7) ||
  484. (cluster >= 0xf))
  485. return NULL;
  486. index = (cluster << 2) + apic;
  487. }
  488. logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
  489. return &logical_apic_id_table[index];
  490. }
  491. static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
  492. {
  493. bool flat;
  494. u32 *entry, new_entry;
  495. flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
  496. entry = avic_get_logical_id_entry(vcpu, ldr, flat);
  497. if (!entry)
  498. return -EINVAL;
  499. new_entry = READ_ONCE(*entry);
  500. new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
  501. new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
  502. new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
  503. WRITE_ONCE(*entry, new_entry);
  504. return 0;
  505. }
  506. static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
  507. {
  508. struct vcpu_svm *svm = to_svm(vcpu);
  509. bool flat = svm->dfr_reg == APIC_DFR_FLAT;
  510. u32 *entry;
  511. /* Note: x2AVIC does not use logical APIC ID table */
  512. if (apic_x2apic_mode(vcpu->arch.apic))
  513. return;
  514. entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
  515. if (entry)
  516. clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
  517. }
  518. static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
  519. {
  520. int ret = 0;
  521. struct vcpu_svm *svm = to_svm(vcpu);
  522. u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
  523. u32 id = kvm_xapic_id(vcpu->arch.apic);
  524. /* AVIC does not support LDR update for x2APIC */
  525. if (apic_x2apic_mode(vcpu->arch.apic))
  526. return 0;
  527. if (ldr == svm->ldr_reg)
  528. return 0;
  529. avic_invalidate_logical_id_entry(vcpu);
  530. if (ldr)
  531. ret = avic_ldr_write(vcpu, id, ldr);
  532. if (!ret)
  533. svm->ldr_reg = ldr;
  534. return ret;
  535. }
  536. static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
  537. {
  538. struct vcpu_svm *svm = to_svm(vcpu);
  539. u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
  540. if (svm->dfr_reg == dfr)
  541. return;
  542. avic_invalidate_logical_id_entry(vcpu);
  543. svm->dfr_reg = dfr;
  544. }
  545. static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
  546. {
  547. u32 offset = to_svm(vcpu)->vmcb->control.exit_info_1 &
  548. AVIC_UNACCEL_ACCESS_OFFSET_MASK;
  549. switch (offset) {
  550. case APIC_LDR:
  551. if (avic_handle_ldr_update(vcpu))
  552. return 0;
  553. break;
  554. case APIC_DFR:
  555. avic_handle_dfr_update(vcpu);
  556. break;
  557. default:
  558. break;
  559. }
  560. kvm_apic_write_nodecode(vcpu, offset);
  561. return 1;
  562. }
  563. static bool is_avic_unaccelerated_access_trap(u32 offset)
  564. {
  565. bool ret = false;
  566. switch (offset) {
  567. case APIC_ID:
  568. case APIC_EOI:
  569. case APIC_RRR:
  570. case APIC_LDR:
  571. case APIC_DFR:
  572. case APIC_SPIV:
  573. case APIC_ESR:
  574. case APIC_ICR:
  575. case APIC_LVTT:
  576. case APIC_LVTTHMR:
  577. case APIC_LVTPC:
  578. case APIC_LVT0:
  579. case APIC_LVT1:
  580. case APIC_LVTERR:
  581. case APIC_TMICT:
  582. case APIC_TDCR:
  583. ret = true;
  584. break;
  585. default:
  586. break;
  587. }
  588. return ret;
  589. }
  590. int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu)
  591. {
  592. struct vcpu_svm *svm = to_svm(vcpu);
  593. int ret = 0;
  594. u32 offset = svm->vmcb->control.exit_info_1 &
  595. AVIC_UNACCEL_ACCESS_OFFSET_MASK;
  596. u32 vector = svm->vmcb->control.exit_info_2 &
  597. AVIC_UNACCEL_ACCESS_VECTOR_MASK;
  598. bool write = (svm->vmcb->control.exit_info_1 >> 32) &
  599. AVIC_UNACCEL_ACCESS_WRITE_MASK;
  600. bool trap = is_avic_unaccelerated_access_trap(offset);
  601. trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset,
  602. trap, write, vector);
  603. if (trap) {
  604. /* Handling Trap */
  605. WARN_ONCE(!write, "svm: Handling trap read.\n");
  606. ret = avic_unaccel_trap_write(vcpu);
  607. } else {
  608. /* Handling Fault */
  609. ret = kvm_emulate_instruction(vcpu, 0);
  610. }
  611. return ret;
  612. }
  613. int avic_init_vcpu(struct vcpu_svm *svm)
  614. {
  615. int ret;
  616. struct kvm_vcpu *vcpu = &svm->vcpu;
  617. if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
  618. return 0;
  619. ret = avic_init_backing_page(vcpu);
  620. if (ret)
  621. return ret;
  622. INIT_LIST_HEAD(&svm->ir_list);
  623. spin_lock_init(&svm->ir_list_lock);
  624. svm->dfr_reg = APIC_DFR_FLAT;
  625. return ret;
  626. }
  627. void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
  628. {
  629. avic_handle_dfr_update(vcpu);
  630. avic_handle_ldr_update(vcpu);
  631. }
  632. static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
  633. {
  634. int ret = 0;
  635. unsigned long flags;
  636. struct amd_svm_iommu_ir *ir;
  637. struct vcpu_svm *svm = to_svm(vcpu);
  638. if (!kvm_arch_has_assigned_device(vcpu->kvm))
  639. return 0;
  640. /*
  641. * Here, we go through the per-vcpu ir_list to update all existing
  642. * interrupt remapping table entry targeting this vcpu.
  643. */
  644. spin_lock_irqsave(&svm->ir_list_lock, flags);
  645. if (list_empty(&svm->ir_list))
  646. goto out;
  647. list_for_each_entry(ir, &svm->ir_list, node) {
  648. if (activate)
  649. ret = amd_iommu_activate_guest_mode(ir->data);
  650. else
  651. ret = amd_iommu_deactivate_guest_mode(ir->data);
  652. if (ret)
  653. break;
  654. }
  655. out:
  656. spin_unlock_irqrestore(&svm->ir_list_lock, flags);
  657. return ret;
  658. }
  659. static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
  660. {
  661. unsigned long flags;
  662. struct amd_svm_iommu_ir *cur;
  663. spin_lock_irqsave(&svm->ir_list_lock, flags);
  664. list_for_each_entry(cur, &svm->ir_list, node) {
  665. if (cur->data != pi->ir_data)
  666. continue;
  667. list_del(&cur->node);
  668. kfree(cur);
  669. break;
  670. }
  671. spin_unlock_irqrestore(&svm->ir_list_lock, flags);
  672. }
  673. static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
  674. {
  675. int ret = 0;
  676. unsigned long flags;
  677. struct amd_svm_iommu_ir *ir;
  678. u64 entry;
  679. /**
  680. * In some cases, the existing irte is updated and re-set,
  681. * so we need to check here if it's already been * added
  682. * to the ir_list.
  683. */
  684. if (pi->ir_data && (pi->prev_ga_tag != 0)) {
  685. struct kvm *kvm = svm->vcpu.kvm;
  686. u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
  687. struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
  688. struct vcpu_svm *prev_svm;
  689. if (!prev_vcpu) {
  690. ret = -EINVAL;
  691. goto out;
  692. }
  693. prev_svm = to_svm(prev_vcpu);
  694. svm_ir_list_del(prev_svm, pi);
  695. }
  696. /**
  697. * Allocating new amd_iommu_pi_data, which will get
  698. * add to the per-vcpu ir_list.
  699. */
  700. ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
  701. if (!ir) {
  702. ret = -ENOMEM;
  703. goto out;
  704. }
  705. ir->data = pi->ir_data;
  706. spin_lock_irqsave(&svm->ir_list_lock, flags);
  707. /*
  708. * Update the target pCPU for IOMMU doorbells if the vCPU is running.
  709. * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
  710. * will update the pCPU info when the vCPU awkened and/or scheduled in.
  711. * See also avic_vcpu_load().
  712. */
  713. entry = READ_ONCE(*(svm->avic_physical_id_cache));
  714. if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
  715. amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
  716. true, pi->ir_data);
  717. list_add(&ir->node, &svm->ir_list);
  718. spin_unlock_irqrestore(&svm->ir_list_lock, flags);
  719. out:
  720. return ret;
  721. }
  722. /*
  723. * Note:
  724. * The HW cannot support posting multicast/broadcast
  725. * interrupts to a vCPU. So, we still use legacy interrupt
  726. * remapping for these kind of interrupts.
  727. *
  728. * For lowest-priority interrupts, we only support
  729. * those with single CPU as the destination, e.g. user
  730. * configures the interrupts via /proc/irq or uses
  731. * irqbalance to make the interrupts single-CPU.
  732. */
  733. static int
  734. get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
  735. struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
  736. {
  737. struct kvm_lapic_irq irq;
  738. struct kvm_vcpu *vcpu = NULL;
  739. kvm_set_msi_irq(kvm, e, &irq);
  740. if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
  741. !kvm_irq_is_postable(&irq)) {
  742. pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
  743. __func__, irq.vector);
  744. return -1;
  745. }
  746. pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
  747. irq.vector);
  748. *svm = to_svm(vcpu);
  749. vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
  750. vcpu_info->vector = irq.vector;
  751. return 0;
  752. }
  753. /*
  754. * avic_pi_update_irte - set IRTE for Posted-Interrupts
  755. *
  756. * @kvm: kvm
  757. * @host_irq: host irq of the interrupt
  758. * @guest_irq: gsi of the interrupt
  759. * @set: set or unset PI
  760. * returns 0 on success, < 0 on failure
  761. */
  762. int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
  763. uint32_t guest_irq, bool set)
  764. {
  765. struct kvm_kernel_irq_routing_entry *e;
  766. struct kvm_irq_routing_table *irq_rt;
  767. int idx, ret = 0;
  768. if (!kvm_arch_has_assigned_device(kvm) ||
  769. !irq_remapping_cap(IRQ_POSTING_CAP))
  770. return 0;
  771. pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
  772. __func__, host_irq, guest_irq, set);
  773. idx = srcu_read_lock(&kvm->irq_srcu);
  774. irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
  775. if (guest_irq >= irq_rt->nr_rt_entries ||
  776. hlist_empty(&irq_rt->map[guest_irq])) {
  777. pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
  778. guest_irq, irq_rt->nr_rt_entries);
  779. goto out;
  780. }
  781. hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
  782. struct vcpu_data vcpu_info;
  783. struct vcpu_svm *svm = NULL;
  784. if (e->type != KVM_IRQ_ROUTING_MSI)
  785. continue;
  786. /**
  787. * Here, we setup with legacy mode in the following cases:
  788. * 1. When cannot target interrupt to a specific vcpu.
  789. * 2. Unsetting posted interrupt.
  790. * 3. APIC virtualization is disabled for the vcpu.
  791. * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
  792. */
  793. if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
  794. kvm_vcpu_apicv_active(&svm->vcpu)) {
  795. struct amd_iommu_pi_data pi;
  796. /* Try to enable guest_mode in IRTE */
  797. pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
  798. AVIC_HPA_MASK);
  799. pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
  800. svm->vcpu.vcpu_id);
  801. pi.is_guest_mode = true;
  802. pi.vcpu_data = &vcpu_info;
  803. ret = irq_set_vcpu_affinity(host_irq, &pi);
  804. /**
  805. * Here, we successfully setting up vcpu affinity in
  806. * IOMMU guest mode. Now, we need to store the posted
  807. * interrupt information in a per-vcpu ir_list so that
  808. * we can reference to them directly when we update vcpu
  809. * scheduling information in IOMMU irte.
  810. */
  811. if (!ret && pi.is_guest_mode)
  812. svm_ir_list_add(svm, &pi);
  813. } else {
  814. /* Use legacy mode in IRTE */
  815. struct amd_iommu_pi_data pi;
  816. /**
  817. * Here, pi is used to:
  818. * - Tell IOMMU to use legacy mode for this interrupt.
  819. * - Retrieve ga_tag of prior interrupt remapping data.
  820. */
  821. pi.prev_ga_tag = 0;
  822. pi.is_guest_mode = false;
  823. ret = irq_set_vcpu_affinity(host_irq, &pi);
  824. /**
  825. * Check if the posted interrupt was previously
  826. * setup with the guest_mode by checking if the ga_tag
  827. * was cached. If so, we need to clean up the per-vcpu
  828. * ir_list.
  829. */
  830. if (!ret && pi.prev_ga_tag) {
  831. int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
  832. struct kvm_vcpu *vcpu;
  833. vcpu = kvm_get_vcpu_by_id(kvm, id);
  834. if (vcpu)
  835. svm_ir_list_del(to_svm(vcpu), &pi);
  836. }
  837. }
  838. if (!ret && svm) {
  839. trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
  840. e->gsi, vcpu_info.vector,
  841. vcpu_info.pi_desc_addr, set);
  842. }
  843. if (ret < 0) {
  844. pr_err("%s: failed to update PI IRTE\n", __func__);
  845. goto out;
  846. }
  847. }
  848. ret = 0;
  849. out:
  850. srcu_read_unlock(&kvm->irq_srcu, idx);
  851. return ret;
  852. }
  853. bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
  854. {
  855. ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
  856. BIT(APICV_INHIBIT_REASON_ABSENT) |
  857. BIT(APICV_INHIBIT_REASON_HYPERV) |
  858. BIT(APICV_INHIBIT_REASON_NESTED) |
  859. BIT(APICV_INHIBIT_REASON_IRQWIN) |
  860. BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
  861. BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
  862. BIT(APICV_INHIBIT_REASON_SEV) |
  863. BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
  864. BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
  865. return supported & BIT(reason);
  866. }
  867. static inline int
  868. avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
  869. {
  870. int ret = 0;
  871. struct amd_svm_iommu_ir *ir;
  872. struct vcpu_svm *svm = to_svm(vcpu);
  873. lockdep_assert_held(&svm->ir_list_lock);
  874. if (!kvm_arch_has_assigned_device(vcpu->kvm))
  875. return 0;
  876. /*
  877. * Here, we go through the per-vcpu ir_list to update all existing
  878. * interrupt remapping table entry targeting this vcpu.
  879. */
  880. if (list_empty(&svm->ir_list))
  881. return 0;
  882. list_for_each_entry(ir, &svm->ir_list, node) {
  883. ret = amd_iommu_update_ga(cpu, r, ir->data);
  884. if (ret)
  885. return ret;
  886. }
  887. return 0;
  888. }
  889. void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  890. {
  891. u64 entry;
  892. int h_physical_id = kvm_cpu_get_apicid(cpu);
  893. struct vcpu_svm *svm = to_svm(vcpu);
  894. unsigned long flags;
  895. lockdep_assert_preemption_disabled();
  896. if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
  897. return;
  898. /*
  899. * No need to update anything if the vCPU is blocking, i.e. if the vCPU
  900. * is being scheduled in after being preempted. The CPU entries in the
  901. * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
  902. * If the vCPU was migrated, its new CPU value will be stuffed when the
  903. * vCPU unblocks.
  904. */
  905. if (kvm_vcpu_is_blocking(vcpu))
  906. return;
  907. /*
  908. * Grab the per-vCPU interrupt remapping lock even if the VM doesn't
  909. * _currently_ have assigned devices, as that can change. Holding
  910. * ir_list_lock ensures that either svm_ir_list_add() will consume
  911. * up-to-date entry information, or that this task will wait until
  912. * svm_ir_list_add() completes to set the new target pCPU.
  913. */
  914. spin_lock_irqsave(&svm->ir_list_lock, flags);
  915. entry = READ_ONCE(*(svm->avic_physical_id_cache));
  916. entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
  917. entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
  918. entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
  919. WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
  920. avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
  921. spin_unlock_irqrestore(&svm->ir_list_lock, flags);
  922. }
  923. void avic_vcpu_put(struct kvm_vcpu *vcpu)
  924. {
  925. u64 entry;
  926. struct vcpu_svm *svm = to_svm(vcpu);
  927. unsigned long flags;
  928. lockdep_assert_preemption_disabled();
  929. /*
  930. * Note, reading the Physical ID entry outside of ir_list_lock is safe
  931. * as only the pCPU that has loaded (or is loading) the vCPU is allowed
  932. * to modify the entry, and preemption is disabled. I.e. the vCPU
  933. * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
  934. * recursively.
  935. */
  936. entry = READ_ONCE(*(svm->avic_physical_id_cache));
  937. /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
  938. if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
  939. return;
  940. /*
  941. * Take and hold the per-vCPU interrupt remapping lock while updating
  942. * the Physical ID entry even though the lock doesn't protect against
  943. * multiple writers (see above). Holding ir_list_lock ensures that
  944. * either svm_ir_list_add() will consume up-to-date entry information,
  945. * or that this task will wait until svm_ir_list_add() completes to
  946. * mark the vCPU as not running.
  947. */
  948. spin_lock_irqsave(&svm->ir_list_lock, flags);
  949. avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
  950. entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
  951. WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
  952. spin_unlock_irqrestore(&svm->ir_list_lock, flags);
  953. }
  954. void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
  955. {
  956. struct vcpu_svm *svm = to_svm(vcpu);
  957. struct vmcb *vmcb = svm->vmcb01.ptr;
  958. if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE)
  959. return;
  960. if (!enable_apicv)
  961. return;
  962. if (kvm_vcpu_apicv_active(vcpu)) {
  963. /**
  964. * During AVIC temporary deactivation, guest could update
  965. * APIC ID, DFR and LDR registers, which would not be trapped
  966. * by avic_unaccelerated_access_interception(). In this case,
  967. * we need to check and update the AVIC logical APIC ID table
  968. * accordingly before re-activating.
  969. */
  970. avic_apicv_post_state_restore(vcpu);
  971. avic_activate_vmcb(svm);
  972. } else {
  973. avic_deactivate_vmcb(svm);
  974. }
  975. vmcb_mark_dirty(vmcb, VMCB_AVIC);
  976. }
  977. void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
  978. {
  979. bool activated = kvm_vcpu_apicv_active(vcpu);
  980. if (!enable_apicv)
  981. return;
  982. avic_refresh_virtual_apic_mode(vcpu);
  983. if (activated)
  984. avic_vcpu_load(vcpu, vcpu->cpu);
  985. else
  986. avic_vcpu_put(vcpu);
  987. avic_set_pi_irte_mode(vcpu, activated);
  988. }
  989. void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
  990. {
  991. if (!kvm_vcpu_apicv_active(vcpu))
  992. return;
  993. /*
  994. * Unload the AVIC when the vCPU is about to block, _before_
  995. * the vCPU actually blocks.
  996. *
  997. * Any IRQs that arrive before IsRunning=0 will not cause an
  998. * incomplete IPI vmexit on the source, therefore vIRR will also
  999. * be checked by kvm_vcpu_check_block() before blocking. The
  1000. * memory barrier implicit in set_current_state orders writing
  1001. * IsRunning=0 before reading the vIRR. The processor needs a
  1002. * matching memory barrier on interrupt delivery between writing
  1003. * IRR and reading IsRunning; the lack of this barrier might be
  1004. * the cause of errata #1235).
  1005. */
  1006. avic_vcpu_put(vcpu);
  1007. }
  1008. void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
  1009. {
  1010. if (!kvm_vcpu_apicv_active(vcpu))
  1011. return;
  1012. avic_vcpu_load(vcpu, vcpu->cpu);
  1013. }
  1014. /*
  1015. * Note:
  1016. * - The module param avic enable both xAPIC and x2APIC mode.
  1017. * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
  1018. * - The mode can be switched at run-time.
  1019. */
  1020. bool avic_hardware_setup(struct kvm_x86_ops *x86_ops)
  1021. {
  1022. if (!npt_enabled)
  1023. return false;
  1024. if (boot_cpu_has(X86_FEATURE_AVIC)) {
  1025. avic_mode = AVIC_MODE_X1;
  1026. pr_info("AVIC enabled\n");
  1027. } else if (force_avic) {
  1028. /*
  1029. * Some older systems does not advertise AVIC support.
  1030. * See Revision Guide for specific AMD processor for more detail.
  1031. */
  1032. avic_mode = AVIC_MODE_X1;
  1033. pr_warn("AVIC is not supported in CPUID but force enabled");
  1034. pr_warn("Your system might crash and burn");
  1035. }
  1036. /* AVIC is a prerequisite for x2AVIC. */
  1037. if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
  1038. if (avic_mode == AVIC_MODE_X1) {
  1039. avic_mode = AVIC_MODE_X2;
  1040. pr_info("x2AVIC enabled\n");
  1041. } else {
  1042. pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
  1043. pr_warn(FW_BUG "Try enable AVIC using force_avic option");
  1044. }
  1045. }
  1046. if (avic_mode != AVIC_MODE_NONE)
  1047. amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
  1048. return !!avic_mode;
  1049. }