pmu.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Kernel-based Virtual Machine -- Performance Monitoring Unit support
  4. *
  5. * Copyright 2015 Red Hat, Inc. and/or its affiliates.
  6. *
  7. * Authors:
  8. * Avi Kivity <[email protected]>
  9. * Gleb Natapov <[email protected]>
  10. * Wei Huang <[email protected]>
  11. */
  12. #include <linux/types.h>
  13. #include <linux/kvm_host.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/bsearch.h>
  16. #include <linux/sort.h>
  17. #include <asm/perf_event.h>
  18. #include <asm/cpu_device_id.h>
  19. #include "x86.h"
  20. #include "cpuid.h"
  21. #include "lapic.h"
  22. #include "pmu.h"
  23. /* This is enough to filter the vast majority of currently defined events. */
  24. #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
  25. struct x86_pmu_capability __read_mostly kvm_pmu_cap;
  26. EXPORT_SYMBOL_GPL(kvm_pmu_cap);
  27. static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
  28. X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
  29. X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL),
  30. {}
  31. };
  32. /* NOTE:
  33. * - Each perf counter is defined as "struct kvm_pmc";
  34. * - There are two types of perf counters: general purpose (gp) and fixed.
  35. * gp counters are stored in gp_counters[] and fixed counters are stored
  36. * in fixed_counters[] respectively. Both of them are part of "struct
  37. * kvm_pmu";
  38. * - pmu.c understands the difference between gp counters and fixed counters.
  39. * However AMD doesn't support fixed-counters;
  40. * - There are three types of index to access perf counters (PMC):
  41. * 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD
  42. * has MSR_K7_PERFCTRn and, for families 15H and later,
  43. * MSR_F15H_PERF_CTRn, where MSR_F15H_PERF_CTR[0-3] are
  44. * aliased to MSR_K7_PERFCTRn.
  45. * 2. MSR Index (named idx): This normally is used by RDPMC instruction.
  46. * For instance AMD RDPMC instruction uses 0000_0003h in ECX to access
  47. * C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except
  48. * that it also supports fixed counters. idx can be used to as index to
  49. * gp and fixed counters.
  50. * 3. Global PMC Index (named pmc): pmc is an index specific to PMU
  51. * code. Each pmc, stored in kvm_pmc.idx field, is unique across
  52. * all perf counters (both gp and fixed). The mapping relationship
  53. * between pmc and perf counters is as the following:
  54. * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
  55. * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
  56. * * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
  57. * and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
  58. */
  59. static struct kvm_pmu_ops kvm_pmu_ops __read_mostly;
  60. #define KVM_X86_PMU_OP(func) \
  61. DEFINE_STATIC_CALL_NULL(kvm_x86_pmu_##func, \
  62. *(((struct kvm_pmu_ops *)0)->func));
  63. #define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP
  64. #include <asm/kvm-x86-pmu-ops.h>
  65. void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
  66. {
  67. memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops));
  68. #define __KVM_X86_PMU_OP(func) \
  69. static_call_update(kvm_x86_pmu_##func, kvm_pmu_ops.func);
  70. #define KVM_X86_PMU_OP(func) \
  71. WARN_ON(!kvm_pmu_ops.func); __KVM_X86_PMU_OP(func)
  72. #define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP
  73. #include <asm/kvm-x86-pmu-ops.h>
  74. #undef __KVM_X86_PMU_OP
  75. }
  76. static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
  77. {
  78. return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
  79. }
  80. static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
  81. {
  82. struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
  83. struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
  84. kvm_pmu_deliver_pmi(vcpu);
  85. }
  86. static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
  87. {
  88. struct kvm_pmu *pmu = pmc_to_pmu(pmc);
  89. bool skip_pmi = false;
  90. /* Ignore counters that have been reprogrammed already. */
  91. if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
  92. return;
  93. if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
  94. if (!in_pmi) {
  95. /*
  96. * TODO: KVM is currently _choosing_ to not generate records
  97. * for emulated instructions, avoiding BUFFER_OVF PMI when
  98. * there are no records. Strictly speaking, it should be done
  99. * as well in the right context to improve sampling accuracy.
  100. */
  101. skip_pmi = true;
  102. } else {
  103. /* Indicate PEBS overflow PMI to guest. */
  104. skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
  105. (unsigned long *)&pmu->global_status);
  106. }
  107. } else {
  108. __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
  109. }
  110. kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
  111. if (!pmc->intr || skip_pmi)
  112. return;
  113. /*
  114. * Inject PMI. If vcpu was in a guest mode during NMI PMI
  115. * can be ejected on a guest mode re-entry. Otherwise we can't
  116. * be sure that vcpu wasn't executing hlt instruction at the
  117. * time of vmexit and is not going to re-enter guest mode until
  118. * woken up. So we should wake it, but this is impossible from
  119. * NMI context. Do it from irq work instead.
  120. */
  121. if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
  122. irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
  123. else
  124. kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
  125. }
  126. static void kvm_perf_overflow(struct perf_event *perf_event,
  127. struct perf_sample_data *data,
  128. struct pt_regs *regs)
  129. {
  130. struct kvm_pmc *pmc = perf_event->overflow_handler_context;
  131. __kvm_perf_overflow(pmc, true);
  132. }
  133. static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
  134. u64 config, bool exclude_user,
  135. bool exclude_kernel, bool intr)
  136. {
  137. struct kvm_pmu *pmu = pmc_to_pmu(pmc);
  138. struct perf_event *event;
  139. struct perf_event_attr attr = {
  140. .type = type,
  141. .size = sizeof(attr),
  142. .pinned = true,
  143. .exclude_idle = true,
  144. .exclude_host = 1,
  145. .exclude_user = exclude_user,
  146. .exclude_kernel = exclude_kernel,
  147. .config = config,
  148. };
  149. bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
  150. attr.sample_period = get_sample_period(pmc, pmc->counter);
  151. if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
  152. guest_cpuid_is_intel(pmc->vcpu)) {
  153. /*
  154. * HSW_IN_TX_CHECKPOINTED is not supported with nonzero
  155. * period. Just clear the sample period so at least
  156. * allocating the counter doesn't fail.
  157. */
  158. attr.sample_period = 0;
  159. }
  160. if (pebs) {
  161. /*
  162. * The non-zero precision level of guest event makes the ordinary
  163. * guest event becomes a guest PEBS event and triggers the host
  164. * PEBS PMI handler to determine whether the PEBS overflow PMI
  165. * comes from the host counters or the guest.
  166. *
  167. * For most PEBS hardware events, the difference in the software
  168. * precision levels of guest and host PEBS events will not affect
  169. * the accuracy of the PEBS profiling result, because the "event IP"
  170. * in the PEBS record is calibrated on the guest side.
  171. *
  172. * On Icelake everything is fine. Other hardware (GLC+, TNT+) that
  173. * could possibly care here is unsupported and needs changes.
  174. */
  175. attr.precise_ip = 1;
  176. if (x86_match_cpu(vmx_icl_pebs_cpu) && pmc->idx == 32)
  177. attr.precise_ip = 3;
  178. }
  179. event = perf_event_create_kernel_counter(&attr, -1, current,
  180. kvm_perf_overflow, pmc);
  181. if (IS_ERR(event)) {
  182. pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
  183. PTR_ERR(event), pmc->idx);
  184. return;
  185. }
  186. pmc->perf_event = event;
  187. pmc_to_pmu(pmc)->event_count++;
  188. clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
  189. pmc->is_paused = false;
  190. pmc->intr = intr || pebs;
  191. }
  192. static void pmc_pause_counter(struct kvm_pmc *pmc)
  193. {
  194. u64 counter = pmc->counter;
  195. if (!pmc->perf_event || pmc->is_paused)
  196. return;
  197. /* update counter, reset event value to avoid redundant accumulation */
  198. counter += perf_event_pause(pmc->perf_event, true);
  199. pmc->counter = counter & pmc_bitmask(pmc);
  200. pmc->is_paused = true;
  201. }
  202. static bool pmc_resume_counter(struct kvm_pmc *pmc)
  203. {
  204. if (!pmc->perf_event)
  205. return false;
  206. /* recalibrate sample period and check if it's accepted by perf core */
  207. if (perf_event_period(pmc->perf_event,
  208. get_sample_period(pmc, pmc->counter)))
  209. return false;
  210. if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) !=
  211. (!!pmc->perf_event->attr.precise_ip))
  212. return false;
  213. /* reuse perf_event to serve as pmc_reprogram_counter() does*/
  214. perf_event_enable(pmc->perf_event);
  215. pmc->is_paused = false;
  216. clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
  217. return true;
  218. }
  219. static int cmp_u64(const void *pa, const void *pb)
  220. {
  221. u64 a = *(u64 *)pa;
  222. u64 b = *(u64 *)pb;
  223. return (a > b) - (a < b);
  224. }
  225. static bool check_pmu_event_filter(struct kvm_pmc *pmc)
  226. {
  227. struct kvm_pmu_event_filter *filter;
  228. struct kvm *kvm = pmc->vcpu->kvm;
  229. bool allow_event = true;
  230. __u64 key;
  231. int idx;
  232. if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
  233. return false;
  234. filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
  235. if (!filter)
  236. goto out;
  237. if (pmc_is_gp(pmc)) {
  238. key = pmc->eventsel & AMD64_RAW_EVENT_MASK_NB;
  239. if (bsearch(&key, filter->events, filter->nevents,
  240. sizeof(__u64), cmp_u64))
  241. allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
  242. else
  243. allow_event = filter->action == KVM_PMU_EVENT_DENY;
  244. } else {
  245. idx = pmc->idx - INTEL_PMC_IDX_FIXED;
  246. if (filter->action == KVM_PMU_EVENT_DENY &&
  247. test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
  248. allow_event = false;
  249. if (filter->action == KVM_PMU_EVENT_ALLOW &&
  250. !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
  251. allow_event = false;
  252. }
  253. out:
  254. return allow_event;
  255. }
  256. void reprogram_counter(struct kvm_pmc *pmc)
  257. {
  258. struct kvm_pmu *pmu = pmc_to_pmu(pmc);
  259. u64 eventsel = pmc->eventsel;
  260. u64 new_config = eventsel;
  261. u8 fixed_ctr_ctrl;
  262. pmc_pause_counter(pmc);
  263. if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
  264. return;
  265. if (!check_pmu_event_filter(pmc))
  266. return;
  267. if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
  268. printk_once("kvm pmu: pin control bit is ignored\n");
  269. if (pmc_is_fixed(pmc)) {
  270. fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
  271. pmc->idx - INTEL_PMC_IDX_FIXED);
  272. if (fixed_ctr_ctrl & 0x1)
  273. eventsel |= ARCH_PERFMON_EVENTSEL_OS;
  274. if (fixed_ctr_ctrl & 0x2)
  275. eventsel |= ARCH_PERFMON_EVENTSEL_USR;
  276. if (fixed_ctr_ctrl & 0x8)
  277. eventsel |= ARCH_PERFMON_EVENTSEL_INT;
  278. new_config = (u64)fixed_ctr_ctrl;
  279. }
  280. if (pmc->current_config == new_config && pmc_resume_counter(pmc))
  281. return;
  282. pmc_release_perf_event(pmc);
  283. pmc->current_config = new_config;
  284. pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
  285. (eventsel & pmu->raw_event_mask),
  286. !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
  287. !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
  288. eventsel & ARCH_PERFMON_EVENTSEL_INT);
  289. }
  290. EXPORT_SYMBOL_GPL(reprogram_counter);
  291. void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
  292. {
  293. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  294. int bit;
  295. for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
  296. struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
  297. if (unlikely(!pmc || !pmc->perf_event)) {
  298. clear_bit(bit, pmu->reprogram_pmi);
  299. continue;
  300. }
  301. reprogram_counter(pmc);
  302. }
  303. /*
  304. * Unused perf_events are only released if the corresponding MSRs
  305. * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
  306. * triggers KVM_REQ_PMU if cleanup is needed.
  307. */
  308. if (unlikely(pmu->need_cleanup))
  309. kvm_pmu_cleanup(vcpu);
  310. }
  311. /* check if idx is a valid index to access PMU */
  312. bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
  313. {
  314. return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
  315. }
  316. bool is_vmware_backdoor_pmc(u32 pmc_idx)
  317. {
  318. switch (pmc_idx) {
  319. case VMWARE_BACKDOOR_PMC_HOST_TSC:
  320. case VMWARE_BACKDOOR_PMC_REAL_TIME:
  321. case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
  322. return true;
  323. }
  324. return false;
  325. }
  326. static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
  327. {
  328. u64 ctr_val;
  329. switch (idx) {
  330. case VMWARE_BACKDOOR_PMC_HOST_TSC:
  331. ctr_val = rdtsc();
  332. break;
  333. case VMWARE_BACKDOOR_PMC_REAL_TIME:
  334. ctr_val = ktime_get_boottime_ns();
  335. break;
  336. case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
  337. ctr_val = ktime_get_boottime_ns() +
  338. vcpu->kvm->arch.kvmclock_offset;
  339. break;
  340. default:
  341. return 1;
  342. }
  343. *data = ctr_val;
  344. return 0;
  345. }
  346. int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
  347. {
  348. bool fast_mode = idx & (1u << 31);
  349. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  350. struct kvm_pmc *pmc;
  351. u64 mask = fast_mode ? ~0u : ~0ull;
  352. if (!pmu->version)
  353. return 1;
  354. if (is_vmware_backdoor_pmc(idx))
  355. return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
  356. pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask);
  357. if (!pmc)
  358. return 1;
  359. if (!(kvm_read_cr4_bits(vcpu, X86_CR4_PCE)) &&
  360. (static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
  361. (kvm_read_cr0_bits(vcpu, X86_CR0_PE)))
  362. return 1;
  363. *data = pmc_read_counter(pmc) & mask;
  364. return 0;
  365. }
  366. void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
  367. {
  368. if (lapic_in_kernel(vcpu)) {
  369. static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu);
  370. kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
  371. }
  372. }
  373. bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
  374. {
  375. return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) ||
  376. static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr);
  377. }
  378. static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
  379. {
  380. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  381. struct kvm_pmc *pmc = static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr);
  382. if (pmc)
  383. __set_bit(pmc->idx, pmu->pmc_in_use);
  384. }
  385. int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  386. {
  387. return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
  388. }
  389. int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  390. {
  391. kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
  392. return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
  393. }
  394. /* refresh PMU settings. This function generally is called when underlying
  395. * settings are changed (such as changes of PMU CPUID by guest VMs), which
  396. * should rarely happen.
  397. */
  398. void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
  399. {
  400. static_call(kvm_x86_pmu_refresh)(vcpu);
  401. }
  402. void kvm_pmu_reset(struct kvm_vcpu *vcpu)
  403. {
  404. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  405. irq_work_sync(&pmu->irq_work);
  406. static_call(kvm_x86_pmu_reset)(vcpu);
  407. }
  408. void kvm_pmu_init(struct kvm_vcpu *vcpu)
  409. {
  410. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  411. memset(pmu, 0, sizeof(*pmu));
  412. static_call(kvm_x86_pmu_init)(vcpu);
  413. init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
  414. pmu->event_count = 0;
  415. pmu->need_cleanup = false;
  416. kvm_pmu_refresh(vcpu);
  417. }
  418. /* Release perf_events for vPMCs that have been unused for a full time slice. */
  419. void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
  420. {
  421. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  422. struct kvm_pmc *pmc = NULL;
  423. DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
  424. int i;
  425. pmu->need_cleanup = false;
  426. bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
  427. pmu->pmc_in_use, X86_PMC_IDX_MAX);
  428. for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
  429. pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
  430. if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
  431. pmc_stop_counter(pmc);
  432. }
  433. static_call_cond(kvm_x86_pmu_cleanup)(vcpu);
  434. bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
  435. }
  436. void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
  437. {
  438. kvm_pmu_reset(vcpu);
  439. }
  440. static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
  441. {
  442. u64 prev_count;
  443. prev_count = pmc->counter;
  444. pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
  445. reprogram_counter(pmc);
  446. if (pmc->counter < prev_count)
  447. __kvm_perf_overflow(pmc, false);
  448. }
  449. static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
  450. unsigned int perf_hw_id)
  451. {
  452. return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
  453. AMD64_RAW_EVENT_MASK_NB);
  454. }
  455. static inline bool cpl_is_matched(struct kvm_pmc *pmc)
  456. {
  457. bool select_os, select_user;
  458. u64 config = pmc->current_config;
  459. if (pmc_is_gp(pmc)) {
  460. select_os = config & ARCH_PERFMON_EVENTSEL_OS;
  461. select_user = config & ARCH_PERFMON_EVENTSEL_USR;
  462. } else {
  463. select_os = config & 0x1;
  464. select_user = config & 0x2;
  465. }
  466. return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
  467. }
  468. void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
  469. {
  470. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  471. struct kvm_pmc *pmc;
  472. int i;
  473. for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
  474. pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
  475. if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
  476. continue;
  477. /* Ignore checks for edge detect, pin control, invert and CMASK bits */
  478. if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
  479. kvm_pmu_incr_counter(pmc);
  480. }
  481. }
  482. EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
  483. int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
  484. {
  485. struct kvm_pmu_event_filter tmp, *filter;
  486. size_t size;
  487. int r;
  488. if (copy_from_user(&tmp, argp, sizeof(tmp)))
  489. return -EFAULT;
  490. if (tmp.action != KVM_PMU_EVENT_ALLOW &&
  491. tmp.action != KVM_PMU_EVENT_DENY)
  492. return -EINVAL;
  493. if (tmp.flags != 0)
  494. return -EINVAL;
  495. if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
  496. return -E2BIG;
  497. size = struct_size(filter, events, tmp.nevents);
  498. filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
  499. if (!filter)
  500. return -ENOMEM;
  501. r = -EFAULT;
  502. if (copy_from_user(filter, argp, size))
  503. goto cleanup;
  504. /* Ensure nevents can't be changed between the user copies. */
  505. *filter = tmp;
  506. /*
  507. * Sort the in-kernel list so that we can search it with bsearch.
  508. */
  509. sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
  510. mutex_lock(&kvm->lock);
  511. filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
  512. mutex_is_locked(&kvm->lock));
  513. mutex_unlock(&kvm->lock);
  514. synchronize_srcu_expedited(&kvm->srcu);
  515. r = 0;
  516. cleanup:
  517. kfree(filter);
  518. return r;
  519. }