pmu_intel.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * KVM PMU support for Intel CPUs
  4. *
  5. * Copyright 2011 Red Hat, Inc. and/or its affiliates.
  6. *
  7. * Authors:
  8. * Avi Kivity <[email protected]>
  9. * Gleb Natapov <[email protected]>
  10. */
  11. #include <linux/types.h>
  12. #include <linux/kvm_host.h>
  13. #include <linux/perf_event.h>
  14. #include <asm/perf_event.h>
  15. #include "x86.h"
  16. #include "cpuid.h"
  17. #include "lapic.h"
  18. #include "nested.h"
  19. #include "pmu.h"
  20. #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
  21. static struct kvm_event_hw_type_mapping intel_arch_events[] = {
  22. [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
  23. [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
  24. [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
  25. [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
  26. [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
  27. [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
  28. [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
  29. /* The above index must match CPUID 0x0A.EBX bit vector */
  30. [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
  31. };
  32. /* mapping between fixed pmc index and intel_arch_events array */
  33. static int fixed_pmc_events[] = {1, 0, 7};
  34. static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
  35. {
  36. struct kvm_pmc *pmc;
  37. u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
  38. int i;
  39. pmu->fixed_ctr_ctrl = data;
  40. for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
  41. u8 new_ctrl = fixed_ctrl_field(data, i);
  42. u8 old_ctrl = fixed_ctrl_field(old_fixed_ctr_ctrl, i);
  43. if (old_ctrl == new_ctrl)
  44. continue;
  45. pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
  46. __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
  47. reprogram_counter(pmc);
  48. }
  49. }
  50. static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
  51. {
  52. if (pmc_idx < INTEL_PMC_IDX_FIXED) {
  53. return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
  54. MSR_P6_EVNTSEL0);
  55. } else {
  56. u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
  57. return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
  58. }
  59. }
  60. static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
  61. {
  62. int bit;
  63. struct kvm_pmc *pmc;
  64. for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
  65. pmc = intel_pmc_idx_to_pmc(pmu, bit);
  66. if (pmc)
  67. reprogram_counter(pmc);
  68. }
  69. }
  70. static bool intel_hw_event_available(struct kvm_pmc *pmc)
  71. {
  72. struct kvm_pmu *pmu = pmc_to_pmu(pmc);
  73. u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
  74. u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
  75. int i;
  76. for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
  77. if (intel_arch_events[i].eventsel != event_select ||
  78. intel_arch_events[i].unit_mask != unit_mask)
  79. continue;
  80. /* disable event that reported as not present by cpuid */
  81. if ((i < 7) && !(pmu->available_event_types & (1 << i)))
  82. return false;
  83. break;
  84. }
  85. return true;
  86. }
  87. /* check if a PMC is enabled by comparing it with globl_ctrl bits. */
  88. static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
  89. {
  90. struct kvm_pmu *pmu = pmc_to_pmu(pmc);
  91. if (!intel_pmu_has_perf_global_ctrl(pmu))
  92. return true;
  93. return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
  94. }
  95. static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
  96. {
  97. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  98. bool fixed = idx & (1u << 30);
  99. idx &= ~(3u << 30);
  100. return fixed ? idx < pmu->nr_arch_fixed_counters
  101. : idx < pmu->nr_arch_gp_counters;
  102. }
  103. static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
  104. unsigned int idx, u64 *mask)
  105. {
  106. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  107. bool fixed = idx & (1u << 30);
  108. struct kvm_pmc *counters;
  109. unsigned int num_counters;
  110. idx &= ~(3u << 30);
  111. if (fixed) {
  112. counters = pmu->fixed_counters;
  113. num_counters = pmu->nr_arch_fixed_counters;
  114. } else {
  115. counters = pmu->gp_counters;
  116. num_counters = pmu->nr_arch_gp_counters;
  117. }
  118. if (idx >= num_counters)
  119. return NULL;
  120. *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
  121. return &counters[array_index_nospec(idx, num_counters)];
  122. }
  123. static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
  124. {
  125. if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
  126. return 0;
  127. return vcpu->arch.perf_capabilities;
  128. }
  129. static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
  130. {
  131. return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0;
  132. }
  133. static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
  134. {
  135. if (!fw_writes_is_enabled(pmu_to_vcpu(pmu)))
  136. return NULL;
  137. return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
  138. }
  139. static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
  140. {
  141. struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
  142. bool ret = false;
  143. if (!intel_pmu_lbr_is_enabled(vcpu))
  144. return ret;
  145. ret = (index == MSR_LBR_SELECT) || (index == MSR_LBR_TOS) ||
  146. (index >= records->from && index < records->from + records->nr) ||
  147. (index >= records->to && index < records->to + records->nr);
  148. if (!ret && records->info)
  149. ret = (index >= records->info && index < records->info + records->nr);
  150. return ret;
  151. }
  152. static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
  153. {
  154. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  155. u64 perf_capabilities;
  156. int ret;
  157. switch (msr) {
  158. case MSR_CORE_PERF_FIXED_CTR_CTRL:
  159. case MSR_CORE_PERF_GLOBAL_STATUS:
  160. case MSR_CORE_PERF_GLOBAL_CTRL:
  161. case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  162. return intel_pmu_has_perf_global_ctrl(pmu);
  163. break;
  164. case MSR_IA32_PEBS_ENABLE:
  165. ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
  166. break;
  167. case MSR_IA32_DS_AREA:
  168. ret = guest_cpuid_has(vcpu, X86_FEATURE_DS);
  169. break;
  170. case MSR_PEBS_DATA_CFG:
  171. perf_capabilities = vcpu_get_perf_capabilities(vcpu);
  172. ret = (perf_capabilities & PERF_CAP_PEBS_BASELINE) &&
  173. ((perf_capabilities & PERF_CAP_PEBS_FORMAT) > 3);
  174. break;
  175. default:
  176. ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
  177. get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
  178. get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr) ||
  179. intel_pmu_is_valid_lbr_msr(vcpu, msr);
  180. break;
  181. }
  182. return ret;
  183. }
  184. static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
  185. {
  186. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  187. struct kvm_pmc *pmc;
  188. pmc = get_fixed_pmc(pmu, msr);
  189. pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
  190. pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
  191. return pmc;
  192. }
  193. static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu *vcpu)
  194. {
  195. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  196. if (lbr_desc->event) {
  197. perf_event_release_kernel(lbr_desc->event);
  198. lbr_desc->event = NULL;
  199. vcpu_to_pmu(vcpu)->event_count--;
  200. }
  201. }
  202. int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
  203. {
  204. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  205. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  206. struct perf_event *event;
  207. /*
  208. * The perf_event_attr is constructed in the minimum efficient way:
  209. * - set 'pinned = true' to make it task pinned so that if another
  210. * cpu pinned event reclaims LBR, the event->oncpu will be set to -1;
  211. * - set '.exclude_host = true' to record guest branches behavior;
  212. *
  213. * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf
  214. * schedule the event without a real HW counter but a fake one;
  215. * check is_guest_lbr_event() and __intel_get_event_constraints();
  216. *
  217. * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and
  218. * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
  219. * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack
  220. * event, which helps KVM to save/restore guest LBR records
  221. * during host context switches and reduces quite a lot overhead,
  222. * check branch_user_callstack() and intel_pmu_lbr_sched_task();
  223. */
  224. struct perf_event_attr attr = {
  225. .type = PERF_TYPE_RAW,
  226. .size = sizeof(attr),
  227. .config = INTEL_FIXED_VLBR_EVENT,
  228. .sample_type = PERF_SAMPLE_BRANCH_STACK,
  229. .pinned = true,
  230. .exclude_host = true,
  231. .branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
  232. PERF_SAMPLE_BRANCH_USER,
  233. };
  234. if (unlikely(lbr_desc->event)) {
  235. __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
  236. return 0;
  237. }
  238. event = perf_event_create_kernel_counter(&attr, -1,
  239. current, NULL, NULL);
  240. if (IS_ERR(event)) {
  241. pr_debug_ratelimited("%s: failed %ld\n",
  242. __func__, PTR_ERR(event));
  243. return PTR_ERR(event);
  244. }
  245. lbr_desc->event = event;
  246. pmu->event_count++;
  247. __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
  248. return 0;
  249. }
  250. /*
  251. * It's safe to access LBR msrs from guest when they have not
  252. * been passthrough since the host would help restore or reset
  253. * the LBR msrs records when the guest LBR event is scheduled in.
  254. */
  255. static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
  256. struct msr_data *msr_info, bool read)
  257. {
  258. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  259. u32 index = msr_info->index;
  260. if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
  261. return false;
  262. if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0)
  263. goto dummy;
  264. /*
  265. * Disable irq to ensure the LBR feature doesn't get reclaimed by the
  266. * host at the time the value is read from the msr, and this avoids the
  267. * host LBR value to be leaked to the guest. If LBR has been reclaimed,
  268. * return 0 on guest reads.
  269. */
  270. local_irq_disable();
  271. if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) {
  272. if (read)
  273. rdmsrl(index, msr_info->data);
  274. else
  275. wrmsrl(index, msr_info->data);
  276. __set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
  277. local_irq_enable();
  278. return true;
  279. }
  280. clear_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
  281. local_irq_enable();
  282. dummy:
  283. if (read)
  284. msr_info->data = 0;
  285. return true;
  286. }
  287. static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  288. {
  289. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  290. struct kvm_pmc *pmc;
  291. u32 msr = msr_info->index;
  292. switch (msr) {
  293. case MSR_CORE_PERF_FIXED_CTR_CTRL:
  294. msr_info->data = pmu->fixed_ctr_ctrl;
  295. return 0;
  296. case MSR_CORE_PERF_GLOBAL_STATUS:
  297. msr_info->data = pmu->global_status;
  298. return 0;
  299. case MSR_CORE_PERF_GLOBAL_CTRL:
  300. msr_info->data = pmu->global_ctrl;
  301. return 0;
  302. case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  303. msr_info->data = 0;
  304. return 0;
  305. case MSR_IA32_PEBS_ENABLE:
  306. msr_info->data = pmu->pebs_enable;
  307. return 0;
  308. case MSR_IA32_DS_AREA:
  309. msr_info->data = pmu->ds_area;
  310. return 0;
  311. case MSR_PEBS_DATA_CFG:
  312. msr_info->data = pmu->pebs_data_cfg;
  313. return 0;
  314. default:
  315. if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
  316. (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
  317. u64 val = pmc_read_counter(pmc);
  318. msr_info->data =
  319. val & pmu->counter_bitmask[KVM_PMC_GP];
  320. return 0;
  321. } else if ((pmc = get_fixed_pmc(pmu, msr))) {
  322. u64 val = pmc_read_counter(pmc);
  323. msr_info->data =
  324. val & pmu->counter_bitmask[KVM_PMC_FIXED];
  325. return 0;
  326. } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
  327. msr_info->data = pmc->eventsel;
  328. return 0;
  329. } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true))
  330. return 0;
  331. }
  332. return 1;
  333. }
  334. static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  335. {
  336. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  337. struct kvm_pmc *pmc;
  338. u32 msr = msr_info->index;
  339. u64 data = msr_info->data;
  340. u64 reserved_bits, diff;
  341. switch (msr) {
  342. case MSR_CORE_PERF_FIXED_CTR_CTRL:
  343. if (pmu->fixed_ctr_ctrl == data)
  344. return 0;
  345. if (!(data & pmu->fixed_ctr_ctrl_mask)) {
  346. reprogram_fixed_counters(pmu, data);
  347. return 0;
  348. }
  349. break;
  350. case MSR_CORE_PERF_GLOBAL_STATUS:
  351. if (msr_info->host_initiated) {
  352. pmu->global_status = data;
  353. return 0;
  354. }
  355. break; /* RO MSR */
  356. case MSR_CORE_PERF_GLOBAL_CTRL:
  357. if (pmu->global_ctrl == data)
  358. return 0;
  359. if (kvm_valid_perf_global_ctrl(pmu, data)) {
  360. diff = pmu->global_ctrl ^ data;
  361. pmu->global_ctrl = data;
  362. reprogram_counters(pmu, diff);
  363. return 0;
  364. }
  365. break;
  366. case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  367. if (!(data & pmu->global_ovf_ctrl_mask)) {
  368. if (!msr_info->host_initiated)
  369. pmu->global_status &= ~data;
  370. return 0;
  371. }
  372. break;
  373. case MSR_IA32_PEBS_ENABLE:
  374. if (pmu->pebs_enable == data)
  375. return 0;
  376. if (!(data & pmu->pebs_enable_mask)) {
  377. diff = pmu->pebs_enable ^ data;
  378. pmu->pebs_enable = data;
  379. reprogram_counters(pmu, diff);
  380. return 0;
  381. }
  382. break;
  383. case MSR_IA32_DS_AREA:
  384. if (msr_info->host_initiated && data && !guest_cpuid_has(vcpu, X86_FEATURE_DS))
  385. return 1;
  386. if (is_noncanonical_address(data, vcpu))
  387. return 1;
  388. pmu->ds_area = data;
  389. return 0;
  390. case MSR_PEBS_DATA_CFG:
  391. if (pmu->pebs_data_cfg == data)
  392. return 0;
  393. if (!(data & pmu->pebs_data_cfg_mask)) {
  394. pmu->pebs_data_cfg = data;
  395. return 0;
  396. }
  397. break;
  398. default:
  399. if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
  400. (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
  401. if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
  402. (data & ~pmu->counter_bitmask[KVM_PMC_GP]))
  403. return 1;
  404. if (!msr_info->host_initiated &&
  405. !(msr & MSR_PMC_FULL_WIDTH_BIT))
  406. data = (s64)(s32)data;
  407. pmc_write_counter(pmc, data);
  408. pmc_update_sample_period(pmc);
  409. return 0;
  410. } else if ((pmc = get_fixed_pmc(pmu, msr))) {
  411. pmc_write_counter(pmc, data);
  412. pmc_update_sample_period(pmc);
  413. return 0;
  414. } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
  415. if (data == pmc->eventsel)
  416. return 0;
  417. reserved_bits = pmu->reserved_bits;
  418. if ((pmc->idx == 2) &&
  419. (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
  420. reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
  421. if (!(data & reserved_bits)) {
  422. pmc->eventsel = data;
  423. reprogram_counter(pmc);
  424. return 0;
  425. }
  426. } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
  427. return 0;
  428. }
  429. return 1;
  430. }
  431. static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
  432. {
  433. size_t size = ARRAY_SIZE(fixed_pmc_events);
  434. struct kvm_pmc *pmc;
  435. u32 event;
  436. int i;
  437. for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
  438. pmc = &pmu->fixed_counters[i];
  439. event = fixed_pmc_events[array_index_nospec(i, size)];
  440. pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
  441. intel_arch_events[event].eventsel;
  442. }
  443. }
  444. static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
  445. {
  446. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  447. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  448. struct kvm_cpuid_entry2 *entry;
  449. union cpuid10_eax eax;
  450. union cpuid10_edx edx;
  451. u64 perf_capabilities;
  452. u64 counter_mask;
  453. int i;
  454. pmu->nr_arch_gp_counters = 0;
  455. pmu->nr_arch_fixed_counters = 0;
  456. pmu->counter_bitmask[KVM_PMC_GP] = 0;
  457. pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
  458. pmu->version = 0;
  459. pmu->reserved_bits = 0xffffffff00200000ull;
  460. pmu->raw_event_mask = X86_RAW_EVENT_MASK;
  461. pmu->global_ctrl_mask = ~0ull;
  462. pmu->global_ovf_ctrl_mask = ~0ull;
  463. pmu->fixed_ctr_ctrl_mask = ~0ull;
  464. pmu->pebs_enable_mask = ~0ull;
  465. pmu->pebs_data_cfg_mask = ~0ull;
  466. entry = kvm_find_cpuid_entry(vcpu, 0xa);
  467. if (!entry || !vcpu->kvm->arch.enable_pmu)
  468. return;
  469. eax.full = entry->eax;
  470. edx.full = entry->edx;
  471. pmu->version = eax.split.version_id;
  472. if (!pmu->version)
  473. return;
  474. pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
  475. kvm_pmu_cap.num_counters_gp);
  476. eax.split.bit_width = min_t(int, eax.split.bit_width,
  477. kvm_pmu_cap.bit_width_gp);
  478. pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
  479. eax.split.mask_length = min_t(int, eax.split.mask_length,
  480. kvm_pmu_cap.events_mask_len);
  481. pmu->available_event_types = ~entry->ebx &
  482. ((1ull << eax.split.mask_length) - 1);
  483. if (pmu->version == 1) {
  484. pmu->nr_arch_fixed_counters = 0;
  485. } else {
  486. pmu->nr_arch_fixed_counters =
  487. min3(ARRAY_SIZE(fixed_pmc_events),
  488. (size_t) edx.split.num_counters_fixed,
  489. (size_t)kvm_pmu_cap.num_counters_fixed);
  490. edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
  491. kvm_pmu_cap.bit_width_fixed);
  492. pmu->counter_bitmask[KVM_PMC_FIXED] =
  493. ((u64)1 << edx.split.bit_width_fixed) - 1;
  494. setup_fixed_pmc_eventsel(pmu);
  495. }
  496. for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
  497. pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
  498. counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
  499. (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED));
  500. pmu->global_ctrl_mask = counter_mask;
  501. pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask
  502. & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
  503. MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
  504. if (vmx_pt_mode_is_host_guest())
  505. pmu->global_ovf_ctrl_mask &=
  506. ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
  507. entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
  508. if (entry &&
  509. (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
  510. (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
  511. pmu->reserved_bits ^= HSW_IN_TX;
  512. pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
  513. }
  514. bitmap_set(pmu->all_valid_pmc_idx,
  515. 0, pmu->nr_arch_gp_counters);
  516. bitmap_set(pmu->all_valid_pmc_idx,
  517. INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
  518. perf_capabilities = vcpu_get_perf_capabilities(vcpu);
  519. if (cpuid_model_is_consistent(vcpu) &&
  520. (perf_capabilities & PMU_CAP_LBR_FMT))
  521. x86_perf_get_lbr(&lbr_desc->records);
  522. else
  523. lbr_desc->records.nr = 0;
  524. if (lbr_desc->records.nr)
  525. bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
  526. if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
  527. if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
  528. pmu->pebs_enable_mask = counter_mask;
  529. pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
  530. for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
  531. pmu->fixed_ctr_ctrl_mask &=
  532. ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
  533. }
  534. pmu->pebs_data_cfg_mask = ~0xff00000full;
  535. } else {
  536. pmu->pebs_enable_mask =
  537. ~((1ull << pmu->nr_arch_gp_counters) - 1);
  538. }
  539. }
  540. }
  541. static void intel_pmu_init(struct kvm_vcpu *vcpu)
  542. {
  543. int i;
  544. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  545. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  546. for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
  547. pmu->gp_counters[i].type = KVM_PMC_GP;
  548. pmu->gp_counters[i].vcpu = vcpu;
  549. pmu->gp_counters[i].idx = i;
  550. pmu->gp_counters[i].current_config = 0;
  551. }
  552. for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
  553. pmu->fixed_counters[i].type = KVM_PMC_FIXED;
  554. pmu->fixed_counters[i].vcpu = vcpu;
  555. pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
  556. pmu->fixed_counters[i].current_config = 0;
  557. }
  558. vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
  559. lbr_desc->records.nr = 0;
  560. lbr_desc->event = NULL;
  561. lbr_desc->msr_passthrough = false;
  562. }
  563. static void intel_pmu_reset(struct kvm_vcpu *vcpu)
  564. {
  565. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  566. struct kvm_pmc *pmc = NULL;
  567. int i;
  568. for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
  569. pmc = &pmu->gp_counters[i];
  570. pmc_stop_counter(pmc);
  571. pmc->counter = pmc->eventsel = 0;
  572. }
  573. for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
  574. pmc = &pmu->fixed_counters[i];
  575. pmc_stop_counter(pmc);
  576. pmc->counter = 0;
  577. }
  578. pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
  579. intel_pmu_release_guest_lbr_event(vcpu);
  580. }
  581. /*
  582. * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4.
  583. *
  584. * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and
  585. * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL.
  586. *
  587. * Guest needs to re-enable LBR to resume branches recording.
  588. */
  589. static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
  590. {
  591. u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
  592. if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
  593. data &= ~DEBUGCTLMSR_LBR;
  594. vmcs_write64(GUEST_IA32_DEBUGCTL, data);
  595. }
  596. }
  597. static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
  598. {
  599. u8 version = vcpu_to_pmu(vcpu)->version;
  600. if (!intel_pmu_lbr_is_enabled(vcpu))
  601. return;
  602. if (version > 1 && version < 4)
  603. intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu);
  604. }
  605. static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set)
  606. {
  607. struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
  608. int i;
  609. for (i = 0; i < lbr->nr; i++) {
  610. vmx_set_intercept_for_msr(vcpu, lbr->from + i, MSR_TYPE_RW, set);
  611. vmx_set_intercept_for_msr(vcpu, lbr->to + i, MSR_TYPE_RW, set);
  612. if (lbr->info)
  613. vmx_set_intercept_for_msr(vcpu, lbr->info + i, MSR_TYPE_RW, set);
  614. }
  615. vmx_set_intercept_for_msr(vcpu, MSR_LBR_SELECT, MSR_TYPE_RW, set);
  616. vmx_set_intercept_for_msr(vcpu, MSR_LBR_TOS, MSR_TYPE_RW, set);
  617. }
  618. static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu)
  619. {
  620. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  621. if (!lbr_desc->msr_passthrough)
  622. return;
  623. vmx_update_intercept_for_lbr_msrs(vcpu, true);
  624. lbr_desc->msr_passthrough = false;
  625. }
  626. static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu)
  627. {
  628. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  629. if (lbr_desc->msr_passthrough)
  630. return;
  631. vmx_update_intercept_for_lbr_msrs(vcpu, false);
  632. lbr_desc->msr_passthrough = true;
  633. }
  634. /*
  635. * Higher priority host perf events (e.g. cpu pinned) could reclaim the
  636. * pmu resources (e.g. LBR) that were assigned to the guest. This is
  637. * usually done via ipi calls (more details in perf_install_in_context).
  638. *
  639. * Before entering the non-root mode (with irq disabled here), double
  640. * confirm that the pmu features enabled to the guest are not reclaimed
  641. * by higher priority host events. Otherwise, disallow vcpu's access to
  642. * the reclaimed features.
  643. */
  644. void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
  645. {
  646. struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  647. struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  648. if (!lbr_desc->event) {
  649. vmx_disable_lbr_msrs_passthrough(vcpu);
  650. if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
  651. goto warn;
  652. if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
  653. goto warn;
  654. return;
  655. }
  656. if (lbr_desc->event->state < PERF_EVENT_STATE_ACTIVE) {
  657. vmx_disable_lbr_msrs_passthrough(vcpu);
  658. __clear_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
  659. goto warn;
  660. } else
  661. vmx_enable_lbr_msrs_passthrough(vcpu);
  662. return;
  663. warn:
  664. pr_warn_ratelimited("kvm: vcpu-%d: fail to passthrough LBR.\n",
  665. vcpu->vcpu_id);
  666. }
  667. static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
  668. {
  669. if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
  670. intel_pmu_release_guest_lbr_event(vcpu);
  671. }
  672. void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
  673. {
  674. struct kvm_pmc *pmc = NULL;
  675. int bit, hw_idx;
  676. for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
  677. X86_PMC_IDX_MAX) {
  678. pmc = intel_pmc_idx_to_pmc(pmu, bit);
  679. if (!pmc || !pmc_speculative_in_use(pmc) ||
  680. !intel_pmc_is_enabled(pmc) || !pmc->perf_event)
  681. continue;
  682. /*
  683. * A negative index indicates the event isn't mapped to a
  684. * physical counter in the host, e.g. due to contention.
  685. */
  686. hw_idx = pmc->perf_event->hw.idx;
  687. if (hw_idx != pmc->idx && hw_idx > -1)
  688. pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx);
  689. }
  690. }
  691. struct kvm_pmu_ops intel_pmu_ops __initdata = {
  692. .hw_event_available = intel_hw_event_available,
  693. .pmc_is_enabled = intel_pmc_is_enabled,
  694. .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
  695. .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
  696. .msr_idx_to_pmc = intel_msr_idx_to_pmc,
  697. .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
  698. .is_valid_msr = intel_is_valid_msr,
  699. .get_msr = intel_pmu_get_msr,
  700. .set_msr = intel_pmu_set_msr,
  701. .refresh = intel_pmu_refresh,
  702. .init = intel_pmu_init,
  703. .reset = intel_pmu_reset,
  704. .deliver_pmi = intel_pmu_deliver_pmi,
  705. .cleanup = intel_pmu_cleanup,
  706. };