lbr.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/perf_event.h>
  3. #include <linux/types.h>
  4. #include <asm/perf_event.h>
  5. #include <asm/msr.h>
  6. #include "../perf_event.h"
  7. /*
  8. * Intel LBR_SELECT bits
  9. * Intel Vol3a, April 2011, Section 16.7 Table 16-10
  10. *
  11. * Hardware branch filter (not available on all CPUs)
  12. */
  13. #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
  14. #define LBR_USER_BIT 1 /* do not capture at ring > 0 */
  15. #define LBR_JCC_BIT 2 /* do not capture conditional branches */
  16. #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
  17. #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
  18. #define LBR_RETURN_BIT 5 /* do not capture near returns */
  19. #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
  20. #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
  21. #define LBR_FAR_BIT 8 /* do not capture far branches */
  22. #define LBR_CALL_STACK_BIT 9 /* enable call stack */
  23. /*
  24. * Following bit only exists in Linux; we mask it out before writing it to
  25. * the actual MSR. But it helps the constraint perf code to understand
  26. * that this is a separate configuration.
  27. */
  28. #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
  29. #define LBR_KERNEL (1 << LBR_KERNEL_BIT)
  30. #define LBR_USER (1 << LBR_USER_BIT)
  31. #define LBR_JCC (1 << LBR_JCC_BIT)
  32. #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
  33. #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
  34. #define LBR_RETURN (1 << LBR_RETURN_BIT)
  35. #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
  36. #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
  37. #define LBR_FAR (1 << LBR_FAR_BIT)
  38. #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
  39. #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
  40. #define LBR_PLM (LBR_KERNEL | LBR_USER)
  41. #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
  42. #define LBR_NOT_SUPP -1 /* LBR filter not supported */
  43. #define LBR_IGN 0 /* ignored */
  44. #define LBR_ANY \
  45. (LBR_JCC |\
  46. LBR_REL_CALL |\
  47. LBR_IND_CALL |\
  48. LBR_RETURN |\
  49. LBR_REL_JMP |\
  50. LBR_IND_JMP |\
  51. LBR_FAR)
  52. #define LBR_FROM_FLAG_MISPRED BIT_ULL(63)
  53. #define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
  54. #define LBR_FROM_FLAG_ABORT BIT_ULL(61)
  55. #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
  56. /*
  57. * Intel LBR_CTL bits
  58. *
  59. * Hardware branch filter for Arch LBR
  60. */
  61. #define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */
  62. #define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */
  63. #define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */
  64. #define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */
  65. #define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */
  66. #define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */
  67. #define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */
  68. #define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */
  69. #define ARCH_LBR_RETURN_BIT 21 /* capture near returns */
  70. #define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */
  71. #define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT)
  72. #define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT)
  73. #define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT)
  74. #define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT)
  75. #define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT)
  76. #define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT)
  77. #define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT)
  78. #define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT)
  79. #define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT)
  80. #define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
  81. #define ARCH_LBR_ANY \
  82. (ARCH_LBR_JCC |\
  83. ARCH_LBR_REL_JMP |\
  84. ARCH_LBR_IND_JMP |\
  85. ARCH_LBR_REL_CALL |\
  86. ARCH_LBR_IND_CALL |\
  87. ARCH_LBR_RETURN |\
  88. ARCH_LBR_OTHER_BRANCH)
  89. #define ARCH_LBR_CTL_MASK 0x7f000e
  90. static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
  91. static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
  92. {
  93. if (static_cpu_has(X86_FEATURE_ARCH_LBR))
  94. return !!(config & ARCH_LBR_CALL_STACK);
  95. return !!(config & LBR_CALL_STACK);
  96. }
  97. /*
  98. * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  99. * otherwise it becomes near impossible to get a reliable stack.
  100. */
  101. static void __intel_pmu_lbr_enable(bool pmi)
  102. {
  103. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  104. u64 debugctl, lbr_select = 0, orig_debugctl;
  105. /*
  106. * No need to unfreeze manually, as v4 can do that as part
  107. * of the GLOBAL_STATUS ack.
  108. */
  109. if (pmi && x86_pmu.version >= 4)
  110. return;
  111. /*
  112. * No need to reprogram LBR_SELECT in a PMI, as it
  113. * did not change.
  114. */
  115. if (cpuc->lbr_sel)
  116. lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
  117. if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
  118. wrmsrl(MSR_LBR_SELECT, lbr_select);
  119. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  120. orig_debugctl = debugctl;
  121. if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
  122. debugctl |= DEBUGCTLMSR_LBR;
  123. /*
  124. * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
  125. * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
  126. * may cause superfluous increase/decrease of LBR_TOS.
  127. */
  128. if (is_lbr_call_stack_bit_set(lbr_select))
  129. debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
  130. else
  131. debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
  132. if (orig_debugctl != debugctl)
  133. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  134. if (static_cpu_has(X86_FEATURE_ARCH_LBR))
  135. wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
  136. }
  137. void intel_pmu_lbr_reset_32(void)
  138. {
  139. int i;
  140. for (i = 0; i < x86_pmu.lbr_nr; i++)
  141. wrmsrl(x86_pmu.lbr_from + i, 0);
  142. }
  143. void intel_pmu_lbr_reset_64(void)
  144. {
  145. int i;
  146. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  147. wrmsrl(x86_pmu.lbr_from + i, 0);
  148. wrmsrl(x86_pmu.lbr_to + i, 0);
  149. if (x86_pmu.lbr_has_info)
  150. wrmsrl(x86_pmu.lbr_info + i, 0);
  151. }
  152. }
  153. static void intel_pmu_arch_lbr_reset(void)
  154. {
  155. /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
  156. wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
  157. }
  158. void intel_pmu_lbr_reset(void)
  159. {
  160. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  161. if (!x86_pmu.lbr_nr)
  162. return;
  163. x86_pmu.lbr_reset();
  164. cpuc->last_task_ctx = NULL;
  165. cpuc->last_log_id = 0;
  166. if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
  167. wrmsrl(MSR_LBR_SELECT, 0);
  168. }
  169. /*
  170. * TOS = most recently recorded branch
  171. */
  172. static inline u64 intel_pmu_lbr_tos(void)
  173. {
  174. u64 tos;
  175. rdmsrl(x86_pmu.lbr_tos, tos);
  176. return tos;
  177. }
  178. enum {
  179. LBR_NONE,
  180. LBR_VALID,
  181. };
  182. /*
  183. * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
  184. * are the TSX flags when TSX is supported, but when TSX is not supported
  185. * they have no consistent behavior:
  186. *
  187. * - For wrmsr(), bits 61:62 are considered part of the sign extension.
  188. * - For HW updates (branch captures) bits 61:62 are always OFF and are not
  189. * part of the sign extension.
  190. *
  191. * Therefore, if:
  192. *
  193. * 1) LBR format LBR_FORMAT_EIP_FLAGS2
  194. * 2) CPU has no TSX support enabled
  195. *
  196. * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
  197. * value from rdmsr() must be converted to have a 61 bits sign extension,
  198. * ignoring the TSX flags.
  199. */
  200. static inline bool lbr_from_signext_quirk_needed(void)
  201. {
  202. bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
  203. boot_cpu_has(X86_FEATURE_RTM);
  204. return !tsx_support;
  205. }
  206. static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
  207. /* If quirk is enabled, ensure sign extension is 63 bits: */
  208. inline u64 lbr_from_signext_quirk_wr(u64 val)
  209. {
  210. if (static_branch_unlikely(&lbr_from_quirk_key)) {
  211. /*
  212. * Sign extend into bits 61:62 while preserving bit 63.
  213. *
  214. * Quirk is enabled when TSX is disabled. Therefore TSX bits
  215. * in val are always OFF and must be changed to be sign
  216. * extension bits. Since bits 59:60 are guaranteed to be
  217. * part of the sign extension bits, we can just copy them
  218. * to 61:62.
  219. */
  220. val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
  221. }
  222. return val;
  223. }
  224. /*
  225. * If quirk is needed, ensure sign extension is 61 bits:
  226. */
  227. static u64 lbr_from_signext_quirk_rd(u64 val)
  228. {
  229. if (static_branch_unlikely(&lbr_from_quirk_key)) {
  230. /*
  231. * Quirk is on when TSX is not enabled. Therefore TSX
  232. * flags must be read as OFF.
  233. */
  234. val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
  235. }
  236. return val;
  237. }
  238. static __always_inline void wrlbr_from(unsigned int idx, u64 val)
  239. {
  240. val = lbr_from_signext_quirk_wr(val);
  241. wrmsrl(x86_pmu.lbr_from + idx, val);
  242. }
  243. static __always_inline void wrlbr_to(unsigned int idx, u64 val)
  244. {
  245. wrmsrl(x86_pmu.lbr_to + idx, val);
  246. }
  247. static __always_inline void wrlbr_info(unsigned int idx, u64 val)
  248. {
  249. wrmsrl(x86_pmu.lbr_info + idx, val);
  250. }
  251. static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
  252. {
  253. u64 val;
  254. if (lbr)
  255. return lbr->from;
  256. rdmsrl(x86_pmu.lbr_from + idx, val);
  257. return lbr_from_signext_quirk_rd(val);
  258. }
  259. static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
  260. {
  261. u64 val;
  262. if (lbr)
  263. return lbr->to;
  264. rdmsrl(x86_pmu.lbr_to + idx, val);
  265. return val;
  266. }
  267. static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
  268. {
  269. u64 val;
  270. if (lbr)
  271. return lbr->info;
  272. rdmsrl(x86_pmu.lbr_info + idx, val);
  273. return val;
  274. }
  275. static inline void
  276. wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
  277. {
  278. wrlbr_from(idx, lbr->from);
  279. wrlbr_to(idx, lbr->to);
  280. if (need_info)
  281. wrlbr_info(idx, lbr->info);
  282. }
  283. static inline bool
  284. rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
  285. {
  286. u64 from = rdlbr_from(idx, NULL);
  287. /* Don't read invalid entry */
  288. if (!from)
  289. return false;
  290. lbr->from = from;
  291. lbr->to = rdlbr_to(idx, NULL);
  292. if (need_info)
  293. lbr->info = rdlbr_info(idx, NULL);
  294. return true;
  295. }
  296. void intel_pmu_lbr_restore(void *ctx)
  297. {
  298. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  299. struct x86_perf_task_context *task_ctx = ctx;
  300. bool need_info = x86_pmu.lbr_has_info;
  301. u64 tos = task_ctx->tos;
  302. unsigned lbr_idx, mask;
  303. int i;
  304. mask = x86_pmu.lbr_nr - 1;
  305. for (i = 0; i < task_ctx->valid_lbrs; i++) {
  306. lbr_idx = (tos - i) & mask;
  307. wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
  308. }
  309. for (; i < x86_pmu.lbr_nr; i++) {
  310. lbr_idx = (tos - i) & mask;
  311. wrlbr_from(lbr_idx, 0);
  312. wrlbr_to(lbr_idx, 0);
  313. if (need_info)
  314. wrlbr_info(lbr_idx, 0);
  315. }
  316. wrmsrl(x86_pmu.lbr_tos, tos);
  317. if (cpuc->lbr_select)
  318. wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
  319. }
  320. static void intel_pmu_arch_lbr_restore(void *ctx)
  321. {
  322. struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
  323. struct lbr_entry *entries = task_ctx->entries;
  324. int i;
  325. /* Fast reset the LBRs before restore if the call stack is not full. */
  326. if (!entries[x86_pmu.lbr_nr - 1].from)
  327. intel_pmu_arch_lbr_reset();
  328. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  329. if (!entries[i].from)
  330. break;
  331. wrlbr_all(&entries[i], i, true);
  332. }
  333. }
  334. /*
  335. * Restore the Architecture LBR state from the xsave area in the perf
  336. * context data for the task via the XRSTORS instruction.
  337. */
  338. static void intel_pmu_arch_lbr_xrstors(void *ctx)
  339. {
  340. struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
  341. xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR);
  342. }
  343. static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
  344. {
  345. if (static_cpu_has(X86_FEATURE_ARCH_LBR))
  346. return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
  347. return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
  348. }
  349. static void __intel_pmu_lbr_restore(void *ctx)
  350. {
  351. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  352. if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
  353. task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
  354. intel_pmu_lbr_reset();
  355. return;
  356. }
  357. /*
  358. * Does not restore the LBR registers, if
  359. * - No one else touched them, and
  360. * - Was not cleared in Cstate
  361. */
  362. if ((ctx == cpuc->last_task_ctx) &&
  363. (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
  364. !lbr_is_reset_in_cstate(ctx)) {
  365. task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
  366. return;
  367. }
  368. x86_pmu.lbr_restore(ctx);
  369. task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
  370. }
  371. void intel_pmu_lbr_save(void *ctx)
  372. {
  373. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  374. struct x86_perf_task_context *task_ctx = ctx;
  375. bool need_info = x86_pmu.lbr_has_info;
  376. unsigned lbr_idx, mask;
  377. u64 tos;
  378. int i;
  379. mask = x86_pmu.lbr_nr - 1;
  380. tos = intel_pmu_lbr_tos();
  381. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  382. lbr_idx = (tos - i) & mask;
  383. if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
  384. break;
  385. }
  386. task_ctx->valid_lbrs = i;
  387. task_ctx->tos = tos;
  388. if (cpuc->lbr_select)
  389. rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
  390. }
  391. static void intel_pmu_arch_lbr_save(void *ctx)
  392. {
  393. struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
  394. struct lbr_entry *entries = task_ctx->entries;
  395. int i;
  396. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  397. if (!rdlbr_all(&entries[i], i, true))
  398. break;
  399. }
  400. /* LBR call stack is not full. Reset is required in restore. */
  401. if (i < x86_pmu.lbr_nr)
  402. entries[x86_pmu.lbr_nr - 1].from = 0;
  403. }
  404. /*
  405. * Save the Architecture LBR state to the xsave area in the perf
  406. * context data for the task via the XSAVES instruction.
  407. */
  408. static void intel_pmu_arch_lbr_xsaves(void *ctx)
  409. {
  410. struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
  411. xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR);
  412. }
  413. static void __intel_pmu_lbr_save(void *ctx)
  414. {
  415. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  416. if (task_context_opt(ctx)->lbr_callstack_users == 0) {
  417. task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
  418. return;
  419. }
  420. x86_pmu.lbr_save(ctx);
  421. task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
  422. cpuc->last_task_ctx = ctx;
  423. cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
  424. }
  425. void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
  426. struct perf_event_context *next)
  427. {
  428. void *prev_ctx_data, *next_ctx_data;
  429. swap(prev->task_ctx_data, next->task_ctx_data);
  430. /*
  431. * Architecture specific synchronization makes sense in
  432. * case both prev->task_ctx_data and next->task_ctx_data
  433. * pointers are allocated.
  434. */
  435. prev_ctx_data = next->task_ctx_data;
  436. next_ctx_data = prev->task_ctx_data;
  437. if (!prev_ctx_data || !next_ctx_data)
  438. return;
  439. swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
  440. task_context_opt(next_ctx_data)->lbr_callstack_users);
  441. }
  442. void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
  443. {
  444. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  445. void *task_ctx;
  446. if (!cpuc->lbr_users)
  447. return;
  448. /*
  449. * If LBR callstack feature is enabled and the stack was saved when
  450. * the task was scheduled out, restore the stack. Otherwise flush
  451. * the LBR stack.
  452. */
  453. task_ctx = ctx ? ctx->task_ctx_data : NULL;
  454. if (task_ctx) {
  455. if (sched_in)
  456. __intel_pmu_lbr_restore(task_ctx);
  457. else
  458. __intel_pmu_lbr_save(task_ctx);
  459. return;
  460. }
  461. /*
  462. * Since a context switch can flip the address space and LBR entries
  463. * are not tagged with an identifier, we need to wipe the LBR, even for
  464. * per-cpu events. You simply cannot resolve the branches from the old
  465. * address space.
  466. */
  467. if (sched_in)
  468. intel_pmu_lbr_reset();
  469. }
  470. static inline bool branch_user_callstack(unsigned br_sel)
  471. {
  472. return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
  473. }
  474. void intel_pmu_lbr_add(struct perf_event *event)
  475. {
  476. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  477. if (!x86_pmu.lbr_nr)
  478. return;
  479. if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
  480. cpuc->lbr_select = 1;
  481. cpuc->br_sel = event->hw.branch_reg.reg;
  482. if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
  483. task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
  484. /*
  485. * Request pmu::sched_task() callback, which will fire inside the
  486. * regular perf event scheduling, so that call will:
  487. *
  488. * - restore or wipe; when LBR-callstack,
  489. * - wipe; otherwise,
  490. *
  491. * when this is from __perf_event_task_sched_in().
  492. *
  493. * However, if this is from perf_install_in_context(), no such callback
  494. * will follow and we'll need to reset the LBR here if this is the
  495. * first LBR event.
  496. *
  497. * The problem is, we cannot tell these cases apart... but we can
  498. * exclude the biggest chunk of cases by looking at
  499. * event->total_time_running. An event that has accrued runtime cannot
  500. * be 'new'. Conversely, a new event can get installed through the
  501. * context switch path for the first time.
  502. */
  503. if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
  504. cpuc->lbr_pebs_users++;
  505. perf_sched_cb_inc(event->ctx->pmu);
  506. if (!cpuc->lbr_users++ && !event->total_time_running)
  507. intel_pmu_lbr_reset();
  508. }
  509. void release_lbr_buffers(void)
  510. {
  511. struct kmem_cache *kmem_cache;
  512. struct cpu_hw_events *cpuc;
  513. int cpu;
  514. if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
  515. return;
  516. for_each_possible_cpu(cpu) {
  517. cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
  518. kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
  519. if (kmem_cache && cpuc->lbr_xsave) {
  520. kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
  521. cpuc->lbr_xsave = NULL;
  522. }
  523. }
  524. }
  525. void reserve_lbr_buffers(void)
  526. {
  527. struct kmem_cache *kmem_cache;
  528. struct cpu_hw_events *cpuc;
  529. int cpu;
  530. if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
  531. return;
  532. for_each_possible_cpu(cpu) {
  533. cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
  534. kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
  535. if (!kmem_cache || cpuc->lbr_xsave)
  536. continue;
  537. cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
  538. GFP_KERNEL | __GFP_ZERO,
  539. cpu_to_node(cpu));
  540. }
  541. }
  542. void intel_pmu_lbr_del(struct perf_event *event)
  543. {
  544. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  545. if (!x86_pmu.lbr_nr)
  546. return;
  547. if (branch_user_callstack(cpuc->br_sel) &&
  548. event->ctx->task_ctx_data)
  549. task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
  550. if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
  551. cpuc->lbr_select = 0;
  552. if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
  553. cpuc->lbr_pebs_users--;
  554. cpuc->lbr_users--;
  555. WARN_ON_ONCE(cpuc->lbr_users < 0);
  556. WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
  557. perf_sched_cb_dec(event->ctx->pmu);
  558. }
  559. static inline bool vlbr_exclude_host(void)
  560. {
  561. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  562. return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
  563. (unsigned long *)&cpuc->intel_ctrl_guest_mask);
  564. }
  565. void intel_pmu_lbr_enable_all(bool pmi)
  566. {
  567. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  568. if (cpuc->lbr_users && !vlbr_exclude_host())
  569. __intel_pmu_lbr_enable(pmi);
  570. }
  571. void intel_pmu_lbr_disable_all(void)
  572. {
  573. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  574. if (cpuc->lbr_users && !vlbr_exclude_host()) {
  575. if (static_cpu_has(X86_FEATURE_ARCH_LBR))
  576. return __intel_pmu_arch_lbr_disable();
  577. __intel_pmu_lbr_disable();
  578. }
  579. }
  580. void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  581. {
  582. unsigned long mask = x86_pmu.lbr_nr - 1;
  583. struct perf_branch_entry *br = cpuc->lbr_entries;
  584. u64 tos = intel_pmu_lbr_tos();
  585. int i;
  586. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  587. unsigned long lbr_idx = (tos - i) & mask;
  588. union {
  589. struct {
  590. u32 from;
  591. u32 to;
  592. };
  593. u64 lbr;
  594. } msr_lastbranch;
  595. rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
  596. perf_clear_branch_entry_bitfields(br);
  597. br->from = msr_lastbranch.from;
  598. br->to = msr_lastbranch.to;
  599. br++;
  600. }
  601. cpuc->lbr_stack.nr = i;
  602. cpuc->lbr_stack.hw_idx = tos;
  603. }
  604. /*
  605. * Due to lack of segmentation in Linux the effective address (offset)
  606. * is the same as the linear address, allowing us to merge the LIP and EIP
  607. * LBR formats.
  608. */
  609. void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
  610. {
  611. bool need_info = false, call_stack = false;
  612. unsigned long mask = x86_pmu.lbr_nr - 1;
  613. struct perf_branch_entry *br = cpuc->lbr_entries;
  614. u64 tos = intel_pmu_lbr_tos();
  615. int i;
  616. int out = 0;
  617. int num = x86_pmu.lbr_nr;
  618. if (cpuc->lbr_sel) {
  619. need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
  620. if (cpuc->lbr_sel->config & LBR_CALL_STACK)
  621. call_stack = true;
  622. }
  623. for (i = 0; i < num; i++) {
  624. unsigned long lbr_idx = (tos - i) & mask;
  625. u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
  626. u16 cycles = 0;
  627. from = rdlbr_from(lbr_idx, NULL);
  628. to = rdlbr_to(lbr_idx, NULL);
  629. /*
  630. * Read LBR call stack entries
  631. * until invalid entry (0s) is detected.
  632. */
  633. if (call_stack && !from)
  634. break;
  635. if (x86_pmu.lbr_has_info) {
  636. if (need_info) {
  637. u64 info;
  638. info = rdlbr_info(lbr_idx, NULL);
  639. mis = !!(info & LBR_INFO_MISPRED);
  640. pred = !mis;
  641. cycles = (info & LBR_INFO_CYCLES);
  642. if (x86_pmu.lbr_has_tsx) {
  643. in_tx = !!(info & LBR_INFO_IN_TX);
  644. abort = !!(info & LBR_INFO_ABORT);
  645. }
  646. }
  647. } else {
  648. int skip = 0;
  649. if (x86_pmu.lbr_from_flags) {
  650. mis = !!(from & LBR_FROM_FLAG_MISPRED);
  651. pred = !mis;
  652. skip = 1;
  653. }
  654. if (x86_pmu.lbr_has_tsx) {
  655. in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
  656. abort = !!(from & LBR_FROM_FLAG_ABORT);
  657. skip = 3;
  658. }
  659. from = (u64)((((s64)from) << skip) >> skip);
  660. if (x86_pmu.lbr_to_cycles) {
  661. cycles = ((to >> 48) & LBR_INFO_CYCLES);
  662. to = (u64)((((s64)to) << 16) >> 16);
  663. }
  664. }
  665. /*
  666. * Some CPUs report duplicated abort records,
  667. * with the second entry not having an abort bit set.
  668. * Skip them here. This loop runs backwards,
  669. * so we need to undo the previous record.
  670. * If the abort just happened outside the window
  671. * the extra entry cannot be removed.
  672. */
  673. if (abort && x86_pmu.lbr_double_abort && out > 0)
  674. out--;
  675. perf_clear_branch_entry_bitfields(br+out);
  676. br[out].from = from;
  677. br[out].to = to;
  678. br[out].mispred = mis;
  679. br[out].predicted = pred;
  680. br[out].in_tx = in_tx;
  681. br[out].abort = abort;
  682. br[out].cycles = cycles;
  683. out++;
  684. }
  685. cpuc->lbr_stack.nr = out;
  686. cpuc->lbr_stack.hw_idx = tos;
  687. }
  688. static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
  689. static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
  690. static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
  691. static __always_inline int get_lbr_br_type(u64 info)
  692. {
  693. int type = 0;
  694. if (static_branch_likely(&x86_lbr_type))
  695. type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
  696. return type;
  697. }
  698. static __always_inline bool get_lbr_mispred(u64 info)
  699. {
  700. bool mispred = 0;
  701. if (static_branch_likely(&x86_lbr_mispred))
  702. mispred = !!(info & LBR_INFO_MISPRED);
  703. return mispred;
  704. }
  705. static __always_inline u16 get_lbr_cycles(u64 info)
  706. {
  707. u16 cycles = info & LBR_INFO_CYCLES;
  708. if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
  709. (!static_branch_likely(&x86_lbr_cycles) ||
  710. !(info & LBR_INFO_CYC_CNT_VALID)))
  711. cycles = 0;
  712. return cycles;
  713. }
  714. static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
  715. struct lbr_entry *entries)
  716. {
  717. struct perf_branch_entry *e;
  718. struct lbr_entry *lbr;
  719. u64 from, to, info;
  720. int i;
  721. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  722. lbr = entries ? &entries[i] : NULL;
  723. e = &cpuc->lbr_entries[i];
  724. from = rdlbr_from(i, lbr);
  725. /*
  726. * Read LBR entries until invalid entry (0s) is detected.
  727. */
  728. if (!from)
  729. break;
  730. to = rdlbr_to(i, lbr);
  731. info = rdlbr_info(i, lbr);
  732. perf_clear_branch_entry_bitfields(e);
  733. e->from = from;
  734. e->to = to;
  735. e->mispred = get_lbr_mispred(info);
  736. e->predicted = !e->mispred;
  737. e->in_tx = !!(info & LBR_INFO_IN_TX);
  738. e->abort = !!(info & LBR_INFO_ABORT);
  739. e->cycles = get_lbr_cycles(info);
  740. e->type = get_lbr_br_type(info);
  741. }
  742. cpuc->lbr_stack.nr = i;
  743. }
  744. static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
  745. {
  746. intel_pmu_store_lbr(cpuc, NULL);
  747. }
  748. static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
  749. {
  750. struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
  751. if (!xsave) {
  752. intel_pmu_store_lbr(cpuc, NULL);
  753. return;
  754. }
  755. xsaves(&xsave->xsave, XFEATURE_MASK_LBR);
  756. intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
  757. }
  758. void intel_pmu_lbr_read(void)
  759. {
  760. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  761. /*
  762. * Don't read when all LBRs users are using adaptive PEBS.
  763. *
  764. * This could be smarter and actually check the event,
  765. * but this simple approach seems to work for now.
  766. */
  767. if (!cpuc->lbr_users || vlbr_exclude_host() ||
  768. cpuc->lbr_users == cpuc->lbr_pebs_users)
  769. return;
  770. x86_pmu.lbr_read(cpuc);
  771. intel_pmu_lbr_filter(cpuc);
  772. }
  773. /*
  774. * SW filter is used:
  775. * - in case there is no HW filter
  776. * - in case the HW filter has errata or limitations
  777. */
  778. static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
  779. {
  780. u64 br_type = event->attr.branch_sample_type;
  781. int mask = 0;
  782. if (br_type & PERF_SAMPLE_BRANCH_USER)
  783. mask |= X86_BR_USER;
  784. if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
  785. mask |= X86_BR_KERNEL;
  786. /* we ignore BRANCH_HV here */
  787. if (br_type & PERF_SAMPLE_BRANCH_ANY)
  788. mask |= X86_BR_ANY;
  789. if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
  790. mask |= X86_BR_ANY_CALL;
  791. if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
  792. mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
  793. if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
  794. mask |= X86_BR_IND_CALL;
  795. if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
  796. mask |= X86_BR_ABORT;
  797. if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
  798. mask |= X86_BR_IN_TX;
  799. if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
  800. mask |= X86_BR_NO_TX;
  801. if (br_type & PERF_SAMPLE_BRANCH_COND)
  802. mask |= X86_BR_JCC;
  803. if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
  804. if (!x86_pmu_has_lbr_callstack())
  805. return -EOPNOTSUPP;
  806. if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
  807. return -EINVAL;
  808. mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
  809. X86_BR_CALL_STACK;
  810. }
  811. if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
  812. mask |= X86_BR_IND_JMP;
  813. if (br_type & PERF_SAMPLE_BRANCH_CALL)
  814. mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
  815. if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
  816. mask |= X86_BR_TYPE_SAVE;
  817. /*
  818. * stash actual user request into reg, it may
  819. * be used by fixup code for some CPU
  820. */
  821. event->hw.branch_reg.reg = mask;
  822. return 0;
  823. }
  824. /*
  825. * setup the HW LBR filter
  826. * Used only when available, may not be enough to disambiguate
  827. * all branches, may need the help of the SW filter
  828. */
  829. static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
  830. {
  831. struct hw_perf_event_extra *reg;
  832. u64 br_type = event->attr.branch_sample_type;
  833. u64 mask = 0, v;
  834. int i;
  835. for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
  836. if (!(br_type & (1ULL << i)))
  837. continue;
  838. v = x86_pmu.lbr_sel_map[i];
  839. if (v == LBR_NOT_SUPP)
  840. return -EOPNOTSUPP;
  841. if (v != LBR_IGN)
  842. mask |= v;
  843. }
  844. reg = &event->hw.branch_reg;
  845. reg->idx = EXTRA_REG_LBR;
  846. if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
  847. reg->config = mask;
  848. /*
  849. * The Arch LBR HW can retrieve the common branch types
  850. * from the LBR_INFO. It doesn't require the high overhead
  851. * SW disassemble.
  852. * Enable the branch type by default for the Arch LBR.
  853. */
  854. reg->reg |= X86_BR_TYPE_SAVE;
  855. return 0;
  856. }
  857. /*
  858. * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
  859. * in suppress mode. So LBR_SELECT should be set to
  860. * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
  861. * But the 10th bit LBR_CALL_STACK does not operate
  862. * in suppress mode.
  863. */
  864. reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
  865. if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
  866. (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
  867. x86_pmu.lbr_has_info)
  868. reg->config |= LBR_NO_INFO;
  869. return 0;
  870. }
  871. int intel_pmu_setup_lbr_filter(struct perf_event *event)
  872. {
  873. int ret = 0;
  874. /*
  875. * no LBR on this PMU
  876. */
  877. if (!x86_pmu.lbr_nr)
  878. return -EOPNOTSUPP;
  879. /*
  880. * setup SW LBR filter
  881. */
  882. ret = intel_pmu_setup_sw_lbr_filter(event);
  883. if (ret)
  884. return ret;
  885. /*
  886. * setup HW LBR filter, if any
  887. */
  888. if (x86_pmu.lbr_sel_map)
  889. ret = intel_pmu_setup_hw_lbr_filter(event);
  890. return ret;
  891. }
  892. enum {
  893. ARCH_LBR_BR_TYPE_JCC = 0,
  894. ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
  895. ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2,
  896. ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3,
  897. ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4,
  898. ARCH_LBR_BR_TYPE_NEAR_RET = 5,
  899. ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET,
  900. ARCH_LBR_BR_TYPE_MAP_MAX = 16,
  901. };
  902. static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
  903. [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC,
  904. [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP,
  905. [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP,
  906. [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL,
  907. [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL,
  908. [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET,
  909. };
  910. /*
  911. * implement actual branch filter based on user demand.
  912. * Hardware may not exactly satisfy that request, thus
  913. * we need to inspect opcodes. Mismatched branches are
  914. * discarded. Therefore, the number of branches returned
  915. * in PERF_SAMPLE_BRANCH_STACK sample may vary.
  916. */
  917. static void
  918. intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
  919. {
  920. u64 from, to;
  921. int br_sel = cpuc->br_sel;
  922. int i, j, type, to_plm;
  923. bool compress = false;
  924. /* if sampling all branches, then nothing to filter */
  925. if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
  926. ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
  927. return;
  928. for (i = 0; i < cpuc->lbr_stack.nr; i++) {
  929. from = cpuc->lbr_entries[i].from;
  930. to = cpuc->lbr_entries[i].to;
  931. type = cpuc->lbr_entries[i].type;
  932. /*
  933. * Parse the branch type recorded in LBR_x_INFO MSR.
  934. * Doesn't support OTHER_BRANCH decoding for now.
  935. * OTHER_BRANCH branch type still rely on software decoding.
  936. */
  937. if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
  938. type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
  939. to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
  940. type = arch_lbr_br_type_map[type] | to_plm;
  941. } else
  942. type = branch_type(from, to, cpuc->lbr_entries[i].abort);
  943. if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
  944. if (cpuc->lbr_entries[i].in_tx)
  945. type |= X86_BR_IN_TX;
  946. else
  947. type |= X86_BR_NO_TX;
  948. }
  949. /* if type does not correspond, then discard */
  950. if (type == X86_BR_NONE || (br_sel & type) != type) {
  951. cpuc->lbr_entries[i].from = 0;
  952. compress = true;
  953. }
  954. if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
  955. cpuc->lbr_entries[i].type = common_branch_type(type);
  956. }
  957. if (!compress)
  958. return;
  959. /* remove all entries with from=0 */
  960. for (i = 0; i < cpuc->lbr_stack.nr; ) {
  961. if (!cpuc->lbr_entries[i].from) {
  962. j = i;
  963. while (++j < cpuc->lbr_stack.nr)
  964. cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
  965. cpuc->lbr_stack.nr--;
  966. if (!cpuc->lbr_entries[i].from)
  967. continue;
  968. }
  969. i++;
  970. }
  971. }
  972. void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
  973. {
  974. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  975. /* Cannot get TOS for large PEBS and Arch LBR */
  976. if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
  977. (cpuc->n_pebs == cpuc->n_large_pebs))
  978. cpuc->lbr_stack.hw_idx = -1ULL;
  979. else
  980. cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
  981. intel_pmu_store_lbr(cpuc, lbr);
  982. intel_pmu_lbr_filter(cpuc);
  983. }
  984. /*
  985. * Map interface branch filters onto LBR filters
  986. */
  987. static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  988. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  989. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  990. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  991. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  992. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
  993. | LBR_IND_JMP | LBR_FAR,
  994. /*
  995. * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
  996. */
  997. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
  998. LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
  999. /*
  1000. * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
  1001. */
  1002. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
  1003. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  1004. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  1005. };
  1006. static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  1007. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  1008. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  1009. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  1010. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  1011. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
  1012. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  1013. | LBR_FAR,
  1014. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
  1015. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  1016. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  1017. [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
  1018. };
  1019. static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  1020. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  1021. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  1022. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  1023. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  1024. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
  1025. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  1026. | LBR_FAR,
  1027. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
  1028. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  1029. [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  1030. | LBR_RETURN | LBR_CALL_STACK,
  1031. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  1032. [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
  1033. };
  1034. static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  1035. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY,
  1036. [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER,
  1037. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL,
  1038. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  1039. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN |
  1040. ARCH_LBR_OTHER_BRANCH,
  1041. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL |
  1042. ARCH_LBR_IND_CALL |
  1043. ARCH_LBR_OTHER_BRANCH,
  1044. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL,
  1045. [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC,
  1046. [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL |
  1047. ARCH_LBR_IND_CALL |
  1048. ARCH_LBR_RETURN |
  1049. ARCH_LBR_CALL_STACK,
  1050. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP,
  1051. [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL,
  1052. };
  1053. /* core */
  1054. void __init intel_pmu_lbr_init_core(void)
  1055. {
  1056. x86_pmu.lbr_nr = 4;
  1057. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1058. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  1059. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  1060. /*
  1061. * SW branch filter usage:
  1062. * - compensate for lack of HW filter
  1063. */
  1064. }
  1065. /* nehalem/westmere */
  1066. void __init intel_pmu_lbr_init_nhm(void)
  1067. {
  1068. x86_pmu.lbr_nr = 16;
  1069. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1070. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1071. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1072. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1073. x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
  1074. /*
  1075. * SW branch filter usage:
  1076. * - workaround LBR_SEL errata (see above)
  1077. * - support syscall, sysret capture.
  1078. * That requires LBR_FAR but that means far
  1079. * jmp need to be filtered out
  1080. */
  1081. }
  1082. /* sandy bridge */
  1083. void __init intel_pmu_lbr_init_snb(void)
  1084. {
  1085. x86_pmu.lbr_nr = 16;
  1086. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1087. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1088. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1089. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1090. x86_pmu.lbr_sel_map = snb_lbr_sel_map;
  1091. /*
  1092. * SW branch filter usage:
  1093. * - support syscall, sysret capture.
  1094. * That requires LBR_FAR but that means far
  1095. * jmp need to be filtered out
  1096. */
  1097. }
  1098. static inline struct kmem_cache *
  1099. create_lbr_kmem_cache(size_t size, size_t align)
  1100. {
  1101. return kmem_cache_create("x86_lbr", size, align, 0, NULL);
  1102. }
  1103. /* haswell */
  1104. void intel_pmu_lbr_init_hsw(void)
  1105. {
  1106. size_t size = sizeof(struct x86_perf_task_context);
  1107. x86_pmu.lbr_nr = 16;
  1108. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1109. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1110. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1111. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1112. x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
  1113. x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
  1114. }
  1115. /* skylake */
  1116. __init void intel_pmu_lbr_init_skl(void)
  1117. {
  1118. size_t size = sizeof(struct x86_perf_task_context);
  1119. x86_pmu.lbr_nr = 32;
  1120. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1121. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1122. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1123. x86_pmu.lbr_info = MSR_LBR_INFO_0;
  1124. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1125. x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
  1126. x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
  1127. /*
  1128. * SW branch filter usage:
  1129. * - support syscall, sysret capture.
  1130. * That requires LBR_FAR but that means far
  1131. * jmp need to be filtered out
  1132. */
  1133. }
  1134. /* atom */
  1135. void __init intel_pmu_lbr_init_atom(void)
  1136. {
  1137. /*
  1138. * only models starting at stepping 10 seems
  1139. * to have an operational LBR which can freeze
  1140. * on PMU interrupt
  1141. */
  1142. if (boot_cpu_data.x86_model == 28
  1143. && boot_cpu_data.x86_stepping < 10) {
  1144. pr_cont("LBR disabled due to erratum");
  1145. return;
  1146. }
  1147. x86_pmu.lbr_nr = 8;
  1148. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1149. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  1150. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  1151. /*
  1152. * SW branch filter usage:
  1153. * - compensate for lack of HW filter
  1154. */
  1155. }
  1156. /* slm */
  1157. void __init intel_pmu_lbr_init_slm(void)
  1158. {
  1159. x86_pmu.lbr_nr = 8;
  1160. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1161. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  1162. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  1163. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1164. x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
  1165. /*
  1166. * SW branch filter usage:
  1167. * - compensate for lack of HW filter
  1168. */
  1169. pr_cont("8-deep LBR, ");
  1170. }
  1171. /* Knights Landing */
  1172. void intel_pmu_lbr_init_knl(void)
  1173. {
  1174. x86_pmu.lbr_nr = 8;
  1175. x86_pmu.lbr_tos = MSR_LBR_TOS;
  1176. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  1177. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  1178. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  1179. x86_pmu.lbr_sel_map = snb_lbr_sel_map;
  1180. /* Knights Landing does have MISPREDICT bit */
  1181. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
  1182. x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
  1183. }
  1184. void intel_pmu_lbr_init(void)
  1185. {
  1186. switch (x86_pmu.intel_cap.lbr_format) {
  1187. case LBR_FORMAT_EIP_FLAGS2:
  1188. x86_pmu.lbr_has_tsx = 1;
  1189. x86_pmu.lbr_from_flags = 1;
  1190. if (lbr_from_signext_quirk_needed())
  1191. static_branch_enable(&lbr_from_quirk_key);
  1192. break;
  1193. case LBR_FORMAT_EIP_FLAGS:
  1194. x86_pmu.lbr_from_flags = 1;
  1195. break;
  1196. case LBR_FORMAT_INFO:
  1197. x86_pmu.lbr_has_tsx = 1;
  1198. fallthrough;
  1199. case LBR_FORMAT_INFO2:
  1200. x86_pmu.lbr_has_info = 1;
  1201. break;
  1202. case LBR_FORMAT_TIME:
  1203. x86_pmu.lbr_from_flags = 1;
  1204. x86_pmu.lbr_to_cycles = 1;
  1205. break;
  1206. }
  1207. if (x86_pmu.lbr_has_info) {
  1208. /*
  1209. * Only used in combination with baseline pebs.
  1210. */
  1211. static_branch_enable(&x86_lbr_mispred);
  1212. static_branch_enable(&x86_lbr_cycles);
  1213. }
  1214. }
  1215. /*
  1216. * LBR state size is variable based on the max number of registers.
  1217. * This calculates the expected state size, which should match
  1218. * what the hardware enumerates for the size of XFEATURE_LBR.
  1219. */
  1220. static inline unsigned int get_lbr_state_size(void)
  1221. {
  1222. return sizeof(struct arch_lbr_state) +
  1223. x86_pmu.lbr_nr * sizeof(struct lbr_entry);
  1224. }
  1225. static bool is_arch_lbr_xsave_available(void)
  1226. {
  1227. if (!boot_cpu_has(X86_FEATURE_XSAVES))
  1228. return false;
  1229. /*
  1230. * Check the LBR state with the corresponding software structure.
  1231. * Disable LBR XSAVES support if the size doesn't match.
  1232. */
  1233. if (xfeature_size(XFEATURE_LBR) == 0)
  1234. return false;
  1235. if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
  1236. return false;
  1237. return true;
  1238. }
  1239. void __init intel_pmu_arch_lbr_init(void)
  1240. {
  1241. struct pmu *pmu = x86_get_pmu(smp_processor_id());
  1242. union cpuid28_eax eax;
  1243. union cpuid28_ebx ebx;
  1244. union cpuid28_ecx ecx;
  1245. unsigned int unused_edx;
  1246. bool arch_lbr_xsave;
  1247. size_t size;
  1248. u64 lbr_nr;
  1249. /* Arch LBR Capabilities */
  1250. cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
  1251. lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
  1252. if (!lbr_nr)
  1253. goto clear_arch_lbr;
  1254. /* Apply the max depth of Arch LBR */
  1255. if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
  1256. goto clear_arch_lbr;
  1257. x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
  1258. x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
  1259. x86_pmu.lbr_lip = eax.split.lbr_lip;
  1260. x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
  1261. x86_pmu.lbr_filter = ebx.split.lbr_filter;
  1262. x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
  1263. x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
  1264. x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
  1265. x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
  1266. x86_pmu.lbr_nr = lbr_nr;
  1267. if (x86_pmu.lbr_mispred)
  1268. static_branch_enable(&x86_lbr_mispred);
  1269. if (x86_pmu.lbr_timed_lbr)
  1270. static_branch_enable(&x86_lbr_cycles);
  1271. if (x86_pmu.lbr_br_type)
  1272. static_branch_enable(&x86_lbr_type);
  1273. arch_lbr_xsave = is_arch_lbr_xsave_available();
  1274. if (arch_lbr_xsave) {
  1275. size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
  1276. get_lbr_state_size();
  1277. pmu->task_ctx_cache = create_lbr_kmem_cache(size,
  1278. XSAVE_ALIGNMENT);
  1279. }
  1280. if (!pmu->task_ctx_cache) {
  1281. arch_lbr_xsave = false;
  1282. size = sizeof(struct x86_perf_task_context_arch_lbr) +
  1283. lbr_nr * sizeof(struct lbr_entry);
  1284. pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
  1285. }
  1286. x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
  1287. x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
  1288. x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
  1289. /* LBR callstack requires both CPL and Branch Filtering support */
  1290. if (!x86_pmu.lbr_cpl ||
  1291. !x86_pmu.lbr_filter ||
  1292. !x86_pmu.lbr_call_stack)
  1293. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
  1294. if (!x86_pmu.lbr_cpl) {
  1295. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
  1296. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
  1297. } else if (!x86_pmu.lbr_filter) {
  1298. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
  1299. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
  1300. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
  1301. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
  1302. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
  1303. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
  1304. arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
  1305. }
  1306. x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
  1307. x86_pmu.lbr_ctl_map = arch_lbr_ctl_map;
  1308. if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
  1309. x86_pmu.lbr_ctl_map = NULL;
  1310. x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
  1311. if (arch_lbr_xsave) {
  1312. x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
  1313. x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
  1314. x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
  1315. pr_cont("XSAVE ");
  1316. } else {
  1317. x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
  1318. x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
  1319. x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
  1320. }
  1321. pr_cont("Architectural LBR, ");
  1322. return;
  1323. clear_arch_lbr:
  1324. setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
  1325. }
  1326. /**
  1327. * x86_perf_get_lbr - get the LBR records information
  1328. *
  1329. * @lbr: the caller's memory to store the LBR records information
  1330. */
  1331. void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
  1332. {
  1333. int lbr_fmt = x86_pmu.intel_cap.lbr_format;
  1334. lbr->nr = x86_pmu.lbr_nr;
  1335. lbr->from = x86_pmu.lbr_from;
  1336. lbr->to = x86_pmu.lbr_to;
  1337. lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
  1338. }
  1339. EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
  1340. struct event_constraint vlbr_constraint =
  1341. __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
  1342. FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);