lbr.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/perf_event.h>
  3. #include <asm/perf_event.h>
  4. #include "../perf_event.h"
  5. /* LBR Branch Select valid bits */
  6. #define LBR_SELECT_MASK 0x1ff
  7. /*
  8. * LBR Branch Select filter bits which when set, ensures that the
  9. * corresponding type of branches are not recorded
  10. */
  11. #define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */
  12. #define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */
  13. #define LBR_SELECT_JCC 2 /* Conditional branches */
  14. #define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */
  15. #define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */
  16. #define LBR_SELECT_RET_NEAR 5 /* Near returns */
  17. #define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */
  18. #define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */
  19. #define LBR_SELECT_FAR_BRANCH 8 /* Far branches */
  20. #define LBR_KERNEL BIT(LBR_SELECT_KERNEL)
  21. #define LBR_USER BIT(LBR_SELECT_USER)
  22. #define LBR_JCC BIT(LBR_SELECT_JCC)
  23. #define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL)
  24. #define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND)
  25. #define LBR_RETURN BIT(LBR_SELECT_RET_NEAR)
  26. #define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL)
  27. #define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND)
  28. #define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH)
  29. #define LBR_NOT_SUPP -1 /* unsupported filter */
  30. #define LBR_IGNORE 0
  31. #define LBR_ANY \
  32. (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \
  33. LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
  34. struct branch_entry {
  35. union {
  36. struct {
  37. u64 ip:58;
  38. u64 ip_sign_ext:5;
  39. u64 mispredict:1;
  40. } split;
  41. u64 full;
  42. } from;
  43. union {
  44. struct {
  45. u64 ip:58;
  46. u64 ip_sign_ext:3;
  47. u64 reserved:1;
  48. u64 spec:1;
  49. u64 valid:1;
  50. } split;
  51. u64 full;
  52. } to;
  53. };
  54. static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
  55. {
  56. wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
  57. }
  58. static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
  59. {
  60. wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
  61. }
  62. static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
  63. {
  64. u64 val;
  65. rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
  66. return val;
  67. }
  68. static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
  69. {
  70. u64 val;
  71. rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
  72. return val;
  73. }
  74. static __always_inline u64 sign_ext_branch_ip(u64 ip)
  75. {
  76. u32 shift = 64 - boot_cpu_data.x86_virt_bits;
  77. return (u64)(((s64)ip << shift) >> shift);
  78. }
  79. static void amd_pmu_lbr_filter(void)
  80. {
  81. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  82. int br_sel = cpuc->br_sel, offset, type, i, j;
  83. bool compress = false;
  84. bool fused_only = false;
  85. u64 from, to;
  86. /* If sampling all branches, there is nothing to filter */
  87. if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
  88. ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
  89. fused_only = true;
  90. for (i = 0; i < cpuc->lbr_stack.nr; i++) {
  91. from = cpuc->lbr_entries[i].from;
  92. to = cpuc->lbr_entries[i].to;
  93. type = branch_type_fused(from, to, 0, &offset);
  94. /*
  95. * Adjust the branch from address in case of instruction
  96. * fusion where it points to an instruction preceding the
  97. * actual branch
  98. */
  99. if (offset) {
  100. cpuc->lbr_entries[i].from += offset;
  101. if (fused_only)
  102. continue;
  103. }
  104. /* If type does not correspond, then discard */
  105. if (type == X86_BR_NONE || (br_sel & type) != type) {
  106. cpuc->lbr_entries[i].from = 0; /* mark invalid */
  107. compress = true;
  108. }
  109. if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
  110. cpuc->lbr_entries[i].type = common_branch_type(type);
  111. }
  112. if (!compress)
  113. return;
  114. /* Remove all invalid entries */
  115. for (i = 0; i < cpuc->lbr_stack.nr; ) {
  116. if (!cpuc->lbr_entries[i].from) {
  117. j = i;
  118. while (++j < cpuc->lbr_stack.nr)
  119. cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
  120. cpuc->lbr_stack.nr--;
  121. if (!cpuc->lbr_entries[i].from)
  122. continue;
  123. }
  124. i++;
  125. }
  126. }
  127. static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
  128. PERF_BR_SPEC_NA,
  129. PERF_BR_SPEC_WRONG_PATH,
  130. PERF_BR_NON_SPEC_CORRECT_PATH,
  131. PERF_BR_SPEC_CORRECT_PATH,
  132. };
  133. void amd_pmu_lbr_read(void)
  134. {
  135. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  136. struct perf_branch_entry *br = cpuc->lbr_entries;
  137. struct branch_entry entry;
  138. int out = 0, idx, i;
  139. if (!cpuc->lbr_users)
  140. return;
  141. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  142. entry.from.full = amd_pmu_lbr_get_from(i);
  143. entry.to.full = amd_pmu_lbr_get_to(i);
  144. /*
  145. * Check if a branch has been logged; if valid = 0, spec = 0
  146. * then no branch was recorded
  147. */
  148. if (!entry.to.split.valid && !entry.to.split.spec)
  149. continue;
  150. perf_clear_branch_entry_bitfields(br + out);
  151. br[out].from = sign_ext_branch_ip(entry.from.split.ip);
  152. br[out].to = sign_ext_branch_ip(entry.to.split.ip);
  153. br[out].mispred = entry.from.split.mispredict;
  154. br[out].predicted = !br[out].mispred;
  155. /*
  156. * Set branch speculation information using the status of
  157. * the valid and spec bits.
  158. *
  159. * When valid = 0, spec = 0, no branch was recorded and the
  160. * entry is discarded as seen above.
  161. *
  162. * When valid = 0, spec = 1, the recorded branch was
  163. * speculative but took the wrong path.
  164. *
  165. * When valid = 1, spec = 0, the recorded branch was
  166. * non-speculative but took the correct path.
  167. *
  168. * When valid = 1, spec = 1, the recorded branch was
  169. * speculative and took the correct path
  170. */
  171. idx = (entry.to.split.valid << 1) | entry.to.split.spec;
  172. br[out].spec = lbr_spec_map[idx];
  173. out++;
  174. }
  175. cpuc->lbr_stack.nr = out;
  176. /*
  177. * Internal register renaming always ensures that LBR From[0] and
  178. * LBR To[0] always represent the TOS
  179. */
  180. cpuc->lbr_stack.hw_idx = 0;
  181. /* Perform further software filtering */
  182. amd_pmu_lbr_filter();
  183. }
  184. static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  185. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  186. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  187. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE,
  188. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  189. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
  190. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
  191. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
  192. [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP,
  193. [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP,
  194. [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP,
  195. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  196. [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP,
  197. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  198. [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
  199. [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP,
  200. [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP,
  201. };
  202. static int amd_pmu_lbr_setup_filter(struct perf_event *event)
  203. {
  204. struct hw_perf_event_extra *reg = &event->hw.branch_reg;
  205. u64 br_type = event->attr.branch_sample_type;
  206. u64 mask = 0, v;
  207. int i;
  208. /* No LBR support */
  209. if (!x86_pmu.lbr_nr)
  210. return -EOPNOTSUPP;
  211. if (br_type & PERF_SAMPLE_BRANCH_USER)
  212. mask |= X86_BR_USER;
  213. if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
  214. mask |= X86_BR_KERNEL;
  215. /* Ignore BRANCH_HV here */
  216. if (br_type & PERF_SAMPLE_BRANCH_ANY)
  217. mask |= X86_BR_ANY;
  218. if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
  219. mask |= X86_BR_ANY_CALL;
  220. if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
  221. mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
  222. if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
  223. mask |= X86_BR_IND_CALL;
  224. if (br_type & PERF_SAMPLE_BRANCH_COND)
  225. mask |= X86_BR_JCC;
  226. if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
  227. mask |= X86_BR_IND_JMP;
  228. if (br_type & PERF_SAMPLE_BRANCH_CALL)
  229. mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
  230. if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
  231. mask |= X86_BR_TYPE_SAVE;
  232. reg->reg = mask;
  233. mask = 0;
  234. for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
  235. if (!(br_type & BIT_ULL(i)))
  236. continue;
  237. v = lbr_select_map[i];
  238. if (v == LBR_NOT_SUPP)
  239. return -EOPNOTSUPP;
  240. if (v != LBR_IGNORE)
  241. mask |= v;
  242. }
  243. /* Filter bits operate in suppress mode */
  244. reg->config = mask ^ LBR_SELECT_MASK;
  245. return 0;
  246. }
  247. int amd_pmu_lbr_hw_config(struct perf_event *event)
  248. {
  249. int ret = 0;
  250. /* LBR is not recommended in counting mode */
  251. if (!is_sampling_event(event))
  252. return -EINVAL;
  253. ret = amd_pmu_lbr_setup_filter(event);
  254. if (!ret)
  255. event->attach_state |= PERF_ATTACH_SCHED_CB;
  256. return ret;
  257. }
  258. void amd_pmu_lbr_reset(void)
  259. {
  260. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  261. int i;
  262. if (!x86_pmu.lbr_nr)
  263. return;
  264. /* Reset all branch records individually */
  265. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  266. amd_pmu_lbr_set_from(i, 0);
  267. amd_pmu_lbr_set_to(i, 0);
  268. }
  269. cpuc->last_task_ctx = NULL;
  270. cpuc->last_log_id = 0;
  271. wrmsrl(MSR_AMD64_LBR_SELECT, 0);
  272. }
  273. void amd_pmu_lbr_add(struct perf_event *event)
  274. {
  275. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  276. struct hw_perf_event_extra *reg = &event->hw.branch_reg;
  277. if (!x86_pmu.lbr_nr)
  278. return;
  279. if (has_branch_stack(event)) {
  280. cpuc->lbr_select = 1;
  281. cpuc->lbr_sel->config = reg->config;
  282. cpuc->br_sel = reg->reg;
  283. }
  284. perf_sched_cb_inc(event->ctx->pmu);
  285. if (!cpuc->lbr_users++ && !event->total_time_running)
  286. amd_pmu_lbr_reset();
  287. }
  288. void amd_pmu_lbr_del(struct perf_event *event)
  289. {
  290. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  291. if (!x86_pmu.lbr_nr)
  292. return;
  293. if (has_branch_stack(event))
  294. cpuc->lbr_select = 0;
  295. cpuc->lbr_users--;
  296. WARN_ON_ONCE(cpuc->lbr_users < 0);
  297. perf_sched_cb_dec(event->ctx->pmu);
  298. }
  299. void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
  300. {
  301. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  302. /*
  303. * A context switch can flip the address space and LBR entries are
  304. * not tagged with an identifier. Hence, branches cannot be resolved
  305. * from the old address space and the LBR records should be wiped.
  306. */
  307. if (cpuc->lbr_users && sched_in)
  308. amd_pmu_lbr_reset();
  309. }
  310. void amd_pmu_lbr_enable_all(void)
  311. {
  312. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  313. u64 lbr_select, dbg_ctl, dbg_extn_cfg;
  314. if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
  315. return;
  316. /* Set hardware branch filter */
  317. if (cpuc->lbr_select) {
  318. lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
  319. wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
  320. }
  321. rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
  322. rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
  323. wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  324. wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
  325. }
  326. void amd_pmu_lbr_disable_all(void)
  327. {
  328. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  329. u64 dbg_ctl, dbg_extn_cfg;
  330. if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
  331. return;
  332. rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
  333. rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
  334. wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
  335. wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  336. }
  337. __init int amd_pmu_lbr_init(void)
  338. {
  339. union cpuid_0x80000022_ebx ebx;
  340. if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
  341. return -EOPNOTSUPP;
  342. /* Set number of entries */
  343. ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
  344. x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
  345. pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
  346. return 0;
  347. }