perf_event.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Xtensa Performance Monitor Module driver
  4. * See Tensilica Debug User's Guide for PMU registers documentation.
  5. *
  6. * Copyright (C) 2015 Cadence Design Systems Inc.
  7. */
  8. #include <linux/interrupt.h>
  9. #include <linux/irqdomain.h>
  10. #include <linux/module.h>
  11. #include <linux/of.h>
  12. #include <linux/perf_event.h>
  13. #include <linux/platform_device.h>
  14. #include <asm/core.h>
  15. #include <asm/processor.h>
  16. #include <asm/stacktrace.h>
  17. #define XTENSA_HWVERSION_RG_2015_0 260000
  18. #if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
  19. #define XTENSA_PMU_ERI_BASE 0x00101000
  20. #else
  21. #define XTENSA_PMU_ERI_BASE 0x00001000
  22. #endif
  23. /* Global control/status for all perf counters */
  24. #define XTENSA_PMU_PMG XTENSA_PMU_ERI_BASE
  25. /* Perf counter values */
  26. #define XTENSA_PMU_PM(i) (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
  27. /* Perf counter control registers */
  28. #define XTENSA_PMU_PMCTRL(i) (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
  29. /* Perf counter status registers */
  30. #define XTENSA_PMU_PMSTAT(i) (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
  31. #define XTENSA_PMU_PMG_PMEN 0x1
  32. #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL
  33. #define XTENSA_PMU_COUNTER_MAX 0x7fffffff
  34. #define XTENSA_PMU_PMCTRL_INTEN 0x00000001
  35. #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008
  36. #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0
  37. #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8
  38. #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00
  39. #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16
  40. #define XTENSA_PMU_PMCTRL_MASK 0xffff0000
  41. #define XTENSA_PMU_MASK(select, mask) \
  42. (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
  43. ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
  44. XTENSA_PMU_PMCTRL_TRACELEVEL | \
  45. XTENSA_PMU_PMCTRL_INTEN)
  46. #define XTENSA_PMU_PMSTAT_OVFL 0x00000001
  47. #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010
  48. struct xtensa_pmu_events {
  49. /* Array of events currently on this core */
  50. struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
  51. /* Bitmap of used hardware counters */
  52. unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
  53. };
  54. static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
  55. static const u32 xtensa_hw_ctl[] = {
  56. [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1),
  57. [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff),
  58. [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1),
  59. [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1),
  60. /* Taken and non-taken branches + taken loop ends */
  61. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490),
  62. /* Instruction-related + other global stall cycles */
  63. [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff),
  64. /* Data-related global stall cycles */
  65. [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff),
  66. };
  67. #define C(_x) PERF_COUNT_HW_CACHE_##_x
  68. static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
  69. [C(L1D)] = {
  70. [C(OP_READ)] = {
  71. [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1),
  72. [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2),
  73. },
  74. [C(OP_WRITE)] = {
  75. [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1),
  76. [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2),
  77. },
  78. },
  79. [C(L1I)] = {
  80. [C(OP_READ)] = {
  81. [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1),
  82. [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2),
  83. },
  84. },
  85. [C(DTLB)] = {
  86. [C(OP_READ)] = {
  87. [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1),
  88. [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8),
  89. },
  90. },
  91. [C(ITLB)] = {
  92. [C(OP_READ)] = {
  93. [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1),
  94. [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8),
  95. },
  96. },
  97. };
  98. static int xtensa_pmu_cache_event(u64 config)
  99. {
  100. unsigned int cache_type, cache_op, cache_result;
  101. int ret;
  102. cache_type = (config >> 0) & 0xff;
  103. cache_op = (config >> 8) & 0xff;
  104. cache_result = (config >> 16) & 0xff;
  105. if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
  106. cache_op >= C(OP_MAX) ||
  107. cache_result >= C(RESULT_MAX))
  108. return -EINVAL;
  109. ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
  110. if (ret == 0)
  111. return -EINVAL;
  112. return ret;
  113. }
  114. static inline uint32_t xtensa_pmu_read_counter(int idx)
  115. {
  116. return get_er(XTENSA_PMU_PM(idx));
  117. }
  118. static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
  119. {
  120. set_er(v, XTENSA_PMU_PM(idx));
  121. }
  122. static void xtensa_perf_event_update(struct perf_event *event,
  123. struct hw_perf_event *hwc, int idx)
  124. {
  125. uint64_t prev_raw_count, new_raw_count;
  126. int64_t delta;
  127. do {
  128. prev_raw_count = local64_read(&hwc->prev_count);
  129. new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
  130. } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
  131. new_raw_count) != prev_raw_count);
  132. delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
  133. local64_add(delta, &event->count);
  134. local64_sub(delta, &hwc->period_left);
  135. }
  136. static bool xtensa_perf_event_set_period(struct perf_event *event,
  137. struct hw_perf_event *hwc, int idx)
  138. {
  139. bool rc = false;
  140. s64 left;
  141. if (!is_sampling_event(event)) {
  142. left = XTENSA_PMU_COUNTER_MAX;
  143. } else {
  144. s64 period = hwc->sample_period;
  145. left = local64_read(&hwc->period_left);
  146. if (left <= -period) {
  147. left = period;
  148. local64_set(&hwc->period_left, left);
  149. hwc->last_period = period;
  150. rc = true;
  151. } else if (left <= 0) {
  152. left += period;
  153. local64_set(&hwc->period_left, left);
  154. hwc->last_period = period;
  155. rc = true;
  156. }
  157. if (left > XTENSA_PMU_COUNTER_MAX)
  158. left = XTENSA_PMU_COUNTER_MAX;
  159. }
  160. local64_set(&hwc->prev_count, -left);
  161. xtensa_pmu_write_counter(idx, -left);
  162. perf_event_update_userpage(event);
  163. return rc;
  164. }
  165. static void xtensa_pmu_enable(struct pmu *pmu)
  166. {
  167. set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
  168. }
  169. static void xtensa_pmu_disable(struct pmu *pmu)
  170. {
  171. set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
  172. }
  173. static int xtensa_pmu_event_init(struct perf_event *event)
  174. {
  175. int ret;
  176. switch (event->attr.type) {
  177. case PERF_TYPE_HARDWARE:
  178. if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
  179. xtensa_hw_ctl[event->attr.config] == 0)
  180. return -EINVAL;
  181. event->hw.config = xtensa_hw_ctl[event->attr.config];
  182. return 0;
  183. case PERF_TYPE_HW_CACHE:
  184. ret = xtensa_pmu_cache_event(event->attr.config);
  185. if (ret < 0)
  186. return ret;
  187. event->hw.config = ret;
  188. return 0;
  189. case PERF_TYPE_RAW:
  190. /* Not 'previous counter' select */
  191. if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
  192. (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
  193. return -EINVAL;
  194. event->hw.config = (event->attr.config &
  195. (XTENSA_PMU_PMCTRL_KRNLCNT |
  196. XTENSA_PMU_PMCTRL_TRACELEVEL |
  197. XTENSA_PMU_PMCTRL_SELECT |
  198. XTENSA_PMU_PMCTRL_MASK)) |
  199. XTENSA_PMU_PMCTRL_INTEN;
  200. return 0;
  201. default:
  202. return -ENOENT;
  203. }
  204. }
  205. /*
  206. * Starts/Stops a counter present on the PMU. The PMI handler
  207. * should stop the counter when perf_event_overflow() returns
  208. * !0. ->start() will be used to continue.
  209. */
  210. static void xtensa_pmu_start(struct perf_event *event, int flags)
  211. {
  212. struct hw_perf_event *hwc = &event->hw;
  213. int idx = hwc->idx;
  214. if (WARN_ON_ONCE(idx == -1))
  215. return;
  216. if (flags & PERF_EF_RELOAD) {
  217. WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
  218. xtensa_perf_event_set_period(event, hwc, idx);
  219. }
  220. hwc->state = 0;
  221. set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
  222. }
  223. static void xtensa_pmu_stop(struct perf_event *event, int flags)
  224. {
  225. struct hw_perf_event *hwc = &event->hw;
  226. int idx = hwc->idx;
  227. if (!(hwc->state & PERF_HES_STOPPED)) {
  228. set_er(0, XTENSA_PMU_PMCTRL(idx));
  229. set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
  230. XTENSA_PMU_PMSTAT(idx));
  231. hwc->state |= PERF_HES_STOPPED;
  232. }
  233. if ((flags & PERF_EF_UPDATE) &&
  234. !(event->hw.state & PERF_HES_UPTODATE)) {
  235. xtensa_perf_event_update(event, &event->hw, idx);
  236. event->hw.state |= PERF_HES_UPTODATE;
  237. }
  238. }
  239. /*
  240. * Adds/Removes a counter to/from the PMU, can be done inside
  241. * a transaction, see the ->*_txn() methods.
  242. */
  243. static int xtensa_pmu_add(struct perf_event *event, int flags)
  244. {
  245. struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
  246. struct hw_perf_event *hwc = &event->hw;
  247. int idx = hwc->idx;
  248. if (__test_and_set_bit(idx, ev->used_mask)) {
  249. idx = find_first_zero_bit(ev->used_mask,
  250. XCHAL_NUM_PERF_COUNTERS);
  251. if (idx == XCHAL_NUM_PERF_COUNTERS)
  252. return -EAGAIN;
  253. __set_bit(idx, ev->used_mask);
  254. hwc->idx = idx;
  255. }
  256. ev->event[idx] = event;
  257. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  258. if (flags & PERF_EF_START)
  259. xtensa_pmu_start(event, PERF_EF_RELOAD);
  260. perf_event_update_userpage(event);
  261. return 0;
  262. }
  263. static void xtensa_pmu_del(struct perf_event *event, int flags)
  264. {
  265. struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
  266. xtensa_pmu_stop(event, PERF_EF_UPDATE);
  267. __clear_bit(event->hw.idx, ev->used_mask);
  268. perf_event_update_userpage(event);
  269. }
  270. static void xtensa_pmu_read(struct perf_event *event)
  271. {
  272. xtensa_perf_event_update(event, &event->hw, event->hw.idx);
  273. }
  274. static int callchain_trace(struct stackframe *frame, void *data)
  275. {
  276. struct perf_callchain_entry_ctx *entry = data;
  277. perf_callchain_store(entry, frame->pc);
  278. return 0;
  279. }
  280. void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
  281. struct pt_regs *regs)
  282. {
  283. xtensa_backtrace_kernel(regs, entry->max_stack,
  284. callchain_trace, NULL, entry);
  285. }
  286. void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
  287. struct pt_regs *regs)
  288. {
  289. xtensa_backtrace_user(regs, entry->max_stack,
  290. callchain_trace, entry);
  291. }
  292. void perf_event_print_debug(void)
  293. {
  294. unsigned long flags;
  295. unsigned i;
  296. local_irq_save(flags);
  297. pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
  298. get_er(XTENSA_PMU_PMG));
  299. for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
  300. pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
  301. i, get_er(XTENSA_PMU_PM(i)),
  302. i, get_er(XTENSA_PMU_PMCTRL(i)),
  303. i, get_er(XTENSA_PMU_PMSTAT(i)));
  304. local_irq_restore(flags);
  305. }
  306. irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
  307. {
  308. irqreturn_t rc = IRQ_NONE;
  309. struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
  310. unsigned i;
  311. for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) {
  312. uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
  313. struct perf_event *event = ev->event[i];
  314. struct hw_perf_event *hwc = &event->hw;
  315. u64 last_period;
  316. if (!(v & XTENSA_PMU_PMSTAT_OVFL))
  317. continue;
  318. set_er(v, XTENSA_PMU_PMSTAT(i));
  319. xtensa_perf_event_update(event, hwc, i);
  320. last_period = hwc->last_period;
  321. if (xtensa_perf_event_set_period(event, hwc, i)) {
  322. struct perf_sample_data data;
  323. struct pt_regs *regs = get_irq_regs();
  324. perf_sample_data_init(&data, 0, last_period);
  325. if (perf_event_overflow(event, &data, regs))
  326. xtensa_pmu_stop(event, 0);
  327. }
  328. rc = IRQ_HANDLED;
  329. }
  330. return rc;
  331. }
  332. static struct pmu xtensa_pmu = {
  333. .pmu_enable = xtensa_pmu_enable,
  334. .pmu_disable = xtensa_pmu_disable,
  335. .event_init = xtensa_pmu_event_init,
  336. .add = xtensa_pmu_add,
  337. .del = xtensa_pmu_del,
  338. .start = xtensa_pmu_start,
  339. .stop = xtensa_pmu_stop,
  340. .read = xtensa_pmu_read,
  341. };
  342. static int xtensa_pmu_setup(unsigned int cpu)
  343. {
  344. unsigned i;
  345. set_er(0, XTENSA_PMU_PMG);
  346. for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
  347. set_er(0, XTENSA_PMU_PMCTRL(i));
  348. set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
  349. }
  350. return 0;
  351. }
  352. static int __init xtensa_pmu_init(void)
  353. {
  354. int ret;
  355. int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
  356. ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
  357. "perf/xtensa:starting", xtensa_pmu_setup,
  358. NULL);
  359. if (ret) {
  360. pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
  361. return ret;
  362. }
  363. #if XTENSA_FAKE_NMI
  364. enable_irq(irq);
  365. #else
  366. ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
  367. "pmu", NULL);
  368. if (ret < 0)
  369. return ret;
  370. #endif
  371. ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
  372. if (ret)
  373. free_irq(irq, NULL);
  374. return ret;
  375. }
  376. early_initcall(xtensa_pmu_init);