perf_event.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. // SPDX-License-Identifier: GPL-2.0+
  2. //
  3. // Linux performance counter support for ARC CPUs.
  4. // This code is inspired by the perf support of various other architectures.
  5. //
  6. // Copyright (C) 2013-2018 Synopsys, Inc. (www.synopsys.com)
  7. #include <linux/errno.h>
  8. #include <linux/interrupt.h>
  9. #include <linux/module.h>
  10. #include <linux/of.h>
  11. #include <linux/perf_event.h>
  12. #include <linux/platform_device.h>
  13. #include <asm/arcregs.h>
  14. #include <asm/stacktrace.h>
  15. /* HW holds 8 symbols + one for null terminator */
  16. #define ARCPMU_EVENT_NAME_LEN 9
  17. /*
  18. * Some ARC pct quirks:
  19. *
  20. * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
  21. * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
  22. * The ARC 700 can either measure stalls per pipeline stage, or all stalls
  23. * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
  24. * and all pipeline flushes (e.g. caused by mispredicts, etc.) to
  25. * STALLED_CYCLES_FRONTEND.
  26. *
  27. * We could start multiple performance counters and combine everything
  28. * afterwards, but that makes it complicated.
  29. *
  30. * Note that I$ cache misses aren't counted by either of the two!
  31. */
  32. /*
  33. * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
  34. * (based on a specific RTL build)
  35. * Below is the static map between perf generic/arc specific event_id and
  36. * h/w condition names.
  37. * At the time of probe, we loop thru each index and find it's name to
  38. * complete the mapping of perf event_id to h/w index as latter is needed
  39. * to program the counter really
  40. */
  41. static const char * const arc_pmu_ev_hw_map[] = {
  42. /* count cycles */
  43. [PERF_COUNT_HW_CPU_CYCLES] = "crun",
  44. [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
  45. [PERF_COUNT_HW_BUS_CYCLES] = "crun",
  46. [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
  47. [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
  48. /* counts condition */
  49. [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
  50. /* All jump instructions that are taken */
  51. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
  52. #ifdef CONFIG_ISA_ARCV2
  53. [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
  54. #else
  55. [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
  56. [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
  57. #endif
  58. [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */
  59. [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */
  60. [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */
  61. [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */
  62. [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */
  63. [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */
  64. [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */
  65. [PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc", /* Instr: mem read cached */
  66. [PERF_COUNT_HW_CACHE_MISSES] = "dclm", /* D-cache Load Miss */
  67. };
  68. #define C(_x) PERF_COUNT_HW_CACHE_##_x
  69. #define CACHE_OP_UNSUPPORTED 0xffff
  70. static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
  71. [C(L1D)] = {
  72. [C(OP_READ)] = {
  73. [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
  74. [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM,
  75. },
  76. [C(OP_WRITE)] = {
  77. [C(RESULT_ACCESS)] = PERF_COUNT_ARC_STC,
  78. [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM,
  79. },
  80. [C(OP_PREFETCH)] = {
  81. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  82. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  83. },
  84. },
  85. [C(L1I)] = {
  86. [C(OP_READ)] = {
  87. [C(RESULT_ACCESS)] = PERF_COUNT_HW_INSTRUCTIONS,
  88. [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM,
  89. },
  90. [C(OP_WRITE)] = {
  91. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  92. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  93. },
  94. [C(OP_PREFETCH)] = {
  95. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  96. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  97. },
  98. },
  99. [C(LL)] = {
  100. [C(OP_READ)] = {
  101. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  102. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  103. },
  104. [C(OP_WRITE)] = {
  105. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  106. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  107. },
  108. [C(OP_PREFETCH)] = {
  109. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  110. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  111. },
  112. },
  113. [C(DTLB)] = {
  114. [C(OP_READ)] = {
  115. [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
  116. [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB,
  117. },
  118. /* DTLB LD/ST Miss not segregated by h/w*/
  119. [C(OP_WRITE)] = {
  120. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  121. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  122. },
  123. [C(OP_PREFETCH)] = {
  124. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  125. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  126. },
  127. },
  128. [C(ITLB)] = {
  129. [C(OP_READ)] = {
  130. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  131. [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB,
  132. },
  133. [C(OP_WRITE)] = {
  134. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  135. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  136. },
  137. [C(OP_PREFETCH)] = {
  138. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  139. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  140. },
  141. },
  142. [C(BPU)] = {
  143. [C(OP_READ)] = {
  144. [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
  145. [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES,
  146. },
  147. [C(OP_WRITE)] = {
  148. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  149. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  150. },
  151. [C(OP_PREFETCH)] = {
  152. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  153. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  154. },
  155. },
  156. [C(NODE)] = {
  157. [C(OP_READ)] = {
  158. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  159. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  160. },
  161. [C(OP_WRITE)] = {
  162. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  163. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  164. },
  165. [C(OP_PREFETCH)] = {
  166. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  167. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  168. },
  169. },
  170. };
  171. enum arc_pmu_attr_groups {
  172. ARCPMU_ATTR_GR_EVENTS,
  173. ARCPMU_ATTR_GR_FORMATS,
  174. ARCPMU_NR_ATTR_GR
  175. };
  176. struct arc_pmu_raw_event_entry {
  177. char name[ARCPMU_EVENT_NAME_LEN];
  178. };
  179. struct arc_pmu {
  180. struct pmu pmu;
  181. unsigned int irq;
  182. int n_counters;
  183. int n_events;
  184. u64 max_period;
  185. int ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
  186. struct arc_pmu_raw_event_entry *raw_entry;
  187. struct attribute **attrs;
  188. struct perf_pmu_events_attr *attr;
  189. const struct attribute_group *attr_groups[ARCPMU_NR_ATTR_GR + 1];
  190. };
  191. struct arc_pmu_cpu {
  192. /*
  193. * A 1 bit for an index indicates that the counter is being used for
  194. * an event. A 0 means that the counter can be used.
  195. */
  196. unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
  197. /*
  198. * The events that are active on the PMU for the given index.
  199. */
  200. struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS];
  201. };
  202. struct arc_callchain_trace {
  203. int depth;
  204. void *perf_stuff;
  205. };
  206. static int callchain_trace(unsigned int addr, void *data)
  207. {
  208. struct arc_callchain_trace *ctrl = data;
  209. struct perf_callchain_entry_ctx *entry = ctrl->perf_stuff;
  210. perf_callchain_store(entry, addr);
  211. if (ctrl->depth++ < 3)
  212. return 0;
  213. return -1;
  214. }
  215. void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
  216. struct pt_regs *regs)
  217. {
  218. struct arc_callchain_trace ctrl = {
  219. .depth = 0,
  220. .perf_stuff = entry,
  221. };
  222. arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
  223. }
  224. void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
  225. struct pt_regs *regs)
  226. {
  227. /*
  228. * User stack can't be unwound trivially with kernel dwarf unwinder
  229. * So for now just record the user PC
  230. */
  231. perf_callchain_store(entry, instruction_pointer(regs));
  232. }
  233. static struct arc_pmu *arc_pmu;
  234. static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);
  235. /* read counter #idx; note that counter# != event# on ARC! */
  236. static u64 arc_pmu_read_counter(int idx)
  237. {
  238. u32 tmp;
  239. u64 result;
  240. /*
  241. * ARC supports making 'snapshots' of the counters, so we don't
  242. * need to care about counters wrapping to 0 underneath our feet
  243. */
  244. write_aux_reg(ARC_REG_PCT_INDEX, idx);
  245. tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
  246. write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN);
  247. result = (u64) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
  248. result |= read_aux_reg(ARC_REG_PCT_SNAPL);
  249. return result;
  250. }
  251. static void arc_perf_event_update(struct perf_event *event,
  252. struct hw_perf_event *hwc, int idx)
  253. {
  254. u64 prev_raw_count = local64_read(&hwc->prev_count);
  255. u64 new_raw_count = arc_pmu_read_counter(idx);
  256. s64 delta = new_raw_count - prev_raw_count;
  257. /*
  258. * We aren't afraid of hwc->prev_count changing beneath our feet
  259. * because there's no way for us to re-enter this function anytime.
  260. */
  261. local64_set(&hwc->prev_count, new_raw_count);
  262. local64_add(delta, &event->count);
  263. local64_sub(delta, &hwc->period_left);
  264. }
  265. static void arc_pmu_read(struct perf_event *event)
  266. {
  267. arc_perf_event_update(event, &event->hw, event->hw.idx);
  268. }
  269. static int arc_pmu_cache_event(u64 config)
  270. {
  271. unsigned int cache_type, cache_op, cache_result;
  272. int ret;
  273. cache_type = (config >> 0) & 0xff;
  274. cache_op = (config >> 8) & 0xff;
  275. cache_result = (config >> 16) & 0xff;
  276. if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  277. return -EINVAL;
  278. if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  279. return -EINVAL;
  280. if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  281. return -EINVAL;
  282. ret = arc_pmu_cache_map[cache_type][cache_op][cache_result];
  283. if (ret == CACHE_OP_UNSUPPORTED)
  284. return -ENOENT;
  285. pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n",
  286. cache_type, cache_op, cache_result, ret,
  287. arc_pmu_ev_hw_map[ret]);
  288. return ret;
  289. }
  290. /* initializes hw_perf_event structure if event is supported */
  291. static int arc_pmu_event_init(struct perf_event *event)
  292. {
  293. struct hw_perf_event *hwc = &event->hw;
  294. int ret;
  295. if (!is_sampling_event(event)) {
  296. hwc->sample_period = arc_pmu->max_period;
  297. hwc->last_period = hwc->sample_period;
  298. local64_set(&hwc->period_left, hwc->sample_period);
  299. }
  300. hwc->config = 0;
  301. if (is_isa_arcv2()) {
  302. /* "exclude user" means "count only kernel" */
  303. if (event->attr.exclude_user)
  304. hwc->config |= ARC_REG_PCT_CONFIG_KERN;
  305. /* "exclude kernel" means "count only user" */
  306. if (event->attr.exclude_kernel)
  307. hwc->config |= ARC_REG_PCT_CONFIG_USER;
  308. }
  309. switch (event->attr.type) {
  310. case PERF_TYPE_HARDWARE:
  311. if (event->attr.config >= PERF_COUNT_HW_MAX)
  312. return -ENOENT;
  313. if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
  314. return -ENOENT;
  315. hwc->config |= arc_pmu->ev_hw_idx[event->attr.config];
  316. pr_debug("init event %d with h/w %08x \'%s\'\n",
  317. (int)event->attr.config, (int)hwc->config,
  318. arc_pmu_ev_hw_map[event->attr.config]);
  319. return 0;
  320. case PERF_TYPE_HW_CACHE:
  321. ret = arc_pmu_cache_event(event->attr.config);
  322. if (ret < 0)
  323. return ret;
  324. hwc->config |= arc_pmu->ev_hw_idx[ret];
  325. pr_debug("init cache event with h/w %08x \'%s\'\n",
  326. (int)hwc->config, arc_pmu_ev_hw_map[ret]);
  327. return 0;
  328. case PERF_TYPE_RAW:
  329. if (event->attr.config >= arc_pmu->n_events)
  330. return -ENOENT;
  331. hwc->config |= event->attr.config;
  332. pr_debug("init raw event with idx %lld \'%s\'\n",
  333. event->attr.config,
  334. arc_pmu->raw_entry[event->attr.config].name);
  335. return 0;
  336. default:
  337. return -ENOENT;
  338. }
  339. }
  340. /* starts all counters */
  341. static void arc_pmu_enable(struct pmu *pmu)
  342. {
  343. u32 tmp;
  344. tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
  345. write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1);
  346. }
  347. /* stops all counters */
  348. static void arc_pmu_disable(struct pmu *pmu)
  349. {
  350. u32 tmp;
  351. tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
  352. write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
  353. }
  354. static int arc_pmu_event_set_period(struct perf_event *event)
  355. {
  356. struct hw_perf_event *hwc = &event->hw;
  357. s64 left = local64_read(&hwc->period_left);
  358. s64 period = hwc->sample_period;
  359. int idx = hwc->idx;
  360. int overflow = 0;
  361. u64 value;
  362. if (unlikely(left <= -period)) {
  363. /* left underflowed by more than period. */
  364. left = period;
  365. local64_set(&hwc->period_left, left);
  366. hwc->last_period = period;
  367. overflow = 1;
  368. } else if (unlikely(left <= 0)) {
  369. /* left underflowed by less than period. */
  370. left += period;
  371. local64_set(&hwc->period_left, left);
  372. hwc->last_period = period;
  373. overflow = 1;
  374. }
  375. if (left > arc_pmu->max_period)
  376. left = arc_pmu->max_period;
  377. value = arc_pmu->max_period - left;
  378. local64_set(&hwc->prev_count, value);
  379. /* Select counter */
  380. write_aux_reg(ARC_REG_PCT_INDEX, idx);
  381. /* Write value */
  382. write_aux_reg(ARC_REG_PCT_COUNTL, lower_32_bits(value));
  383. write_aux_reg(ARC_REG_PCT_COUNTH, upper_32_bits(value));
  384. perf_event_update_userpage(event);
  385. return overflow;
  386. }
  387. /*
  388. * Assigns hardware counter to hardware condition.
  389. * Note that there is no separate start/stop mechanism;
  390. * stopping is achieved by assigning the 'never' condition
  391. */
  392. static void arc_pmu_start(struct perf_event *event, int flags)
  393. {
  394. struct hw_perf_event *hwc = &event->hw;
  395. int idx = hwc->idx;
  396. if (WARN_ON_ONCE(idx == -1))
  397. return;
  398. if (flags & PERF_EF_RELOAD)
  399. WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
  400. hwc->state = 0;
  401. arc_pmu_event_set_period(event);
  402. /* Enable interrupt for this counter */
  403. if (is_sampling_event(event))
  404. write_aux_reg(ARC_REG_PCT_INT_CTRL,
  405. read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
  406. /* enable ARC pmu here */
  407. write_aux_reg(ARC_REG_PCT_INDEX, idx); /* counter # */
  408. write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config); /* condition */
  409. }
  410. static void arc_pmu_stop(struct perf_event *event, int flags)
  411. {
  412. struct hw_perf_event *hwc = &event->hw;
  413. int idx = hwc->idx;
  414. /* Disable interrupt for this counter */
  415. if (is_sampling_event(event)) {
  416. /*
  417. * Reset interrupt flag by writing of 1. This is required
  418. * to make sure pending interrupt was not left.
  419. */
  420. write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
  421. write_aux_reg(ARC_REG_PCT_INT_CTRL,
  422. read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~BIT(idx));
  423. }
  424. if (!(event->hw.state & PERF_HES_STOPPED)) {
  425. /* stop hw counter here */
  426. write_aux_reg(ARC_REG_PCT_INDEX, idx);
  427. /* condition code #0 is always "never" */
  428. write_aux_reg(ARC_REG_PCT_CONFIG, 0);
  429. event->hw.state |= PERF_HES_STOPPED;
  430. }
  431. if ((flags & PERF_EF_UPDATE) &&
  432. !(event->hw.state & PERF_HES_UPTODATE)) {
  433. arc_perf_event_update(event, &event->hw, idx);
  434. event->hw.state |= PERF_HES_UPTODATE;
  435. }
  436. }
  437. static void arc_pmu_del(struct perf_event *event, int flags)
  438. {
  439. struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
  440. arc_pmu_stop(event, PERF_EF_UPDATE);
  441. __clear_bit(event->hw.idx, pmu_cpu->used_mask);
  442. pmu_cpu->act_counter[event->hw.idx] = 0;
  443. perf_event_update_userpage(event);
  444. }
  445. /* allocate hardware counter and optionally start counting */
  446. static int arc_pmu_add(struct perf_event *event, int flags)
  447. {
  448. struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
  449. struct hw_perf_event *hwc = &event->hw;
  450. int idx;
  451. idx = ffz(pmu_cpu->used_mask[0]);
  452. if (idx == arc_pmu->n_counters)
  453. return -EAGAIN;
  454. __set_bit(idx, pmu_cpu->used_mask);
  455. hwc->idx = idx;
  456. write_aux_reg(ARC_REG_PCT_INDEX, idx);
  457. pmu_cpu->act_counter[idx] = event;
  458. if (is_sampling_event(event)) {
  459. /* Mimic full counter overflow as other arches do */
  460. write_aux_reg(ARC_REG_PCT_INT_CNTL,
  461. lower_32_bits(arc_pmu->max_period));
  462. write_aux_reg(ARC_REG_PCT_INT_CNTH,
  463. upper_32_bits(arc_pmu->max_period));
  464. }
  465. write_aux_reg(ARC_REG_PCT_CONFIG, 0);
  466. write_aux_reg(ARC_REG_PCT_COUNTL, 0);
  467. write_aux_reg(ARC_REG_PCT_COUNTH, 0);
  468. local64_set(&hwc->prev_count, 0);
  469. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  470. if (flags & PERF_EF_START)
  471. arc_pmu_start(event, PERF_EF_RELOAD);
  472. perf_event_update_userpage(event);
  473. return 0;
  474. }
  475. #ifdef CONFIG_ISA_ARCV2
  476. static irqreturn_t arc_pmu_intr(int irq, void *dev)
  477. {
  478. struct perf_sample_data data;
  479. struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
  480. struct pt_regs *regs;
  481. unsigned int active_ints;
  482. int idx;
  483. arc_pmu_disable(&arc_pmu->pmu);
  484. active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT);
  485. if (!active_ints)
  486. goto done;
  487. regs = get_irq_regs();
  488. do {
  489. struct perf_event *event;
  490. struct hw_perf_event *hwc;
  491. idx = __ffs(active_ints);
  492. /* Reset interrupt flag by writing of 1 */
  493. write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
  494. /*
  495. * On reset of "interrupt active" bit corresponding
  496. * "interrupt enable" bit gets automatically reset as well.
  497. * Now we need to re-enable interrupt for the counter.
  498. */
  499. write_aux_reg(ARC_REG_PCT_INT_CTRL,
  500. read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
  501. event = pmu_cpu->act_counter[idx];
  502. hwc = &event->hw;
  503. WARN_ON_ONCE(hwc->idx != idx);
  504. arc_perf_event_update(event, &event->hw, event->hw.idx);
  505. perf_sample_data_init(&data, 0, hwc->last_period);
  506. if (arc_pmu_event_set_period(event)) {
  507. if (perf_event_overflow(event, &data, regs))
  508. arc_pmu_stop(event, 0);
  509. }
  510. active_ints &= ~BIT(idx);
  511. } while (active_ints);
  512. done:
  513. arc_pmu_enable(&arc_pmu->pmu);
  514. return IRQ_HANDLED;
  515. }
  516. #else
  517. static irqreturn_t arc_pmu_intr(int irq, void *dev)
  518. {
  519. return IRQ_NONE;
  520. }
  521. #endif /* CONFIG_ISA_ARCV2 */
  522. static void arc_cpu_pmu_irq_init(void *data)
  523. {
  524. int irq = *(int *)data;
  525. enable_percpu_irq(irq, IRQ_TYPE_NONE);
  526. /* Clear all pending interrupt flags */
  527. write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
  528. }
  529. /* Event field occupies the bottom 15 bits of our config field */
  530. PMU_FORMAT_ATTR(event, "config:0-14");
  531. static struct attribute *arc_pmu_format_attrs[] = {
  532. &format_attr_event.attr,
  533. NULL,
  534. };
  535. static struct attribute_group arc_pmu_format_attr_gr = {
  536. .name = "format",
  537. .attrs = arc_pmu_format_attrs,
  538. };
  539. static ssize_t arc_pmu_events_sysfs_show(struct device *dev,
  540. struct device_attribute *attr,
  541. char *page)
  542. {
  543. struct perf_pmu_events_attr *pmu_attr;
  544. pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
  545. return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
  546. }
  547. /*
  548. * We don't add attrs here as we don't have pre-defined list of perf events.
  549. * We will generate and add attrs dynamically in probe() after we read HW
  550. * configuration.
  551. */
  552. static struct attribute_group arc_pmu_events_attr_gr = {
  553. .name = "events",
  554. };
  555. static void arc_pmu_add_raw_event_attr(int j, char *str)
  556. {
  557. memmove(arc_pmu->raw_entry[j].name, str, ARCPMU_EVENT_NAME_LEN - 1);
  558. arc_pmu->attr[j].attr.attr.name = arc_pmu->raw_entry[j].name;
  559. arc_pmu->attr[j].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(0444);
  560. arc_pmu->attr[j].attr.show = arc_pmu_events_sysfs_show;
  561. arc_pmu->attr[j].id = j;
  562. arc_pmu->attrs[j] = &(arc_pmu->attr[j].attr.attr);
  563. }
  564. static int arc_pmu_raw_alloc(struct device *dev)
  565. {
  566. arc_pmu->attr = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
  567. sizeof(*arc_pmu->attr), GFP_KERNEL | __GFP_ZERO);
  568. if (!arc_pmu->attr)
  569. return -ENOMEM;
  570. arc_pmu->attrs = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
  571. sizeof(*arc_pmu->attrs), GFP_KERNEL | __GFP_ZERO);
  572. if (!arc_pmu->attrs)
  573. return -ENOMEM;
  574. arc_pmu->raw_entry = devm_kmalloc_array(dev, arc_pmu->n_events,
  575. sizeof(*arc_pmu->raw_entry), GFP_KERNEL | __GFP_ZERO);
  576. if (!arc_pmu->raw_entry)
  577. return -ENOMEM;
  578. return 0;
  579. }
  580. static inline bool event_in_hw_event_map(int i, char *name)
  581. {
  582. if (!arc_pmu_ev_hw_map[i])
  583. return false;
  584. if (!strlen(arc_pmu_ev_hw_map[i]))
  585. return false;
  586. if (strcmp(arc_pmu_ev_hw_map[i], name))
  587. return false;
  588. return true;
  589. }
  590. static void arc_pmu_map_hw_event(int j, char *str)
  591. {
  592. int i;
  593. /* See if HW condition has been mapped to a perf event_id */
  594. for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
  595. if (event_in_hw_event_map(i, str)) {
  596. pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
  597. i, str, j);
  598. arc_pmu->ev_hw_idx[i] = j;
  599. }
  600. }
  601. }
  602. static int arc_pmu_device_probe(struct platform_device *pdev)
  603. {
  604. struct arc_reg_pct_build pct_bcr;
  605. struct arc_reg_cc_build cc_bcr;
  606. int i, has_interrupts, irq = -1;
  607. int counter_size; /* in bits */
  608. union cc_name {
  609. struct {
  610. u32 word0, word1;
  611. char sentinel;
  612. } indiv;
  613. char str[ARCPMU_EVENT_NAME_LEN];
  614. } cc_name;
  615. READ_BCR(ARC_REG_PCT_BUILD, pct_bcr);
  616. if (!pct_bcr.v) {
  617. pr_err("This core does not have performance counters!\n");
  618. return -ENODEV;
  619. }
  620. BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
  621. if (WARN_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS))
  622. return -EINVAL;
  623. READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
  624. if (WARN(!cc_bcr.v, "Counters exist but No countable conditions?"))
  625. return -EINVAL;
  626. arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
  627. if (!arc_pmu)
  628. return -ENOMEM;
  629. arc_pmu->n_events = cc_bcr.c;
  630. if (arc_pmu_raw_alloc(&pdev->dev))
  631. return -ENOMEM;
  632. has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0;
  633. arc_pmu->n_counters = pct_bcr.c;
  634. counter_size = 32 + (pct_bcr.s << 4);
  635. arc_pmu->max_period = (1ULL << counter_size) / 2 - 1ULL;
  636. pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n",
  637. arc_pmu->n_counters, counter_size, cc_bcr.c,
  638. has_interrupts ? ", [overflow IRQ support]" : "");
  639. cc_name.str[ARCPMU_EVENT_NAME_LEN - 1] = 0;
  640. for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
  641. arc_pmu->ev_hw_idx[i] = -1;
  642. /* loop thru all available h/w condition indexes */
  643. for (i = 0; i < cc_bcr.c; i++) {
  644. write_aux_reg(ARC_REG_CC_INDEX, i);
  645. cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0));
  646. cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1));
  647. arc_pmu_map_hw_event(i, cc_name.str);
  648. arc_pmu_add_raw_event_attr(i, cc_name.str);
  649. }
  650. arc_pmu_events_attr_gr.attrs = arc_pmu->attrs;
  651. arc_pmu->attr_groups[ARCPMU_ATTR_GR_EVENTS] = &arc_pmu_events_attr_gr;
  652. arc_pmu->attr_groups[ARCPMU_ATTR_GR_FORMATS] = &arc_pmu_format_attr_gr;
  653. arc_pmu->pmu = (struct pmu) {
  654. .pmu_enable = arc_pmu_enable,
  655. .pmu_disable = arc_pmu_disable,
  656. .event_init = arc_pmu_event_init,
  657. .add = arc_pmu_add,
  658. .del = arc_pmu_del,
  659. .start = arc_pmu_start,
  660. .stop = arc_pmu_stop,
  661. .read = arc_pmu_read,
  662. .attr_groups = arc_pmu->attr_groups,
  663. };
  664. if (has_interrupts) {
  665. irq = platform_get_irq(pdev, 0);
  666. if (irq >= 0) {
  667. int ret;
  668. arc_pmu->irq = irq;
  669. /* intc map function ensures irq_set_percpu_devid() called */
  670. ret = request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
  671. this_cpu_ptr(&arc_pmu_cpu));
  672. if (!ret)
  673. on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
  674. else
  675. irq = -1;
  676. }
  677. }
  678. if (irq == -1)
  679. arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
  680. /*
  681. * perf parser doesn't really like '-' symbol in events name, so let's
  682. * use '_' in arc pct name as it goes to kernel PMU event prefix.
  683. */
  684. return perf_pmu_register(&arc_pmu->pmu, "arc_pct", PERF_TYPE_RAW);
  685. }
  686. static const struct of_device_id arc_pmu_match[] = {
  687. { .compatible = "snps,arc700-pct" },
  688. { .compatible = "snps,archs-pct" },
  689. {},
  690. };
  691. MODULE_DEVICE_TABLE(of, arc_pmu_match);
  692. static struct platform_driver arc_pmu_driver = {
  693. .driver = {
  694. .name = "arc-pct",
  695. .of_match_table = of_match_ptr(arc_pmu_match),
  696. },
  697. .probe = arc_pmu_device_probe,
  698. };
  699. module_platform_driver(arc_pmu_driver);
  700. MODULE_LICENSE("GPL");
  701. MODULE_AUTHOR("Mischa Jonker <[email protected]>");
  702. MODULE_DESCRIPTION("ARC PMU driver");