cache-l2x0-pmu.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * L220/L310 cache controller support
  4. *
  5. * Copyright (C) 2016 ARM Limited
  6. */
  7. #include <linux/errno.h>
  8. #include <linux/hrtimer.h>
  9. #include <linux/io.h>
  10. #include <linux/list.h>
  11. #include <linux/perf_event.h>
  12. #include <linux/printk.h>
  13. #include <linux/slab.h>
  14. #include <linux/types.h>
  15. #include <asm/hardware/cache-l2x0.h>
  16. #define PMU_NR_COUNTERS 2
  17. static void __iomem *l2x0_base;
  18. static struct pmu *l2x0_pmu;
  19. static cpumask_t pmu_cpu;
  20. static const char *l2x0_name;
  21. static ktime_t l2x0_pmu_poll_period;
  22. static struct hrtimer l2x0_pmu_hrtimer;
  23. /*
  24. * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0.
  25. * Registers controlling these are laid out in pairs, in descending order, i.e.
  26. * the register for Counter1 comes first, followed by the register for
  27. * Counter0.
  28. * We ensure that idx 0 -> Counter0, and idx1 -> Counter1.
  29. */
  30. static struct perf_event *events[PMU_NR_COUNTERS];
  31. /* Find an unused counter */
  32. static int l2x0_pmu_find_idx(void)
  33. {
  34. int i;
  35. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  36. if (!events[i])
  37. return i;
  38. }
  39. return -1;
  40. }
  41. /* How many counters are allocated? */
  42. static int l2x0_pmu_num_active_counters(void)
  43. {
  44. int i, cnt = 0;
  45. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  46. if (events[i])
  47. cnt++;
  48. }
  49. return cnt;
  50. }
  51. static void l2x0_pmu_counter_config_write(int idx, u32 val)
  52. {
  53. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx);
  54. }
  55. static u32 l2x0_pmu_counter_read(int idx)
  56. {
  57. return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
  58. }
  59. static void l2x0_pmu_counter_write(int idx, u32 val)
  60. {
  61. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
  62. }
  63. static void __l2x0_pmu_enable(void)
  64. {
  65. u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
  66. val |= L2X0_EVENT_CNT_CTRL_ENABLE;
  67. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
  68. }
  69. static void __l2x0_pmu_disable(void)
  70. {
  71. u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
  72. val &= ~L2X0_EVENT_CNT_CTRL_ENABLE;
  73. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
  74. }
  75. static void l2x0_pmu_enable(struct pmu *pmu)
  76. {
  77. if (l2x0_pmu_num_active_counters() == 0)
  78. return;
  79. __l2x0_pmu_enable();
  80. }
  81. static void l2x0_pmu_disable(struct pmu *pmu)
  82. {
  83. if (l2x0_pmu_num_active_counters() == 0)
  84. return;
  85. __l2x0_pmu_disable();
  86. }
  87. static void warn_if_saturated(u32 count)
  88. {
  89. if (count != 0xffffffff)
  90. return;
  91. pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n");
  92. }
  93. static void l2x0_pmu_event_read(struct perf_event *event)
  94. {
  95. struct hw_perf_event *hw = &event->hw;
  96. u64 prev_count, new_count, mask;
  97. do {
  98. prev_count = local64_read(&hw->prev_count);
  99. new_count = l2x0_pmu_counter_read(hw->idx);
  100. } while (local64_xchg(&hw->prev_count, new_count) != prev_count);
  101. mask = GENMASK_ULL(31, 0);
  102. local64_add((new_count - prev_count) & mask, &event->count);
  103. warn_if_saturated(new_count);
  104. }
  105. static void l2x0_pmu_event_configure(struct perf_event *event)
  106. {
  107. struct hw_perf_event *hw = &event->hw;
  108. /*
  109. * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we
  110. * will *always* lose some number of events when a counter saturates,
  111. * and have no way of detecting how many were lost.
  112. *
  113. * To minimize the impact of this, we try to maximize the period by
  114. * always starting counters at zero. To ensure that group ratios are
  115. * representative, we poll periodically to avoid counters saturating.
  116. * See l2x0_pmu_poll().
  117. */
  118. local64_set(&hw->prev_count, 0);
  119. l2x0_pmu_counter_write(hw->idx, 0);
  120. }
  121. static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer)
  122. {
  123. unsigned long flags;
  124. int i;
  125. local_irq_save(flags);
  126. __l2x0_pmu_disable();
  127. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  128. struct perf_event *event = events[i];
  129. if (!event)
  130. continue;
  131. l2x0_pmu_event_read(event);
  132. l2x0_pmu_event_configure(event);
  133. }
  134. __l2x0_pmu_enable();
  135. local_irq_restore(flags);
  136. hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period);
  137. return HRTIMER_RESTART;
  138. }
  139. static void __l2x0_pmu_event_enable(int idx, u32 event)
  140. {
  141. u32 val;
  142. val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
  143. val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
  144. l2x0_pmu_counter_config_write(idx, val);
  145. }
  146. static void l2x0_pmu_event_start(struct perf_event *event, int flags)
  147. {
  148. struct hw_perf_event *hw = &event->hw;
  149. if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
  150. return;
  151. if (flags & PERF_EF_RELOAD) {
  152. WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
  153. l2x0_pmu_event_configure(event);
  154. }
  155. hw->state = 0;
  156. __l2x0_pmu_event_enable(hw->idx, hw->config_base);
  157. }
  158. static void __l2x0_pmu_event_disable(int idx)
  159. {
  160. u32 val;
  161. val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
  162. val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
  163. l2x0_pmu_counter_config_write(idx, val);
  164. }
  165. static void l2x0_pmu_event_stop(struct perf_event *event, int flags)
  166. {
  167. struct hw_perf_event *hw = &event->hw;
  168. if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED))
  169. return;
  170. __l2x0_pmu_event_disable(hw->idx);
  171. hw->state |= PERF_HES_STOPPED;
  172. if (flags & PERF_EF_UPDATE) {
  173. l2x0_pmu_event_read(event);
  174. hw->state |= PERF_HES_UPTODATE;
  175. }
  176. }
  177. static int l2x0_pmu_event_add(struct perf_event *event, int flags)
  178. {
  179. struct hw_perf_event *hw = &event->hw;
  180. int idx = l2x0_pmu_find_idx();
  181. if (idx == -1)
  182. return -EAGAIN;
  183. /*
  184. * Pin the timer, so that the overflows are handled by the chosen
  185. * event->cpu (this is the same one as presented in "cpumask"
  186. * attribute).
  187. */
  188. if (l2x0_pmu_num_active_counters() == 0)
  189. hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period,
  190. HRTIMER_MODE_REL_PINNED);
  191. events[idx] = event;
  192. hw->idx = idx;
  193. l2x0_pmu_event_configure(event);
  194. hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
  195. if (flags & PERF_EF_START)
  196. l2x0_pmu_event_start(event, 0);
  197. return 0;
  198. }
  199. static void l2x0_pmu_event_del(struct perf_event *event, int flags)
  200. {
  201. struct hw_perf_event *hw = &event->hw;
  202. l2x0_pmu_event_stop(event, PERF_EF_UPDATE);
  203. events[hw->idx] = NULL;
  204. hw->idx = -1;
  205. if (l2x0_pmu_num_active_counters() == 0)
  206. hrtimer_cancel(&l2x0_pmu_hrtimer);
  207. }
  208. static bool l2x0_pmu_group_is_valid(struct perf_event *event)
  209. {
  210. struct pmu *pmu = event->pmu;
  211. struct perf_event *leader = event->group_leader;
  212. struct perf_event *sibling;
  213. int num_hw = 0;
  214. if (leader->pmu == pmu)
  215. num_hw++;
  216. else if (!is_software_event(leader))
  217. return false;
  218. for_each_sibling_event(sibling, leader) {
  219. if (sibling->pmu == pmu)
  220. num_hw++;
  221. else if (!is_software_event(sibling))
  222. return false;
  223. }
  224. return num_hw <= PMU_NR_COUNTERS;
  225. }
  226. static int l2x0_pmu_event_init(struct perf_event *event)
  227. {
  228. struct hw_perf_event *hw = &event->hw;
  229. if (event->attr.type != l2x0_pmu->type)
  230. return -ENOENT;
  231. if (is_sampling_event(event) ||
  232. event->attach_state & PERF_ATTACH_TASK)
  233. return -EINVAL;
  234. if (event->cpu < 0)
  235. return -EINVAL;
  236. if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK)
  237. return -EINVAL;
  238. hw->config_base = event->attr.config;
  239. if (!l2x0_pmu_group_is_valid(event))
  240. return -EINVAL;
  241. event->cpu = cpumask_first(&pmu_cpu);
  242. return 0;
  243. }
  244. struct l2x0_event_attribute {
  245. struct device_attribute attr;
  246. unsigned int config;
  247. bool pl310_only;
  248. };
  249. #define L2X0_EVENT_ATTR(_name, _config, _pl310_only) \
  250. (&((struct l2x0_event_attribute[]) {{ \
  251. .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL), \
  252. .config = _config, \
  253. .pl310_only = _pl310_only, \
  254. }})[0].attr.attr)
  255. #define L220_PLUS_EVENT_ATTR(_name, _config) \
  256. L2X0_EVENT_ATTR(_name, _config, false)
  257. #define PL310_EVENT_ATTR(_name, _config) \
  258. L2X0_EVENT_ATTR(_name, _config, true)
  259. static ssize_t l2x0_pmu_event_show(struct device *dev,
  260. struct device_attribute *attr, char *buf)
  261. {
  262. struct l2x0_event_attribute *lattr;
  263. lattr = container_of(attr, typeof(*lattr), attr);
  264. return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config);
  265. }
  266. static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj,
  267. struct attribute *attr,
  268. int unused)
  269. {
  270. struct device *dev = kobj_to_dev(kobj);
  271. struct pmu *pmu = dev_get_drvdata(dev);
  272. struct l2x0_event_attribute *lattr;
  273. lattr = container_of(attr, typeof(*lattr), attr.attr);
  274. if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0)
  275. return attr->mode;
  276. return 0;
  277. }
  278. static struct attribute *l2x0_pmu_event_attrs[] = {
  279. L220_PLUS_EVENT_ATTR(co, 0x1),
  280. L220_PLUS_EVENT_ATTR(drhit, 0x2),
  281. L220_PLUS_EVENT_ATTR(drreq, 0x3),
  282. L220_PLUS_EVENT_ATTR(dwhit, 0x4),
  283. L220_PLUS_EVENT_ATTR(dwreq, 0x5),
  284. L220_PLUS_EVENT_ATTR(dwtreq, 0x6),
  285. L220_PLUS_EVENT_ATTR(irhit, 0x7),
  286. L220_PLUS_EVENT_ATTR(irreq, 0x8),
  287. L220_PLUS_EVENT_ATTR(wa, 0x9),
  288. PL310_EVENT_ATTR(ipfalloc, 0xa),
  289. PL310_EVENT_ATTR(epfhit, 0xb),
  290. PL310_EVENT_ATTR(epfalloc, 0xc),
  291. PL310_EVENT_ATTR(srrcvd, 0xd),
  292. PL310_EVENT_ATTR(srconf, 0xe),
  293. PL310_EVENT_ATTR(epfrcvd, 0xf),
  294. NULL
  295. };
  296. static struct attribute_group l2x0_pmu_event_attrs_group = {
  297. .name = "events",
  298. .attrs = l2x0_pmu_event_attrs,
  299. .is_visible = l2x0_pmu_event_attr_is_visible,
  300. };
  301. static ssize_t l2x0_pmu_cpumask_show(struct device *dev,
  302. struct device_attribute *attr, char *buf)
  303. {
  304. return cpumap_print_to_pagebuf(true, buf, &pmu_cpu);
  305. }
  306. static struct device_attribute l2x0_pmu_cpumask_attr =
  307. __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL);
  308. static struct attribute *l2x0_pmu_cpumask_attrs[] = {
  309. &l2x0_pmu_cpumask_attr.attr,
  310. NULL,
  311. };
  312. static struct attribute_group l2x0_pmu_cpumask_attr_group = {
  313. .attrs = l2x0_pmu_cpumask_attrs,
  314. };
  315. static const struct attribute_group *l2x0_pmu_attr_groups[] = {
  316. &l2x0_pmu_event_attrs_group,
  317. &l2x0_pmu_cpumask_attr_group,
  318. NULL,
  319. };
  320. static void l2x0_pmu_reset(void)
  321. {
  322. int i;
  323. __l2x0_pmu_disable();
  324. for (i = 0; i < PMU_NR_COUNTERS; i++)
  325. __l2x0_pmu_event_disable(i);
  326. }
  327. static int l2x0_pmu_offline_cpu(unsigned int cpu)
  328. {
  329. unsigned int target;
  330. if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu))
  331. return 0;
  332. target = cpumask_any_but(cpu_online_mask, cpu);
  333. if (target >= nr_cpu_ids)
  334. return 0;
  335. perf_pmu_migrate_context(l2x0_pmu, cpu, target);
  336. cpumask_set_cpu(target, &pmu_cpu);
  337. return 0;
  338. }
  339. void l2x0_pmu_suspend(void)
  340. {
  341. int i;
  342. if (!l2x0_pmu)
  343. return;
  344. l2x0_pmu_disable(l2x0_pmu);
  345. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  346. if (events[i])
  347. l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE);
  348. }
  349. }
  350. void l2x0_pmu_resume(void)
  351. {
  352. int i;
  353. if (!l2x0_pmu)
  354. return;
  355. l2x0_pmu_reset();
  356. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  357. if (events[i])
  358. l2x0_pmu_event_start(events[i], PERF_EF_RELOAD);
  359. }
  360. l2x0_pmu_enable(l2x0_pmu);
  361. }
  362. void __init l2x0_pmu_register(void __iomem *base, u32 part)
  363. {
  364. /*
  365. * Determine whether we support the PMU, and choose the name for sysfs.
  366. * This is also used by l2x0_pmu_event_attr_is_visible to determine
  367. * which events to display, as the PL310 PMU supports a superset of
  368. * L220 events.
  369. *
  370. * The L210 PMU has a different programmer's interface, and is not
  371. * supported by this driver.
  372. *
  373. * We must defer registering the PMU until the perf subsystem is up and
  374. * running, so just stash the name and base, and leave that to another
  375. * initcall.
  376. */
  377. switch (part & L2X0_CACHE_ID_PART_MASK) {
  378. case L2X0_CACHE_ID_PART_L220:
  379. l2x0_name = "l2c_220";
  380. break;
  381. case L2X0_CACHE_ID_PART_L310:
  382. l2x0_name = "l2c_310";
  383. break;
  384. default:
  385. return;
  386. }
  387. l2x0_base = base;
  388. }
  389. static __init int l2x0_pmu_init(void)
  390. {
  391. int ret;
  392. if (!l2x0_base)
  393. return 0;
  394. l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL);
  395. if (!l2x0_pmu) {
  396. pr_warn("Unable to allocate L2x0 PMU\n");
  397. return -ENOMEM;
  398. }
  399. *l2x0_pmu = (struct pmu) {
  400. .task_ctx_nr = perf_invalid_context,
  401. .pmu_enable = l2x0_pmu_enable,
  402. .pmu_disable = l2x0_pmu_disable,
  403. .read = l2x0_pmu_event_read,
  404. .start = l2x0_pmu_event_start,
  405. .stop = l2x0_pmu_event_stop,
  406. .add = l2x0_pmu_event_add,
  407. .del = l2x0_pmu_event_del,
  408. .event_init = l2x0_pmu_event_init,
  409. .attr_groups = l2x0_pmu_attr_groups,
  410. .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
  411. };
  412. l2x0_pmu_reset();
  413. /*
  414. * We always use a hrtimer rather than an interrupt.
  415. * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll.
  416. *
  417. * Polling once a second allows the counters to fill up to 1/128th on a
  418. * quad-core test chip with cores clocked at 400MHz. Hopefully this
  419. * leaves sufficient headroom to avoid overflow on production silicon
  420. * at higher frequencies.
  421. */
  422. l2x0_pmu_poll_period = ms_to_ktime(1000);
  423. hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  424. l2x0_pmu_hrtimer.function = l2x0_pmu_poll;
  425. cpumask_set_cpu(0, &pmu_cpu);
  426. ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE,
  427. "perf/arm/l2x0:online", NULL,
  428. l2x0_pmu_offline_cpu);
  429. if (ret)
  430. goto out_pmu;
  431. ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1);
  432. if (ret)
  433. goto out_cpuhp;
  434. return 0;
  435. out_cpuhp:
  436. cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE);
  437. out_pmu:
  438. kfree(l2x0_pmu);
  439. l2x0_pmu = NULL;
  440. return ret;
  441. }
  442. device_initcall(l2x0_pmu_init);