nd_perf.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * nd_perf.c: NVDIMM Device Performance Monitoring Unit support
  4. *
  5. * Perf interface to expose nvdimm performance stats.
  6. *
  7. * Copyright (C) 2021 IBM Corporation
  8. */
  9. #define pr_fmt(fmt) "nvdimm_pmu: " fmt
  10. #include <linux/nd.h>
  11. #include <linux/platform_device.h>
  12. #define EVENT(_name, _code) enum{_name = _code}
  13. /*
  14. * NVDIMM Events codes.
  15. */
  16. /* Controller Reset Count */
  17. EVENT(CTL_RES_CNT, 0x1);
  18. /* Controller Reset Elapsed Time */
  19. EVENT(CTL_RES_TM, 0x2);
  20. /* Power-on Seconds */
  21. EVENT(POWERON_SECS, 0x3);
  22. /* Life Remaining */
  23. EVENT(MEM_LIFE, 0x4);
  24. /* Critical Resource Utilization */
  25. EVENT(CRI_RES_UTIL, 0x5);
  26. /* Host Load Count */
  27. EVENT(HOST_L_CNT, 0x6);
  28. /* Host Store Count */
  29. EVENT(HOST_S_CNT, 0x7);
  30. /* Host Store Duration */
  31. EVENT(HOST_S_DUR, 0x8);
  32. /* Host Load Duration */
  33. EVENT(HOST_L_DUR, 0x9);
  34. /* Media Read Count */
  35. EVENT(MED_R_CNT, 0xa);
  36. /* Media Write Count */
  37. EVENT(MED_W_CNT, 0xb);
  38. /* Media Read Duration */
  39. EVENT(MED_R_DUR, 0xc);
  40. /* Media Write Duration */
  41. EVENT(MED_W_DUR, 0xd);
  42. /* Cache Read Hit Count */
  43. EVENT(CACHE_RH_CNT, 0xe);
  44. /* Cache Write Hit Count */
  45. EVENT(CACHE_WH_CNT, 0xf);
  46. /* Fast Write Count */
  47. EVENT(FAST_W_CNT, 0x10);
  48. NVDIMM_EVENT_ATTR(ctl_res_cnt, CTL_RES_CNT);
  49. NVDIMM_EVENT_ATTR(ctl_res_tm, CTL_RES_TM);
  50. NVDIMM_EVENT_ATTR(poweron_secs, POWERON_SECS);
  51. NVDIMM_EVENT_ATTR(mem_life, MEM_LIFE);
  52. NVDIMM_EVENT_ATTR(cri_res_util, CRI_RES_UTIL);
  53. NVDIMM_EVENT_ATTR(host_l_cnt, HOST_L_CNT);
  54. NVDIMM_EVENT_ATTR(host_s_cnt, HOST_S_CNT);
  55. NVDIMM_EVENT_ATTR(host_s_dur, HOST_S_DUR);
  56. NVDIMM_EVENT_ATTR(host_l_dur, HOST_L_DUR);
  57. NVDIMM_EVENT_ATTR(med_r_cnt, MED_R_CNT);
  58. NVDIMM_EVENT_ATTR(med_w_cnt, MED_W_CNT);
  59. NVDIMM_EVENT_ATTR(med_r_dur, MED_R_DUR);
  60. NVDIMM_EVENT_ATTR(med_w_dur, MED_W_DUR);
  61. NVDIMM_EVENT_ATTR(cache_rh_cnt, CACHE_RH_CNT);
  62. NVDIMM_EVENT_ATTR(cache_wh_cnt, CACHE_WH_CNT);
  63. NVDIMM_EVENT_ATTR(fast_w_cnt, FAST_W_CNT);
  64. static struct attribute *nvdimm_events_attr[] = {
  65. NVDIMM_EVENT_PTR(CTL_RES_CNT),
  66. NVDIMM_EVENT_PTR(CTL_RES_TM),
  67. NVDIMM_EVENT_PTR(POWERON_SECS),
  68. NVDIMM_EVENT_PTR(MEM_LIFE),
  69. NVDIMM_EVENT_PTR(CRI_RES_UTIL),
  70. NVDIMM_EVENT_PTR(HOST_L_CNT),
  71. NVDIMM_EVENT_PTR(HOST_S_CNT),
  72. NVDIMM_EVENT_PTR(HOST_S_DUR),
  73. NVDIMM_EVENT_PTR(HOST_L_DUR),
  74. NVDIMM_EVENT_PTR(MED_R_CNT),
  75. NVDIMM_EVENT_PTR(MED_W_CNT),
  76. NVDIMM_EVENT_PTR(MED_R_DUR),
  77. NVDIMM_EVENT_PTR(MED_W_DUR),
  78. NVDIMM_EVENT_PTR(CACHE_RH_CNT),
  79. NVDIMM_EVENT_PTR(CACHE_WH_CNT),
  80. NVDIMM_EVENT_PTR(FAST_W_CNT),
  81. NULL
  82. };
  83. static struct attribute_group nvdimm_pmu_events_group = {
  84. .name = "events",
  85. .attrs = nvdimm_events_attr,
  86. };
  87. PMU_FORMAT_ATTR(event, "config:0-4");
  88. static struct attribute *nvdimm_pmu_format_attr[] = {
  89. &format_attr_event.attr,
  90. NULL,
  91. };
  92. static struct attribute_group nvdimm_pmu_format_group = {
  93. .name = "format",
  94. .attrs = nvdimm_pmu_format_attr,
  95. };
  96. ssize_t nvdimm_events_sysfs_show(struct device *dev,
  97. struct device_attribute *attr, char *page)
  98. {
  99. struct perf_pmu_events_attr *pmu_attr;
  100. pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
  101. return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
  102. }
  103. static ssize_t nvdimm_pmu_cpumask_show(struct device *dev,
  104. struct device_attribute *attr, char *buf)
  105. {
  106. struct pmu *pmu = dev_get_drvdata(dev);
  107. struct nvdimm_pmu *nd_pmu;
  108. nd_pmu = container_of(pmu, struct nvdimm_pmu, pmu);
  109. return cpumap_print_to_pagebuf(true, buf, cpumask_of(nd_pmu->cpu));
  110. }
  111. static int nvdimm_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
  112. {
  113. struct nvdimm_pmu *nd_pmu;
  114. u32 target;
  115. int nodeid;
  116. const struct cpumask *cpumask;
  117. nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
  118. /* Clear it, incase given cpu is set in nd_pmu->arch_cpumask */
  119. cpumask_test_and_clear_cpu(cpu, &nd_pmu->arch_cpumask);
  120. /*
  121. * If given cpu is not same as current designated cpu for
  122. * counter access, just return.
  123. */
  124. if (cpu != nd_pmu->cpu)
  125. return 0;
  126. /* Check for any active cpu in nd_pmu->arch_cpumask */
  127. target = cpumask_any(&nd_pmu->arch_cpumask);
  128. /*
  129. * Incase we don't have any active cpu in nd_pmu->arch_cpumask,
  130. * check in given cpu's numa node list.
  131. */
  132. if (target >= nr_cpu_ids) {
  133. nodeid = cpu_to_node(cpu);
  134. cpumask = cpumask_of_node(nodeid);
  135. target = cpumask_any_but(cpumask, cpu);
  136. }
  137. nd_pmu->cpu = target;
  138. /* Migrate nvdimm pmu events to the new target cpu if valid */
  139. if (target >= 0 && target < nr_cpu_ids)
  140. perf_pmu_migrate_context(&nd_pmu->pmu, cpu, target);
  141. return 0;
  142. }
  143. static int nvdimm_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
  144. {
  145. struct nvdimm_pmu *nd_pmu;
  146. nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
  147. if (nd_pmu->cpu >= nr_cpu_ids)
  148. nd_pmu->cpu = cpu;
  149. return 0;
  150. }
  151. static int create_cpumask_attr_group(struct nvdimm_pmu *nd_pmu)
  152. {
  153. struct perf_pmu_events_attr *pmu_events_attr;
  154. struct attribute **attrs_group;
  155. struct attribute_group *nvdimm_pmu_cpumask_group;
  156. pmu_events_attr = kzalloc(sizeof(*pmu_events_attr), GFP_KERNEL);
  157. if (!pmu_events_attr)
  158. return -ENOMEM;
  159. attrs_group = kzalloc(2 * sizeof(struct attribute *), GFP_KERNEL);
  160. if (!attrs_group) {
  161. kfree(pmu_events_attr);
  162. return -ENOMEM;
  163. }
  164. /* Allocate memory for cpumask attribute group */
  165. nvdimm_pmu_cpumask_group = kzalloc(sizeof(*nvdimm_pmu_cpumask_group), GFP_KERNEL);
  166. if (!nvdimm_pmu_cpumask_group) {
  167. kfree(pmu_events_attr);
  168. kfree(attrs_group);
  169. return -ENOMEM;
  170. }
  171. sysfs_attr_init(&pmu_events_attr->attr.attr);
  172. pmu_events_attr->attr.attr.name = "cpumask";
  173. pmu_events_attr->attr.attr.mode = 0444;
  174. pmu_events_attr->attr.show = nvdimm_pmu_cpumask_show;
  175. attrs_group[0] = &pmu_events_attr->attr.attr;
  176. attrs_group[1] = NULL;
  177. nvdimm_pmu_cpumask_group->attrs = attrs_group;
  178. nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR] = nvdimm_pmu_cpumask_group;
  179. return 0;
  180. }
  181. static int nvdimm_pmu_cpu_hotplug_init(struct nvdimm_pmu *nd_pmu)
  182. {
  183. int nodeid, rc;
  184. const struct cpumask *cpumask;
  185. /*
  186. * Incase of cpu hotplug feature, arch specific code
  187. * can provide required cpumask which can be used
  188. * to get designatd cpu for counter access.
  189. * Check for any active cpu in nd_pmu->arch_cpumask.
  190. */
  191. if (!cpumask_empty(&nd_pmu->arch_cpumask)) {
  192. nd_pmu->cpu = cpumask_any(&nd_pmu->arch_cpumask);
  193. } else {
  194. /* pick active cpu from the cpumask of device numa node. */
  195. nodeid = dev_to_node(nd_pmu->dev);
  196. cpumask = cpumask_of_node(nodeid);
  197. nd_pmu->cpu = cpumask_any(cpumask);
  198. }
  199. rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/nvdimm:online",
  200. nvdimm_pmu_cpu_online, nvdimm_pmu_cpu_offline);
  201. if (rc < 0)
  202. return rc;
  203. nd_pmu->cpuhp_state = rc;
  204. /* Register the pmu instance for cpu hotplug */
  205. rc = cpuhp_state_add_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
  206. if (rc) {
  207. cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
  208. return rc;
  209. }
  210. /* Create cpumask attribute group */
  211. rc = create_cpumask_attr_group(nd_pmu);
  212. if (rc) {
  213. cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
  214. cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
  215. return rc;
  216. }
  217. return 0;
  218. }
  219. static void nvdimm_pmu_free_hotplug_memory(struct nvdimm_pmu *nd_pmu)
  220. {
  221. cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
  222. cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
  223. if (nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR])
  224. kfree(nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]->attrs);
  225. kfree(nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]);
  226. }
  227. int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev)
  228. {
  229. int rc;
  230. if (!nd_pmu || !pdev)
  231. return -EINVAL;
  232. /* event functions like add/del/read/event_init and pmu name should not be NULL */
  233. if (WARN_ON_ONCE(!(nd_pmu->pmu.event_init && nd_pmu->pmu.add &&
  234. nd_pmu->pmu.del && nd_pmu->pmu.read && nd_pmu->pmu.name)))
  235. return -EINVAL;
  236. nd_pmu->pmu.attr_groups = kzalloc((NVDIMM_PMU_NULL_ATTR + 1) *
  237. sizeof(struct attribute_group *), GFP_KERNEL);
  238. if (!nd_pmu->pmu.attr_groups)
  239. return -ENOMEM;
  240. /*
  241. * Add platform_device->dev pointer to nvdimm_pmu to access
  242. * device data in events functions.
  243. */
  244. nd_pmu->dev = &pdev->dev;
  245. /* Fill attribute groups for the nvdimm pmu device */
  246. nd_pmu->pmu.attr_groups[NVDIMM_PMU_FORMAT_ATTR] = &nvdimm_pmu_format_group;
  247. nd_pmu->pmu.attr_groups[NVDIMM_PMU_EVENT_ATTR] = &nvdimm_pmu_events_group;
  248. nd_pmu->pmu.attr_groups[NVDIMM_PMU_NULL_ATTR] = NULL;
  249. /* Fill attribute group for cpumask */
  250. rc = nvdimm_pmu_cpu_hotplug_init(nd_pmu);
  251. if (rc) {
  252. pr_info("cpu hotplug feature failed for device: %s\n", nd_pmu->pmu.name);
  253. kfree(nd_pmu->pmu.attr_groups);
  254. return rc;
  255. }
  256. rc = perf_pmu_register(&nd_pmu->pmu, nd_pmu->pmu.name, -1);
  257. if (rc) {
  258. nvdimm_pmu_free_hotplug_memory(nd_pmu);
  259. kfree(nd_pmu->pmu.attr_groups);
  260. return rc;
  261. }
  262. pr_info("%s NVDIMM performance monitor support registered\n",
  263. nd_pmu->pmu.name);
  264. return 0;
  265. }
  266. EXPORT_SYMBOL_GPL(register_nvdimm_pmu);
  267. void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu)
  268. {
  269. perf_pmu_unregister(&nd_pmu->pmu);
  270. nvdimm_pmu_free_hotplug_memory(nd_pmu);
  271. kfree(nd_pmu->pmu.attr_groups);
  272. kfree(nd_pmu);
  273. }
  274. EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu);