imc-pmu.c 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * In-Memory Collection (IMC) Performance Monitor counter support.
  4. *
  5. * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
  6. * (C) 2017 Anju T Sudhakar, IBM Corporation.
  7. * (C) 2017 Hemant K Shaw, IBM Corporation.
  8. */
  9. #include <linux/of.h>
  10. #include <linux/perf_event.h>
  11. #include <linux/slab.h>
  12. #include <asm/opal.h>
  13. #include <asm/imc-pmu.h>
  14. #include <asm/cputhreads.h>
  15. #include <asm/smp.h>
  16. #include <linux/string.h>
  17. #include <linux/spinlock.h>
  18. /* Nest IMC data structures and variables */
  19. /*
  20. * Used to avoid races in counting the nest-pmu units during hotplug
  21. * register and unregister
  22. */
  23. static DEFINE_MUTEX(nest_init_lock);
  24. static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
  25. static struct imc_pmu **per_nest_pmu_arr;
  26. static cpumask_t nest_imc_cpumask;
  27. static struct imc_pmu_ref *nest_imc_refc;
  28. static int nest_pmus;
  29. /* Core IMC data structures and variables */
  30. static cpumask_t core_imc_cpumask;
  31. static struct imc_pmu_ref *core_imc_refc;
  32. static struct imc_pmu *core_imc_pmu;
  33. /* Thread IMC data structures and variables */
  34. static DEFINE_PER_CPU(u64 *, thread_imc_mem);
  35. static struct imc_pmu *thread_imc_pmu;
  36. static int thread_imc_mem_size;
  37. /* Trace IMC data structures */
  38. static DEFINE_PER_CPU(u64 *, trace_imc_mem);
  39. static struct imc_pmu_ref *trace_imc_refc;
  40. static int trace_imc_mem_size;
  41. /*
  42. * Global data structure used to avoid races between thread,
  43. * core and trace-imc
  44. */
  45. static struct imc_pmu_ref imc_global_refc = {
  46. .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock),
  47. .id = 0,
  48. .refc = 0,
  49. };
  50. static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
  51. {
  52. return container_of(event->pmu, struct imc_pmu, pmu);
  53. }
  54. PMU_FORMAT_ATTR(event, "config:0-61");
  55. PMU_FORMAT_ATTR(offset, "config:0-31");
  56. PMU_FORMAT_ATTR(rvalue, "config:32");
  57. PMU_FORMAT_ATTR(mode, "config:33-40");
  58. static struct attribute *imc_format_attrs[] = {
  59. &format_attr_event.attr,
  60. &format_attr_offset.attr,
  61. &format_attr_rvalue.attr,
  62. &format_attr_mode.attr,
  63. NULL,
  64. };
  65. static const struct attribute_group imc_format_group = {
  66. .name = "format",
  67. .attrs = imc_format_attrs,
  68. };
  69. /* Format attribute for imc trace-mode */
  70. PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
  71. PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
  72. PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
  73. PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
  74. static struct attribute *trace_imc_format_attrs[] = {
  75. &format_attr_event.attr,
  76. &format_attr_cpmc_reserved.attr,
  77. &format_attr_cpmc_event.attr,
  78. &format_attr_cpmc_samplesel.attr,
  79. &format_attr_cpmc_load.attr,
  80. NULL,
  81. };
  82. static const struct attribute_group trace_imc_format_group = {
  83. .name = "format",
  84. .attrs = trace_imc_format_attrs,
  85. };
  86. /* Get the cpumask printed to a buffer "buf" */
  87. static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
  88. struct device_attribute *attr,
  89. char *buf)
  90. {
  91. struct pmu *pmu = dev_get_drvdata(dev);
  92. struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
  93. cpumask_t *active_mask;
  94. switch(imc_pmu->domain){
  95. case IMC_DOMAIN_NEST:
  96. active_mask = &nest_imc_cpumask;
  97. break;
  98. case IMC_DOMAIN_CORE:
  99. active_mask = &core_imc_cpumask;
  100. break;
  101. default:
  102. return 0;
  103. }
  104. return cpumap_print_to_pagebuf(true, buf, active_mask);
  105. }
  106. static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
  107. static struct attribute *imc_pmu_cpumask_attrs[] = {
  108. &dev_attr_cpumask.attr,
  109. NULL,
  110. };
  111. static const struct attribute_group imc_pmu_cpumask_attr_group = {
  112. .attrs = imc_pmu_cpumask_attrs,
  113. };
  114. /* device_str_attr_create : Populate event "name" and string "str" in attribute */
  115. static struct attribute *device_str_attr_create(const char *name, const char *str)
  116. {
  117. struct perf_pmu_events_attr *attr;
  118. attr = kzalloc(sizeof(*attr), GFP_KERNEL);
  119. if (!attr)
  120. return NULL;
  121. sysfs_attr_init(&attr->attr.attr);
  122. attr->event_str = str;
  123. attr->attr.attr.name = name;
  124. attr->attr.attr.mode = 0444;
  125. attr->attr.show = perf_event_sysfs_show;
  126. return &attr->attr.attr;
  127. }
  128. static int imc_parse_event(struct device_node *np, const char *scale,
  129. const char *unit, const char *prefix,
  130. u32 base, struct imc_events *event)
  131. {
  132. const char *s;
  133. u32 reg;
  134. if (of_property_read_u32(np, "reg", &reg))
  135. goto error;
  136. /* Add the base_reg value to the "reg" */
  137. event->value = base + reg;
  138. if (of_property_read_string(np, "event-name", &s))
  139. goto error;
  140. event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s);
  141. if (!event->name)
  142. goto error;
  143. if (of_property_read_string(np, "scale", &s))
  144. s = scale;
  145. if (s) {
  146. event->scale = kstrdup(s, GFP_KERNEL);
  147. if (!event->scale)
  148. goto error;
  149. }
  150. if (of_property_read_string(np, "unit", &s))
  151. s = unit;
  152. if (s) {
  153. event->unit = kstrdup(s, GFP_KERNEL);
  154. if (!event->unit)
  155. goto error;
  156. }
  157. return 0;
  158. error:
  159. kfree(event->unit);
  160. kfree(event->scale);
  161. kfree(event->name);
  162. return -EINVAL;
  163. }
  164. /*
  165. * imc_free_events: Function to cleanup the events list, having
  166. * "nr_entries".
  167. */
  168. static void imc_free_events(struct imc_events *events, int nr_entries)
  169. {
  170. int i;
  171. /* Nothing to clean, return */
  172. if (!events)
  173. return;
  174. for (i = 0; i < nr_entries; i++) {
  175. kfree(events[i].unit);
  176. kfree(events[i].scale);
  177. kfree(events[i].name);
  178. }
  179. kfree(events);
  180. }
  181. /*
  182. * update_events_in_group: Update the "events" information in an attr_group
  183. * and assign the attr_group to the pmu "pmu".
  184. */
  185. static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
  186. {
  187. struct attribute_group *attr_group;
  188. struct attribute **attrs, *dev_str;
  189. struct device_node *np, *pmu_events;
  190. u32 handle, base_reg;
  191. int i = 0, j = 0, ct, ret;
  192. const char *prefix, *g_scale, *g_unit;
  193. const char *ev_val_str, *ev_scale_str, *ev_unit_str;
  194. if (!of_property_read_u32(node, "events", &handle))
  195. pmu_events = of_find_node_by_phandle(handle);
  196. else
  197. return 0;
  198. /* Did not find any node with a given phandle */
  199. if (!pmu_events)
  200. return 0;
  201. /* Get a count of number of child nodes */
  202. ct = of_get_child_count(pmu_events);
  203. /* Get the event prefix */
  204. if (of_property_read_string(node, "events-prefix", &prefix)) {
  205. of_node_put(pmu_events);
  206. return 0;
  207. }
  208. /* Get a global unit and scale data if available */
  209. if (of_property_read_string(node, "scale", &g_scale))
  210. g_scale = NULL;
  211. if (of_property_read_string(node, "unit", &g_unit))
  212. g_unit = NULL;
  213. /* "reg" property gives out the base offset of the counters data */
  214. of_property_read_u32(node, "reg", &base_reg);
  215. /* Allocate memory for the events */
  216. pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
  217. if (!pmu->events) {
  218. of_node_put(pmu_events);
  219. return -ENOMEM;
  220. }
  221. ct = 0;
  222. /* Parse the events and update the struct */
  223. for_each_child_of_node(pmu_events, np) {
  224. ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
  225. if (!ret)
  226. ct++;
  227. }
  228. of_node_put(pmu_events);
  229. /* Allocate memory for attribute group */
  230. attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
  231. if (!attr_group) {
  232. imc_free_events(pmu->events, ct);
  233. return -ENOMEM;
  234. }
  235. /*
  236. * Allocate memory for attributes.
  237. * Since we have count of events for this pmu, we also allocate
  238. * memory for the scale and unit attribute for now.
  239. * "ct" has the total event structs added from the events-parent node.
  240. * So allocate three times the "ct" (this includes event, event_scale and
  241. * event_unit).
  242. */
  243. attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
  244. if (!attrs) {
  245. kfree(attr_group);
  246. imc_free_events(pmu->events, ct);
  247. return -ENOMEM;
  248. }
  249. attr_group->name = "events";
  250. attr_group->attrs = attrs;
  251. do {
  252. ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
  253. dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
  254. if (!dev_str)
  255. continue;
  256. attrs[j++] = dev_str;
  257. if (pmu->events[i].scale) {
  258. ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
  259. dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
  260. if (!dev_str)
  261. continue;
  262. attrs[j++] = dev_str;
  263. }
  264. if (pmu->events[i].unit) {
  265. ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
  266. dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
  267. if (!dev_str)
  268. continue;
  269. attrs[j++] = dev_str;
  270. }
  271. } while (++i < ct);
  272. /* Save the event attribute */
  273. pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
  274. return 0;
  275. }
  276. /* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
  277. static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
  278. {
  279. return per_cpu(local_nest_imc_refc, cpu);
  280. }
  281. static void nest_change_cpu_context(int old_cpu, int new_cpu)
  282. {
  283. struct imc_pmu **pn = per_nest_pmu_arr;
  284. if (old_cpu < 0 || new_cpu < 0)
  285. return;
  286. while (*pn) {
  287. perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
  288. pn++;
  289. }
  290. }
  291. static int ppc_nest_imc_cpu_offline(unsigned int cpu)
  292. {
  293. int nid, target = -1;
  294. const struct cpumask *l_cpumask;
  295. struct imc_pmu_ref *ref;
  296. /*
  297. * Check in the designated list for this cpu. Dont bother
  298. * if not one of them.
  299. */
  300. if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
  301. return 0;
  302. /*
  303. * Check whether nest_imc is registered. We could end up here if the
  304. * cpuhotplug callback registration fails. i.e, callback invokes the
  305. * offline path for all successfully registered nodes. At this stage,
  306. * nest_imc pmu will not be registered and we should return here.
  307. *
  308. * We return with a zero since this is not an offline failure. And
  309. * cpuhp_setup_state() returns the actual failure reason to the caller,
  310. * which in turn will call the cleanup routine.
  311. */
  312. if (!nest_pmus)
  313. return 0;
  314. /*
  315. * Now that this cpu is one of the designated,
  316. * find a next cpu a) which is online and b) in same chip.
  317. */
  318. nid = cpu_to_node(cpu);
  319. l_cpumask = cpumask_of_node(nid);
  320. target = cpumask_last(l_cpumask);
  321. /*
  322. * If this(target) is the last cpu in the cpumask for this chip,
  323. * check for any possible online cpu in the chip.
  324. */
  325. if (unlikely(target == cpu))
  326. target = cpumask_any_but(l_cpumask, cpu);
  327. /*
  328. * Update the cpumask with the target cpu and
  329. * migrate the context if needed
  330. */
  331. if (target >= 0 && target < nr_cpu_ids) {
  332. cpumask_set_cpu(target, &nest_imc_cpumask);
  333. nest_change_cpu_context(cpu, target);
  334. } else {
  335. opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
  336. get_hard_smp_processor_id(cpu));
  337. /*
  338. * If this is the last cpu in this chip then, skip the reference
  339. * count lock and make the reference count on this chip zero.
  340. */
  341. ref = get_nest_pmu_ref(cpu);
  342. if (!ref)
  343. return -EINVAL;
  344. ref->refc = 0;
  345. }
  346. return 0;
  347. }
  348. static int ppc_nest_imc_cpu_online(unsigned int cpu)
  349. {
  350. const struct cpumask *l_cpumask;
  351. static struct cpumask tmp_mask;
  352. int res;
  353. /* Get the cpumask of this node */
  354. l_cpumask = cpumask_of_node(cpu_to_node(cpu));
  355. /*
  356. * If this is not the first online CPU on this node, then
  357. * just return.
  358. */
  359. if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
  360. return 0;
  361. /*
  362. * If this is the first online cpu on this node
  363. * disable the nest counters by making an OPAL call.
  364. */
  365. res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
  366. get_hard_smp_processor_id(cpu));
  367. if (res)
  368. return res;
  369. /* Make this CPU the designated target for counter collection */
  370. cpumask_set_cpu(cpu, &nest_imc_cpumask);
  371. return 0;
  372. }
  373. static int nest_pmu_cpumask_init(void)
  374. {
  375. return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
  376. "perf/powerpc/imc:online",
  377. ppc_nest_imc_cpu_online,
  378. ppc_nest_imc_cpu_offline);
  379. }
  380. static void nest_imc_counters_release(struct perf_event *event)
  381. {
  382. int rc, node_id;
  383. struct imc_pmu_ref *ref;
  384. if (event->cpu < 0)
  385. return;
  386. node_id = cpu_to_node(event->cpu);
  387. /*
  388. * See if we need to disable the nest PMU.
  389. * If no events are currently in use, then we have to take a
  390. * lock to ensure that we don't race with another task doing
  391. * enable or disable the nest counters.
  392. */
  393. ref = get_nest_pmu_ref(event->cpu);
  394. if (!ref)
  395. return;
  396. /* Take the lock for this node and then decrement the reference count */
  397. spin_lock(&ref->lock);
  398. if (ref->refc == 0) {
  399. /*
  400. * The scenario where this is true is, when perf session is
  401. * started, followed by offlining of all cpus in a given node.
  402. *
  403. * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
  404. * function set the ref->count to zero, if the cpu which is
  405. * about to offline is the last cpu in a given node and make
  406. * an OPAL call to disable the engine in that node.
  407. *
  408. */
  409. spin_unlock(&ref->lock);
  410. return;
  411. }
  412. ref->refc--;
  413. if (ref->refc == 0) {
  414. rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
  415. get_hard_smp_processor_id(event->cpu));
  416. if (rc) {
  417. spin_unlock(&ref->lock);
  418. pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
  419. return;
  420. }
  421. } else if (ref->refc < 0) {
  422. WARN(1, "nest-imc: Invalid event reference count\n");
  423. ref->refc = 0;
  424. }
  425. spin_unlock(&ref->lock);
  426. }
  427. static int nest_imc_event_init(struct perf_event *event)
  428. {
  429. int chip_id, rc, node_id;
  430. u32 l_config, config = event->attr.config;
  431. struct imc_mem_info *pcni;
  432. struct imc_pmu *pmu;
  433. struct imc_pmu_ref *ref;
  434. bool flag = false;
  435. if (event->attr.type != event->pmu->type)
  436. return -ENOENT;
  437. /* Sampling not supported */
  438. if (event->hw.sample_period)
  439. return -EINVAL;
  440. if (event->cpu < 0)
  441. return -EINVAL;
  442. pmu = imc_event_to_pmu(event);
  443. /* Sanity check for config (event offset) */
  444. if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)
  445. return -EINVAL;
  446. /*
  447. * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
  448. * Get the base memory address for this cpu.
  449. */
  450. chip_id = cpu_to_chip_id(event->cpu);
  451. /* Return, if chip_id is not valid */
  452. if (chip_id < 0)
  453. return -ENODEV;
  454. pcni = pmu->mem_info;
  455. do {
  456. if (pcni->id == chip_id) {
  457. flag = true;
  458. break;
  459. }
  460. pcni++;
  461. } while (pcni->vbase != 0);
  462. if (!flag)
  463. return -ENODEV;
  464. /*
  465. * Add the event offset to the base address.
  466. */
  467. l_config = config & IMC_EVENT_OFFSET_MASK;
  468. event->hw.event_base = (u64)pcni->vbase + l_config;
  469. node_id = cpu_to_node(event->cpu);
  470. /*
  471. * Get the imc_pmu_ref struct for this node.
  472. * Take the lock and then increment the count of nest pmu events inited.
  473. */
  474. ref = get_nest_pmu_ref(event->cpu);
  475. if (!ref)
  476. return -EINVAL;
  477. spin_lock(&ref->lock);
  478. if (ref->refc == 0) {
  479. rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
  480. get_hard_smp_processor_id(event->cpu));
  481. if (rc) {
  482. spin_unlock(&ref->lock);
  483. pr_err("nest-imc: Unable to start the counters for node %d\n",
  484. node_id);
  485. return rc;
  486. }
  487. }
  488. ++ref->refc;
  489. spin_unlock(&ref->lock);
  490. event->destroy = nest_imc_counters_release;
  491. return 0;
  492. }
  493. /*
  494. * core_imc_mem_init : Initializes memory for the current core.
  495. *
  496. * Uses alloc_pages_node() and uses the returned address as an argument to
  497. * an opal call to configure the pdbar. The address sent as an argument is
  498. * converted to physical address before the opal call is made. This is the
  499. * base address at which the core imc counters are populated.
  500. */
  501. static int core_imc_mem_init(int cpu, int size)
  502. {
  503. int nid, rc = 0, core_id = (cpu / threads_per_core);
  504. struct imc_mem_info *mem_info;
  505. struct page *page;
  506. /*
  507. * alloc_pages_node() will allocate memory for core in the
  508. * local node only.
  509. */
  510. nid = cpu_to_node(cpu);
  511. mem_info = &core_imc_pmu->mem_info[core_id];
  512. mem_info->id = core_id;
  513. /* We need only vbase for core counters */
  514. page = alloc_pages_node(nid,
  515. GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
  516. __GFP_NOWARN, get_order(size));
  517. if (!page)
  518. return -ENOMEM;
  519. mem_info->vbase = page_address(page);
  520. core_imc_refc[core_id].id = core_id;
  521. spin_lock_init(&core_imc_refc[core_id].lock);
  522. rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
  523. __pa((void *)mem_info->vbase),
  524. get_hard_smp_processor_id(cpu));
  525. if (rc) {
  526. free_pages((u64)mem_info->vbase, get_order(size));
  527. mem_info->vbase = NULL;
  528. }
  529. return rc;
  530. }
  531. static bool is_core_imc_mem_inited(int cpu)
  532. {
  533. struct imc_mem_info *mem_info;
  534. int core_id = (cpu / threads_per_core);
  535. mem_info = &core_imc_pmu->mem_info[core_id];
  536. if (!mem_info->vbase)
  537. return false;
  538. return true;
  539. }
  540. static int ppc_core_imc_cpu_online(unsigned int cpu)
  541. {
  542. const struct cpumask *l_cpumask;
  543. static struct cpumask tmp_mask;
  544. int ret = 0;
  545. /* Get the cpumask for this core */
  546. l_cpumask = cpu_sibling_mask(cpu);
  547. /* If a cpu for this core is already set, then, don't do anything */
  548. if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
  549. return 0;
  550. if (!is_core_imc_mem_inited(cpu)) {
  551. ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
  552. if (ret) {
  553. pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
  554. return ret;
  555. }
  556. }
  557. /* set the cpu in the mask */
  558. cpumask_set_cpu(cpu, &core_imc_cpumask);
  559. return 0;
  560. }
  561. static int ppc_core_imc_cpu_offline(unsigned int cpu)
  562. {
  563. unsigned int core_id;
  564. int ncpu;
  565. struct imc_pmu_ref *ref;
  566. /*
  567. * clear this cpu out of the mask, if not present in the mask,
  568. * don't bother doing anything.
  569. */
  570. if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
  571. return 0;
  572. /*
  573. * Check whether core_imc is registered. We could end up here
  574. * if the cpuhotplug callback registration fails. i.e, callback
  575. * invokes the offline path for all successfully registered cpus.
  576. * At this stage, core_imc pmu will not be registered and we
  577. * should return here.
  578. *
  579. * We return with a zero since this is not an offline failure.
  580. * And cpuhp_setup_state() returns the actual failure reason
  581. * to the caller, which inturn will call the cleanup routine.
  582. */
  583. if (!core_imc_pmu->pmu.event_init)
  584. return 0;
  585. /* Find any online cpu in that core except the current "cpu" */
  586. ncpu = cpumask_last(cpu_sibling_mask(cpu));
  587. if (unlikely(ncpu == cpu))
  588. ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
  589. if (ncpu >= 0 && ncpu < nr_cpu_ids) {
  590. cpumask_set_cpu(ncpu, &core_imc_cpumask);
  591. perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
  592. } else {
  593. /*
  594. * If this is the last cpu in this core then skip taking reference
  595. * count lock for this core and directly zero "refc" for this core.
  596. */
  597. opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
  598. get_hard_smp_processor_id(cpu));
  599. core_id = cpu / threads_per_core;
  600. ref = &core_imc_refc[core_id];
  601. if (!ref)
  602. return -EINVAL;
  603. ref->refc = 0;
  604. /*
  605. * Reduce the global reference count, if this is the
  606. * last cpu in this core and core-imc event running
  607. * in this cpu.
  608. */
  609. spin_lock(&imc_global_refc.lock);
  610. if (imc_global_refc.id == IMC_DOMAIN_CORE)
  611. imc_global_refc.refc--;
  612. spin_unlock(&imc_global_refc.lock);
  613. }
  614. return 0;
  615. }
  616. static int core_imc_pmu_cpumask_init(void)
  617. {
  618. return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
  619. "perf/powerpc/imc_core:online",
  620. ppc_core_imc_cpu_online,
  621. ppc_core_imc_cpu_offline);
  622. }
  623. static void reset_global_refc(struct perf_event *event)
  624. {
  625. spin_lock(&imc_global_refc.lock);
  626. imc_global_refc.refc--;
  627. /*
  628. * If no other thread is running any
  629. * event for this domain(thread/core/trace),
  630. * set the global id to zero.
  631. */
  632. if (imc_global_refc.refc <= 0) {
  633. imc_global_refc.refc = 0;
  634. imc_global_refc.id = 0;
  635. }
  636. spin_unlock(&imc_global_refc.lock);
  637. }
  638. static void core_imc_counters_release(struct perf_event *event)
  639. {
  640. int rc, core_id;
  641. struct imc_pmu_ref *ref;
  642. if (event->cpu < 0)
  643. return;
  644. /*
  645. * See if we need to disable the IMC PMU.
  646. * If no events are currently in use, then we have to take a
  647. * lock to ensure that we don't race with another task doing
  648. * enable or disable the core counters.
  649. */
  650. core_id = event->cpu / threads_per_core;
  651. /* Take the lock and decrement the refernce count for this core */
  652. ref = &core_imc_refc[core_id];
  653. if (!ref)
  654. return;
  655. spin_lock(&ref->lock);
  656. if (ref->refc == 0) {
  657. /*
  658. * The scenario where this is true is, when perf session is
  659. * started, followed by offlining of all cpus in a given core.
  660. *
  661. * In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
  662. * function set the ref->count to zero, if the cpu which is
  663. * about to offline is the last cpu in a given core and make
  664. * an OPAL call to disable the engine in that core.
  665. *
  666. */
  667. spin_unlock(&ref->lock);
  668. return;
  669. }
  670. ref->refc--;
  671. if (ref->refc == 0) {
  672. rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
  673. get_hard_smp_processor_id(event->cpu));
  674. if (rc) {
  675. spin_unlock(&ref->lock);
  676. pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
  677. return;
  678. }
  679. } else if (ref->refc < 0) {
  680. WARN(1, "core-imc: Invalid event reference count\n");
  681. ref->refc = 0;
  682. }
  683. spin_unlock(&ref->lock);
  684. reset_global_refc(event);
  685. }
  686. static int core_imc_event_init(struct perf_event *event)
  687. {
  688. int core_id, rc;
  689. u64 config = event->attr.config;
  690. struct imc_mem_info *pcmi;
  691. struct imc_pmu *pmu;
  692. struct imc_pmu_ref *ref;
  693. if (event->attr.type != event->pmu->type)
  694. return -ENOENT;
  695. /* Sampling not supported */
  696. if (event->hw.sample_period)
  697. return -EINVAL;
  698. if (event->cpu < 0)
  699. return -EINVAL;
  700. event->hw.idx = -1;
  701. pmu = imc_event_to_pmu(event);
  702. /* Sanity check for config (event offset) */
  703. if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
  704. return -EINVAL;
  705. if (!is_core_imc_mem_inited(event->cpu))
  706. return -ENODEV;
  707. core_id = event->cpu / threads_per_core;
  708. pcmi = &core_imc_pmu->mem_info[core_id];
  709. if ((!pcmi->vbase))
  710. return -ENODEV;
  711. ref = &core_imc_refc[core_id];
  712. if (!ref)
  713. return -EINVAL;
  714. /*
  715. * Core pmu units are enabled only when it is used.
  716. * See if this is triggered for the first time.
  717. * If yes, take the lock and enable the core counters.
  718. * If not, just increment the count in core_imc_refc struct.
  719. */
  720. spin_lock(&ref->lock);
  721. if (ref->refc == 0) {
  722. rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
  723. get_hard_smp_processor_id(event->cpu));
  724. if (rc) {
  725. spin_unlock(&ref->lock);
  726. pr_err("core-imc: Unable to start the counters for core %d\n",
  727. core_id);
  728. return rc;
  729. }
  730. }
  731. ++ref->refc;
  732. spin_unlock(&ref->lock);
  733. /*
  734. * Since the system can run either in accumulation or trace-mode
  735. * of IMC at a time, core-imc events are allowed only if no other
  736. * trace/thread imc events are enabled/monitored.
  737. *
  738. * Take the global lock, and check the refc.id
  739. * to know whether any other trace/thread imc
  740. * events are running.
  741. */
  742. spin_lock(&imc_global_refc.lock);
  743. if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
  744. /*
  745. * No other trace/thread imc events are running in
  746. * the system, so set the refc.id to core-imc.
  747. */
  748. imc_global_refc.id = IMC_DOMAIN_CORE;
  749. imc_global_refc.refc++;
  750. } else {
  751. spin_unlock(&imc_global_refc.lock);
  752. return -EBUSY;
  753. }
  754. spin_unlock(&imc_global_refc.lock);
  755. event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
  756. event->destroy = core_imc_counters_release;
  757. return 0;
  758. }
  759. /*
  760. * Allocates a page of memory for each of the online cpus, and load
  761. * LDBAR with 0.
  762. * The physical base address of the page allocated for a cpu will be
  763. * written to the LDBAR for that cpu, when the thread-imc event
  764. * is added.
  765. *
  766. * LDBAR Register Layout:
  767. *
  768. * 0 4 8 12 16 20 24 28
  769. * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
  770. * | | [ ] [ Counter Address [8:50]
  771. * | * Mode |
  772. * | * PB Scope
  773. * * Enable/Disable
  774. *
  775. * 32 36 40 44 48 52 56 60
  776. * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
  777. * Counter Address [8:50] ]
  778. *
  779. */
  780. static int thread_imc_mem_alloc(int cpu_id, int size)
  781. {
  782. u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
  783. int nid = cpu_to_node(cpu_id);
  784. if (!local_mem) {
  785. struct page *page;
  786. /*
  787. * This case could happen only once at start, since we dont
  788. * free the memory in cpu offline path.
  789. */
  790. page = alloc_pages_node(nid,
  791. GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
  792. __GFP_NOWARN, get_order(size));
  793. if (!page)
  794. return -ENOMEM;
  795. local_mem = page_address(page);
  796. per_cpu(thread_imc_mem, cpu_id) = local_mem;
  797. }
  798. mtspr(SPRN_LDBAR, 0);
  799. return 0;
  800. }
  801. static int ppc_thread_imc_cpu_online(unsigned int cpu)
  802. {
  803. return thread_imc_mem_alloc(cpu, thread_imc_mem_size);
  804. }
  805. static int ppc_thread_imc_cpu_offline(unsigned int cpu)
  806. {
  807. /*
  808. * Set the bit 0 of LDBAR to zero.
  809. *
  810. * If bit 0 of LDBAR is unset, it will stop posting
  811. * the counter data to memory.
  812. * For thread-imc, bit 0 of LDBAR will be set to 1 in the
  813. * event_add function. So reset this bit here, to stop the updates
  814. * to memory in the cpu_offline path.
  815. */
  816. mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
  817. /* Reduce the refc if thread-imc event running on this cpu */
  818. spin_lock(&imc_global_refc.lock);
  819. if (imc_global_refc.id == IMC_DOMAIN_THREAD)
  820. imc_global_refc.refc--;
  821. spin_unlock(&imc_global_refc.lock);
  822. return 0;
  823. }
  824. static int thread_imc_cpu_init(void)
  825. {
  826. return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
  827. "perf/powerpc/imc_thread:online",
  828. ppc_thread_imc_cpu_online,
  829. ppc_thread_imc_cpu_offline);
  830. }
  831. static int thread_imc_event_init(struct perf_event *event)
  832. {
  833. u32 config = event->attr.config;
  834. struct task_struct *target;
  835. struct imc_pmu *pmu;
  836. if (event->attr.type != event->pmu->type)
  837. return -ENOENT;
  838. if (!perfmon_capable())
  839. return -EACCES;
  840. /* Sampling not supported */
  841. if (event->hw.sample_period)
  842. return -EINVAL;
  843. event->hw.idx = -1;
  844. pmu = imc_event_to_pmu(event);
  845. /* Sanity check for config offset */
  846. if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
  847. return -EINVAL;
  848. target = event->hw.target;
  849. if (!target)
  850. return -EINVAL;
  851. spin_lock(&imc_global_refc.lock);
  852. /*
  853. * Check if any other trace/core imc events are running in the
  854. * system, if not set the global id to thread-imc.
  855. */
  856. if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) {
  857. imc_global_refc.id = IMC_DOMAIN_THREAD;
  858. imc_global_refc.refc++;
  859. } else {
  860. spin_unlock(&imc_global_refc.lock);
  861. return -EBUSY;
  862. }
  863. spin_unlock(&imc_global_refc.lock);
  864. event->pmu->task_ctx_nr = perf_sw_context;
  865. event->destroy = reset_global_refc;
  866. return 0;
  867. }
  868. static bool is_thread_imc_pmu(struct perf_event *event)
  869. {
  870. if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc")))
  871. return true;
  872. return false;
  873. }
  874. static u64 * get_event_base_addr(struct perf_event *event)
  875. {
  876. u64 addr;
  877. if (is_thread_imc_pmu(event)) {
  878. addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
  879. return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
  880. }
  881. return (u64 *)event->hw.event_base;
  882. }
  883. static void thread_imc_pmu_start_txn(struct pmu *pmu,
  884. unsigned int txn_flags)
  885. {
  886. if (txn_flags & ~PERF_PMU_TXN_ADD)
  887. return;
  888. perf_pmu_disable(pmu);
  889. }
  890. static void thread_imc_pmu_cancel_txn(struct pmu *pmu)
  891. {
  892. perf_pmu_enable(pmu);
  893. }
  894. static int thread_imc_pmu_commit_txn(struct pmu *pmu)
  895. {
  896. perf_pmu_enable(pmu);
  897. return 0;
  898. }
  899. static u64 imc_read_counter(struct perf_event *event)
  900. {
  901. u64 *addr, data;
  902. /*
  903. * In-Memory Collection (IMC) counters are free flowing counters.
  904. * So we take a snapshot of the counter value on enable and save it
  905. * to calculate the delta at later stage to present the event counter
  906. * value.
  907. */
  908. addr = get_event_base_addr(event);
  909. data = be64_to_cpu(READ_ONCE(*addr));
  910. local64_set(&event->hw.prev_count, data);
  911. return data;
  912. }
  913. static void imc_event_update(struct perf_event *event)
  914. {
  915. u64 counter_prev, counter_new, final_count;
  916. counter_prev = local64_read(&event->hw.prev_count);
  917. counter_new = imc_read_counter(event);
  918. final_count = counter_new - counter_prev;
  919. /* Update the delta to the event count */
  920. local64_add(final_count, &event->count);
  921. }
  922. static void imc_event_start(struct perf_event *event, int flags)
  923. {
  924. /*
  925. * In Memory Counters are free flowing counters. HW or the microcode
  926. * keeps adding to the counter offset in memory. To get event
  927. * counter value, we snapshot the value here and we calculate
  928. * delta at later point.
  929. */
  930. imc_read_counter(event);
  931. }
  932. static void imc_event_stop(struct perf_event *event, int flags)
  933. {
  934. /*
  935. * Take a snapshot and calculate the delta and update
  936. * the event counter values.
  937. */
  938. imc_event_update(event);
  939. }
  940. static int imc_event_add(struct perf_event *event, int flags)
  941. {
  942. if (flags & PERF_EF_START)
  943. imc_event_start(event, flags);
  944. return 0;
  945. }
  946. static int thread_imc_event_add(struct perf_event *event, int flags)
  947. {
  948. int core_id;
  949. struct imc_pmu_ref *ref;
  950. u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
  951. if (flags & PERF_EF_START)
  952. imc_event_start(event, flags);
  953. if (!is_core_imc_mem_inited(smp_processor_id()))
  954. return -EINVAL;
  955. core_id = smp_processor_id() / threads_per_core;
  956. ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
  957. mtspr(SPRN_LDBAR, ldbar_value);
  958. /*
  959. * imc pmus are enabled only when it is used.
  960. * See if this is triggered for the first time.
  961. * If yes, take the lock and enable the counters.
  962. * If not, just increment the count in ref count struct.
  963. */
  964. ref = &core_imc_refc[core_id];
  965. if (!ref)
  966. return -EINVAL;
  967. spin_lock(&ref->lock);
  968. if (ref->refc == 0) {
  969. if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
  970. get_hard_smp_processor_id(smp_processor_id()))) {
  971. spin_unlock(&ref->lock);
  972. pr_err("thread-imc: Unable to start the counter\
  973. for core %d\n", core_id);
  974. return -EINVAL;
  975. }
  976. }
  977. ++ref->refc;
  978. spin_unlock(&ref->lock);
  979. return 0;
  980. }
  981. static void thread_imc_event_del(struct perf_event *event, int flags)
  982. {
  983. int core_id;
  984. struct imc_pmu_ref *ref;
  985. core_id = smp_processor_id() / threads_per_core;
  986. ref = &core_imc_refc[core_id];
  987. if (!ref) {
  988. pr_debug("imc: Failed to get event reference count\n");
  989. return;
  990. }
  991. spin_lock(&ref->lock);
  992. ref->refc--;
  993. if (ref->refc == 0) {
  994. if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
  995. get_hard_smp_processor_id(smp_processor_id()))) {
  996. spin_unlock(&ref->lock);
  997. pr_err("thread-imc: Unable to stop the counters\
  998. for core %d\n", core_id);
  999. return;
  1000. }
  1001. } else if (ref->refc < 0) {
  1002. ref->refc = 0;
  1003. }
  1004. spin_unlock(&ref->lock);
  1005. /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
  1006. mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
  1007. /*
  1008. * Take a snapshot and calculate the delta and update
  1009. * the event counter values.
  1010. */
  1011. imc_event_update(event);
  1012. }
  1013. /*
  1014. * Allocate a page of memory for each cpu, and load LDBAR with 0.
  1015. */
  1016. static int trace_imc_mem_alloc(int cpu_id, int size)
  1017. {
  1018. u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
  1019. int phys_id = cpu_to_node(cpu_id), rc = 0;
  1020. int core_id = (cpu_id / threads_per_core);
  1021. if (!local_mem) {
  1022. struct page *page;
  1023. page = alloc_pages_node(phys_id,
  1024. GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
  1025. __GFP_NOWARN, get_order(size));
  1026. if (!page)
  1027. return -ENOMEM;
  1028. local_mem = page_address(page);
  1029. per_cpu(trace_imc_mem, cpu_id) = local_mem;
  1030. /* Initialise the counters for trace mode */
  1031. rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
  1032. get_hard_smp_processor_id(cpu_id));
  1033. if (rc) {
  1034. pr_info("IMC:opal init failed for trace imc\n");
  1035. return rc;
  1036. }
  1037. }
  1038. trace_imc_refc[core_id].id = core_id;
  1039. spin_lock_init(&trace_imc_refc[core_id].lock);
  1040. mtspr(SPRN_LDBAR, 0);
  1041. return 0;
  1042. }
  1043. static int ppc_trace_imc_cpu_online(unsigned int cpu)
  1044. {
  1045. return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
  1046. }
  1047. static int ppc_trace_imc_cpu_offline(unsigned int cpu)
  1048. {
  1049. /*
  1050. * No need to set bit 0 of LDBAR to zero, as
  1051. * it is set to zero for imc trace-mode
  1052. *
  1053. * Reduce the refc if any trace-imc event running
  1054. * on this cpu.
  1055. */
  1056. spin_lock(&imc_global_refc.lock);
  1057. if (imc_global_refc.id == IMC_DOMAIN_TRACE)
  1058. imc_global_refc.refc--;
  1059. spin_unlock(&imc_global_refc.lock);
  1060. return 0;
  1061. }
  1062. static int trace_imc_cpu_init(void)
  1063. {
  1064. return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
  1065. "perf/powerpc/imc_trace:online",
  1066. ppc_trace_imc_cpu_online,
  1067. ppc_trace_imc_cpu_offline);
  1068. }
  1069. static u64 get_trace_imc_event_base_addr(void)
  1070. {
  1071. return (u64)per_cpu(trace_imc_mem, smp_processor_id());
  1072. }
  1073. /*
  1074. * Function to parse trace-imc data obtained
  1075. * and to prepare the perf sample.
  1076. */
  1077. static int trace_imc_prepare_sample(struct trace_imc_data *mem,
  1078. struct perf_sample_data *data,
  1079. u64 *prev_tb,
  1080. struct perf_event_header *header,
  1081. struct perf_event *event)
  1082. {
  1083. /* Sanity checks for a valid record */
  1084. if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
  1085. *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
  1086. else
  1087. return -EINVAL;
  1088. if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
  1089. be64_to_cpu(READ_ONCE(mem->tb2)))
  1090. return -EINVAL;
  1091. /* Prepare perf sample */
  1092. data->ip = be64_to_cpu(READ_ONCE(mem->ip));
  1093. data->period = event->hw.last_period;
  1094. header->type = PERF_RECORD_SAMPLE;
  1095. header->size = sizeof(*header) + event->header_size;
  1096. header->misc = 0;
  1097. if (cpu_has_feature(CPU_FTR_ARCH_31)) {
  1098. switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) {
  1099. case 0:/* when MSR HV and PR not set in the trace-record */
  1100. header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
  1101. break;
  1102. case 1: /* MSR HV is 0 and PR is 1 */
  1103. header->misc |= PERF_RECORD_MISC_GUEST_USER;
  1104. break;
  1105. case 2: /* MSR HV is 1 and PR is 0 */
  1106. header->misc |= PERF_RECORD_MISC_KERNEL;
  1107. break;
  1108. case 3: /* MSR HV is 1 and PR is 1 */
  1109. header->misc |= PERF_RECORD_MISC_USER;
  1110. break;
  1111. default:
  1112. pr_info("IMC: Unable to set the flag based on MSR bits\n");
  1113. break;
  1114. }
  1115. } else {
  1116. if (is_kernel_addr(data->ip))
  1117. header->misc |= PERF_RECORD_MISC_KERNEL;
  1118. else
  1119. header->misc |= PERF_RECORD_MISC_USER;
  1120. }
  1121. perf_event_header__init_id(header, data, event);
  1122. return 0;
  1123. }
  1124. static void dump_trace_imc_data(struct perf_event *event)
  1125. {
  1126. struct trace_imc_data *mem;
  1127. int i, ret;
  1128. u64 prev_tb = 0;
  1129. mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
  1130. for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
  1131. i++, mem++) {
  1132. struct perf_sample_data data;
  1133. struct perf_event_header header;
  1134. ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
  1135. if (ret) /* Exit, if not a valid record */
  1136. break;
  1137. else {
  1138. /* If this is a valid record, create the sample */
  1139. struct perf_output_handle handle;
  1140. if (perf_output_begin(&handle, &data, event, header.size))
  1141. return;
  1142. perf_output_sample(&handle, &header, &data, event);
  1143. perf_output_end(&handle);
  1144. }
  1145. }
  1146. }
  1147. static int trace_imc_event_add(struct perf_event *event, int flags)
  1148. {
  1149. int core_id = smp_processor_id() / threads_per_core;
  1150. struct imc_pmu_ref *ref = NULL;
  1151. u64 local_mem, ldbar_value;
  1152. /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
  1153. local_mem = get_trace_imc_event_base_addr();
  1154. ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
  1155. /* trace-imc reference count */
  1156. if (trace_imc_refc)
  1157. ref = &trace_imc_refc[core_id];
  1158. if (!ref) {
  1159. pr_debug("imc: Failed to get the event reference count\n");
  1160. return -EINVAL;
  1161. }
  1162. mtspr(SPRN_LDBAR, ldbar_value);
  1163. spin_lock(&ref->lock);
  1164. if (ref->refc == 0) {
  1165. if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
  1166. get_hard_smp_processor_id(smp_processor_id()))) {
  1167. spin_unlock(&ref->lock);
  1168. pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
  1169. return -EINVAL;
  1170. }
  1171. }
  1172. ++ref->refc;
  1173. spin_unlock(&ref->lock);
  1174. return 0;
  1175. }
  1176. static void trace_imc_event_read(struct perf_event *event)
  1177. {
  1178. return;
  1179. }
  1180. static void trace_imc_event_stop(struct perf_event *event, int flags)
  1181. {
  1182. u64 local_mem = get_trace_imc_event_base_addr();
  1183. dump_trace_imc_data(event);
  1184. memset((void *)local_mem, 0, sizeof(u64));
  1185. }
  1186. static void trace_imc_event_start(struct perf_event *event, int flags)
  1187. {
  1188. return;
  1189. }
  1190. static void trace_imc_event_del(struct perf_event *event, int flags)
  1191. {
  1192. int core_id = smp_processor_id() / threads_per_core;
  1193. struct imc_pmu_ref *ref = NULL;
  1194. if (trace_imc_refc)
  1195. ref = &trace_imc_refc[core_id];
  1196. if (!ref) {
  1197. pr_debug("imc: Failed to get event reference count\n");
  1198. return;
  1199. }
  1200. spin_lock(&ref->lock);
  1201. ref->refc--;
  1202. if (ref->refc == 0) {
  1203. if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
  1204. get_hard_smp_processor_id(smp_processor_id()))) {
  1205. spin_unlock(&ref->lock);
  1206. pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
  1207. return;
  1208. }
  1209. } else if (ref->refc < 0) {
  1210. ref->refc = 0;
  1211. }
  1212. spin_unlock(&ref->lock);
  1213. trace_imc_event_stop(event, flags);
  1214. }
  1215. static int trace_imc_event_init(struct perf_event *event)
  1216. {
  1217. if (event->attr.type != event->pmu->type)
  1218. return -ENOENT;
  1219. if (!perfmon_capable())
  1220. return -EACCES;
  1221. /* Return if this is a couting event */
  1222. if (event->attr.sample_period == 0)
  1223. return -ENOENT;
  1224. /*
  1225. * Take the global lock, and make sure
  1226. * no other thread is running any core/thread imc
  1227. * events
  1228. */
  1229. spin_lock(&imc_global_refc.lock);
  1230. if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
  1231. /*
  1232. * No core/thread imc events are running in the
  1233. * system, so set the refc.id to trace-imc.
  1234. */
  1235. imc_global_refc.id = IMC_DOMAIN_TRACE;
  1236. imc_global_refc.refc++;
  1237. } else {
  1238. spin_unlock(&imc_global_refc.lock);
  1239. return -EBUSY;
  1240. }
  1241. spin_unlock(&imc_global_refc.lock);
  1242. event->hw.idx = -1;
  1243. /*
  1244. * There can only be a single PMU for perf_hw_context events which is assigned to
  1245. * core PMU. Hence use "perf_sw_context" for trace_imc.
  1246. */
  1247. event->pmu->task_ctx_nr = perf_sw_context;
  1248. event->destroy = reset_global_refc;
  1249. return 0;
  1250. }
  1251. /* update_pmu_ops : Populate the appropriate operations for "pmu" */
  1252. static int update_pmu_ops(struct imc_pmu *pmu)
  1253. {
  1254. pmu->pmu.task_ctx_nr = perf_invalid_context;
  1255. pmu->pmu.add = imc_event_add;
  1256. pmu->pmu.del = imc_event_stop;
  1257. pmu->pmu.start = imc_event_start;
  1258. pmu->pmu.stop = imc_event_stop;
  1259. pmu->pmu.read = imc_event_update;
  1260. pmu->pmu.attr_groups = pmu->attr_groups;
  1261. pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
  1262. pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
  1263. switch (pmu->domain) {
  1264. case IMC_DOMAIN_NEST:
  1265. pmu->pmu.event_init = nest_imc_event_init;
  1266. pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
  1267. break;
  1268. case IMC_DOMAIN_CORE:
  1269. pmu->pmu.event_init = core_imc_event_init;
  1270. pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
  1271. break;
  1272. case IMC_DOMAIN_THREAD:
  1273. pmu->pmu.event_init = thread_imc_event_init;
  1274. pmu->pmu.add = thread_imc_event_add;
  1275. pmu->pmu.del = thread_imc_event_del;
  1276. pmu->pmu.start_txn = thread_imc_pmu_start_txn;
  1277. pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
  1278. pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
  1279. break;
  1280. case IMC_DOMAIN_TRACE:
  1281. pmu->pmu.event_init = trace_imc_event_init;
  1282. pmu->pmu.add = trace_imc_event_add;
  1283. pmu->pmu.del = trace_imc_event_del;
  1284. pmu->pmu.start = trace_imc_event_start;
  1285. pmu->pmu.stop = trace_imc_event_stop;
  1286. pmu->pmu.read = trace_imc_event_read;
  1287. pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
  1288. break;
  1289. default:
  1290. break;
  1291. }
  1292. return 0;
  1293. }
  1294. /* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
  1295. static int init_nest_pmu_ref(void)
  1296. {
  1297. int nid, i, cpu;
  1298. nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc),
  1299. GFP_KERNEL);
  1300. if (!nest_imc_refc)
  1301. return -ENOMEM;
  1302. i = 0;
  1303. for_each_node(nid) {
  1304. /*
  1305. * Take the lock to avoid races while tracking the number of
  1306. * sessions using the chip's nest pmu units.
  1307. */
  1308. spin_lock_init(&nest_imc_refc[i].lock);
  1309. /*
  1310. * Loop to init the "id" with the node_id. Variable "i" initialized to
  1311. * 0 and will be used as index to the array. "i" will not go off the
  1312. * end of the array since the "for_each_node" loops for "N_POSSIBLE"
  1313. * nodes only.
  1314. */
  1315. nest_imc_refc[i++].id = nid;
  1316. }
  1317. /*
  1318. * Loop to init the per_cpu "local_nest_imc_refc" with the proper
  1319. * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple.
  1320. */
  1321. for_each_possible_cpu(cpu) {
  1322. nid = cpu_to_node(cpu);
  1323. for (i = 0; i < num_possible_nodes(); i++) {
  1324. if (nest_imc_refc[i].id == nid) {
  1325. per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i];
  1326. break;
  1327. }
  1328. }
  1329. }
  1330. return 0;
  1331. }
  1332. static void cleanup_all_core_imc_memory(void)
  1333. {
  1334. int i, nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
  1335. struct imc_mem_info *ptr = core_imc_pmu->mem_info;
  1336. int size = core_imc_pmu->counter_mem_size;
  1337. /* mem_info will never be NULL */
  1338. for (i = 0; i < nr_cores; i++) {
  1339. if (ptr[i].vbase)
  1340. free_pages((u64)ptr[i].vbase, get_order(size));
  1341. }
  1342. kfree(ptr);
  1343. kfree(core_imc_refc);
  1344. }
  1345. static void thread_imc_ldbar_disable(void *dummy)
  1346. {
  1347. /*
  1348. * By setting 0th bit of LDBAR to zero, we disable thread-imc
  1349. * updates to memory.
  1350. */
  1351. mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
  1352. }
  1353. void thread_imc_disable(void)
  1354. {
  1355. on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
  1356. }
  1357. static void cleanup_all_thread_imc_memory(void)
  1358. {
  1359. int i, order = get_order(thread_imc_mem_size);
  1360. for_each_online_cpu(i) {
  1361. if (per_cpu(thread_imc_mem, i))
  1362. free_pages((u64)per_cpu(thread_imc_mem, i), order);
  1363. }
  1364. }
  1365. static void cleanup_all_trace_imc_memory(void)
  1366. {
  1367. int i, order = get_order(trace_imc_mem_size);
  1368. for_each_online_cpu(i) {
  1369. if (per_cpu(trace_imc_mem, i))
  1370. free_pages((u64)per_cpu(trace_imc_mem, i), order);
  1371. }
  1372. kfree(trace_imc_refc);
  1373. }
  1374. /* Function to free the attr_groups which are dynamically allocated */
  1375. static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
  1376. {
  1377. if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
  1378. kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
  1379. kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
  1380. }
  1381. /*
  1382. * Common function to unregister cpu hotplug callback and
  1383. * free the memory.
  1384. * TODO: Need to handle pmu unregistering, which will be
  1385. * done in followup series.
  1386. */
  1387. static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
  1388. {
  1389. if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
  1390. mutex_lock(&nest_init_lock);
  1391. if (nest_pmus == 1) {
  1392. cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
  1393. kfree(nest_imc_refc);
  1394. kfree(per_nest_pmu_arr);
  1395. per_nest_pmu_arr = NULL;
  1396. }
  1397. if (nest_pmus > 0)
  1398. nest_pmus--;
  1399. mutex_unlock(&nest_init_lock);
  1400. }
  1401. /* Free core_imc memory */
  1402. if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
  1403. cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
  1404. cleanup_all_core_imc_memory();
  1405. }
  1406. /* Free thread_imc memory */
  1407. if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
  1408. cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
  1409. cleanup_all_thread_imc_memory();
  1410. }
  1411. if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
  1412. cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
  1413. cleanup_all_trace_imc_memory();
  1414. }
  1415. }
  1416. /*
  1417. * Function to unregister thread-imc if core-imc
  1418. * is not registered.
  1419. */
  1420. void unregister_thread_imc(void)
  1421. {
  1422. imc_common_cpuhp_mem_free(thread_imc_pmu);
  1423. imc_common_mem_free(thread_imc_pmu);
  1424. perf_pmu_unregister(&thread_imc_pmu->pmu);
  1425. }
  1426. /*
  1427. * imc_mem_init : Function to support memory allocation for core imc.
  1428. */
  1429. static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
  1430. int pmu_index)
  1431. {
  1432. const char *s;
  1433. int nr_cores, cpu, res = -ENOMEM;
  1434. if (of_property_read_string(parent, "name", &s))
  1435. return -ENODEV;
  1436. switch (pmu_ptr->domain) {
  1437. case IMC_DOMAIN_NEST:
  1438. /* Update the pmu name */
  1439. pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
  1440. if (!pmu_ptr->pmu.name)
  1441. goto err;
  1442. /* Needed for hotplug/migration */
  1443. if (!per_nest_pmu_arr) {
  1444. per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
  1445. sizeof(struct imc_pmu *),
  1446. GFP_KERNEL);
  1447. if (!per_nest_pmu_arr)
  1448. goto err;
  1449. }
  1450. per_nest_pmu_arr[pmu_index] = pmu_ptr;
  1451. break;
  1452. case IMC_DOMAIN_CORE:
  1453. /* Update the pmu name */
  1454. pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
  1455. if (!pmu_ptr->pmu.name)
  1456. goto err;
  1457. nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
  1458. pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
  1459. GFP_KERNEL);
  1460. if (!pmu_ptr->mem_info)
  1461. goto err;
  1462. core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
  1463. GFP_KERNEL);
  1464. if (!core_imc_refc) {
  1465. kfree(pmu_ptr->mem_info);
  1466. goto err;
  1467. }
  1468. core_imc_pmu = pmu_ptr;
  1469. break;
  1470. case IMC_DOMAIN_THREAD:
  1471. /* Update the pmu name */
  1472. pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
  1473. if (!pmu_ptr->pmu.name)
  1474. goto err;
  1475. thread_imc_mem_size = pmu_ptr->counter_mem_size;
  1476. for_each_online_cpu(cpu) {
  1477. res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
  1478. if (res) {
  1479. cleanup_all_thread_imc_memory();
  1480. goto err;
  1481. }
  1482. }
  1483. thread_imc_pmu = pmu_ptr;
  1484. break;
  1485. case IMC_DOMAIN_TRACE:
  1486. /* Update the pmu name */
  1487. pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
  1488. if (!pmu_ptr->pmu.name)
  1489. return -ENOMEM;
  1490. nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
  1491. trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
  1492. GFP_KERNEL);
  1493. if (!trace_imc_refc)
  1494. return -ENOMEM;
  1495. trace_imc_mem_size = pmu_ptr->counter_mem_size;
  1496. for_each_online_cpu(cpu) {
  1497. res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
  1498. if (res) {
  1499. cleanup_all_trace_imc_memory();
  1500. goto err;
  1501. }
  1502. }
  1503. break;
  1504. default:
  1505. return -EINVAL;
  1506. }
  1507. return 0;
  1508. err:
  1509. return res;
  1510. }
  1511. /*
  1512. * init_imc_pmu : Setup and register the IMC pmu device.
  1513. *
  1514. * @parent: Device tree unit node
  1515. * @pmu_ptr: memory allocated for this pmu
  1516. * @pmu_idx: Count of nest pmc registered
  1517. *
  1518. * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback.
  1519. * Handles failure cases and accordingly frees memory.
  1520. */
  1521. int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx)
  1522. {
  1523. int ret;
  1524. ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
  1525. if (ret)
  1526. goto err_free_mem;
  1527. switch (pmu_ptr->domain) {
  1528. case IMC_DOMAIN_NEST:
  1529. /*
  1530. * Nest imc pmu need only one cpu per chip, we initialize the
  1531. * cpumask for the first nest imc pmu and use the same for the
  1532. * rest. To handle the cpuhotplug callback unregister, we track
  1533. * the number of nest pmus in "nest_pmus".
  1534. */
  1535. mutex_lock(&nest_init_lock);
  1536. if (nest_pmus == 0) {
  1537. ret = init_nest_pmu_ref();
  1538. if (ret) {
  1539. mutex_unlock(&nest_init_lock);
  1540. kfree(per_nest_pmu_arr);
  1541. per_nest_pmu_arr = NULL;
  1542. goto err_free_mem;
  1543. }
  1544. /* Register for cpu hotplug notification. */
  1545. ret = nest_pmu_cpumask_init();
  1546. if (ret) {
  1547. mutex_unlock(&nest_init_lock);
  1548. kfree(nest_imc_refc);
  1549. kfree(per_nest_pmu_arr);
  1550. per_nest_pmu_arr = NULL;
  1551. goto err_free_mem;
  1552. }
  1553. }
  1554. nest_pmus++;
  1555. mutex_unlock(&nest_init_lock);
  1556. break;
  1557. case IMC_DOMAIN_CORE:
  1558. ret = core_imc_pmu_cpumask_init();
  1559. if (ret) {
  1560. cleanup_all_core_imc_memory();
  1561. goto err_free_mem;
  1562. }
  1563. break;
  1564. case IMC_DOMAIN_THREAD:
  1565. ret = thread_imc_cpu_init();
  1566. if (ret) {
  1567. cleanup_all_thread_imc_memory();
  1568. goto err_free_mem;
  1569. }
  1570. break;
  1571. case IMC_DOMAIN_TRACE:
  1572. ret = trace_imc_cpu_init();
  1573. if (ret) {
  1574. cleanup_all_trace_imc_memory();
  1575. goto err_free_mem;
  1576. }
  1577. break;
  1578. default:
  1579. return -EINVAL; /* Unknown domain */
  1580. }
  1581. ret = update_events_in_group(parent, pmu_ptr);
  1582. if (ret)
  1583. goto err_free_cpuhp_mem;
  1584. ret = update_pmu_ops(pmu_ptr);
  1585. if (ret)
  1586. goto err_free_cpuhp_mem;
  1587. ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
  1588. if (ret)
  1589. goto err_free_cpuhp_mem;
  1590. pr_debug("%s performance monitor hardware support registered\n",
  1591. pmu_ptr->pmu.name);
  1592. return 0;
  1593. err_free_cpuhp_mem:
  1594. imc_common_cpuhp_mem_free(pmu_ptr);
  1595. err_free_mem:
  1596. imc_common_mem_free(pmu_ptr);
  1597. return ret;
  1598. }