perf_cpum_cf.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Performance event support for s390x - CPU-measurement Counter Facility
  4. *
  5. * Copyright IBM Corp. 2012, 2021
  6. * Author(s): Hendrik Brueckner <[email protected]>
  7. * Thomas Richter <[email protected]>
  8. */
  9. #define KMSG_COMPONENT "cpum_cf"
  10. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  11. #include <linux/kernel.h>
  12. #include <linux/kernel_stat.h>
  13. #include <linux/percpu.h>
  14. #include <linux/notifier.h>
  15. #include <linux/init.h>
  16. #include <linux/export.h>
  17. #include <linux/miscdevice.h>
  18. #include <asm/cpu_mcf.h>
  19. #include <asm/hwctrset.h>
  20. #include <asm/debug.h>
  21. static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
  22. static debug_info_t *cf_dbg;
  23. #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
  24. /* interval in seconds */
  25. /* Counter sets are stored as data stream in a page sized memory buffer and
  26. * exported to user space via raw data attached to the event sample data.
  27. * Each counter set starts with an eight byte header consisting of:
  28. * - a two byte eye catcher (0xfeef)
  29. * - a one byte counter set number
  30. * - a two byte counter set size (indicates the number of counters in this set)
  31. * - a three byte reserved value (must be zero) to make the header the same
  32. * size as a counter value.
  33. * All counter values are eight byte in size.
  34. *
  35. * All counter sets are followed by a 64 byte trailer.
  36. * The trailer consists of a:
  37. * - flag field indicating valid fields when corresponding bit set
  38. * - the counter facility first and second version number
  39. * - the CPU speed if nonzero
  40. * - the time stamp the counter sets have been collected
  41. * - the time of day (TOD) base value
  42. * - the machine type.
  43. *
  44. * The counter sets are saved when the process is prepared to be executed on a
  45. * CPU and saved again when the process is going to be removed from a CPU.
  46. * The difference of both counter sets are calculated and stored in the event
  47. * sample data area.
  48. */
  49. struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
  50. unsigned int def:16; /* 0-15 Data Entry Format */
  51. unsigned int set:16; /* 16-31 Counter set identifier */
  52. unsigned int ctr:16; /* 32-47 Number of stored counters */
  53. unsigned int res1:16; /* 48-63 Reserved */
  54. };
  55. struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */
  56. /* 0 - 7 */
  57. union {
  58. struct {
  59. unsigned int clock_base:1; /* TOD clock base set */
  60. unsigned int speed:1; /* CPU speed set */
  61. /* Measurement alerts */
  62. unsigned int mtda:1; /* Loss of MT ctr. data alert */
  63. unsigned int caca:1; /* Counter auth. change alert */
  64. unsigned int lcda:1; /* Loss of counter data alert */
  65. };
  66. unsigned long flags; /* 0-63 All indicators */
  67. };
  68. /* 8 - 15 */
  69. unsigned int cfvn:16; /* 64-79 Ctr First Version */
  70. unsigned int csvn:16; /* 80-95 Ctr Second Version */
  71. unsigned int cpu_speed:32; /* 96-127 CPU speed */
  72. /* 16 - 23 */
  73. unsigned long timestamp; /* 128-191 Timestamp (TOD) */
  74. /* 24 - 55 */
  75. union {
  76. struct {
  77. unsigned long progusage1;
  78. unsigned long progusage2;
  79. unsigned long progusage3;
  80. unsigned long tod_base;
  81. };
  82. unsigned long progusage[4];
  83. };
  84. /* 56 - 63 */
  85. unsigned int mach_type:16; /* Machine type */
  86. unsigned int res1:16; /* Reserved */
  87. unsigned int res2:32; /* Reserved */
  88. };
  89. /* Create the trailer data at the end of a page. */
  90. static void cfdiag_trailer(struct cf_trailer_entry *te)
  91. {
  92. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  93. struct cpuid cpuid;
  94. te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */
  95. te->csvn = cpuhw->info.csvn;
  96. get_cpu_id(&cpuid); /* Machine type */
  97. te->mach_type = cpuid.machine;
  98. te->cpu_speed = cfdiag_cpu_speed;
  99. if (te->cpu_speed)
  100. te->speed = 1;
  101. te->clock_base = 1; /* Save clock base */
  102. te->tod_base = tod_clock_base.tod;
  103. te->timestamp = get_tod_clock_fast();
  104. }
  105. /* Read a counter set. The counter set number determines the counter set and
  106. * the CPUM-CF first and second version number determine the number of
  107. * available counters in each counter set.
  108. * Each counter set starts with header containing the counter set number and
  109. * the number of eight byte counters.
  110. *
  111. * The functions returns the number of bytes occupied by this counter set
  112. * including the header.
  113. * If there is no counter in the counter set, this counter set is useless and
  114. * zero is returned on this case.
  115. *
  116. * Note that the counter sets may not be enabled or active and the stcctm
  117. * instruction might return error 3. Depending on error_ok value this is ok,
  118. * for example when called from cpumf_pmu_start() call back function.
  119. */
  120. static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
  121. size_t room, bool error_ok)
  122. {
  123. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  124. size_t ctrset_size, need = 0;
  125. int rc = 3; /* Assume write failure */
  126. ctrdata->def = CF_DIAG_CTRSET_DEF;
  127. ctrdata->set = ctrset;
  128. ctrdata->res1 = 0;
  129. ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info);
  130. if (ctrset_size) { /* Save data */
  131. need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
  132. if (need <= room) {
  133. rc = ctr_stcctm(ctrset, ctrset_size,
  134. (u64 *)(ctrdata + 1));
  135. }
  136. if (rc != 3 || error_ok)
  137. ctrdata->ctr = ctrset_size;
  138. else
  139. need = 0;
  140. }
  141. debug_sprintf_event(cf_dbg, 3,
  142. "%s ctrset %d ctrset_size %zu cfvn %d csvn %d"
  143. " need %zd rc %d\n", __func__, ctrset, ctrset_size,
  144. cpuhw->info.cfvn, cpuhw->info.csvn, need, rc);
  145. return need;
  146. }
  147. static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
  148. [CPUMF_CTR_SET_BASIC] = 0x02,
  149. [CPUMF_CTR_SET_USER] = 0x04,
  150. [CPUMF_CTR_SET_CRYPTO] = 0x08,
  151. [CPUMF_CTR_SET_EXT] = 0x01,
  152. [CPUMF_CTR_SET_MT_DIAG] = 0x20,
  153. };
  154. /* Read out all counter sets and save them in the provided data buffer.
  155. * The last 64 byte host an artificial trailer entry.
  156. */
  157. static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth,
  158. bool error_ok)
  159. {
  160. struct cf_trailer_entry *trailer;
  161. size_t offset = 0, done;
  162. int i;
  163. memset(data, 0, sz);
  164. sz -= sizeof(*trailer); /* Always room for trailer */
  165. for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
  166. struct cf_ctrset_entry *ctrdata = data + offset;
  167. if (!(auth & cpumf_ctr_ctl[i]))
  168. continue; /* Counter set not authorized */
  169. done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok);
  170. offset += done;
  171. }
  172. trailer = data + offset;
  173. cfdiag_trailer(trailer);
  174. return offset + sizeof(*trailer);
  175. }
  176. /* Calculate the difference for each counter in a counter set. */
  177. static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters)
  178. {
  179. for (; --counters >= 0; ++pstart, ++pstop)
  180. if (*pstop >= *pstart)
  181. *pstop -= *pstart;
  182. else
  183. *pstop = *pstart - *pstop + 1;
  184. }
  185. /* Scan the counter sets and calculate the difference of each counter
  186. * in each set. The result is the increment of each counter during the
  187. * period the counter set has been activated.
  188. *
  189. * Return true on success.
  190. */
  191. static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
  192. {
  193. struct cf_trailer_entry *trailer_start, *trailer_stop;
  194. struct cf_ctrset_entry *ctrstart, *ctrstop;
  195. size_t offset = 0;
  196. auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
  197. do {
  198. ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
  199. ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
  200. if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
  201. pr_err_once("cpum_cf_diag counter set compare error "
  202. "in set %i\n", ctrstart->set);
  203. return 0;
  204. }
  205. auth &= ~cpumf_ctr_ctl[ctrstart->set];
  206. if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
  207. cfdiag_diffctrset((u64 *)(ctrstart + 1),
  208. (u64 *)(ctrstop + 1), ctrstart->ctr);
  209. offset += ctrstart->ctr * sizeof(u64) +
  210. sizeof(*ctrstart);
  211. }
  212. } while (ctrstart->def && auth);
  213. /* Save time_stamp from start of event in stop's trailer */
  214. trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
  215. trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset);
  216. trailer_stop->progusage[0] = trailer_start->timestamp;
  217. return 1;
  218. }
  219. static enum cpumf_ctr_set get_counter_set(u64 event)
  220. {
  221. int set = CPUMF_CTR_SET_MAX;
  222. if (event < 32)
  223. set = CPUMF_CTR_SET_BASIC;
  224. else if (event < 64)
  225. set = CPUMF_CTR_SET_USER;
  226. else if (event < 128)
  227. set = CPUMF_CTR_SET_CRYPTO;
  228. else if (event < 288)
  229. set = CPUMF_CTR_SET_EXT;
  230. else if (event >= 448 && event < 496)
  231. set = CPUMF_CTR_SET_MT_DIAG;
  232. return set;
  233. }
  234. static int validate_ctr_version(const struct hw_perf_event *hwc,
  235. enum cpumf_ctr_set set)
  236. {
  237. struct cpu_cf_events *cpuhw;
  238. int err = 0;
  239. u16 mtdiag_ctl;
  240. cpuhw = &get_cpu_var(cpu_cf_events);
  241. /* check required version for counter sets */
  242. switch (set) {
  243. case CPUMF_CTR_SET_BASIC:
  244. case CPUMF_CTR_SET_USER:
  245. if (cpuhw->info.cfvn < 1)
  246. err = -EOPNOTSUPP;
  247. break;
  248. case CPUMF_CTR_SET_CRYPTO:
  249. if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
  250. hwc->config > 79) ||
  251. (cpuhw->info.csvn >= 6 && hwc->config > 83))
  252. err = -EOPNOTSUPP;
  253. break;
  254. case CPUMF_CTR_SET_EXT:
  255. if (cpuhw->info.csvn < 1)
  256. err = -EOPNOTSUPP;
  257. if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
  258. (cpuhw->info.csvn == 2 && hwc->config > 175) ||
  259. (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
  260. && hwc->config > 255) ||
  261. (cpuhw->info.csvn >= 6 && hwc->config > 287))
  262. err = -EOPNOTSUPP;
  263. break;
  264. case CPUMF_CTR_SET_MT_DIAG:
  265. if (cpuhw->info.csvn <= 3)
  266. err = -EOPNOTSUPP;
  267. /*
  268. * MT-diagnostic counters are read-only. The counter set
  269. * is automatically enabled and activated on all CPUs with
  270. * multithreading (SMT). Deactivation of multithreading
  271. * also disables the counter set. State changes are ignored
  272. * by lcctl(). Because Linux controls SMT enablement through
  273. * a kernel parameter only, the counter set is either disabled
  274. * or enabled and active.
  275. *
  276. * Thus, the counters can only be used if SMT is on and the
  277. * counter set is enabled and active.
  278. */
  279. mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
  280. if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
  281. (cpuhw->info.enable_ctl & mtdiag_ctl) &&
  282. (cpuhw->info.act_ctl & mtdiag_ctl)))
  283. err = -EOPNOTSUPP;
  284. break;
  285. case CPUMF_CTR_SET_MAX:
  286. err = -EOPNOTSUPP;
  287. }
  288. put_cpu_var(cpu_cf_events);
  289. return err;
  290. }
  291. static int validate_ctr_auth(const struct hw_perf_event *hwc)
  292. {
  293. struct cpu_cf_events *cpuhw;
  294. int err = 0;
  295. cpuhw = &get_cpu_var(cpu_cf_events);
  296. /* Check authorization for cpu counter sets.
  297. * If the particular CPU counter set is not authorized,
  298. * return with -ENOENT in order to fall back to other
  299. * PMUs that might suffice the event request.
  300. */
  301. if (!(hwc->config_base & cpuhw->info.auth_ctl))
  302. err = -ENOENT;
  303. put_cpu_var(cpu_cf_events);
  304. return err;
  305. }
  306. /*
  307. * Change the CPUMF state to active.
  308. * Enable and activate the CPU-counter sets according
  309. * to the per-cpu control state.
  310. */
  311. static void cpumf_pmu_enable(struct pmu *pmu)
  312. {
  313. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  314. int err;
  315. if (cpuhw->flags & PMU_F_ENABLED)
  316. return;
  317. err = lcctl(cpuhw->state | cpuhw->dev_state);
  318. if (err) {
  319. pr_err("Enabling the performance measuring unit "
  320. "failed with rc=%x\n", err);
  321. return;
  322. }
  323. cpuhw->flags |= PMU_F_ENABLED;
  324. }
  325. /*
  326. * Change the CPUMF state to inactive.
  327. * Disable and enable (inactive) the CPU-counter sets according
  328. * to the per-cpu control state.
  329. */
  330. static void cpumf_pmu_disable(struct pmu *pmu)
  331. {
  332. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  333. int err;
  334. u64 inactive;
  335. if (!(cpuhw->flags & PMU_F_ENABLED))
  336. return;
  337. inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
  338. inactive |= cpuhw->dev_state;
  339. err = lcctl(inactive);
  340. if (err) {
  341. pr_err("Disabling the performance measuring unit "
  342. "failed with rc=%x\n", err);
  343. return;
  344. }
  345. cpuhw->flags &= ~PMU_F_ENABLED;
  346. }
  347. /* Number of perf events counting hardware events */
  348. static atomic_t num_events = ATOMIC_INIT(0);
  349. /* Used to avoid races in calling reserve/release_cpumf_hardware */
  350. static DEFINE_MUTEX(pmc_reserve_mutex);
  351. /* Release the PMU if event is the last perf event */
  352. static void hw_perf_event_destroy(struct perf_event *event)
  353. {
  354. if (!atomic_add_unless(&num_events, -1, 1)) {
  355. mutex_lock(&pmc_reserve_mutex);
  356. if (atomic_dec_return(&num_events) == 0)
  357. __kernel_cpumcf_end();
  358. mutex_unlock(&pmc_reserve_mutex);
  359. }
  360. }
  361. /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
  362. static const int cpumf_generic_events_basic[] = {
  363. [PERF_COUNT_HW_CPU_CYCLES] = 0,
  364. [PERF_COUNT_HW_INSTRUCTIONS] = 1,
  365. [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
  366. [PERF_COUNT_HW_CACHE_MISSES] = -1,
  367. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
  368. [PERF_COUNT_HW_BRANCH_MISSES] = -1,
  369. [PERF_COUNT_HW_BUS_CYCLES] = -1,
  370. };
  371. /* CPUMF <-> perf event mappings for userspace (problem-state set) */
  372. static const int cpumf_generic_events_user[] = {
  373. [PERF_COUNT_HW_CPU_CYCLES] = 32,
  374. [PERF_COUNT_HW_INSTRUCTIONS] = 33,
  375. [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
  376. [PERF_COUNT_HW_CACHE_MISSES] = -1,
  377. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
  378. [PERF_COUNT_HW_BRANCH_MISSES] = -1,
  379. [PERF_COUNT_HW_BUS_CYCLES] = -1,
  380. };
  381. static void cpumf_hw_inuse(void)
  382. {
  383. mutex_lock(&pmc_reserve_mutex);
  384. if (atomic_inc_return(&num_events) == 1)
  385. __kernel_cpumcf_begin();
  386. mutex_unlock(&pmc_reserve_mutex);
  387. }
  388. static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
  389. {
  390. struct perf_event_attr *attr = &event->attr;
  391. struct hw_perf_event *hwc = &event->hw;
  392. enum cpumf_ctr_set set;
  393. int err = 0;
  394. u64 ev;
  395. switch (type) {
  396. case PERF_TYPE_RAW:
  397. /* Raw events are used to access counters directly,
  398. * hence do not permit excludes */
  399. if (attr->exclude_kernel || attr->exclude_user ||
  400. attr->exclude_hv)
  401. return -EOPNOTSUPP;
  402. ev = attr->config;
  403. break;
  404. case PERF_TYPE_HARDWARE:
  405. if (is_sampling_event(event)) /* No sampling support */
  406. return -ENOENT;
  407. ev = attr->config;
  408. /* Count user space (problem-state) only */
  409. if (!attr->exclude_user && attr->exclude_kernel) {
  410. if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
  411. return -EOPNOTSUPP;
  412. ev = cpumf_generic_events_user[ev];
  413. /* No support for kernel space counters only */
  414. } else if (!attr->exclude_kernel && attr->exclude_user) {
  415. return -EOPNOTSUPP;
  416. } else { /* Count user and kernel space */
  417. if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
  418. return -EOPNOTSUPP;
  419. ev = cpumf_generic_events_basic[ev];
  420. }
  421. break;
  422. default:
  423. return -ENOENT;
  424. }
  425. if (ev == -1)
  426. return -ENOENT;
  427. if (ev > PERF_CPUM_CF_MAX_CTR)
  428. return -ENOENT;
  429. /* Obtain the counter set to which the specified counter belongs */
  430. set = get_counter_set(ev);
  431. switch (set) {
  432. case CPUMF_CTR_SET_BASIC:
  433. case CPUMF_CTR_SET_USER:
  434. case CPUMF_CTR_SET_CRYPTO:
  435. case CPUMF_CTR_SET_EXT:
  436. case CPUMF_CTR_SET_MT_DIAG:
  437. /*
  438. * Use the hardware perf event structure to store the
  439. * counter number in the 'config' member and the counter
  440. * set number in the 'config_base' as bit mask.
  441. * It is later used to enable/disable the counter(s).
  442. */
  443. hwc->config = ev;
  444. hwc->config_base = cpumf_ctr_ctl[set];
  445. break;
  446. case CPUMF_CTR_SET_MAX:
  447. /* The counter could not be associated to a counter set */
  448. return -EINVAL;
  449. }
  450. /* Initialize for using the CPU-measurement counter facility */
  451. cpumf_hw_inuse();
  452. event->destroy = hw_perf_event_destroy;
  453. /* Finally, validate version and authorization of the counter set */
  454. err = validate_ctr_auth(hwc);
  455. if (!err)
  456. err = validate_ctr_version(hwc, set);
  457. return err;
  458. }
  459. /* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
  460. * attribute::type values:
  461. * - PERF_TYPE_HARDWARE:
  462. * - pmu->type:
  463. * Handle both type of invocations identical. They address the same hardware.
  464. * The result is different when event modifiers exclude_kernel and/or
  465. * exclude_user are also set.
  466. */
  467. static int cpumf_pmu_event_type(struct perf_event *event)
  468. {
  469. u64 ev = event->attr.config;
  470. if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
  471. cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
  472. cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
  473. cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
  474. return PERF_TYPE_HARDWARE;
  475. return PERF_TYPE_RAW;
  476. }
  477. static int cpumf_pmu_event_init(struct perf_event *event)
  478. {
  479. unsigned int type = event->attr.type;
  480. int err;
  481. if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
  482. err = __hw_perf_event_init(event, type);
  483. else if (event->pmu->type == type)
  484. /* Registered as unknown PMU */
  485. err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
  486. else
  487. return -ENOENT;
  488. if (unlikely(err) && event->destroy)
  489. event->destroy(event);
  490. return err;
  491. }
  492. static int hw_perf_event_reset(struct perf_event *event)
  493. {
  494. u64 prev, new;
  495. int err;
  496. do {
  497. prev = local64_read(&event->hw.prev_count);
  498. err = ecctr(event->hw.config, &new);
  499. if (err) {
  500. if (err != 3)
  501. break;
  502. /* The counter is not (yet) available. This
  503. * might happen if the counter set to which
  504. * this counter belongs is in the disabled
  505. * state.
  506. */
  507. new = 0;
  508. }
  509. } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
  510. return err;
  511. }
  512. static void hw_perf_event_update(struct perf_event *event)
  513. {
  514. u64 prev, new, delta;
  515. int err;
  516. do {
  517. prev = local64_read(&event->hw.prev_count);
  518. err = ecctr(event->hw.config, &new);
  519. if (err)
  520. return;
  521. } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
  522. delta = (prev <= new) ? new - prev
  523. : (-1ULL - prev) + new + 1; /* overflow */
  524. local64_add(delta, &event->count);
  525. }
  526. static void cpumf_pmu_read(struct perf_event *event)
  527. {
  528. if (event->hw.state & PERF_HES_STOPPED)
  529. return;
  530. hw_perf_event_update(event);
  531. }
  532. static void cpumf_pmu_start(struct perf_event *event, int flags)
  533. {
  534. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  535. struct hw_perf_event *hwc = &event->hw;
  536. int i;
  537. if (!(hwc->state & PERF_HES_STOPPED))
  538. return;
  539. hwc->state = 0;
  540. /* (Re-)enable and activate the counter set */
  541. ctr_set_enable(&cpuhw->state, hwc->config_base);
  542. ctr_set_start(&cpuhw->state, hwc->config_base);
  543. /* The counter set to which this counter belongs can be already active.
  544. * Because all counters in a set are active, the event->hw.prev_count
  545. * needs to be synchronized. At this point, the counter set can be in
  546. * the inactive or disabled state.
  547. */
  548. if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
  549. cpuhw->usedss = cfdiag_getctr(cpuhw->start,
  550. sizeof(cpuhw->start),
  551. hwc->config_base, true);
  552. } else {
  553. hw_perf_event_reset(event);
  554. }
  555. /* Increment refcount for counter sets */
  556. for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
  557. if ((hwc->config_base & cpumf_ctr_ctl[i]))
  558. atomic_inc(&cpuhw->ctr_set[i]);
  559. }
  560. /* Create perf event sample with the counter sets as raw data. The sample
  561. * is then pushed to the event subsystem and the function checks for
  562. * possible event overflows. If an event overflow occurs, the PMU is
  563. * stopped.
  564. *
  565. * Return non-zero if an event overflow occurred.
  566. */
  567. static int cfdiag_push_sample(struct perf_event *event,
  568. struct cpu_cf_events *cpuhw)
  569. {
  570. struct perf_sample_data data;
  571. struct perf_raw_record raw;
  572. struct pt_regs regs;
  573. int overflow;
  574. /* Setup perf sample */
  575. perf_sample_data_init(&data, 0, event->hw.last_period);
  576. memset(&regs, 0, sizeof(regs));
  577. memset(&raw, 0, sizeof(raw));
  578. if (event->attr.sample_type & PERF_SAMPLE_CPU)
  579. data.cpu_entry.cpu = event->cpu;
  580. if (event->attr.sample_type & PERF_SAMPLE_RAW) {
  581. raw.frag.size = cpuhw->usedss;
  582. raw.frag.data = cpuhw->stop;
  583. raw.size = raw.frag.size;
  584. data.raw = &raw;
  585. data.sample_flags |= PERF_SAMPLE_RAW;
  586. }
  587. overflow = perf_event_overflow(event, &data, &regs);
  588. debug_sprintf_event(cf_dbg, 3,
  589. "%s event %#llx sample_type %#llx raw %d ov %d\n",
  590. __func__, event->hw.config,
  591. event->attr.sample_type, raw.size, overflow);
  592. if (overflow)
  593. event->pmu->stop(event, 0);
  594. perf_event_update_userpage(event);
  595. return overflow;
  596. }
  597. static void cpumf_pmu_stop(struct perf_event *event, int flags)
  598. {
  599. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  600. struct hw_perf_event *hwc = &event->hw;
  601. int i;
  602. if (!(hwc->state & PERF_HES_STOPPED)) {
  603. /* Decrement reference count for this counter set and if this
  604. * is the last used counter in the set, clear activation
  605. * control and set the counter set state to inactive.
  606. */
  607. for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
  608. if (!(hwc->config_base & cpumf_ctr_ctl[i]))
  609. continue;
  610. if (!atomic_dec_return(&cpuhw->ctr_set[i]))
  611. ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]);
  612. }
  613. hwc->state |= PERF_HES_STOPPED;
  614. }
  615. if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
  616. if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
  617. local64_inc(&event->count);
  618. cpuhw->usedss = cfdiag_getctr(cpuhw->stop,
  619. sizeof(cpuhw->stop),
  620. event->hw.config_base,
  621. false);
  622. if (cfdiag_diffctr(cpuhw, event->hw.config_base))
  623. cfdiag_push_sample(event, cpuhw);
  624. } else if (cpuhw->flags & PMU_F_RESERVED) {
  625. /* Only update when PMU not hotplugged off */
  626. hw_perf_event_update(event);
  627. }
  628. hwc->state |= PERF_HES_UPTODATE;
  629. }
  630. }
  631. static int cpumf_pmu_add(struct perf_event *event, int flags)
  632. {
  633. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  634. ctr_set_enable(&cpuhw->state, event->hw.config_base);
  635. event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  636. if (flags & PERF_EF_START)
  637. cpumf_pmu_start(event, PERF_EF_RELOAD);
  638. return 0;
  639. }
  640. static void cpumf_pmu_del(struct perf_event *event, int flags)
  641. {
  642. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  643. int i;
  644. cpumf_pmu_stop(event, PERF_EF_UPDATE);
  645. /* Check if any counter in the counter set is still used. If not used,
  646. * change the counter set to the disabled state. This also clears the
  647. * content of all counters in the set.
  648. *
  649. * When a new perf event has been added but not yet started, this can
  650. * clear enable control and resets all counters in a set. Therefore,
  651. * cpumf_pmu_start() always has to reenable a counter set.
  652. */
  653. for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
  654. if (!atomic_read(&cpuhw->ctr_set[i]))
  655. ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]);
  656. }
  657. /* Performance monitoring unit for s390x */
  658. static struct pmu cpumf_pmu = {
  659. .task_ctx_nr = perf_sw_context,
  660. .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
  661. .pmu_enable = cpumf_pmu_enable,
  662. .pmu_disable = cpumf_pmu_disable,
  663. .event_init = cpumf_pmu_event_init,
  664. .add = cpumf_pmu_add,
  665. .del = cpumf_pmu_del,
  666. .start = cpumf_pmu_start,
  667. .stop = cpumf_pmu_stop,
  668. .read = cpumf_pmu_read,
  669. };
  670. static int cfset_init(void);
  671. static int __init cpumf_pmu_init(void)
  672. {
  673. int rc;
  674. if (!kernel_cpumcf_avail())
  675. return -ENODEV;
  676. /* Setup s390dbf facility */
  677. cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
  678. if (!cf_dbg) {
  679. pr_err("Registration of s390dbf(cpum_cf) failed\n");
  680. return -ENOMEM;
  681. }
  682. debug_register_view(cf_dbg, &debug_sprintf_view);
  683. cpumf_pmu.attr_groups = cpumf_cf_event_group();
  684. rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
  685. if (rc) {
  686. debug_unregister_view(cf_dbg, &debug_sprintf_view);
  687. debug_unregister(cf_dbg);
  688. pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
  689. } else if (stccm_avail()) { /* Setup counter set device */
  690. cfset_init();
  691. }
  692. return rc;
  693. }
  694. /* Support for the CPU Measurement Facility counter set extraction using
  695. * device /dev/hwctr. This allows user space programs to extract complete
  696. * counter set via normal file operations.
  697. */
  698. static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */
  699. static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */
  700. struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
  701. unsigned int sets; /* Counter set bit mask */
  702. atomic_t cpus_ack; /* # CPUs successfully executed func */
  703. };
  704. static struct cfset_session { /* CPUs and counter set bit mask */
  705. struct list_head head; /* Head of list of active processes */
  706. } cfset_session = {
  707. .head = LIST_HEAD_INIT(cfset_session.head)
  708. };
  709. struct cfset_request { /* CPUs and counter set bit mask */
  710. unsigned long ctrset; /* Bit mask of counter set to read */
  711. cpumask_t mask; /* CPU mask to read from */
  712. struct list_head node; /* Chain to cfset_session.head */
  713. };
  714. static void cfset_session_init(void)
  715. {
  716. INIT_LIST_HEAD(&cfset_session.head);
  717. }
  718. /* Remove current request from global bookkeeping. Maintain a counter set bit
  719. * mask on a per CPU basis.
  720. * Done in process context under mutex protection.
  721. */
  722. static void cfset_session_del(struct cfset_request *p)
  723. {
  724. list_del(&p->node);
  725. }
  726. /* Add current request to global bookkeeping. Maintain a counter set bit mask
  727. * on a per CPU basis.
  728. * Done in process context under mutex protection.
  729. */
  730. static void cfset_session_add(struct cfset_request *p)
  731. {
  732. list_add(&p->node, &cfset_session.head);
  733. }
  734. /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access
  735. * path is currently used.
  736. * The cpu_cf_events::dev_state is used to denote counter sets in use by this
  737. * interface. It is always or'ed in. If this interface is not active, its
  738. * value is zero and no additional counter sets will be included.
  739. *
  740. * The cpu_cf_events::state is used by the perf_event_open SVC and remains
  741. * unchanged.
  742. *
  743. * perf_pmu_enable() and perf_pmu_enable() and its call backs
  744. * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the
  745. * performance measurement subsystem to enable per process
  746. * CPU Measurement counter facility.
  747. * The XXX_enable() and XXX_disable functions are used to turn off
  748. * x86 performance monitoring interrupt (PMI) during scheduling.
  749. * s390 uses these calls to temporarily stop and resume the active CPU
  750. * counters sets during scheduling.
  751. *
  752. * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr
  753. * device access. The perf_event_open() SVC interface makes a lot of effort
  754. * to only run the counters while the calling process is actively scheduled
  755. * to run.
  756. * When /dev/hwctr interface is also used at the same time, the counter sets
  757. * will keep running, even when the process is scheduled off a CPU.
  758. * However this is not a problem and does not lead to wrong counter values
  759. * for the perf_event_open() SVC. The current counter value will be recorded
  760. * during schedule-in. At schedule-out time the current counter value is
  761. * extracted again and the delta is calculated and added to the event.
  762. */
  763. /* Stop all counter sets via ioctl interface */
  764. static void cfset_ioctl_off(void *parm)
  765. {
  766. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  767. struct cfset_call_on_cpu_parm *p = parm;
  768. int rc;
  769. /* Check if any counter set used by /dev/hwc */
  770. for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
  771. if ((p->sets & cpumf_ctr_ctl[rc])) {
  772. if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
  773. ctr_set_disable(&cpuhw->dev_state,
  774. cpumf_ctr_ctl[rc]);
  775. ctr_set_stop(&cpuhw->dev_state,
  776. cpumf_ctr_ctl[rc]);
  777. }
  778. }
  779. /* Keep perf_event_open counter sets */
  780. rc = lcctl(cpuhw->dev_state | cpuhw->state);
  781. if (rc)
  782. pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n",
  783. cpuhw->state, S390_HWCTR_DEVICE, rc);
  784. if (!cpuhw->dev_state)
  785. cpuhw->flags &= ~PMU_F_IN_USE;
  786. debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
  787. __func__, rc, cpuhw->state, cpuhw->dev_state);
  788. }
  789. /* Start counter sets on particular CPU */
  790. static void cfset_ioctl_on(void *parm)
  791. {
  792. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  793. struct cfset_call_on_cpu_parm *p = parm;
  794. int rc;
  795. cpuhw->flags |= PMU_F_IN_USE;
  796. ctr_set_enable(&cpuhw->dev_state, p->sets);
  797. ctr_set_start(&cpuhw->dev_state, p->sets);
  798. for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
  799. if ((p->sets & cpumf_ctr_ctl[rc]))
  800. atomic_inc(&cpuhw->ctr_set[rc]);
  801. rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */
  802. if (!rc)
  803. atomic_inc(&p->cpus_ack);
  804. else
  805. pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
  806. cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
  807. debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
  808. __func__, rc, cpuhw->state, cpuhw->dev_state);
  809. }
  810. static void cfset_release_cpu(void *p)
  811. {
  812. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  813. int rc;
  814. debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n",
  815. __func__, cpuhw->state, cpuhw->dev_state);
  816. cpuhw->dev_state = 0;
  817. rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */
  818. if (rc)
  819. pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n",
  820. cpuhw->state, S390_HWCTR_DEVICE, rc);
  821. }
  822. /* This modifies the process CPU mask to adopt it to the currently online
  823. * CPUs. Offline CPUs can not be addresses. This call terminates the access
  824. * and is usually followed by close() or a new iotcl(..., START, ...) which
  825. * creates a new request structure.
  826. */
  827. static void cfset_all_stop(struct cfset_request *req)
  828. {
  829. struct cfset_call_on_cpu_parm p = {
  830. .sets = req->ctrset,
  831. };
  832. cpumask_and(&req->mask, &req->mask, cpu_online_mask);
  833. on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1);
  834. }
  835. /* Release function is also called when application gets terminated without
  836. * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
  837. */
  838. static int cfset_release(struct inode *inode, struct file *file)
  839. {
  840. mutex_lock(&cfset_ctrset_mutex);
  841. /* Open followed by close/exit has no private_data */
  842. if (file->private_data) {
  843. cfset_all_stop(file->private_data);
  844. cfset_session_del(file->private_data);
  845. kfree(file->private_data);
  846. file->private_data = NULL;
  847. }
  848. if (!atomic_dec_return(&cfset_opencnt))
  849. on_each_cpu(cfset_release_cpu, NULL, 1);
  850. mutex_unlock(&cfset_ctrset_mutex);
  851. hw_perf_event_destroy(NULL);
  852. return 0;
  853. }
  854. static int cfset_open(struct inode *inode, struct file *file)
  855. {
  856. if (!capable(CAP_SYS_ADMIN))
  857. return -EPERM;
  858. mutex_lock(&cfset_ctrset_mutex);
  859. if (atomic_inc_return(&cfset_opencnt) == 1)
  860. cfset_session_init();
  861. mutex_unlock(&cfset_ctrset_mutex);
  862. cpumf_hw_inuse();
  863. file->private_data = NULL;
  864. /* nonseekable_open() never fails */
  865. return nonseekable_open(inode, file);
  866. }
  867. static int cfset_all_start(struct cfset_request *req)
  868. {
  869. struct cfset_call_on_cpu_parm p = {
  870. .sets = req->ctrset,
  871. .cpus_ack = ATOMIC_INIT(0),
  872. };
  873. cpumask_var_t mask;
  874. int rc = 0;
  875. if (!alloc_cpumask_var(&mask, GFP_KERNEL))
  876. return -ENOMEM;
  877. cpumask_and(mask, &req->mask, cpu_online_mask);
  878. on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1);
  879. if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
  880. on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
  881. rc = -EIO;
  882. debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__);
  883. }
  884. free_cpumask_var(mask);
  885. return rc;
  886. }
  887. /* Return the maximum required space for all possible CPUs in case one
  888. * CPU will be onlined during the START, READ, STOP cycles.
  889. * To find out the size of the counter sets, any one CPU will do. They
  890. * all have the same counter sets.
  891. */
  892. static size_t cfset_needspace(unsigned int sets)
  893. {
  894. struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events);
  895. size_t bytes = 0;
  896. int i;
  897. for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
  898. if (!(sets & cpumf_ctr_ctl[i]))
  899. continue;
  900. bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
  901. sizeof(((struct s390_ctrset_setdata *)0)->set) +
  902. sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
  903. }
  904. bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
  905. (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
  906. sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
  907. put_cpu_ptr(&cpu_cf_events);
  908. return bytes;
  909. }
  910. static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
  911. {
  912. struct s390_ctrset_read __user *ctrset_read;
  913. unsigned int cpu, cpus, rc;
  914. void __user *uptr;
  915. ctrset_read = (struct s390_ctrset_read __user *)arg;
  916. uptr = ctrset_read->data;
  917. for_each_cpu(cpu, mask) {
  918. struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu);
  919. struct s390_ctrset_cpudata __user *ctrset_cpudata;
  920. ctrset_cpudata = uptr;
  921. rc = put_user(cpu, &ctrset_cpudata->cpu_nr);
  922. rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
  923. rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
  924. cpuhw->used);
  925. if (rc)
  926. return -EFAULT;
  927. uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
  928. cond_resched();
  929. }
  930. cpus = cpumask_weight(mask);
  931. if (put_user(cpus, &ctrset_read->no_cpus))
  932. return -EFAULT;
  933. debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__,
  934. uptr - (void __user *)ctrset_read->data);
  935. return 0;
  936. }
  937. static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
  938. int ctrset_size, size_t room)
  939. {
  940. size_t need = 0;
  941. int rc = -1;
  942. need = sizeof(*p) + sizeof(u64) * ctrset_size;
  943. if (need <= room) {
  944. p->set = cpumf_ctr_ctl[ctrset];
  945. p->no_cnts = ctrset_size;
  946. rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
  947. if (rc == 3) /* Nothing stored */
  948. need = 0;
  949. }
  950. return need;
  951. }
  952. /* Read all counter sets. */
  953. static void cfset_cpu_read(void *parm)
  954. {
  955. struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
  956. struct cfset_call_on_cpu_parm *p = parm;
  957. int set, set_size;
  958. size_t space;
  959. /* No data saved yet */
  960. cpuhw->used = 0;
  961. cpuhw->sets = 0;
  962. memset(cpuhw->data, 0, sizeof(cpuhw->data));
  963. /* Scan the counter sets */
  964. for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
  965. struct s390_ctrset_setdata *sp = (void *)cpuhw->data +
  966. cpuhw->used;
  967. if (!(p->sets & cpumf_ctr_ctl[set]))
  968. continue; /* Counter set not in list */
  969. set_size = cpum_cf_ctrset_size(set, &cpuhw->info);
  970. space = sizeof(cpuhw->data) - cpuhw->used;
  971. space = cfset_cpuset_read(sp, set, set_size, space);
  972. if (space) {
  973. cpuhw->used += space;
  974. cpuhw->sets += 1;
  975. }
  976. }
  977. debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
  978. cpuhw->sets, cpuhw->used);
  979. }
  980. static int cfset_all_read(unsigned long arg, struct cfset_request *req)
  981. {
  982. struct cfset_call_on_cpu_parm p;
  983. cpumask_var_t mask;
  984. int rc;
  985. if (!alloc_cpumask_var(&mask, GFP_KERNEL))
  986. return -ENOMEM;
  987. p.sets = req->ctrset;
  988. cpumask_and(mask, &req->mask, cpu_online_mask);
  989. on_each_cpu_mask(mask, cfset_cpu_read, &p, 1);
  990. rc = cfset_all_copy(arg, mask);
  991. free_cpumask_var(mask);
  992. return rc;
  993. }
  994. static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
  995. {
  996. struct s390_ctrset_read read;
  997. int ret = -ENODATA;
  998. if (req && req->ctrset) {
  999. if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
  1000. return -EFAULT;
  1001. ret = cfset_all_read(arg, req);
  1002. }
  1003. return ret;
  1004. }
  1005. static long cfset_ioctl_stop(struct file *file)
  1006. {
  1007. struct cfset_request *req = file->private_data;
  1008. int ret = -ENXIO;
  1009. if (req) {
  1010. cfset_all_stop(req);
  1011. cfset_session_del(req);
  1012. kfree(req);
  1013. file->private_data = NULL;
  1014. ret = 0;
  1015. }
  1016. return ret;
  1017. }
  1018. static long cfset_ioctl_start(unsigned long arg, struct file *file)
  1019. {
  1020. struct s390_ctrset_start __user *ustart;
  1021. struct s390_ctrset_start start;
  1022. struct cfset_request *preq;
  1023. void __user *umask;
  1024. unsigned int len;
  1025. int ret = 0;
  1026. size_t need;
  1027. if (file->private_data)
  1028. return -EBUSY;
  1029. ustart = (struct s390_ctrset_start __user *)arg;
  1030. if (copy_from_user(&start, ustart, sizeof(start)))
  1031. return -EFAULT;
  1032. if (start.version != S390_HWCTR_START_VERSION)
  1033. return -EINVAL;
  1034. if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
  1035. cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
  1036. cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
  1037. cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
  1038. cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
  1039. return -EINVAL; /* Invalid counter set */
  1040. if (!start.counter_sets)
  1041. return -EINVAL; /* No counter set at all? */
  1042. preq = kzalloc(sizeof(*preq), GFP_KERNEL);
  1043. if (!preq)
  1044. return -ENOMEM;
  1045. cpumask_clear(&preq->mask);
  1046. len = min_t(u64, start.cpumask_len, cpumask_size());
  1047. umask = (void __user *)start.cpumask;
  1048. if (copy_from_user(&preq->mask, umask, len)) {
  1049. kfree(preq);
  1050. return -EFAULT;
  1051. }
  1052. if (cpumask_empty(&preq->mask)) {
  1053. kfree(preq);
  1054. return -EINVAL;
  1055. }
  1056. need = cfset_needspace(start.counter_sets);
  1057. if (put_user(need, &ustart->data_bytes)) {
  1058. kfree(preq);
  1059. return -EFAULT;
  1060. }
  1061. preq->ctrset = start.counter_sets;
  1062. ret = cfset_all_start(preq);
  1063. if (!ret) {
  1064. cfset_session_add(preq);
  1065. file->private_data = preq;
  1066. debug_sprintf_event(cf_dbg, 4, "%s set %#lx need %ld ret %d\n",
  1067. __func__, preq->ctrset, need, ret);
  1068. } else {
  1069. kfree(preq);
  1070. }
  1071. return ret;
  1072. }
  1073. /* Entry point to the /dev/hwctr device interface.
  1074. * The ioctl system call supports three subcommands:
  1075. * S390_HWCTR_START: Start the specified counter sets on a CPU list. The
  1076. * counter set keeps running until explicitly stopped. Returns the number
  1077. * of bytes needed to store the counter values. If another S390_HWCTR_START
  1078. * ioctl subcommand is called without a previous S390_HWCTR_STOP stop
  1079. * command on the same file descriptor, -EBUSY is returned.
  1080. * S390_HWCTR_READ: Read the counter set values from specified CPU list given
  1081. * with the S390_HWCTR_START command.
  1082. * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the
  1083. * previous S390_HWCTR_START subcommand.
  1084. */
  1085. static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  1086. {
  1087. int ret;
  1088. cpus_read_lock();
  1089. mutex_lock(&cfset_ctrset_mutex);
  1090. switch (cmd) {
  1091. case S390_HWCTR_START:
  1092. ret = cfset_ioctl_start(arg, file);
  1093. break;
  1094. case S390_HWCTR_STOP:
  1095. ret = cfset_ioctl_stop(file);
  1096. break;
  1097. case S390_HWCTR_READ:
  1098. ret = cfset_ioctl_read(arg, file->private_data);
  1099. break;
  1100. default:
  1101. ret = -ENOTTY;
  1102. break;
  1103. }
  1104. mutex_unlock(&cfset_ctrset_mutex);
  1105. cpus_read_unlock();
  1106. return ret;
  1107. }
  1108. static const struct file_operations cfset_fops = {
  1109. .owner = THIS_MODULE,
  1110. .open = cfset_open,
  1111. .release = cfset_release,
  1112. .unlocked_ioctl = cfset_ioctl,
  1113. .compat_ioctl = cfset_ioctl,
  1114. .llseek = no_llseek
  1115. };
  1116. static struct miscdevice cfset_dev = {
  1117. .name = S390_HWCTR_DEVICE,
  1118. .minor = MISC_DYNAMIC_MINOR,
  1119. .fops = &cfset_fops,
  1120. };
  1121. /* Hotplug add of a CPU. Scan through all active processes and add
  1122. * that CPU to the list of CPUs supplied with ioctl(..., START, ...).
  1123. */
  1124. int cfset_online_cpu(unsigned int cpu)
  1125. {
  1126. struct cfset_call_on_cpu_parm p;
  1127. struct cfset_request *rp;
  1128. mutex_lock(&cfset_ctrset_mutex);
  1129. if (!list_empty(&cfset_session.head)) {
  1130. list_for_each_entry(rp, &cfset_session.head, node) {
  1131. p.sets = rp->ctrset;
  1132. cfset_ioctl_on(&p);
  1133. cpumask_set_cpu(cpu, &rp->mask);
  1134. }
  1135. }
  1136. mutex_unlock(&cfset_ctrset_mutex);
  1137. return 0;
  1138. }
  1139. /* Hotplug remove of a CPU. Scan through all active processes and clear
  1140. * that CPU from the list of CPUs supplied with ioctl(..., START, ...).
  1141. */
  1142. int cfset_offline_cpu(unsigned int cpu)
  1143. {
  1144. struct cfset_call_on_cpu_parm p;
  1145. struct cfset_request *rp;
  1146. mutex_lock(&cfset_ctrset_mutex);
  1147. if (!list_empty(&cfset_session.head)) {
  1148. list_for_each_entry(rp, &cfset_session.head, node) {
  1149. p.sets = rp->ctrset;
  1150. cfset_ioctl_off(&p);
  1151. cpumask_clear_cpu(cpu, &rp->mask);
  1152. }
  1153. }
  1154. mutex_unlock(&cfset_ctrset_mutex);
  1155. return 0;
  1156. }
  1157. static void cfdiag_read(struct perf_event *event)
  1158. {
  1159. debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__,
  1160. event->attr.config, local64_read(&event->count));
  1161. }
  1162. static int get_authctrsets(void)
  1163. {
  1164. struct cpu_cf_events *cpuhw;
  1165. unsigned long auth = 0;
  1166. enum cpumf_ctr_set i;
  1167. cpuhw = &get_cpu_var(cpu_cf_events);
  1168. for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
  1169. if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
  1170. auth |= cpumf_ctr_ctl[i];
  1171. }
  1172. put_cpu_var(cpu_cf_events);
  1173. return auth;
  1174. }
  1175. /* Setup the event. Test for authorized counter sets and only include counter
  1176. * sets which are authorized at the time of the setup. Including unauthorized
  1177. * counter sets result in specification exception (and panic).
  1178. */
  1179. static int cfdiag_event_init2(struct perf_event *event)
  1180. {
  1181. struct perf_event_attr *attr = &event->attr;
  1182. int err = 0;
  1183. /* Set sample_period to indicate sampling */
  1184. event->hw.config = attr->config;
  1185. event->hw.sample_period = attr->sample_period;
  1186. local64_set(&event->hw.period_left, event->hw.sample_period);
  1187. local64_set(&event->count, 0);
  1188. event->hw.last_period = event->hw.sample_period;
  1189. /* Add all authorized counter sets to config_base. The
  1190. * the hardware init function is either called per-cpu or just once
  1191. * for all CPUS (event->cpu == -1). This depends on the whether
  1192. * counting is started for all CPUs or on a per workload base where
  1193. * the perf event moves from one CPU to another CPU.
  1194. * Checking the authorization on any CPU is fine as the hardware
  1195. * applies the same authorization settings to all CPUs.
  1196. */
  1197. event->hw.config_base = get_authctrsets();
  1198. /* No authorized counter sets, nothing to count/sample */
  1199. if (!event->hw.config_base)
  1200. err = -EINVAL;
  1201. debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n",
  1202. __func__, err, event->hw.config_base);
  1203. return err;
  1204. }
  1205. static int cfdiag_event_init(struct perf_event *event)
  1206. {
  1207. struct perf_event_attr *attr = &event->attr;
  1208. int err = -ENOENT;
  1209. if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
  1210. event->attr.type != event->pmu->type)
  1211. goto out;
  1212. /* Raw events are used to access counters directly,
  1213. * hence do not permit excludes.
  1214. * This event is useless without PERF_SAMPLE_RAW to return counter set
  1215. * values as raw data.
  1216. */
  1217. if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
  1218. !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
  1219. err = -EOPNOTSUPP;
  1220. goto out;
  1221. }
  1222. /* Initialize for using the CPU-measurement counter facility */
  1223. cpumf_hw_inuse();
  1224. event->destroy = hw_perf_event_destroy;
  1225. err = cfdiag_event_init2(event);
  1226. if (unlikely(err))
  1227. event->destroy(event);
  1228. out:
  1229. return err;
  1230. }
  1231. /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used
  1232. * to collect the complete counter sets for a scheduled process. Target
  1233. * are complete counter sets attached as raw data to the artificial event.
  1234. * This results in complete counter sets available when a process is
  1235. * scheduled. Contains the delta of every counter while the process was
  1236. * running.
  1237. */
  1238. CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
  1239. static struct attribute *cfdiag_events_attr[] = {
  1240. CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
  1241. NULL,
  1242. };
  1243. PMU_FORMAT_ATTR(event, "config:0-63");
  1244. static struct attribute *cfdiag_format_attr[] = {
  1245. &format_attr_event.attr,
  1246. NULL,
  1247. };
  1248. static struct attribute_group cfdiag_events_group = {
  1249. .name = "events",
  1250. .attrs = cfdiag_events_attr,
  1251. };
  1252. static struct attribute_group cfdiag_format_group = {
  1253. .name = "format",
  1254. .attrs = cfdiag_format_attr,
  1255. };
  1256. static const struct attribute_group *cfdiag_attr_groups[] = {
  1257. &cfdiag_events_group,
  1258. &cfdiag_format_group,
  1259. NULL,
  1260. };
  1261. /* Performance monitoring unit for event CF_DIAG. Since this event
  1262. * is also started and stopped via the perf_event_open() system call, use
  1263. * the same event enable/disable call back functions. They do not
  1264. * have a pointer to the perf_event strcture as first parameter.
  1265. *
  1266. * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
  1267. * Reuse them and distinguish the event (always first parameter) via
  1268. * 'config' member.
  1269. */
  1270. static struct pmu cf_diag = {
  1271. .task_ctx_nr = perf_sw_context,
  1272. .event_init = cfdiag_event_init,
  1273. .pmu_enable = cpumf_pmu_enable,
  1274. .pmu_disable = cpumf_pmu_disable,
  1275. .add = cpumf_pmu_add,
  1276. .del = cpumf_pmu_del,
  1277. .start = cpumf_pmu_start,
  1278. .stop = cpumf_pmu_stop,
  1279. .read = cfdiag_read,
  1280. .attr_groups = cfdiag_attr_groups
  1281. };
  1282. /* Calculate memory needed to store all counter sets together with header and
  1283. * trailer data. This is independent of the counter set authorization which
  1284. * can vary depending on the configuration.
  1285. */
  1286. static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
  1287. {
  1288. size_t max_size = sizeof(struct cf_trailer_entry);
  1289. enum cpumf_ctr_set i;
  1290. for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
  1291. size_t size = cpum_cf_ctrset_size(i, info);
  1292. if (size)
  1293. max_size += size * sizeof(u64) +
  1294. sizeof(struct cf_ctrset_entry);
  1295. }
  1296. return max_size;
  1297. }
  1298. /* Get the CPU speed, try sampling facility first and CPU attributes second. */
  1299. static void cfdiag_get_cpu_speed(void)
  1300. {
  1301. unsigned long mhz;
  1302. if (cpum_sf_avail()) { /* Sampling facility first */
  1303. struct hws_qsi_info_block si;
  1304. memset(&si, 0, sizeof(si));
  1305. if (!qsi(&si)) {
  1306. cfdiag_cpu_speed = si.cpu_speed;
  1307. return;
  1308. }
  1309. }
  1310. /* Fallback: CPU speed extract static part. Used in case
  1311. * CPU Measurement Sampling Facility is turned off.
  1312. */
  1313. mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
  1314. if (mhz != -1UL)
  1315. cfdiag_cpu_speed = mhz & 0xffffffff;
  1316. }
  1317. static int cfset_init(void)
  1318. {
  1319. struct cpumf_ctr_info info;
  1320. size_t need;
  1321. int rc;
  1322. if (qctri(&info))
  1323. return -ENODEV;
  1324. cfdiag_get_cpu_speed();
  1325. /* Make sure the counter set data fits into predefined buffer. */
  1326. need = cfdiag_maxsize(&info);
  1327. if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
  1328. pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
  1329. need);
  1330. return -ENOMEM;
  1331. }
  1332. rc = misc_register(&cfset_dev);
  1333. if (rc) {
  1334. pr_err("Registration of /dev/%s failed rc=%i\n",
  1335. cfset_dev.name, rc);
  1336. goto out;
  1337. }
  1338. rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
  1339. if (rc) {
  1340. misc_deregister(&cfset_dev);
  1341. pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
  1342. rc);
  1343. }
  1344. out:
  1345. return rc;
  1346. }
  1347. device_initcall(cpumf_pmu_init);