intel_rapl_common.c 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Common code for Intel Running Average Power Limit (RAPL) support.
  4. * Copyright (c) 2019, Intel Corporation.
  5. */
  6. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/list.h>
  10. #include <linux/types.h>
  11. #include <linux/device.h>
  12. #include <linux/slab.h>
  13. #include <linux/log2.h>
  14. #include <linux/bitmap.h>
  15. #include <linux/delay.h>
  16. #include <linux/sysfs.h>
  17. #include <linux/cpu.h>
  18. #include <linux/powercap.h>
  19. #include <linux/suspend.h>
  20. #include <linux/intel_rapl.h>
  21. #include <linux/processor.h>
  22. #include <linux/platform_device.h>
  23. #include <asm/iosf_mbi.h>
  24. #include <asm/cpu_device_id.h>
  25. #include <asm/intel-family.h>
  26. /* bitmasks for RAPL MSRs, used by primitive access functions */
  27. #define ENERGY_STATUS_MASK 0xffffffff
  28. #define POWER_LIMIT1_MASK 0x7FFF
  29. #define POWER_LIMIT1_ENABLE BIT(15)
  30. #define POWER_LIMIT1_CLAMP BIT(16)
  31. #define POWER_LIMIT2_MASK (0x7FFFULL<<32)
  32. #define POWER_LIMIT2_ENABLE BIT_ULL(47)
  33. #define POWER_LIMIT2_CLAMP BIT_ULL(48)
  34. #define POWER_HIGH_LOCK BIT_ULL(63)
  35. #define POWER_LOW_LOCK BIT(31)
  36. #define POWER_LIMIT4_MASK 0x1FFF
  37. #define TIME_WINDOW1_MASK (0x7FULL<<17)
  38. #define TIME_WINDOW2_MASK (0x7FULL<<49)
  39. #define POWER_UNIT_OFFSET 0
  40. #define POWER_UNIT_MASK 0x0F
  41. #define ENERGY_UNIT_OFFSET 0x08
  42. #define ENERGY_UNIT_MASK 0x1F00
  43. #define TIME_UNIT_OFFSET 0x10
  44. #define TIME_UNIT_MASK 0xF0000
  45. #define POWER_INFO_MAX_MASK (0x7fffULL<<32)
  46. #define POWER_INFO_MIN_MASK (0x7fffULL<<16)
  47. #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
  48. #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
  49. #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
  50. #define PP_POLICY_MASK 0x1F
  51. /*
  52. * SPR has different layout for Psys Domain PowerLimit registers.
  53. * There are 17 bits of PL1 and PL2 instead of 15 bits.
  54. * The Enable bits and TimeWindow bits are also shifted as a result.
  55. */
  56. #define PSYS_POWER_LIMIT1_MASK 0x1FFFF
  57. #define PSYS_POWER_LIMIT1_ENABLE BIT(17)
  58. #define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
  59. #define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
  60. #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
  61. #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
  62. /* Non HW constants */
  63. #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
  64. #define RAPL_PRIMITIVE_DUMMY BIT(2)
  65. #define TIME_WINDOW_MAX_MSEC 40000
  66. #define TIME_WINDOW_MIN_MSEC 250
  67. #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
  68. enum unit_type {
  69. ARBITRARY_UNIT, /* no translation */
  70. POWER_UNIT,
  71. ENERGY_UNIT,
  72. TIME_UNIT,
  73. };
  74. /* per domain data, some are optional */
  75. #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
  76. #define DOMAIN_STATE_INACTIVE BIT(0)
  77. #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
  78. #define DOMAIN_STATE_BIOS_LOCKED BIT(2)
  79. static const char pl1_name[] = "long_term";
  80. static const char pl2_name[] = "short_term";
  81. static const char pl4_name[] = "peak_power";
  82. #define power_zone_to_rapl_domain(_zone) \
  83. container_of(_zone, struct rapl_domain, power_zone)
  84. struct rapl_defaults {
  85. u8 floor_freq_reg_addr;
  86. int (*check_unit)(struct rapl_package *rp, int cpu);
  87. void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
  88. u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
  89. bool to_raw);
  90. unsigned int dram_domain_energy_unit;
  91. unsigned int psys_domain_energy_unit;
  92. bool spr_psys_bits;
  93. };
  94. static struct rapl_defaults *rapl_defaults;
  95. /* Sideband MBI registers */
  96. #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
  97. #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
  98. #define PACKAGE_PLN_INT_SAVED BIT(0)
  99. #define MAX_PRIM_NAME (32)
  100. /* per domain data. used to describe individual knobs such that access function
  101. * can be consolidated into one instead of many inline functions.
  102. */
  103. struct rapl_primitive_info {
  104. const char *name;
  105. u64 mask;
  106. int shift;
  107. enum rapl_domain_reg_id id;
  108. enum unit_type unit;
  109. u32 flag;
  110. };
  111. #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
  112. .name = #p, \
  113. .mask = m, \
  114. .shift = s, \
  115. .id = i, \
  116. .unit = u, \
  117. .flag = f \
  118. }
  119. static void rapl_init_domains(struct rapl_package *rp);
  120. static int rapl_read_data_raw(struct rapl_domain *rd,
  121. enum rapl_primitives prim,
  122. bool xlate, u64 *data);
  123. static int rapl_write_data_raw(struct rapl_domain *rd,
  124. enum rapl_primitives prim,
  125. unsigned long long value);
  126. static u64 rapl_unit_xlate(struct rapl_domain *rd,
  127. enum unit_type type, u64 value, int to_raw);
  128. static void package_power_limit_irq_save(struct rapl_package *rp);
  129. static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */
  130. static const char *const rapl_domain_names[] = {
  131. "package",
  132. "core",
  133. "uncore",
  134. "dram",
  135. "psys",
  136. };
  137. static int get_energy_counter(struct powercap_zone *power_zone,
  138. u64 *energy_raw)
  139. {
  140. struct rapl_domain *rd;
  141. u64 energy_now;
  142. /* prevent CPU hotplug, make sure the RAPL domain does not go
  143. * away while reading the counter.
  144. */
  145. cpus_read_lock();
  146. rd = power_zone_to_rapl_domain(power_zone);
  147. if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
  148. *energy_raw = energy_now;
  149. cpus_read_unlock();
  150. return 0;
  151. }
  152. cpus_read_unlock();
  153. return -EIO;
  154. }
  155. static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
  156. {
  157. struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
  158. *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
  159. return 0;
  160. }
  161. static int release_zone(struct powercap_zone *power_zone)
  162. {
  163. struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
  164. struct rapl_package *rp = rd->rp;
  165. /* package zone is the last zone of a package, we can free
  166. * memory here since all children has been unregistered.
  167. */
  168. if (rd->id == RAPL_DOMAIN_PACKAGE) {
  169. kfree(rd);
  170. rp->domains = NULL;
  171. }
  172. return 0;
  173. }
  174. static int find_nr_power_limit(struct rapl_domain *rd)
  175. {
  176. int i, nr_pl = 0;
  177. for (i = 0; i < NR_POWER_LIMITS; i++) {
  178. if (rd->rpl[i].name)
  179. nr_pl++;
  180. }
  181. return nr_pl;
  182. }
  183. static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
  184. {
  185. struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
  186. if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
  187. return -EACCES;
  188. cpus_read_lock();
  189. rapl_write_data_raw(rd, PL1_ENABLE, mode);
  190. if (rapl_defaults->set_floor_freq)
  191. rapl_defaults->set_floor_freq(rd, mode);
  192. cpus_read_unlock();
  193. return 0;
  194. }
  195. static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
  196. {
  197. struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
  198. u64 val;
  199. if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
  200. *mode = false;
  201. return 0;
  202. }
  203. cpus_read_lock();
  204. if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
  205. cpus_read_unlock();
  206. return -EIO;
  207. }
  208. *mode = val;
  209. cpus_read_unlock();
  210. return 0;
  211. }
  212. /* per RAPL domain ops, in the order of rapl_domain_type */
  213. static const struct powercap_zone_ops zone_ops[] = {
  214. /* RAPL_DOMAIN_PACKAGE */
  215. {
  216. .get_energy_uj = get_energy_counter,
  217. .get_max_energy_range_uj = get_max_energy_counter,
  218. .release = release_zone,
  219. .set_enable = set_domain_enable,
  220. .get_enable = get_domain_enable,
  221. },
  222. /* RAPL_DOMAIN_PP0 */
  223. {
  224. .get_energy_uj = get_energy_counter,
  225. .get_max_energy_range_uj = get_max_energy_counter,
  226. .release = release_zone,
  227. .set_enable = set_domain_enable,
  228. .get_enable = get_domain_enable,
  229. },
  230. /* RAPL_DOMAIN_PP1 */
  231. {
  232. .get_energy_uj = get_energy_counter,
  233. .get_max_energy_range_uj = get_max_energy_counter,
  234. .release = release_zone,
  235. .set_enable = set_domain_enable,
  236. .get_enable = get_domain_enable,
  237. },
  238. /* RAPL_DOMAIN_DRAM */
  239. {
  240. .get_energy_uj = get_energy_counter,
  241. .get_max_energy_range_uj = get_max_energy_counter,
  242. .release = release_zone,
  243. .set_enable = set_domain_enable,
  244. .get_enable = get_domain_enable,
  245. },
  246. /* RAPL_DOMAIN_PLATFORM */
  247. {
  248. .get_energy_uj = get_energy_counter,
  249. .get_max_energy_range_uj = get_max_energy_counter,
  250. .release = release_zone,
  251. .set_enable = set_domain_enable,
  252. .get_enable = get_domain_enable,
  253. },
  254. };
  255. /*
  256. * Constraint index used by powercap can be different than power limit (PL)
  257. * index in that some PLs maybe missing due to non-existent MSRs. So we
  258. * need to convert here by finding the valid PLs only (name populated).
  259. */
  260. static int contraint_to_pl(struct rapl_domain *rd, int cid)
  261. {
  262. int i, j;
  263. for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) {
  264. if ((rd->rpl[i].name) && j++ == cid) {
  265. pr_debug("%s: index %d\n", __func__, i);
  266. return i;
  267. }
  268. }
  269. pr_err("Cannot find matching power limit for constraint %d\n", cid);
  270. return -EINVAL;
  271. }
  272. static int set_power_limit(struct powercap_zone *power_zone, int cid,
  273. u64 power_limit)
  274. {
  275. struct rapl_domain *rd;
  276. struct rapl_package *rp;
  277. int ret = 0;
  278. int id;
  279. cpus_read_lock();
  280. rd = power_zone_to_rapl_domain(power_zone);
  281. id = contraint_to_pl(rd, cid);
  282. if (id < 0) {
  283. ret = id;
  284. goto set_exit;
  285. }
  286. rp = rd->rp;
  287. if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
  288. dev_warn(&power_zone->dev,
  289. "%s locked by BIOS, monitoring only\n", rd->name);
  290. ret = -EACCES;
  291. goto set_exit;
  292. }
  293. switch (rd->rpl[id].prim_id) {
  294. case PL1_ENABLE:
  295. rapl_write_data_raw(rd, POWER_LIMIT1, power_limit);
  296. break;
  297. case PL2_ENABLE:
  298. rapl_write_data_raw(rd, POWER_LIMIT2, power_limit);
  299. break;
  300. case PL4_ENABLE:
  301. rapl_write_data_raw(rd, POWER_LIMIT4, power_limit);
  302. break;
  303. default:
  304. ret = -EINVAL;
  305. }
  306. if (!ret)
  307. package_power_limit_irq_save(rp);
  308. set_exit:
  309. cpus_read_unlock();
  310. return ret;
  311. }
  312. static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
  313. u64 *data)
  314. {
  315. struct rapl_domain *rd;
  316. u64 val;
  317. int prim;
  318. int ret = 0;
  319. int id;
  320. cpus_read_lock();
  321. rd = power_zone_to_rapl_domain(power_zone);
  322. id = contraint_to_pl(rd, cid);
  323. if (id < 0) {
  324. ret = id;
  325. goto get_exit;
  326. }
  327. switch (rd->rpl[id].prim_id) {
  328. case PL1_ENABLE:
  329. prim = POWER_LIMIT1;
  330. break;
  331. case PL2_ENABLE:
  332. prim = POWER_LIMIT2;
  333. break;
  334. case PL4_ENABLE:
  335. prim = POWER_LIMIT4;
  336. break;
  337. default:
  338. cpus_read_unlock();
  339. return -EINVAL;
  340. }
  341. if (rapl_read_data_raw(rd, prim, true, &val))
  342. ret = -EIO;
  343. else
  344. *data = val;
  345. get_exit:
  346. cpus_read_unlock();
  347. return ret;
  348. }
  349. static int set_time_window(struct powercap_zone *power_zone, int cid,
  350. u64 window)
  351. {
  352. struct rapl_domain *rd;
  353. int ret = 0;
  354. int id;
  355. cpus_read_lock();
  356. rd = power_zone_to_rapl_domain(power_zone);
  357. id = contraint_to_pl(rd, cid);
  358. if (id < 0) {
  359. ret = id;
  360. goto set_time_exit;
  361. }
  362. switch (rd->rpl[id].prim_id) {
  363. case PL1_ENABLE:
  364. rapl_write_data_raw(rd, TIME_WINDOW1, window);
  365. break;
  366. case PL2_ENABLE:
  367. rapl_write_data_raw(rd, TIME_WINDOW2, window);
  368. break;
  369. default:
  370. ret = -EINVAL;
  371. }
  372. set_time_exit:
  373. cpus_read_unlock();
  374. return ret;
  375. }
  376. static int get_time_window(struct powercap_zone *power_zone, int cid,
  377. u64 *data)
  378. {
  379. struct rapl_domain *rd;
  380. u64 val;
  381. int ret = 0;
  382. int id;
  383. cpus_read_lock();
  384. rd = power_zone_to_rapl_domain(power_zone);
  385. id = contraint_to_pl(rd, cid);
  386. if (id < 0) {
  387. ret = id;
  388. goto get_time_exit;
  389. }
  390. switch (rd->rpl[id].prim_id) {
  391. case PL1_ENABLE:
  392. ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val);
  393. break;
  394. case PL2_ENABLE:
  395. ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val);
  396. break;
  397. case PL4_ENABLE:
  398. /*
  399. * Time window parameter is not applicable for PL4 entry
  400. * so assigining '0' as default value.
  401. */
  402. val = 0;
  403. break;
  404. default:
  405. cpus_read_unlock();
  406. return -EINVAL;
  407. }
  408. if (!ret)
  409. *data = val;
  410. get_time_exit:
  411. cpus_read_unlock();
  412. return ret;
  413. }
  414. static const char *get_constraint_name(struct powercap_zone *power_zone,
  415. int cid)
  416. {
  417. struct rapl_domain *rd;
  418. int id;
  419. rd = power_zone_to_rapl_domain(power_zone);
  420. id = contraint_to_pl(rd, cid);
  421. if (id >= 0)
  422. return rd->rpl[id].name;
  423. return NULL;
  424. }
  425. static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
  426. {
  427. struct rapl_domain *rd;
  428. u64 val;
  429. int prim;
  430. int ret = 0;
  431. cpus_read_lock();
  432. rd = power_zone_to_rapl_domain(power_zone);
  433. switch (rd->rpl[id].prim_id) {
  434. case PL1_ENABLE:
  435. prim = THERMAL_SPEC_POWER;
  436. break;
  437. case PL2_ENABLE:
  438. prim = MAX_POWER;
  439. break;
  440. case PL4_ENABLE:
  441. prim = MAX_POWER;
  442. break;
  443. default:
  444. cpus_read_unlock();
  445. return -EINVAL;
  446. }
  447. if (rapl_read_data_raw(rd, prim, true, &val))
  448. ret = -EIO;
  449. else
  450. *data = val;
  451. /* As a generalization rule, PL4 would be around two times PL2. */
  452. if (rd->rpl[id].prim_id == PL4_ENABLE)
  453. *data = *data * 2;
  454. cpus_read_unlock();
  455. return ret;
  456. }
  457. static const struct powercap_zone_constraint_ops constraint_ops = {
  458. .set_power_limit_uw = set_power_limit,
  459. .get_power_limit_uw = get_current_power_limit,
  460. .set_time_window_us = set_time_window,
  461. .get_time_window_us = get_time_window,
  462. .get_max_power_uw = get_max_power,
  463. .get_name = get_constraint_name,
  464. };
  465. /* called after domain detection and package level data are set */
  466. static void rapl_init_domains(struct rapl_package *rp)
  467. {
  468. enum rapl_domain_type i;
  469. enum rapl_domain_reg_id j;
  470. struct rapl_domain *rd = rp->domains;
  471. for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
  472. unsigned int mask = rp->domain_map & (1 << i);
  473. if (!mask)
  474. continue;
  475. rd->rp = rp;
  476. if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
  477. snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
  478. topology_physical_package_id(rp->lead_cpu));
  479. } else
  480. snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
  481. rapl_domain_names[i]);
  482. rd->id = i;
  483. rd->rpl[0].prim_id = PL1_ENABLE;
  484. rd->rpl[0].name = pl1_name;
  485. /*
  486. * The PL2 power domain is applicable for limits two
  487. * and limits three
  488. */
  489. if (rp->priv->limits[i] >= 2) {
  490. rd->rpl[1].prim_id = PL2_ENABLE;
  491. rd->rpl[1].name = pl2_name;
  492. }
  493. /* Enable PL4 domain if the total power limits are three */
  494. if (rp->priv->limits[i] == 3) {
  495. rd->rpl[2].prim_id = PL4_ENABLE;
  496. rd->rpl[2].name = pl4_name;
  497. }
  498. for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
  499. rd->regs[j] = rp->priv->regs[i][j];
  500. switch (i) {
  501. case RAPL_DOMAIN_DRAM:
  502. rd->domain_energy_unit =
  503. rapl_defaults->dram_domain_energy_unit;
  504. if (rd->domain_energy_unit)
  505. pr_info("DRAM domain energy unit %dpj\n",
  506. rd->domain_energy_unit);
  507. break;
  508. case RAPL_DOMAIN_PLATFORM:
  509. rd->domain_energy_unit =
  510. rapl_defaults->psys_domain_energy_unit;
  511. if (rd->domain_energy_unit)
  512. pr_info("Platform domain energy unit %dpj\n",
  513. rd->domain_energy_unit);
  514. break;
  515. default:
  516. break;
  517. }
  518. rd++;
  519. }
  520. }
  521. static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
  522. u64 value, int to_raw)
  523. {
  524. u64 units = 1;
  525. struct rapl_package *rp = rd->rp;
  526. u64 scale = 1;
  527. switch (type) {
  528. case POWER_UNIT:
  529. units = rp->power_unit;
  530. break;
  531. case ENERGY_UNIT:
  532. scale = ENERGY_UNIT_SCALE;
  533. /* per domain unit takes precedence */
  534. if (rd->domain_energy_unit)
  535. units = rd->domain_energy_unit;
  536. else
  537. units = rp->energy_unit;
  538. break;
  539. case TIME_UNIT:
  540. return rapl_defaults->compute_time_window(rp, value, to_raw);
  541. case ARBITRARY_UNIT:
  542. default:
  543. return value;
  544. }
  545. if (to_raw)
  546. return div64_u64(value, units) * scale;
  547. value *= units;
  548. return div64_u64(value, scale);
  549. }
  550. /* in the order of enum rapl_primitives */
  551. static struct rapl_primitive_info rpi[] = {
  552. /* name, mask, shift, msr index, unit divisor */
  553. PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
  554. RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
  555. PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
  556. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  557. PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
  558. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  559. PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
  560. RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
  561. PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
  562. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  563. PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
  564. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  565. PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
  566. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  567. PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
  568. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  569. PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
  570. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  571. PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
  572. RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
  573. PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
  574. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  575. PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
  576. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  577. PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
  578. 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  579. PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
  580. RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  581. PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
  582. RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  583. PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
  584. RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
  585. PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
  586. RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
  587. PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
  588. RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
  589. PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
  590. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  591. PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
  592. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  593. PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
  594. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  595. PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
  596. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  597. PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
  598. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  599. PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
  600. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  601. /* non-hardware */
  602. PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
  603. RAPL_PRIMITIVE_DERIVED),
  604. {NULL, 0, 0, 0},
  605. };
  606. static enum rapl_primitives
  607. prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
  608. {
  609. if (!rapl_defaults->spr_psys_bits)
  610. return prim;
  611. if (rd->id != RAPL_DOMAIN_PLATFORM)
  612. return prim;
  613. switch (prim) {
  614. case POWER_LIMIT1:
  615. return PSYS_POWER_LIMIT1;
  616. case POWER_LIMIT2:
  617. return PSYS_POWER_LIMIT2;
  618. case PL1_ENABLE:
  619. return PSYS_PL1_ENABLE;
  620. case PL2_ENABLE:
  621. return PSYS_PL2_ENABLE;
  622. case TIME_WINDOW1:
  623. return PSYS_TIME_WINDOW1;
  624. case TIME_WINDOW2:
  625. return PSYS_TIME_WINDOW2;
  626. default:
  627. return prim;
  628. }
  629. }
  630. /* Read primitive data based on its related struct rapl_primitive_info.
  631. * if xlate flag is set, return translated data based on data units, i.e.
  632. * time, energy, and power.
  633. * RAPL MSRs are non-architectual and are laid out not consistently across
  634. * domains. Here we use primitive info to allow writing consolidated access
  635. * functions.
  636. * For a given primitive, it is processed by MSR mask and shift. Unit conversion
  637. * is pre-assigned based on RAPL unit MSRs read at init time.
  638. * 63-------------------------- 31--------------------------- 0
  639. * | xxxxx (mask) |
  640. * | |<- shift ----------------|
  641. * 63-------------------------- 31--------------------------- 0
  642. */
  643. static int rapl_read_data_raw(struct rapl_domain *rd,
  644. enum rapl_primitives prim, bool xlate, u64 *data)
  645. {
  646. u64 value;
  647. enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
  648. struct rapl_primitive_info *rp = &rpi[prim_fixed];
  649. struct reg_action ra;
  650. int cpu;
  651. if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY)
  652. return -EINVAL;
  653. ra.reg = rd->regs[rp->id];
  654. if (!ra.reg)
  655. return -EINVAL;
  656. cpu = rd->rp->lead_cpu;
  657. /* domain with 2 limits has different bit */
  658. if (prim == FW_LOCK && rd->rp->priv->limits[rd->id] == 2) {
  659. rp->mask = POWER_HIGH_LOCK;
  660. rp->shift = 63;
  661. }
  662. /* non-hardware data are collected by the polling thread */
  663. if (rp->flag & RAPL_PRIMITIVE_DERIVED) {
  664. *data = rd->rdd.primitives[prim];
  665. return 0;
  666. }
  667. ra.mask = rp->mask;
  668. if (rd->rp->priv->read_raw(cpu, &ra)) {
  669. pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu);
  670. return -EIO;
  671. }
  672. value = ra.value >> rp->shift;
  673. if (xlate)
  674. *data = rapl_unit_xlate(rd, rp->unit, value, 0);
  675. else
  676. *data = value;
  677. return 0;
  678. }
  679. /* Similar use of primitive info in the read counterpart */
  680. static int rapl_write_data_raw(struct rapl_domain *rd,
  681. enum rapl_primitives prim,
  682. unsigned long long value)
  683. {
  684. enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
  685. struct rapl_primitive_info *rp = &rpi[prim_fixed];
  686. int cpu;
  687. u64 bits;
  688. struct reg_action ra;
  689. int ret;
  690. cpu = rd->rp->lead_cpu;
  691. bits = rapl_unit_xlate(rd, rp->unit, value, 1);
  692. bits <<= rp->shift;
  693. bits &= rp->mask;
  694. memset(&ra, 0, sizeof(ra));
  695. ra.reg = rd->regs[rp->id];
  696. ra.mask = rp->mask;
  697. ra.value = bits;
  698. ret = rd->rp->priv->write_raw(cpu, &ra);
  699. return ret;
  700. }
  701. /*
  702. * Raw RAPL data stored in MSRs are in certain scales. We need to
  703. * convert them into standard units based on the units reported in
  704. * the RAPL unit MSRs. This is specific to CPUs as the method to
  705. * calculate units differ on different CPUs.
  706. * We convert the units to below format based on CPUs.
  707. * i.e.
  708. * energy unit: picoJoules : Represented in picoJoules by default
  709. * power unit : microWatts : Represented in milliWatts by default
  710. * time unit : microseconds: Represented in seconds by default
  711. */
  712. static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
  713. {
  714. struct reg_action ra;
  715. u32 value;
  716. ra.reg = rp->priv->reg_unit;
  717. ra.mask = ~0;
  718. if (rp->priv->read_raw(cpu, &ra)) {
  719. pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
  720. rp->priv->reg_unit, cpu);
  721. return -ENODEV;
  722. }
  723. value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
  724. rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
  725. value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
  726. rp->power_unit = 1000000 / (1 << value);
  727. value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
  728. rp->time_unit = 1000000 / (1 << value);
  729. pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
  730. rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
  731. return 0;
  732. }
  733. static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
  734. {
  735. struct reg_action ra;
  736. u32 value;
  737. ra.reg = rp->priv->reg_unit;
  738. ra.mask = ~0;
  739. if (rp->priv->read_raw(cpu, &ra)) {
  740. pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
  741. rp->priv->reg_unit, cpu);
  742. return -ENODEV;
  743. }
  744. value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
  745. rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
  746. value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
  747. rp->power_unit = (1 << value) * 1000;
  748. value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
  749. rp->time_unit = 1000000 / (1 << value);
  750. pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
  751. rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
  752. return 0;
  753. }
  754. static void power_limit_irq_save_cpu(void *info)
  755. {
  756. u32 l, h = 0;
  757. struct rapl_package *rp = (struct rapl_package *)info;
  758. /* save the state of PLN irq mask bit before disabling it */
  759. rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
  760. if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) {
  761. rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE;
  762. rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED;
  763. }
  764. l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
  765. wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  766. }
  767. /* REVISIT:
  768. * When package power limit is set artificially low by RAPL, LVT
  769. * thermal interrupt for package power limit should be ignored
  770. * since we are not really exceeding the real limit. The intention
  771. * is to avoid excessive interrupts while we are trying to save power.
  772. * A useful feature might be routing the package_power_limit interrupt
  773. * to userspace via eventfd. once we have a usecase, this is simple
  774. * to do by adding an atomic notifier.
  775. */
  776. static void package_power_limit_irq_save(struct rapl_package *rp)
  777. {
  778. if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
  779. return;
  780. smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1);
  781. }
  782. /*
  783. * Restore per package power limit interrupt enable state. Called from cpu
  784. * hotplug code on package removal.
  785. */
  786. static void package_power_limit_irq_restore(struct rapl_package *rp)
  787. {
  788. u32 l, h;
  789. if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
  790. return;
  791. /* irq enable state not saved, nothing to restore */
  792. if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED))
  793. return;
  794. rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
  795. if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE)
  796. l |= PACKAGE_THERM_INT_PLN_ENABLE;
  797. else
  798. l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
  799. wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  800. }
  801. static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
  802. {
  803. int nr_powerlimit = find_nr_power_limit(rd);
  804. /* always enable clamp such that p-state can go below OS requested
  805. * range. power capping priority over guranteed frequency.
  806. */
  807. rapl_write_data_raw(rd, PL1_CLAMP, mode);
  808. /* some domains have pl2 */
  809. if (nr_powerlimit > 1) {
  810. rapl_write_data_raw(rd, PL2_ENABLE, mode);
  811. rapl_write_data_raw(rd, PL2_CLAMP, mode);
  812. }
  813. }
  814. static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
  815. {
  816. static u32 power_ctrl_orig_val;
  817. u32 mdata;
  818. if (!rapl_defaults->floor_freq_reg_addr) {
  819. pr_err("Invalid floor frequency config register\n");
  820. return;
  821. }
  822. if (!power_ctrl_orig_val)
  823. iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
  824. rapl_defaults->floor_freq_reg_addr,
  825. &power_ctrl_orig_val);
  826. mdata = power_ctrl_orig_val;
  827. if (enable) {
  828. mdata &= ~(0x7f << 8);
  829. mdata |= 1 << 8;
  830. }
  831. iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
  832. rapl_defaults->floor_freq_reg_addr, mdata);
  833. }
  834. static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
  835. bool to_raw)
  836. {
  837. u64 f, y; /* fraction and exp. used for time unit */
  838. /*
  839. * Special processing based on 2^Y*(1+F/4), refer
  840. * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
  841. */
  842. if (!to_raw) {
  843. f = (value & 0x60) >> 5;
  844. y = value & 0x1f;
  845. value = (1 << y) * (4 + f) * rp->time_unit / 4;
  846. } else {
  847. if (value < rp->time_unit)
  848. return 0;
  849. do_div(value, rp->time_unit);
  850. y = ilog2(value);
  851. f = div64_u64(4 * (value - (1 << y)), 1 << y);
  852. value = (y & 0x1f) | ((f & 0x3) << 5);
  853. }
  854. return value;
  855. }
  856. static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
  857. bool to_raw)
  858. {
  859. /*
  860. * Atom time unit encoding is straight forward val * time_unit,
  861. * where time_unit is default to 1 sec. Never 0.
  862. */
  863. if (!to_raw)
  864. return (value) ? value * rp->time_unit : rp->time_unit;
  865. value = div64_u64(value, rp->time_unit);
  866. return value;
  867. }
  868. static const struct rapl_defaults rapl_defaults_core = {
  869. .floor_freq_reg_addr = 0,
  870. .check_unit = rapl_check_unit_core,
  871. .set_floor_freq = set_floor_freq_default,
  872. .compute_time_window = rapl_compute_time_window_core,
  873. };
  874. static const struct rapl_defaults rapl_defaults_hsw_server = {
  875. .check_unit = rapl_check_unit_core,
  876. .set_floor_freq = set_floor_freq_default,
  877. .compute_time_window = rapl_compute_time_window_core,
  878. .dram_domain_energy_unit = 15300,
  879. };
  880. static const struct rapl_defaults rapl_defaults_spr_server = {
  881. .check_unit = rapl_check_unit_core,
  882. .set_floor_freq = set_floor_freq_default,
  883. .compute_time_window = rapl_compute_time_window_core,
  884. .psys_domain_energy_unit = 1000000000,
  885. .spr_psys_bits = true,
  886. };
  887. static const struct rapl_defaults rapl_defaults_byt = {
  888. .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
  889. .check_unit = rapl_check_unit_atom,
  890. .set_floor_freq = set_floor_freq_atom,
  891. .compute_time_window = rapl_compute_time_window_atom,
  892. };
  893. static const struct rapl_defaults rapl_defaults_tng = {
  894. .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
  895. .check_unit = rapl_check_unit_atom,
  896. .set_floor_freq = set_floor_freq_atom,
  897. .compute_time_window = rapl_compute_time_window_atom,
  898. };
  899. static const struct rapl_defaults rapl_defaults_ann = {
  900. .floor_freq_reg_addr = 0,
  901. .check_unit = rapl_check_unit_atom,
  902. .set_floor_freq = NULL,
  903. .compute_time_window = rapl_compute_time_window_atom,
  904. };
  905. static const struct rapl_defaults rapl_defaults_cht = {
  906. .floor_freq_reg_addr = 0,
  907. .check_unit = rapl_check_unit_atom,
  908. .set_floor_freq = NULL,
  909. .compute_time_window = rapl_compute_time_window_atom,
  910. };
  911. static const struct rapl_defaults rapl_defaults_amd = {
  912. .check_unit = rapl_check_unit_core,
  913. };
  914. static const struct x86_cpu_id rapl_ids[] __initconst = {
  915. X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core),
  916. X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core),
  917. X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core),
  918. X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core),
  919. X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core),
  920. X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core),
  921. X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core),
  922. X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server),
  923. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core),
  924. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core),
  925. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core),
  926. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server),
  927. X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core),
  928. X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core),
  929. X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server),
  930. X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core),
  931. X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core),
  932. X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core),
  933. X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core),
  934. X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core),
  935. X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core),
  936. X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server),
  937. X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server),
  938. X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core),
  939. X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core),
  940. X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core),
  941. X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core),
  942. X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core),
  943. X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core),
  944. X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
  945. X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core),
  946. X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
  947. X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
  948. X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core),
  949. X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
  950. X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
  951. X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
  952. X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht),
  953. X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng),
  954. X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann),
  955. X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core),
  956. X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
  957. X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core),
  958. X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core),
  959. X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core),
  960. X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core),
  961. X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server),
  962. X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server),
  963. X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
  964. X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
  965. X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
  966. {}
  967. };
  968. MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
  969. /* Read once for all raw primitive data for domains */
  970. static void rapl_update_domain_data(struct rapl_package *rp)
  971. {
  972. int dmn, prim;
  973. u64 val;
  974. for (dmn = 0; dmn < rp->nr_domains; dmn++) {
  975. pr_debug("update %s domain %s data\n", rp->name,
  976. rp->domains[dmn].name);
  977. /* exclude non-raw primitives */
  978. for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
  979. if (!rapl_read_data_raw(&rp->domains[dmn], prim,
  980. rpi[prim].unit, &val))
  981. rp->domains[dmn].rdd.primitives[prim] = val;
  982. }
  983. }
  984. }
  985. static int rapl_package_register_powercap(struct rapl_package *rp)
  986. {
  987. struct rapl_domain *rd;
  988. struct powercap_zone *power_zone = NULL;
  989. int nr_pl, ret;
  990. /* Update the domain data of the new package */
  991. rapl_update_domain_data(rp);
  992. /* first we register package domain as the parent zone */
  993. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
  994. if (rd->id == RAPL_DOMAIN_PACKAGE) {
  995. nr_pl = find_nr_power_limit(rd);
  996. pr_debug("register package domain %s\n", rp->name);
  997. power_zone = powercap_register_zone(&rd->power_zone,
  998. rp->priv->control_type, rp->name,
  999. NULL, &zone_ops[rd->id], nr_pl,
  1000. &constraint_ops);
  1001. if (IS_ERR(power_zone)) {
  1002. pr_debug("failed to register power zone %s\n",
  1003. rp->name);
  1004. return PTR_ERR(power_zone);
  1005. }
  1006. /* track parent zone in per package/socket data */
  1007. rp->power_zone = power_zone;
  1008. /* done, only one package domain per socket */
  1009. break;
  1010. }
  1011. }
  1012. if (!power_zone) {
  1013. pr_err("no package domain found, unknown topology!\n");
  1014. return -ENODEV;
  1015. }
  1016. /* now register domains as children of the socket/package */
  1017. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
  1018. struct powercap_zone *parent = rp->power_zone;
  1019. if (rd->id == RAPL_DOMAIN_PACKAGE)
  1020. continue;
  1021. if (rd->id == RAPL_DOMAIN_PLATFORM)
  1022. parent = NULL;
  1023. /* number of power limits per domain varies */
  1024. nr_pl = find_nr_power_limit(rd);
  1025. power_zone = powercap_register_zone(&rd->power_zone,
  1026. rp->priv->control_type,
  1027. rd->name, parent,
  1028. &zone_ops[rd->id], nr_pl,
  1029. &constraint_ops);
  1030. if (IS_ERR(power_zone)) {
  1031. pr_debug("failed to register power_zone, %s:%s\n",
  1032. rp->name, rd->name);
  1033. ret = PTR_ERR(power_zone);
  1034. goto err_cleanup;
  1035. }
  1036. }
  1037. return 0;
  1038. err_cleanup:
  1039. /*
  1040. * Clean up previously initialized domains within the package if we
  1041. * failed after the first domain setup.
  1042. */
  1043. while (--rd >= rp->domains) {
  1044. pr_debug("unregister %s domain %s\n", rp->name, rd->name);
  1045. powercap_unregister_zone(rp->priv->control_type,
  1046. &rd->power_zone);
  1047. }
  1048. return ret;
  1049. }
  1050. static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
  1051. {
  1052. struct reg_action ra;
  1053. switch (domain) {
  1054. case RAPL_DOMAIN_PACKAGE:
  1055. case RAPL_DOMAIN_PP0:
  1056. case RAPL_DOMAIN_PP1:
  1057. case RAPL_DOMAIN_DRAM:
  1058. case RAPL_DOMAIN_PLATFORM:
  1059. ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS];
  1060. break;
  1061. default:
  1062. pr_err("invalid domain id %d\n", domain);
  1063. return -EINVAL;
  1064. }
  1065. /* make sure domain counters are available and contains non-zero
  1066. * values, otherwise skip it.
  1067. */
  1068. ra.mask = ENERGY_STATUS_MASK;
  1069. if (rp->priv->read_raw(cpu, &ra) || !ra.value)
  1070. return -ENODEV;
  1071. return 0;
  1072. }
  1073. /*
  1074. * Check if power limits are available. Two cases when they are not available:
  1075. * 1. Locked by BIOS, in this case we still provide read-only access so that
  1076. * users can see what limit is set by the BIOS.
  1077. * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
  1078. * exist at all. In this case, we do not show the constraints in powercap.
  1079. *
  1080. * Called after domains are detected and initialized.
  1081. */
  1082. static void rapl_detect_powerlimit(struct rapl_domain *rd)
  1083. {
  1084. u64 val64;
  1085. int i;
  1086. /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
  1087. if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
  1088. if (val64) {
  1089. pr_info("RAPL %s domain %s locked by BIOS\n",
  1090. rd->rp->name, rd->name);
  1091. rd->state |= DOMAIN_STATE_BIOS_LOCKED;
  1092. }
  1093. }
  1094. /* check if power limit MSR exists, otherwise domain is monitoring only */
  1095. for (i = 0; i < NR_POWER_LIMITS; i++) {
  1096. int prim = rd->rpl[i].prim_id;
  1097. if (rapl_read_data_raw(rd, prim, false, &val64))
  1098. rd->rpl[i].name = NULL;
  1099. }
  1100. }
  1101. /* Detect active and valid domains for the given CPU, caller must
  1102. * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
  1103. */
  1104. static int rapl_detect_domains(struct rapl_package *rp, int cpu)
  1105. {
  1106. struct rapl_domain *rd;
  1107. int i;
  1108. for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
  1109. /* use physical package id to read counters */
  1110. if (!rapl_check_domain(cpu, i, rp)) {
  1111. rp->domain_map |= 1 << i;
  1112. pr_info("Found RAPL domain %s\n", rapl_domain_names[i]);
  1113. }
  1114. }
  1115. rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
  1116. if (!rp->nr_domains) {
  1117. pr_debug("no valid rapl domains found in %s\n", rp->name);
  1118. return -ENODEV;
  1119. }
  1120. pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
  1121. rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain),
  1122. GFP_KERNEL);
  1123. if (!rp->domains)
  1124. return -ENOMEM;
  1125. rapl_init_domains(rp);
  1126. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++)
  1127. rapl_detect_powerlimit(rd);
  1128. return 0;
  1129. }
  1130. /* called from CPU hotplug notifier, hotplug lock held */
  1131. void rapl_remove_package(struct rapl_package *rp)
  1132. {
  1133. struct rapl_domain *rd, *rd_package = NULL;
  1134. package_power_limit_irq_restore(rp);
  1135. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
  1136. rapl_write_data_raw(rd, PL1_ENABLE, 0);
  1137. rapl_write_data_raw(rd, PL1_CLAMP, 0);
  1138. if (find_nr_power_limit(rd) > 1) {
  1139. rapl_write_data_raw(rd, PL2_ENABLE, 0);
  1140. rapl_write_data_raw(rd, PL2_CLAMP, 0);
  1141. rapl_write_data_raw(rd, PL4_ENABLE, 0);
  1142. }
  1143. if (rd->id == RAPL_DOMAIN_PACKAGE) {
  1144. rd_package = rd;
  1145. continue;
  1146. }
  1147. pr_debug("remove package, undo power limit on %s: %s\n",
  1148. rp->name, rd->name);
  1149. powercap_unregister_zone(rp->priv->control_type,
  1150. &rd->power_zone);
  1151. }
  1152. /* do parent zone last */
  1153. powercap_unregister_zone(rp->priv->control_type,
  1154. &rd_package->power_zone);
  1155. list_del(&rp->plist);
  1156. kfree(rp);
  1157. }
  1158. EXPORT_SYMBOL_GPL(rapl_remove_package);
  1159. /* caller to ensure CPU hotplug lock is held */
  1160. struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv)
  1161. {
  1162. int id = topology_logical_die_id(cpu);
  1163. struct rapl_package *rp;
  1164. list_for_each_entry(rp, &rapl_packages, plist) {
  1165. if (rp->id == id
  1166. && rp->priv->control_type == priv->control_type)
  1167. return rp;
  1168. }
  1169. return NULL;
  1170. }
  1171. EXPORT_SYMBOL_GPL(rapl_find_package_domain);
  1172. /* called from CPU hotplug notifier, hotplug lock held */
  1173. struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
  1174. {
  1175. int id = topology_logical_die_id(cpu);
  1176. struct rapl_package *rp;
  1177. int ret;
  1178. if (!rapl_defaults)
  1179. return ERR_PTR(-ENODEV);
  1180. rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
  1181. if (!rp)
  1182. return ERR_PTR(-ENOMEM);
  1183. /* add the new package to the list */
  1184. rp->id = id;
  1185. rp->lead_cpu = cpu;
  1186. rp->priv = priv;
  1187. if (topology_max_die_per_package() > 1)
  1188. snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
  1189. "package-%d-die-%d",
  1190. topology_physical_package_id(cpu), topology_die_id(cpu));
  1191. else
  1192. snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
  1193. topology_physical_package_id(cpu));
  1194. /* check if the package contains valid domains */
  1195. if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) {
  1196. ret = -ENODEV;
  1197. goto err_free_package;
  1198. }
  1199. ret = rapl_package_register_powercap(rp);
  1200. if (!ret) {
  1201. INIT_LIST_HEAD(&rp->plist);
  1202. list_add(&rp->plist, &rapl_packages);
  1203. return rp;
  1204. }
  1205. err_free_package:
  1206. kfree(rp->domains);
  1207. kfree(rp);
  1208. return ERR_PTR(ret);
  1209. }
  1210. EXPORT_SYMBOL_GPL(rapl_add_package);
  1211. static void power_limit_state_save(void)
  1212. {
  1213. struct rapl_package *rp;
  1214. struct rapl_domain *rd;
  1215. int nr_pl, ret, i;
  1216. cpus_read_lock();
  1217. list_for_each_entry(rp, &rapl_packages, plist) {
  1218. if (!rp->power_zone)
  1219. continue;
  1220. rd = power_zone_to_rapl_domain(rp->power_zone);
  1221. nr_pl = find_nr_power_limit(rd);
  1222. for (i = 0; i < nr_pl; i++) {
  1223. switch (rd->rpl[i].prim_id) {
  1224. case PL1_ENABLE:
  1225. ret = rapl_read_data_raw(rd,
  1226. POWER_LIMIT1, true,
  1227. &rd->rpl[i].last_power_limit);
  1228. if (ret)
  1229. rd->rpl[i].last_power_limit = 0;
  1230. break;
  1231. case PL2_ENABLE:
  1232. ret = rapl_read_data_raw(rd,
  1233. POWER_LIMIT2, true,
  1234. &rd->rpl[i].last_power_limit);
  1235. if (ret)
  1236. rd->rpl[i].last_power_limit = 0;
  1237. break;
  1238. case PL4_ENABLE:
  1239. ret = rapl_read_data_raw(rd,
  1240. POWER_LIMIT4, true,
  1241. &rd->rpl[i].last_power_limit);
  1242. if (ret)
  1243. rd->rpl[i].last_power_limit = 0;
  1244. break;
  1245. }
  1246. }
  1247. }
  1248. cpus_read_unlock();
  1249. }
  1250. static void power_limit_state_restore(void)
  1251. {
  1252. struct rapl_package *rp;
  1253. struct rapl_domain *rd;
  1254. int nr_pl, i;
  1255. cpus_read_lock();
  1256. list_for_each_entry(rp, &rapl_packages, plist) {
  1257. if (!rp->power_zone)
  1258. continue;
  1259. rd = power_zone_to_rapl_domain(rp->power_zone);
  1260. nr_pl = find_nr_power_limit(rd);
  1261. for (i = 0; i < nr_pl; i++) {
  1262. switch (rd->rpl[i].prim_id) {
  1263. case PL1_ENABLE:
  1264. if (rd->rpl[i].last_power_limit)
  1265. rapl_write_data_raw(rd, POWER_LIMIT1,
  1266. rd->rpl[i].last_power_limit);
  1267. break;
  1268. case PL2_ENABLE:
  1269. if (rd->rpl[i].last_power_limit)
  1270. rapl_write_data_raw(rd, POWER_LIMIT2,
  1271. rd->rpl[i].last_power_limit);
  1272. break;
  1273. case PL4_ENABLE:
  1274. if (rd->rpl[i].last_power_limit)
  1275. rapl_write_data_raw(rd, POWER_LIMIT4,
  1276. rd->rpl[i].last_power_limit);
  1277. break;
  1278. }
  1279. }
  1280. }
  1281. cpus_read_unlock();
  1282. }
  1283. static int rapl_pm_callback(struct notifier_block *nb,
  1284. unsigned long mode, void *_unused)
  1285. {
  1286. switch (mode) {
  1287. case PM_SUSPEND_PREPARE:
  1288. power_limit_state_save();
  1289. break;
  1290. case PM_POST_SUSPEND:
  1291. power_limit_state_restore();
  1292. break;
  1293. }
  1294. return NOTIFY_OK;
  1295. }
  1296. static struct notifier_block rapl_pm_notifier = {
  1297. .notifier_call = rapl_pm_callback,
  1298. };
  1299. static struct platform_device *rapl_msr_platdev;
  1300. static int __init rapl_init(void)
  1301. {
  1302. const struct x86_cpu_id *id;
  1303. int ret;
  1304. id = x86_match_cpu(rapl_ids);
  1305. if (!id) {
  1306. pr_err("driver does not support CPU family %d model %d\n",
  1307. boot_cpu_data.x86, boot_cpu_data.x86_model);
  1308. return -ENODEV;
  1309. }
  1310. rapl_defaults = (struct rapl_defaults *)id->driver_data;
  1311. ret = register_pm_notifier(&rapl_pm_notifier);
  1312. if (ret)
  1313. return ret;
  1314. rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
  1315. if (!rapl_msr_platdev) {
  1316. ret = -ENOMEM;
  1317. goto end;
  1318. }
  1319. ret = platform_device_add(rapl_msr_platdev);
  1320. if (ret)
  1321. platform_device_put(rapl_msr_platdev);
  1322. end:
  1323. if (ret)
  1324. unregister_pm_notifier(&rapl_pm_notifier);
  1325. return ret;
  1326. }
  1327. static void __exit rapl_exit(void)
  1328. {
  1329. platform_device_unregister(rapl_msr_platdev);
  1330. unregister_pm_notifier(&rapl_pm_notifier);
  1331. }
  1332. fs_initcall(rapl_init);
  1333. module_exit(rapl_exit);
  1334. MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
  1335. MODULE_AUTHOR("Jacob Pan <[email protected]>");
  1336. MODULE_LICENSE("GPL v2");