therm_throt.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Thermal throttle event support code (such as syslog messaging and rate
  4. * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
  5. *
  6. * This allows consistent reporting of CPU thermal throttle events.
  7. *
  8. * Maintains a counter in /sys that keeps track of the number of thermal
  9. * events, such that the user knows how bad the thermal problem might be
  10. * (since the logging to syslog is rate limited).
  11. *
  12. * Author: Dmitriy Zavin ([email protected])
  13. *
  14. * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
  15. * Inspired by Ross Biro's and Al Borchers' counter code.
  16. */
  17. #include <linux/interrupt.h>
  18. #include <linux/notifier.h>
  19. #include <linux/jiffies.h>
  20. #include <linux/kernel.h>
  21. #include <linux/percpu.h>
  22. #include <linux/export.h>
  23. #include <linux/types.h>
  24. #include <linux/init.h>
  25. #include <linux/smp.h>
  26. #include <linux/cpu.h>
  27. #include <asm/processor.h>
  28. #include <asm/thermal.h>
  29. #include <asm/traps.h>
  30. #include <asm/apic.h>
  31. #include <asm/irq.h>
  32. #include <asm/msr.h>
  33. #include "intel_hfi.h"
  34. #include "thermal_interrupt.h"
  35. /* How long to wait between reporting thermal events */
  36. #define CHECK_INTERVAL (300 * HZ)
  37. #define THERMAL_THROTTLING_EVENT 0
  38. #define POWER_LIMIT_EVENT 1
  39. /**
  40. * struct _thermal_state - Represent the current thermal event state
  41. * @next_check: Stores the next timestamp, when it is allowed
  42. * to log the next warning message.
  43. * @last_interrupt_time: Stores the timestamp for the last threshold
  44. * high event.
  45. * @therm_work: Delayed workqueue structure
  46. * @count: Stores the current running count for thermal
  47. * or power threshold interrupts.
  48. * @last_count: Stores the previous running count for thermal
  49. * or power threshold interrupts.
  50. * @max_time_ms: This shows the maximum amount of time CPU was
  51. * in throttled state for a single thermal
  52. * threshold high to low state.
  53. * @total_time_ms: This is a cumulative time during which CPU was
  54. * in the throttled state.
  55. * @rate_control_active: Set when a throttling message is logged.
  56. * This is used for the purpose of rate-control.
  57. * @new_event: Stores the last high/low status of the
  58. * THERM_STATUS_PROCHOT or
  59. * THERM_STATUS_POWER_LIMIT.
  60. * @level: Stores whether this _thermal_state instance is
  61. * for a CORE level or for PACKAGE level.
  62. * @sample_index: Index for storing the next sample in the buffer
  63. * temp_samples[].
  64. * @sample_count: Total number of samples collected in the buffer
  65. * temp_samples[].
  66. * @average: The last moving average of temperature samples
  67. * @baseline_temp: Temperature at which thermal threshold high
  68. * interrupt was generated.
  69. * @temp_samples: Storage for temperature samples to calculate
  70. * moving average.
  71. *
  72. * This structure is used to represent data related to thermal state for a CPU.
  73. * There is a separate storage for core and package level for each CPU.
  74. */
  75. struct _thermal_state {
  76. u64 next_check;
  77. u64 last_interrupt_time;
  78. struct delayed_work therm_work;
  79. unsigned long count;
  80. unsigned long last_count;
  81. unsigned long max_time_ms;
  82. unsigned long total_time_ms;
  83. bool rate_control_active;
  84. bool new_event;
  85. u8 level;
  86. u8 sample_index;
  87. u8 sample_count;
  88. u8 average;
  89. u8 baseline_temp;
  90. u8 temp_samples[3];
  91. };
  92. struct thermal_state {
  93. struct _thermal_state core_throttle;
  94. struct _thermal_state core_power_limit;
  95. struct _thermal_state package_throttle;
  96. struct _thermal_state package_power_limit;
  97. struct _thermal_state core_thresh0;
  98. struct _thermal_state core_thresh1;
  99. struct _thermal_state pkg_thresh0;
  100. struct _thermal_state pkg_thresh1;
  101. };
  102. /* Callback to handle core threshold interrupts */
  103. int (*platform_thermal_notify)(__u64 msr_val);
  104. EXPORT_SYMBOL(platform_thermal_notify);
  105. /* Callback to handle core package threshold_interrupts */
  106. int (*platform_thermal_package_notify)(__u64 msr_val);
  107. EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
  108. /* Callback support of rate control, return true, if
  109. * callback has rate control */
  110. bool (*platform_thermal_package_rate_control)(void);
  111. EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
  112. static DEFINE_PER_CPU(struct thermal_state, thermal_state);
  113. static atomic_t therm_throt_en = ATOMIC_INIT(0);
  114. static u32 lvtthmr_init __read_mostly;
  115. #ifdef CONFIG_SYSFS
  116. #define define_therm_throt_device_one_ro(_name) \
  117. static DEVICE_ATTR(_name, 0444, \
  118. therm_throt_device_show_##_name, \
  119. NULL) \
  120. #define define_therm_throt_device_show_func(event, name) \
  121. \
  122. static ssize_t therm_throt_device_show_##event##_##name( \
  123. struct device *dev, \
  124. struct device_attribute *attr, \
  125. char *buf) \
  126. { \
  127. unsigned int cpu = dev->id; \
  128. ssize_t ret; \
  129. \
  130. preempt_disable(); /* CPU hotplug */ \
  131. if (cpu_online(cpu)) { \
  132. ret = sprintf(buf, "%lu\n", \
  133. per_cpu(thermal_state, cpu).event.name); \
  134. } else \
  135. ret = 0; \
  136. preempt_enable(); \
  137. \
  138. return ret; \
  139. }
  140. define_therm_throt_device_show_func(core_throttle, count);
  141. define_therm_throt_device_one_ro(core_throttle_count);
  142. define_therm_throt_device_show_func(core_power_limit, count);
  143. define_therm_throt_device_one_ro(core_power_limit_count);
  144. define_therm_throt_device_show_func(package_throttle, count);
  145. define_therm_throt_device_one_ro(package_throttle_count);
  146. define_therm_throt_device_show_func(package_power_limit, count);
  147. define_therm_throt_device_one_ro(package_power_limit_count);
  148. define_therm_throt_device_show_func(core_throttle, max_time_ms);
  149. define_therm_throt_device_one_ro(core_throttle_max_time_ms);
  150. define_therm_throt_device_show_func(package_throttle, max_time_ms);
  151. define_therm_throt_device_one_ro(package_throttle_max_time_ms);
  152. define_therm_throt_device_show_func(core_throttle, total_time_ms);
  153. define_therm_throt_device_one_ro(core_throttle_total_time_ms);
  154. define_therm_throt_device_show_func(package_throttle, total_time_ms);
  155. define_therm_throt_device_one_ro(package_throttle_total_time_ms);
  156. static struct attribute *thermal_throttle_attrs[] = {
  157. &dev_attr_core_throttle_count.attr,
  158. &dev_attr_core_throttle_max_time_ms.attr,
  159. &dev_attr_core_throttle_total_time_ms.attr,
  160. NULL
  161. };
  162. static const struct attribute_group thermal_attr_group = {
  163. .attrs = thermal_throttle_attrs,
  164. .name = "thermal_throttle"
  165. };
  166. #endif /* CONFIG_SYSFS */
  167. #define CORE_LEVEL 0
  168. #define PACKAGE_LEVEL 1
  169. #define THERM_THROT_POLL_INTERVAL HZ
  170. #define THERM_STATUS_PROCHOT_LOG BIT(1)
  171. #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
  172. #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
  173. static void clear_therm_status_log(int level)
  174. {
  175. int msr;
  176. u64 mask, msr_val;
  177. if (level == CORE_LEVEL) {
  178. msr = MSR_IA32_THERM_STATUS;
  179. mask = THERM_STATUS_CLEAR_CORE_MASK;
  180. } else {
  181. msr = MSR_IA32_PACKAGE_THERM_STATUS;
  182. mask = THERM_STATUS_CLEAR_PKG_MASK;
  183. }
  184. rdmsrl(msr, msr_val);
  185. msr_val &= mask;
  186. wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
  187. }
  188. static void get_therm_status(int level, bool *proc_hot, u8 *temp)
  189. {
  190. int msr;
  191. u64 msr_val;
  192. if (level == CORE_LEVEL)
  193. msr = MSR_IA32_THERM_STATUS;
  194. else
  195. msr = MSR_IA32_PACKAGE_THERM_STATUS;
  196. rdmsrl(msr, msr_val);
  197. if (msr_val & THERM_STATUS_PROCHOT_LOG)
  198. *proc_hot = true;
  199. else
  200. *proc_hot = false;
  201. *temp = (msr_val >> 16) & 0x7F;
  202. }
  203. static void __maybe_unused throttle_active_work(struct work_struct *work)
  204. {
  205. struct _thermal_state *state = container_of(to_delayed_work(work),
  206. struct _thermal_state, therm_work);
  207. unsigned int i, avg, this_cpu = smp_processor_id();
  208. u64 now = get_jiffies_64();
  209. bool hot;
  210. u8 temp;
  211. get_therm_status(state->level, &hot, &temp);
  212. /* temperature value is offset from the max so lesser means hotter */
  213. if (!hot && temp > state->baseline_temp) {
  214. if (state->rate_control_active)
  215. pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
  216. this_cpu,
  217. state->level == CORE_LEVEL ? "Core" : "Package",
  218. state->count);
  219. state->rate_control_active = false;
  220. return;
  221. }
  222. if (time_before64(now, state->next_check) &&
  223. state->rate_control_active)
  224. goto re_arm;
  225. state->next_check = now + CHECK_INTERVAL;
  226. if (state->count != state->last_count) {
  227. /* There was one new thermal interrupt */
  228. state->last_count = state->count;
  229. state->average = 0;
  230. state->sample_count = 0;
  231. state->sample_index = 0;
  232. }
  233. state->temp_samples[state->sample_index] = temp;
  234. state->sample_count++;
  235. state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
  236. if (state->sample_count < ARRAY_SIZE(state->temp_samples))
  237. goto re_arm;
  238. avg = 0;
  239. for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
  240. avg += state->temp_samples[i];
  241. avg /= ARRAY_SIZE(state->temp_samples);
  242. if (state->average > avg) {
  243. pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
  244. this_cpu,
  245. state->level == CORE_LEVEL ? "Core" : "Package",
  246. state->count);
  247. state->rate_control_active = true;
  248. }
  249. state->average = avg;
  250. re_arm:
  251. clear_therm_status_log(state->level);
  252. schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
  253. }
  254. /***
  255. * therm_throt_process - Process thermal throttling event from interrupt
  256. * @curr: Whether the condition is current or not (boolean), since the
  257. * thermal interrupt normally gets called both when the thermal
  258. * event begins and once the event has ended.
  259. *
  260. * This function is called by the thermal interrupt after the
  261. * IRQ has been acknowledged.
  262. *
  263. * It will take care of rate limiting and printing messages to the syslog.
  264. */
  265. static void therm_throt_process(bool new_event, int event, int level)
  266. {
  267. struct _thermal_state *state;
  268. unsigned int this_cpu = smp_processor_id();
  269. bool old_event;
  270. u64 now;
  271. struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
  272. now = get_jiffies_64();
  273. if (level == CORE_LEVEL) {
  274. if (event == THERMAL_THROTTLING_EVENT)
  275. state = &pstate->core_throttle;
  276. else if (event == POWER_LIMIT_EVENT)
  277. state = &pstate->core_power_limit;
  278. else
  279. return;
  280. } else if (level == PACKAGE_LEVEL) {
  281. if (event == THERMAL_THROTTLING_EVENT)
  282. state = &pstate->package_throttle;
  283. else if (event == POWER_LIMIT_EVENT)
  284. state = &pstate->package_power_limit;
  285. else
  286. return;
  287. } else
  288. return;
  289. old_event = state->new_event;
  290. state->new_event = new_event;
  291. if (new_event)
  292. state->count++;
  293. if (event != THERMAL_THROTTLING_EVENT)
  294. return;
  295. if (new_event && !state->last_interrupt_time) {
  296. bool hot;
  297. u8 temp;
  298. get_therm_status(state->level, &hot, &temp);
  299. /*
  300. * Ignore short temperature spike as the system is not close
  301. * to PROCHOT. 10C offset is large enough to ignore. It is
  302. * already dropped from the high threshold temperature.
  303. */
  304. if (temp > 10)
  305. return;
  306. state->baseline_temp = temp;
  307. state->last_interrupt_time = now;
  308. schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
  309. } else if (old_event && state->last_interrupt_time) {
  310. unsigned long throttle_time;
  311. throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
  312. if (throttle_time > state->max_time_ms)
  313. state->max_time_ms = throttle_time;
  314. state->total_time_ms += throttle_time;
  315. state->last_interrupt_time = 0;
  316. }
  317. }
  318. static int thresh_event_valid(int level, int event)
  319. {
  320. struct _thermal_state *state;
  321. unsigned int this_cpu = smp_processor_id();
  322. struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
  323. u64 now = get_jiffies_64();
  324. if (level == PACKAGE_LEVEL)
  325. state = (event == 0) ? &pstate->pkg_thresh0 :
  326. &pstate->pkg_thresh1;
  327. else
  328. state = (event == 0) ? &pstate->core_thresh0 :
  329. &pstate->core_thresh1;
  330. if (time_before64(now, state->next_check))
  331. return 0;
  332. state->next_check = now + CHECK_INTERVAL;
  333. return 1;
  334. }
  335. static bool int_pln_enable;
  336. static int __init int_pln_enable_setup(char *s)
  337. {
  338. int_pln_enable = true;
  339. return 1;
  340. }
  341. __setup("int_pln_enable", int_pln_enable_setup);
  342. #ifdef CONFIG_SYSFS
  343. /* Add/Remove thermal_throttle interface for CPU device: */
  344. static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
  345. {
  346. int err;
  347. struct cpuinfo_x86 *c = &cpu_data(cpu);
  348. err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
  349. if (err)
  350. return err;
  351. if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
  352. err = sysfs_add_file_to_group(&dev->kobj,
  353. &dev_attr_core_power_limit_count.attr,
  354. thermal_attr_group.name);
  355. if (err)
  356. goto del_group;
  357. }
  358. if (cpu_has(c, X86_FEATURE_PTS)) {
  359. err = sysfs_add_file_to_group(&dev->kobj,
  360. &dev_attr_package_throttle_count.attr,
  361. thermal_attr_group.name);
  362. if (err)
  363. goto del_group;
  364. err = sysfs_add_file_to_group(&dev->kobj,
  365. &dev_attr_package_throttle_max_time_ms.attr,
  366. thermal_attr_group.name);
  367. if (err)
  368. goto del_group;
  369. err = sysfs_add_file_to_group(&dev->kobj,
  370. &dev_attr_package_throttle_total_time_ms.attr,
  371. thermal_attr_group.name);
  372. if (err)
  373. goto del_group;
  374. if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
  375. err = sysfs_add_file_to_group(&dev->kobj,
  376. &dev_attr_package_power_limit_count.attr,
  377. thermal_attr_group.name);
  378. if (err)
  379. goto del_group;
  380. }
  381. }
  382. return 0;
  383. del_group:
  384. sysfs_remove_group(&dev->kobj, &thermal_attr_group);
  385. return err;
  386. }
  387. static void thermal_throttle_remove_dev(struct device *dev)
  388. {
  389. sysfs_remove_group(&dev->kobj, &thermal_attr_group);
  390. }
  391. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  392. static int thermal_throttle_online(unsigned int cpu)
  393. {
  394. struct thermal_state *state = &per_cpu(thermal_state, cpu);
  395. struct device *dev = get_cpu_device(cpu);
  396. u32 l;
  397. state->package_throttle.level = PACKAGE_LEVEL;
  398. state->core_throttle.level = CORE_LEVEL;
  399. INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
  400. INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
  401. /*
  402. * The first CPU coming online will enable the HFI. Usually this causes
  403. * hardware to issue an HFI thermal interrupt. Such interrupt will reach
  404. * the CPU once we enable the thermal vector in the local APIC.
  405. */
  406. intel_hfi_online(cpu);
  407. /* Unmask the thermal vector after the above workqueues are initialized. */
  408. l = apic_read(APIC_LVTTHMR);
  409. apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
  410. return thermal_throttle_add_dev(dev, cpu);
  411. }
  412. static int thermal_throttle_offline(unsigned int cpu)
  413. {
  414. struct thermal_state *state = &per_cpu(thermal_state, cpu);
  415. struct device *dev = get_cpu_device(cpu);
  416. u32 l;
  417. /* Mask the thermal vector before draining evtl. pending work */
  418. l = apic_read(APIC_LVTTHMR);
  419. apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
  420. intel_hfi_offline(cpu);
  421. cancel_delayed_work_sync(&state->package_throttle.therm_work);
  422. cancel_delayed_work_sync(&state->core_throttle.therm_work);
  423. state->package_throttle.rate_control_active = false;
  424. state->core_throttle.rate_control_active = false;
  425. thermal_throttle_remove_dev(dev);
  426. return 0;
  427. }
  428. static __init int thermal_throttle_init_device(void)
  429. {
  430. int ret;
  431. if (!atomic_read(&therm_throt_en))
  432. return 0;
  433. intel_hfi_init();
  434. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
  435. thermal_throttle_online,
  436. thermal_throttle_offline);
  437. return ret < 0 ? ret : 0;
  438. }
  439. device_initcall(thermal_throttle_init_device);
  440. #endif /* CONFIG_SYSFS */
  441. static void notify_package_thresholds(__u64 msr_val)
  442. {
  443. bool notify_thres_0 = false;
  444. bool notify_thres_1 = false;
  445. if (!platform_thermal_package_notify)
  446. return;
  447. /* lower threshold check */
  448. if (msr_val & THERM_LOG_THRESHOLD0)
  449. notify_thres_0 = true;
  450. /* higher threshold check */
  451. if (msr_val & THERM_LOG_THRESHOLD1)
  452. notify_thres_1 = true;
  453. if (!notify_thres_0 && !notify_thres_1)
  454. return;
  455. if (platform_thermal_package_rate_control &&
  456. platform_thermal_package_rate_control()) {
  457. /* Rate control is implemented in callback */
  458. platform_thermal_package_notify(msr_val);
  459. return;
  460. }
  461. /* lower threshold reached */
  462. if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
  463. platform_thermal_package_notify(msr_val);
  464. /* higher threshold reached */
  465. if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
  466. platform_thermal_package_notify(msr_val);
  467. }
  468. static void notify_thresholds(__u64 msr_val)
  469. {
  470. /* check whether the interrupt handler is defined;
  471. * otherwise simply return
  472. */
  473. if (!platform_thermal_notify)
  474. return;
  475. /* lower threshold reached */
  476. if ((msr_val & THERM_LOG_THRESHOLD0) &&
  477. thresh_event_valid(CORE_LEVEL, 0))
  478. platform_thermal_notify(msr_val);
  479. /* higher threshold reached */
  480. if ((msr_val & THERM_LOG_THRESHOLD1) &&
  481. thresh_event_valid(CORE_LEVEL, 1))
  482. platform_thermal_notify(msr_val);
  483. }
  484. void __weak notify_hwp_interrupt(void)
  485. {
  486. wrmsrl_safe(MSR_HWP_STATUS, 0);
  487. }
  488. /* Thermal transition interrupt handler */
  489. void intel_thermal_interrupt(void)
  490. {
  491. __u64 msr_val;
  492. if (static_cpu_has(X86_FEATURE_HWP))
  493. notify_hwp_interrupt();
  494. rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
  495. /* Check for violation of core thermal thresholds*/
  496. notify_thresholds(msr_val);
  497. therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
  498. THERMAL_THROTTLING_EVENT,
  499. CORE_LEVEL);
  500. if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
  501. therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
  502. POWER_LIMIT_EVENT,
  503. CORE_LEVEL);
  504. if (this_cpu_has(X86_FEATURE_PTS)) {
  505. rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
  506. /* check violations of package thermal thresholds */
  507. notify_package_thresholds(msr_val);
  508. therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
  509. THERMAL_THROTTLING_EVENT,
  510. PACKAGE_LEVEL);
  511. if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
  512. therm_throt_process(msr_val &
  513. PACKAGE_THERM_STATUS_POWER_LIMIT,
  514. POWER_LIMIT_EVENT,
  515. PACKAGE_LEVEL);
  516. if (this_cpu_has(X86_FEATURE_HFI))
  517. intel_hfi_process_event(msr_val &
  518. PACKAGE_THERM_STATUS_HFI_UPDATED);
  519. }
  520. }
  521. /* Thermal monitoring depends on APIC, ACPI and clock modulation */
  522. static int intel_thermal_supported(struct cpuinfo_x86 *c)
  523. {
  524. if (!boot_cpu_has(X86_FEATURE_APIC))
  525. return 0;
  526. if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
  527. return 0;
  528. return 1;
  529. }
  530. bool x86_thermal_enabled(void)
  531. {
  532. return atomic_read(&therm_throt_en);
  533. }
  534. void __init therm_lvt_init(void)
  535. {
  536. /*
  537. * This function is only called on boot CPU. Save the init thermal
  538. * LVT value on BSP and use that value to restore APs' thermal LVT
  539. * entry BIOS programmed later
  540. */
  541. if (intel_thermal_supported(&boot_cpu_data))
  542. lvtthmr_init = apic_read(APIC_LVTTHMR);
  543. }
  544. void intel_init_thermal(struct cpuinfo_x86 *c)
  545. {
  546. unsigned int cpu = smp_processor_id();
  547. int tm2 = 0;
  548. u32 l, h;
  549. if (!intel_thermal_supported(c))
  550. return;
  551. /*
  552. * First check if its enabled already, in which case there might
  553. * be some SMM goo which handles it, so we can't even put a handler
  554. * since it might be delivered via SMI already:
  555. */
  556. rdmsr(MSR_IA32_MISC_ENABLE, l, h);
  557. h = lvtthmr_init;
  558. /*
  559. * The initial value of thermal LVT entries on all APs always reads
  560. * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
  561. * sequence to them and LVT registers are reset to 0s except for
  562. * the mask bits which are set to 1s when APs receive INIT IPI.
  563. * If BIOS takes over the thermal interrupt and sets its interrupt
  564. * delivery mode to SMI (not fixed), it restores the value that the
  565. * BIOS has programmed on AP based on BSP's info we saved since BIOS
  566. * is always setting the same value for all threads/cores.
  567. */
  568. if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
  569. apic_write(APIC_LVTTHMR, lvtthmr_init);
  570. if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
  571. if (system_state == SYSTEM_BOOTING)
  572. pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
  573. return;
  574. }
  575. /* early Pentium M models use different method for enabling TM2 */
  576. if (cpu_has(c, X86_FEATURE_TM2)) {
  577. if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
  578. rdmsr(MSR_THERM2_CTL, l, h);
  579. if (l & MSR_THERM2_CTL_TM_SELECT)
  580. tm2 = 1;
  581. } else if (l & MSR_IA32_MISC_ENABLE_TM2)
  582. tm2 = 1;
  583. }
  584. /* We'll mask the thermal vector in the lapic till we're ready: */
  585. h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
  586. apic_write(APIC_LVTTHMR, h);
  587. rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
  588. if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
  589. wrmsr(MSR_IA32_THERM_INTERRUPT,
  590. (l | (THERM_INT_LOW_ENABLE
  591. | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
  592. else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
  593. wrmsr(MSR_IA32_THERM_INTERRUPT,
  594. l | (THERM_INT_LOW_ENABLE
  595. | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
  596. else
  597. wrmsr(MSR_IA32_THERM_INTERRUPT,
  598. l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
  599. if (cpu_has(c, X86_FEATURE_PTS)) {
  600. rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  601. if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
  602. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  603. (l | (PACKAGE_THERM_INT_LOW_ENABLE
  604. | PACKAGE_THERM_INT_HIGH_ENABLE))
  605. & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
  606. else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
  607. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  608. l | (PACKAGE_THERM_INT_LOW_ENABLE
  609. | PACKAGE_THERM_INT_HIGH_ENABLE
  610. | PACKAGE_THERM_INT_PLN_ENABLE), h);
  611. else
  612. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  613. l | (PACKAGE_THERM_INT_LOW_ENABLE
  614. | PACKAGE_THERM_INT_HIGH_ENABLE), h);
  615. if (cpu_has(c, X86_FEATURE_HFI)) {
  616. rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  617. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  618. l | PACKAGE_THERM_INT_HFI_ENABLE, h);
  619. }
  620. }
  621. rdmsr(MSR_IA32_MISC_ENABLE, l, h);
  622. wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
  623. pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
  624. tm2 ? "TM2" : "TM1");
  625. /* enable thermal throttle processing */
  626. atomic_set(&therm_throt_en, 1);
  627. }