dev-mcelog.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * /dev/mcelog driver
  4. *
  5. * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
  6. * Rest from unknown author(s).
  7. * 2004 Andi Kleen. Rewrote most of it.
  8. * Copyright 2008 Intel Corporation
  9. * Author: Andi Kleen
  10. */
  11. #include <linux/miscdevice.h>
  12. #include <linux/slab.h>
  13. #include <linux/kmod.h>
  14. #include <linux/poll.h>
  15. #include "internal.h"
  16. static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
  17. static DEFINE_MUTEX(mce_chrdev_read_mutex);
  18. static char mce_helper[128];
  19. static char *mce_helper_argv[2] = { mce_helper, NULL };
  20. /*
  21. * Lockless MCE logging infrastructure.
  22. * This avoids deadlocks on printk locks without having to break locks. Also
  23. * separate MCEs from kernel messages to avoid bogus bug reports.
  24. */
  25. static struct mce_log_buffer *mcelog;
  26. static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
  27. static int dev_mce_log(struct notifier_block *nb, unsigned long val,
  28. void *data)
  29. {
  30. struct mce *mce = (struct mce *)data;
  31. unsigned int entry;
  32. if (mce->kflags & MCE_HANDLED_CEC)
  33. return NOTIFY_DONE;
  34. mutex_lock(&mce_chrdev_read_mutex);
  35. entry = mcelog->next;
  36. /*
  37. * When the buffer fills up discard new entries. Assume that the
  38. * earlier errors are the more interesting ones:
  39. */
  40. if (entry >= mcelog->len) {
  41. set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
  42. goto unlock;
  43. }
  44. mcelog->next = entry + 1;
  45. memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
  46. mcelog->entry[entry].finished = 1;
  47. mcelog->entry[entry].kflags = 0;
  48. /* wake processes polling /dev/mcelog */
  49. wake_up_interruptible(&mce_chrdev_wait);
  50. unlock:
  51. mutex_unlock(&mce_chrdev_read_mutex);
  52. if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
  53. mce->kflags |= MCE_HANDLED_MCELOG;
  54. return NOTIFY_OK;
  55. }
  56. static struct notifier_block dev_mcelog_nb = {
  57. .notifier_call = dev_mce_log,
  58. .priority = MCE_PRIO_MCELOG,
  59. };
  60. static void mce_do_trigger(struct work_struct *work)
  61. {
  62. call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
  63. }
  64. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  65. void mce_work_trigger(void)
  66. {
  67. if (mce_helper[0])
  68. schedule_work(&mce_trigger_work);
  69. }
  70. static ssize_t
  71. show_trigger(struct device *s, struct device_attribute *attr, char *buf)
  72. {
  73. strcpy(buf, mce_helper);
  74. strcat(buf, "\n");
  75. return strlen(mce_helper) + 1;
  76. }
  77. static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
  78. const char *buf, size_t siz)
  79. {
  80. char *p;
  81. strncpy(mce_helper, buf, sizeof(mce_helper));
  82. mce_helper[sizeof(mce_helper)-1] = 0;
  83. p = strchr(mce_helper, '\n');
  84. if (p)
  85. *p = 0;
  86. return strlen(mce_helper) + !!p;
  87. }
  88. DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
  89. /*
  90. * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
  91. */
  92. static DEFINE_SPINLOCK(mce_chrdev_state_lock);
  93. static int mce_chrdev_open_count; /* #times opened */
  94. static int mce_chrdev_open_exclu; /* already open exclusive? */
  95. static int mce_chrdev_open(struct inode *inode, struct file *file)
  96. {
  97. spin_lock(&mce_chrdev_state_lock);
  98. if (mce_chrdev_open_exclu ||
  99. (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
  100. spin_unlock(&mce_chrdev_state_lock);
  101. return -EBUSY;
  102. }
  103. if (file->f_flags & O_EXCL)
  104. mce_chrdev_open_exclu = 1;
  105. mce_chrdev_open_count++;
  106. spin_unlock(&mce_chrdev_state_lock);
  107. return nonseekable_open(inode, file);
  108. }
  109. static int mce_chrdev_release(struct inode *inode, struct file *file)
  110. {
  111. spin_lock(&mce_chrdev_state_lock);
  112. mce_chrdev_open_count--;
  113. mce_chrdev_open_exclu = 0;
  114. spin_unlock(&mce_chrdev_state_lock);
  115. return 0;
  116. }
  117. static int mce_apei_read_done;
  118. /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
  119. static int __mce_read_apei(char __user **ubuf, size_t usize)
  120. {
  121. int rc;
  122. u64 record_id;
  123. struct mce m;
  124. if (usize < sizeof(struct mce))
  125. return -EINVAL;
  126. rc = apei_read_mce(&m, &record_id);
  127. /* Error or no more MCE record */
  128. if (rc <= 0) {
  129. mce_apei_read_done = 1;
  130. /*
  131. * When ERST is disabled, mce_chrdev_read() should return
  132. * "no record" instead of "no device."
  133. */
  134. if (rc == -ENODEV)
  135. return 0;
  136. return rc;
  137. }
  138. rc = -EFAULT;
  139. if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
  140. return rc;
  141. /*
  142. * In fact, we should have cleared the record after that has
  143. * been flushed to the disk or sent to network in
  144. * /sbin/mcelog, but we have no interface to support that now,
  145. * so just clear it to avoid duplication.
  146. */
  147. rc = apei_clear_mce(record_id);
  148. if (rc) {
  149. mce_apei_read_done = 1;
  150. return rc;
  151. }
  152. *ubuf += sizeof(struct mce);
  153. return 0;
  154. }
  155. static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
  156. size_t usize, loff_t *off)
  157. {
  158. char __user *buf = ubuf;
  159. unsigned next;
  160. int i, err;
  161. mutex_lock(&mce_chrdev_read_mutex);
  162. if (!mce_apei_read_done) {
  163. err = __mce_read_apei(&buf, usize);
  164. if (err || buf != ubuf)
  165. goto out;
  166. }
  167. /* Only supports full reads right now */
  168. err = -EINVAL;
  169. if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
  170. goto out;
  171. next = mcelog->next;
  172. err = 0;
  173. for (i = 0; i < next; i++) {
  174. struct mce *m = &mcelog->entry[i];
  175. err |= copy_to_user(buf, m, sizeof(*m));
  176. buf += sizeof(*m);
  177. }
  178. memset(mcelog->entry, 0, next * sizeof(struct mce));
  179. mcelog->next = 0;
  180. if (err)
  181. err = -EFAULT;
  182. out:
  183. mutex_unlock(&mce_chrdev_read_mutex);
  184. return err ? err : buf - ubuf;
  185. }
  186. static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
  187. {
  188. poll_wait(file, &mce_chrdev_wait, wait);
  189. if (READ_ONCE(mcelog->next))
  190. return EPOLLIN | EPOLLRDNORM;
  191. if (!mce_apei_read_done && apei_check_mce())
  192. return EPOLLIN | EPOLLRDNORM;
  193. return 0;
  194. }
  195. static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
  196. unsigned long arg)
  197. {
  198. int __user *p = (int __user *)arg;
  199. if (!capable(CAP_SYS_ADMIN))
  200. return -EPERM;
  201. switch (cmd) {
  202. case MCE_GET_RECORD_LEN:
  203. return put_user(sizeof(struct mce), p);
  204. case MCE_GET_LOG_LEN:
  205. return put_user(mcelog->len, p);
  206. case MCE_GETCLEAR_FLAGS: {
  207. unsigned flags;
  208. do {
  209. flags = mcelog->flags;
  210. } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
  211. return put_user(flags, p);
  212. }
  213. default:
  214. return -ENOTTY;
  215. }
  216. }
  217. void mce_register_injector_chain(struct notifier_block *nb)
  218. {
  219. blocking_notifier_chain_register(&mce_injector_chain, nb);
  220. }
  221. EXPORT_SYMBOL_GPL(mce_register_injector_chain);
  222. void mce_unregister_injector_chain(struct notifier_block *nb)
  223. {
  224. blocking_notifier_chain_unregister(&mce_injector_chain, nb);
  225. }
  226. EXPORT_SYMBOL_GPL(mce_unregister_injector_chain);
  227. static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
  228. size_t usize, loff_t *off)
  229. {
  230. struct mce m;
  231. if (!capable(CAP_SYS_ADMIN))
  232. return -EPERM;
  233. /*
  234. * There are some cases where real MSR reads could slip
  235. * through.
  236. */
  237. if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
  238. return -EIO;
  239. if ((unsigned long)usize > sizeof(struct mce))
  240. usize = sizeof(struct mce);
  241. if (copy_from_user(&m, ubuf, usize))
  242. return -EFAULT;
  243. if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
  244. return -EINVAL;
  245. /*
  246. * Need to give user space some time to set everything up,
  247. * so do it a jiffie or two later everywhere.
  248. */
  249. schedule_timeout(2);
  250. blocking_notifier_call_chain(&mce_injector_chain, 0, &m);
  251. return usize;
  252. }
  253. static const struct file_operations mce_chrdev_ops = {
  254. .open = mce_chrdev_open,
  255. .release = mce_chrdev_release,
  256. .read = mce_chrdev_read,
  257. .write = mce_chrdev_write,
  258. .poll = mce_chrdev_poll,
  259. .unlocked_ioctl = mce_chrdev_ioctl,
  260. .compat_ioctl = compat_ptr_ioctl,
  261. .llseek = no_llseek,
  262. };
  263. static struct miscdevice mce_chrdev_device = {
  264. MISC_MCELOG_MINOR,
  265. "mcelog",
  266. &mce_chrdev_ops,
  267. };
  268. static __init int dev_mcelog_init_device(void)
  269. {
  270. int mce_log_len;
  271. int err;
  272. mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
  273. mcelog = kzalloc(struct_size(mcelog, entry, mce_log_len), GFP_KERNEL);
  274. if (!mcelog)
  275. return -ENOMEM;
  276. memcpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
  277. mcelog->len = mce_log_len;
  278. mcelog->recordlen = sizeof(struct mce);
  279. /* register character device /dev/mcelog */
  280. err = misc_register(&mce_chrdev_device);
  281. if (err) {
  282. if (err == -EBUSY)
  283. /* Xen dom0 might have registered the device already. */
  284. pr_info("Unable to init device /dev/mcelog, already registered");
  285. else
  286. pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
  287. kfree(mcelog);
  288. return err;
  289. }
  290. mce_register_decode_chain(&dev_mcelog_nb);
  291. return 0;
  292. }
  293. device_initcall_sync(dev_mcelog_init_device);