devcoredump.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright(c) 2014 Intel Mobile Communications GmbH
  4. * Copyright(c) 2015 Intel Deutschland GmbH
  5. *
  6. * Author: Johannes Berg <[email protected]>
  7. */
  8. #include <linux/module.h>
  9. #include <linux/device.h>
  10. #include <linux/devcoredump.h>
  11. #include <linux/list.h>
  12. #include <linux/slab.h>
  13. #include <linux/fs.h>
  14. #include <linux/workqueue.h>
  15. static struct class devcd_class;
  16. /* global disable flag, for security purposes */
  17. static bool devcd_disabled;
  18. /* if data isn't read by userspace after 5 minutes then delete it */
  19. #define DEVCD_TIMEOUT (HZ * 60 * 5)
  20. struct devcd_entry {
  21. struct device devcd_dev;
  22. void *data;
  23. size_t datalen;
  24. /*
  25. * Here, mutex is required to serialize the calls to del_wk work between
  26. * user/kernel space which happens when devcd is added with device_add()
  27. * and that sends uevent to user space. User space reads the uevents,
  28. * and calls to devcd_data_write() which try to modify the work which is
  29. * not even initialized/queued from devcoredump.
  30. *
  31. *
  32. *
  33. * cpu0(X) cpu1(Y)
  34. *
  35. * dev_coredump() uevent sent to user space
  36. * device_add() ======================> user space process Y reads the
  37. * uevents writes to devcd fd
  38. * which results into writes to
  39. *
  40. * devcd_data_write()
  41. * mod_delayed_work()
  42. * try_to_grab_pending()
  43. * del_timer()
  44. * debug_assert_init()
  45. * INIT_DELAYED_WORK()
  46. * schedule_delayed_work()
  47. *
  48. *
  49. * Also, mutex alone would not be enough to avoid scheduling of
  50. * del_wk work after it get flush from a call to devcd_free()
  51. * mentioned as below.
  52. *
  53. * disabled_store()
  54. * devcd_free()
  55. * mutex_lock() devcd_data_write()
  56. * flush_delayed_work()
  57. * mutex_unlock()
  58. * mutex_lock()
  59. * mod_delayed_work()
  60. * mutex_unlock()
  61. * So, delete_work flag is required.
  62. */
  63. struct mutex mutex;
  64. bool delete_work;
  65. struct module *owner;
  66. ssize_t (*read)(char *buffer, loff_t offset, size_t count,
  67. void *data, size_t datalen);
  68. void (*free)(void *data);
  69. struct delayed_work del_wk;
  70. struct device *failing_dev;
  71. };
  72. static struct devcd_entry *dev_to_devcd(struct device *dev)
  73. {
  74. return container_of(dev, struct devcd_entry, devcd_dev);
  75. }
  76. static void devcd_dev_release(struct device *dev)
  77. {
  78. struct devcd_entry *devcd = dev_to_devcd(dev);
  79. devcd->free(devcd->data);
  80. module_put(devcd->owner);
  81. /*
  82. * this seems racy, but I don't see a notifier or such on
  83. * a struct device to know when it goes away?
  84. */
  85. if (devcd->failing_dev->kobj.sd)
  86. sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj,
  87. "devcoredump");
  88. put_device(devcd->failing_dev);
  89. kfree(devcd);
  90. }
  91. static void devcd_del(struct work_struct *wk)
  92. {
  93. struct devcd_entry *devcd;
  94. devcd = container_of(wk, struct devcd_entry, del_wk.work);
  95. device_del(&devcd->devcd_dev);
  96. put_device(&devcd->devcd_dev);
  97. }
  98. static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
  99. struct bin_attribute *bin_attr,
  100. char *buffer, loff_t offset, size_t count)
  101. {
  102. struct device *dev = kobj_to_dev(kobj);
  103. struct devcd_entry *devcd = dev_to_devcd(dev);
  104. return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
  105. }
  106. static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
  107. struct bin_attribute *bin_attr,
  108. char *buffer, loff_t offset, size_t count)
  109. {
  110. struct device *dev = kobj_to_dev(kobj);
  111. struct devcd_entry *devcd = dev_to_devcd(dev);
  112. mutex_lock(&devcd->mutex);
  113. if (!devcd->delete_work) {
  114. devcd->delete_work = true;
  115. mod_delayed_work(system_wq, &devcd->del_wk, 0);
  116. }
  117. mutex_unlock(&devcd->mutex);
  118. return count;
  119. }
  120. static struct bin_attribute devcd_attr_data = {
  121. .attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, },
  122. .size = 0,
  123. .read = devcd_data_read,
  124. .write = devcd_data_write,
  125. };
  126. static struct bin_attribute *devcd_dev_bin_attrs[] = {
  127. &devcd_attr_data, NULL,
  128. };
  129. static const struct attribute_group devcd_dev_group = {
  130. .bin_attrs = devcd_dev_bin_attrs,
  131. };
  132. static const struct attribute_group *devcd_dev_groups[] = {
  133. &devcd_dev_group, NULL,
  134. };
  135. static int devcd_free(struct device *dev, void *data)
  136. {
  137. struct devcd_entry *devcd = dev_to_devcd(dev);
  138. mutex_lock(&devcd->mutex);
  139. if (!devcd->delete_work)
  140. devcd->delete_work = true;
  141. flush_delayed_work(&devcd->del_wk);
  142. mutex_unlock(&devcd->mutex);
  143. return 0;
  144. }
  145. static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
  146. char *buf)
  147. {
  148. return sysfs_emit(buf, "%d\n", devcd_disabled);
  149. }
  150. /*
  151. *
  152. * disabled_store() worker()
  153. * class_for_each_device(&devcd_class,
  154. * NULL, NULL, devcd_free)
  155. * ...
  156. * ...
  157. * while ((dev = class_dev_iter_next(&iter))
  158. * devcd_del()
  159. * device_del()
  160. * put_device() <- last reference
  161. * error = fn(dev, data) devcd_dev_release()
  162. * devcd_free(dev, data) kfree(devcd)
  163. * mutex_lock(&devcd->mutex);
  164. *
  165. *
  166. * In the above diagram, It looks like disabled_store() would be racing with parallely
  167. * running devcd_del() and result in memory abort while acquiring devcd->mutex which
  168. * is called after kfree of devcd memory after dropping its last reference with
  169. * put_device(). However, this will not happens as fn(dev, data) runs
  170. * with its own reference to device via klist_node so it is not its last reference.
  171. * so, above situation would not occur.
  172. */
  173. static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
  174. const char *buf, size_t count)
  175. {
  176. long tmp = simple_strtol(buf, NULL, 10);
  177. /*
  178. * This essentially makes the attribute write-once, since you can't
  179. * go back to not having it disabled. This is intentional, it serves
  180. * as a system lockdown feature.
  181. */
  182. if (tmp != 1)
  183. return -EINVAL;
  184. devcd_disabled = true;
  185. class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
  186. return count;
  187. }
  188. static CLASS_ATTR_RW(disabled);
  189. static struct attribute *devcd_class_attrs[] = {
  190. &class_attr_disabled.attr,
  191. NULL,
  192. };
  193. ATTRIBUTE_GROUPS(devcd_class);
  194. static struct class devcd_class = {
  195. .name = "devcoredump",
  196. .owner = THIS_MODULE,
  197. .dev_release = devcd_dev_release,
  198. .dev_groups = devcd_dev_groups,
  199. .class_groups = devcd_class_groups,
  200. };
  201. static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
  202. void *data, size_t datalen)
  203. {
  204. return memory_read_from_buffer(buffer, count, &offset, data, datalen);
  205. }
  206. static void devcd_freev(void *data)
  207. {
  208. vfree(data);
  209. }
  210. /**
  211. * dev_coredumpv - create device coredump with vmalloc data
  212. * @dev: the struct device for the crashed device
  213. * @data: vmalloc data containing the device coredump
  214. * @datalen: length of the data
  215. * @gfp: allocation flags
  216. *
  217. * This function takes ownership of the vmalloc'ed data and will free
  218. * it when it is no longer used. See dev_coredumpm() for more information.
  219. */
  220. void dev_coredumpv(struct device *dev, void *data, size_t datalen,
  221. gfp_t gfp)
  222. {
  223. dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev);
  224. }
  225. EXPORT_SYMBOL_GPL(dev_coredumpv);
  226. static int devcd_match_failing(struct device *dev, const void *failing)
  227. {
  228. struct devcd_entry *devcd = dev_to_devcd(dev);
  229. return devcd->failing_dev == failing;
  230. }
  231. /**
  232. * devcd_free_sgtable - free all the memory of the given scatterlist table
  233. * (i.e. both pages and scatterlist instances)
  234. * NOTE: if two tables allocated with devcd_alloc_sgtable and then chained
  235. * using the sg_chain function then that function should be called only once
  236. * on the chained table
  237. * @data: pointer to sg_table to free
  238. */
  239. static void devcd_free_sgtable(void *data)
  240. {
  241. _devcd_free_sgtable(data);
  242. }
  243. /**
  244. * devcd_read_from_sgtable - copy data from sg_table to a given buffer
  245. * and return the number of bytes read
  246. * @buffer: the buffer to copy the data to it
  247. * @buf_len: the length of the buffer
  248. * @data: the scatterlist table to copy from
  249. * @offset: start copy from @offset@ bytes from the head of the data
  250. * in the given scatterlist
  251. * @data_len: the length of the data in the sg_table
  252. */
  253. static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
  254. size_t buf_len, void *data,
  255. size_t data_len)
  256. {
  257. struct scatterlist *table = data;
  258. if (offset > data_len)
  259. return -EINVAL;
  260. if (offset + buf_len > data_len)
  261. buf_len = data_len - offset;
  262. return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len,
  263. offset);
  264. }
  265. /**
  266. * dev_coredumpm - create device coredump with read/free methods
  267. * @dev: the struct device for the crashed device
  268. * @owner: the module that contains the read/free functions, use %THIS_MODULE
  269. * @data: data cookie for the @read/@free functions
  270. * @datalen: length of the data
  271. * @gfp: allocation flags
  272. * @read: function to read from the given buffer
  273. * @free: function to free the given buffer
  274. *
  275. * Creates a new device coredump for the given device. If a previous one hasn't
  276. * been read yet, the new coredump is discarded. The data lifetime is determined
  277. * by the device coredump framework and when it is no longer needed the @free
  278. * function will be called to free the data.
  279. */
  280. void dev_coredumpm(struct device *dev, struct module *owner,
  281. void *data, size_t datalen, gfp_t gfp,
  282. ssize_t (*read)(char *buffer, loff_t offset, size_t count,
  283. void *data, size_t datalen),
  284. void (*free)(void *data))
  285. {
  286. static atomic_t devcd_count = ATOMIC_INIT(0);
  287. struct devcd_entry *devcd;
  288. struct device *existing;
  289. if (devcd_disabled)
  290. goto free;
  291. existing = class_find_device(&devcd_class, NULL, dev,
  292. devcd_match_failing);
  293. if (existing) {
  294. put_device(existing);
  295. goto free;
  296. }
  297. if (!try_module_get(owner))
  298. goto free;
  299. devcd = kzalloc(sizeof(*devcd), gfp);
  300. if (!devcd)
  301. goto put_module;
  302. devcd->owner = owner;
  303. devcd->data = data;
  304. devcd->datalen = datalen;
  305. devcd->read = read;
  306. devcd->free = free;
  307. devcd->failing_dev = get_device(dev);
  308. devcd->delete_work = false;
  309. mutex_init(&devcd->mutex);
  310. device_initialize(&devcd->devcd_dev);
  311. dev_set_name(&devcd->devcd_dev, "devcd%d",
  312. atomic_inc_return(&devcd_count));
  313. devcd->devcd_dev.class = &devcd_class;
  314. mutex_lock(&devcd->mutex);
  315. dev_set_uevent_suppress(&devcd->devcd_dev, true);
  316. if (device_add(&devcd->devcd_dev))
  317. goto put_device;
  318. /*
  319. * These should normally not fail, but there is no problem
  320. * continuing without the links, so just warn instead of
  321. * failing.
  322. */
  323. if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,
  324. "failing_device") ||
  325. sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,
  326. "devcoredump"))
  327. dev_warn(dev, "devcoredump create_link failed\n");
  328. dev_set_uevent_suppress(&devcd->devcd_dev, false);
  329. kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
  330. INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
  331. schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
  332. mutex_unlock(&devcd->mutex);
  333. return;
  334. put_device:
  335. put_device(&devcd->devcd_dev);
  336. mutex_unlock(&devcd->mutex);
  337. put_module:
  338. module_put(owner);
  339. free:
  340. free(data);
  341. }
  342. EXPORT_SYMBOL_GPL(dev_coredumpm);
  343. /**
  344. * dev_coredumpsg - create device coredump that uses scatterlist as data
  345. * parameter
  346. * @dev: the struct device for the crashed device
  347. * @table: the dump data
  348. * @datalen: length of the data
  349. * @gfp: allocation flags
  350. *
  351. * Creates a new device coredump for the given device. If a previous one hasn't
  352. * been read yet, the new coredump is discarded. The data lifetime is determined
  353. * by the device coredump framework and when it is no longer needed
  354. * it will free the data.
  355. */
  356. void dev_coredumpsg(struct device *dev, struct scatterlist *table,
  357. size_t datalen, gfp_t gfp)
  358. {
  359. dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable,
  360. devcd_free_sgtable);
  361. }
  362. EXPORT_SYMBOL_GPL(dev_coredumpsg);
  363. static int __init devcoredump_init(void)
  364. {
  365. return class_register(&devcd_class);
  366. }
  367. __initcall(devcoredump_init);
  368. static void __exit devcoredump_exit(void)
  369. {
  370. class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
  371. class_unregister(&devcd_class);
  372. }
  373. __exitcall(devcoredump_exit);