edac_device.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. /*
  2. * Defines, structures, APIs for edac_device
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <[email protected]> <[email protected]>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <[email protected]>
  17. *
  18. * Please look at Documentation/driver-api/edac.rst for more info about
  19. * EDAC core structs and functions.
  20. */
  21. #ifndef _EDAC_DEVICE_H_
  22. #define _EDAC_DEVICE_H_
  23. #include <linux/completion.h>
  24. #include <linux/device.h>
  25. #include <linux/edac.h>
  26. #include <linux/kobject.h>
  27. #include <linux/list.h>
  28. #include <linux/types.h>
  29. #include <linux/sysfs.h>
  30. #include <linux/workqueue.h>
  31. /*
  32. * The following are the structures to provide for a generic
  33. * or abstract 'edac_device'. This set of structures and the
  34. * code that implements the APIs for the same, provide for
  35. * registering EDAC type devices which are NOT standard memory.
  36. *
  37. * CPU caches (L1 and L2)
  38. * DMA engines
  39. * Core CPU switches
  40. * Fabric switch units
  41. * PCIe interface controllers
  42. * other EDAC/ECC type devices that can be monitored for
  43. * errors, etc.
  44. *
  45. * It allows for a 2 level set of hierarchy. For example:
  46. *
  47. * cache could be composed of L1, L2 and L3 levels of cache.
  48. * Each CPU core would have its own L1 cache, while sharing
  49. * L2 and maybe L3 caches.
  50. *
  51. * View them arranged, via the sysfs presentation:
  52. * /sys/devices/system/edac/..
  53. *
  54. * mc/ <existing memory device directory>
  55. * cpu/cpu0/.. <L1 and L2 block directory>
  56. * /L1-cache/ce_count
  57. * /ue_count
  58. * /L2-cache/ce_count
  59. * /ue_count
  60. * cpu/cpu1/.. <L1 and L2 block directory>
  61. * /L1-cache/ce_count
  62. * /ue_count
  63. * /L2-cache/ce_count
  64. * /ue_count
  65. * ...
  66. *
  67. * the L1 and L2 directories would be "edac_device_block's"
  68. */
  69. struct edac_device_counter {
  70. u32 ue_count;
  71. u32 ce_count;
  72. };
  73. /* forward reference */
  74. struct edac_device_ctl_info;
  75. struct edac_device_block;
  76. /* edac_dev_sysfs_attribute structure
  77. * used for driver sysfs attributes in mem_ctl_info
  78. * for extra controls and attributes:
  79. * like high level error Injection controls
  80. */
  81. struct edac_dev_sysfs_attribute {
  82. struct attribute attr;
  83. ssize_t (*show)(struct edac_device_ctl_info *, char *);
  84. ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
  85. };
  86. /* edac_dev_sysfs_block_attribute structure
  87. *
  88. * used in leaf 'block' nodes for adding controls/attributes
  89. *
  90. * each block in each instance of the containing control structure
  91. * can have an array of the following. The show and store functions
  92. * will be filled in with the show/store function in the
  93. * low level driver.
  94. *
  95. * The 'value' field will be the actual value field used for
  96. * counting
  97. */
  98. struct edac_dev_sysfs_block_attribute {
  99. struct attribute attr;
  100. ssize_t (*show)(struct kobject *, struct attribute *, char *);
  101. ssize_t (*store)(struct kobject *, struct attribute *,
  102. const char *, size_t);
  103. struct edac_device_block *block;
  104. unsigned int value;
  105. };
  106. /* device block control structure */
  107. struct edac_device_block {
  108. struct edac_device_instance *instance; /* Up Pointer */
  109. char name[EDAC_DEVICE_NAME_LEN + 1];
  110. struct edac_device_counter counters; /* basic UE and CE counters */
  111. int nr_attribs; /* how many attributes */
  112. /* this block's attributes, could be NULL */
  113. struct edac_dev_sysfs_block_attribute *block_attributes;
  114. /* edac sysfs device control */
  115. struct kobject kobj;
  116. };
  117. /* device instance control structure */
  118. struct edac_device_instance {
  119. struct edac_device_ctl_info *ctl; /* Up pointer */
  120. char name[EDAC_DEVICE_NAME_LEN + 4];
  121. struct edac_device_counter counters; /* instance counters */
  122. u32 nr_blocks; /* how many blocks */
  123. struct edac_device_block *blocks; /* block array */
  124. /* edac sysfs device control */
  125. struct kobject kobj;
  126. };
  127. /*
  128. * Abstract edac_device control info structure
  129. *
  130. */
  131. struct edac_device_ctl_info {
  132. /* for global list of edac_device_ctl_info structs */
  133. struct list_head link;
  134. struct module *owner; /* Module owner of this control struct */
  135. int dev_idx;
  136. /* Per instance controls for this edac_device */
  137. int log_ue; /* boolean for logging UEs */
  138. int log_ce; /* boolean for logging CEs */
  139. int panic_on_ue; /* boolean for panic'ing on an UE */
  140. unsigned poll_msec; /* number of milliseconds to poll interval */
  141. unsigned long delay; /* number of jiffies for poll_msec */
  142. /* Additional top controller level attributes, but specified
  143. * by the low level driver.
  144. *
  145. * Set by the low level driver to provide attributes at the
  146. * controller level, same level as 'ue_count' and 'ce_count' above.
  147. * An array of structures, NULL terminated
  148. *
  149. * If attributes are desired, then set to array of attributes
  150. * If no attributes are desired, leave NULL
  151. */
  152. struct edac_dev_sysfs_attribute *sysfs_attributes;
  153. /* pointer to main 'edac' subsys in sysfs */
  154. struct bus_type *edac_subsys;
  155. /* the internal state of this controller instance */
  156. int op_state;
  157. /* work struct for this instance */
  158. struct delayed_work work;
  159. /* pointer to edac polling checking routine:
  160. * If NOT NULL: points to polling check routine
  161. * If NULL: Then assumes INTERRUPT operation, where
  162. * MC driver will receive events
  163. */
  164. void (*edac_check) (struct edac_device_ctl_info * edac_dev);
  165. struct device *dev; /* pointer to device structure */
  166. const char *mod_name; /* module name */
  167. const char *ctl_name; /* edac controller name */
  168. const char *dev_name; /* pci/platform/etc... name */
  169. void *pvt_info; /* pointer to 'private driver' info */
  170. unsigned long start_time; /* edac_device load start time (jiffies) */
  171. struct completion removal_complete;
  172. /* sysfs top name under 'edac' directory
  173. * and instance name:
  174. * cpu/cpu0/...
  175. * cpu/cpu1/...
  176. * cpu/cpu2/...
  177. * ...
  178. */
  179. char name[EDAC_DEVICE_NAME_LEN + 1];
  180. /* Number of instances supported on this control structure
  181. * and the array of those instances
  182. */
  183. u32 nr_instances;
  184. struct edac_device_instance *instances;
  185. struct edac_device_block *blocks;
  186. struct edac_dev_sysfs_block_attribute *attribs;
  187. /* Event counters for the this whole EDAC Device */
  188. struct edac_device_counter counters;
  189. /* edac sysfs device control for the 'name'
  190. * device this structure controls
  191. */
  192. struct kobject kobj;
  193. };
  194. /* To get from the instance's wq to the beginning of the ctl structure */
  195. #define to_edac_mem_ctl_work(w) \
  196. container_of(w, struct mem_ctl_info, work)
  197. #define to_edac_device_ctl_work(w) \
  198. container_of(w,struct edac_device_ctl_info,work)
  199. /*
  200. * The alloc() and free() functions for the 'edac_device' control info
  201. * structure. A MC driver will allocate one of these for each edac_device
  202. * it is going to control/register with the EDAC CORE.
  203. */
  204. extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  205. unsigned sizeof_private,
  206. char *edac_device_name, unsigned nr_instances,
  207. char *edac_block_name, unsigned nr_blocks,
  208. unsigned offset_value,
  209. struct edac_dev_sysfs_block_attribute *block_attributes,
  210. unsigned nr_attribs,
  211. int device_index);
  212. /* The offset value can be:
  213. * -1 indicating no offset value
  214. * 0 for zero-based block numbers
  215. * 1 for 1-based block number
  216. * other for other-based block number
  217. */
  218. #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
  219. extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
  220. /**
  221. * edac_device_add_device - Insert the 'edac_dev' structure into the
  222. * edac_device global list and create sysfs entries associated with
  223. * edac_device structure.
  224. *
  225. * @edac_dev: pointer to edac_device structure to be added to the list
  226. * 'edac_device' structure.
  227. *
  228. * Returns:
  229. * 0 on Success, or an error code on failure
  230. */
  231. extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
  232. /**
  233. * edac_device_del_device - Remove sysfs entries for specified edac_device
  234. * structure and then remove edac_device structure from global list
  235. *
  236. * @dev:
  237. * Pointer to struct &device representing the edac device
  238. * structure to remove.
  239. *
  240. * Returns:
  241. * Pointer to removed edac_device structure,
  242. * or %NULL if device not found.
  243. */
  244. extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
  245. /**
  246. * edac_device_handle_ce_count - Log correctable errors.
  247. *
  248. * @edac_dev: pointer to struct &edac_device_ctl_info
  249. * @inst_nr: number of the instance where the CE error happened
  250. * @count: Number of errors to log.
  251. * @block_nr: number of the block where the CE error happened
  252. * @msg: message to be printed
  253. */
  254. void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
  255. unsigned int count, int inst_nr, int block_nr,
  256. const char *msg);
  257. /**
  258. * edac_device_handle_ue_count - Log uncorrectable errors.
  259. *
  260. * @edac_dev: pointer to struct &edac_device_ctl_info
  261. * @inst_nr: number of the instance where the CE error happened
  262. * @count: Number of errors to log.
  263. * @block_nr: number of the block where the CE error happened
  264. * @msg: message to be printed
  265. */
  266. void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
  267. unsigned int count, int inst_nr, int block_nr,
  268. const char *msg);
  269. /**
  270. * edac_device_handle_ce(): Log a single correctable error
  271. *
  272. * @edac_dev: pointer to struct &edac_device_ctl_info
  273. * @inst_nr: number of the instance where the CE error happened
  274. * @block_nr: number of the block where the CE error happened
  275. * @msg: message to be printed
  276. */
  277. static inline void
  278. edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
  279. int block_nr, const char *msg)
  280. {
  281. edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
  282. }
  283. /**
  284. * edac_device_handle_ue(): Log a single uncorrectable error
  285. *
  286. * @edac_dev: pointer to struct &edac_device_ctl_info
  287. * @inst_nr: number of the instance where the UE error happened
  288. * @block_nr: number of the block where the UE error happened
  289. * @msg: message to be printed
  290. */
  291. static inline void
  292. edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
  293. int block_nr, const char *msg)
  294. {
  295. edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
  296. }
  297. /**
  298. * edac_device_alloc_index: Allocate a unique device index number
  299. *
  300. * Returns:
  301. * allocated index number
  302. */
  303. extern int edac_device_alloc_index(void);
  304. extern const char *edac_layer_name[];
  305. /* Free the actual struct */
  306. static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci)
  307. {
  308. if (ci) {
  309. kfree(ci->pvt_info);
  310. kfree(ci->attribs);
  311. kfree(ci->blocks);
  312. kfree(ci->instances);
  313. kfree(ci);
  314. }
  315. }
  316. #endif