eeh.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation.
  4. * Copyright 2001-2012 IBM Corporation.
  5. */
  6. #ifndef _POWERPC_EEH_H
  7. #define _POWERPC_EEH_H
  8. #ifdef __KERNEL__
  9. #include <linux/init.h>
  10. #include <linux/list.h>
  11. #include <linux/string.h>
  12. #include <linux/time.h>
  13. #include <linux/atomic.h>
  14. #include <uapi/asm/eeh.h>
  15. struct pci_dev;
  16. struct pci_bus;
  17. struct pci_dn;
  18. #ifdef CONFIG_EEH
  19. /* EEH subsystem flags */
  20. #define EEH_ENABLED 0x01 /* EEH enabled */
  21. #define EEH_FORCE_DISABLED 0x02 /* EEH disabled */
  22. #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */
  23. #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */
  24. #define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */
  25. #define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */
  26. /*
  27. * Delay for PE reset, all in ms
  28. *
  29. * PCI specification has reset hold time of 100 milliseconds.
  30. * We have 250 milliseconds here. The PCI bus settlement time
  31. * is specified as 1.5 seconds and we have 1.8 seconds.
  32. */
  33. #define EEH_PE_RST_HOLD_TIME 250
  34. #define EEH_PE_RST_SETTLE_TIME 1800
  35. /*
  36. * The struct is used to trace PE related EEH functionality.
  37. * In theory, there will have one instance of the struct to
  38. * be created against particular PE. In nature, PEs correlate
  39. * to each other. the struct has to reflect that hierarchy in
  40. * order to easily pick up those affected PEs when one particular
  41. * PE has EEH errors.
  42. *
  43. * Also, one particular PE might be composed of PCI device, PCI
  44. * bus and its subordinate components. The struct also need ship
  45. * the information. Further more, one particular PE is only meaingful
  46. * in the corresponding PHB. Therefore, the root PEs should be created
  47. * against existing PHBs in on-to-one fashion.
  48. */
  49. #define EEH_PE_INVALID (1 << 0) /* Invalid */
  50. #define EEH_PE_PHB (1 << 1) /* PHB PE */
  51. #define EEH_PE_DEVICE (1 << 2) /* Device PE */
  52. #define EEH_PE_BUS (1 << 3) /* Bus PE */
  53. #define EEH_PE_VF (1 << 4) /* VF PE */
  54. #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
  55. #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
  56. #define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */
  57. #define EEH_PE_RESET (1 << 3) /* PE reset in progress */
  58. #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
  59. #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
  60. #define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
  61. #define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
  62. struct eeh_pe {
  63. int type; /* PE type: PHB/Bus/Device */
  64. int state; /* PE EEH dependent mode */
  65. int addr; /* PE configuration address */
  66. struct pci_controller *phb; /* Associated PHB */
  67. struct pci_bus *bus; /* Top PCI bus for bus PE */
  68. int check_count; /* Times of ignored error */
  69. int freeze_count; /* Times of froze up */
  70. time64_t tstamp; /* Time on first-time freeze */
  71. int false_positives; /* Times of reported #ff's */
  72. atomic_t pass_dev_cnt; /* Count of passed through devs */
  73. struct eeh_pe *parent; /* Parent PE */
  74. void *data; /* PE auxillary data */
  75. struct list_head child_list; /* List of PEs below this PE */
  76. struct list_head child; /* Memb. child_list/eeh_phb_pe */
  77. struct list_head edevs; /* List of eeh_dev in this PE */
  78. #ifdef CONFIG_STACKTRACE
  79. /*
  80. * Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
  81. * the stack trace is saved here so we can print it in the recovery
  82. * thread if it turns out to due to a real problem rather than
  83. * a hot-remove.
  84. *
  85. * A max of 64 entries might be overkill, but it also might not be.
  86. */
  87. unsigned long stack_trace[64];
  88. int trace_entries;
  89. #endif /* CONFIG_STACKTRACE */
  90. };
  91. #define eeh_pe_for_each_dev(pe, edev, tmp) \
  92. list_for_each_entry_safe(edev, tmp, &pe->edevs, entry)
  93. #define eeh_for_each_pe(root, pe) \
  94. for (pe = root; pe; pe = eeh_pe_next(pe, root))
  95. static inline bool eeh_pe_passed(struct eeh_pe *pe)
  96. {
  97. return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
  98. }
  99. /*
  100. * The struct is used to trace EEH state for the associated
  101. * PCI device node or PCI device. In future, it might
  102. * represent PE as well so that the EEH device to form
  103. * another tree except the currently existing tree of PCI
  104. * buses and PCI devices
  105. */
  106. #define EEH_DEV_BRIDGE (1 << 0) /* PCI bridge */
  107. #define EEH_DEV_ROOT_PORT (1 << 1) /* PCIe root port */
  108. #define EEH_DEV_DS_PORT (1 << 2) /* Downstream port */
  109. #define EEH_DEV_IRQ_DISABLED (1 << 3) /* Interrupt disabled */
  110. #define EEH_DEV_DISCONNECTED (1 << 4) /* Removing from PE */
  111. #define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */
  112. #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */
  113. #define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */
  114. struct eeh_dev {
  115. int mode; /* EEH mode */
  116. int bdfn; /* bdfn of device (for cfg ops) */
  117. struct pci_controller *controller;
  118. int pe_config_addr; /* PE config address */
  119. u32 config_space[16]; /* Saved PCI config space */
  120. int pcix_cap; /* Saved PCIx capability */
  121. int pcie_cap; /* Saved PCIe capability */
  122. int aer_cap; /* Saved AER capability */
  123. int af_cap; /* Saved AF capability */
  124. struct eeh_pe *pe; /* Associated PE */
  125. struct list_head entry; /* Membership in eeh_pe.edevs */
  126. struct list_head rmv_entry; /* Membership in rmv_list */
  127. struct pci_dn *pdn; /* Associated PCI device node */
  128. struct pci_dev *pdev; /* Associated PCI device */
  129. bool in_error; /* Error flag for edev */
  130. /* VF specific properties */
  131. struct pci_dev *physfn; /* Associated SRIOV PF */
  132. int vf_index; /* Index of this VF */
  133. };
  134. /* "fmt" must be a simple literal string */
  135. #define EEH_EDEV_PRINT(level, edev, fmt, ...) \
  136. pr_##level("PCI %04x:%02x:%02x.%x#%04x: EEH: " fmt, \
  137. (edev)->controller->global_number, PCI_BUSNO((edev)->bdfn), \
  138. PCI_SLOT((edev)->bdfn), PCI_FUNC((edev)->bdfn), \
  139. ((edev)->pe ? (edev)->pe_config_addr : 0xffff), ##__VA_ARGS__)
  140. #define eeh_edev_dbg(edev, fmt, ...) EEH_EDEV_PRINT(debug, (edev), fmt, ##__VA_ARGS__)
  141. #define eeh_edev_info(edev, fmt, ...) EEH_EDEV_PRINT(info, (edev), fmt, ##__VA_ARGS__)
  142. #define eeh_edev_warn(edev, fmt, ...) EEH_EDEV_PRINT(warn, (edev), fmt, ##__VA_ARGS__)
  143. #define eeh_edev_err(edev, fmt, ...) EEH_EDEV_PRINT(err, (edev), fmt, ##__VA_ARGS__)
  144. static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
  145. {
  146. return edev ? edev->pdn : NULL;
  147. }
  148. static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
  149. {
  150. return edev ? edev->pdev : NULL;
  151. }
  152. static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev)
  153. {
  154. return edev ? edev->pe : NULL;
  155. }
  156. /* Return values from eeh_ops::next_error */
  157. enum {
  158. EEH_NEXT_ERR_NONE = 0,
  159. EEH_NEXT_ERR_INF,
  160. EEH_NEXT_ERR_FROZEN_PE,
  161. EEH_NEXT_ERR_FENCED_PHB,
  162. EEH_NEXT_ERR_DEAD_PHB,
  163. EEH_NEXT_ERR_DEAD_IOC
  164. };
  165. /*
  166. * The struct is used to trace the registered EEH operation
  167. * callback functions. Actually, those operation callback
  168. * functions are heavily platform dependent. That means the
  169. * platform should register its own EEH operation callback
  170. * functions before any EEH further operations.
  171. */
  172. #define EEH_OPT_DISABLE 0 /* EEH disable */
  173. #define EEH_OPT_ENABLE 1 /* EEH enable */
  174. #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */
  175. #define EEH_OPT_THAW_DMA 3 /* DMA enable */
  176. #define EEH_OPT_FREEZE_PE 4 /* Freeze PE */
  177. #define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */
  178. #define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */
  179. #define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */
  180. #define EEH_STATE_MMIO_ACTIVE (1 << 3) /* Active MMIO */
  181. #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
  182. #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
  183. #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
  184. #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */
  185. #define EEH_RESET_HOT 1 /* Hot reset */
  186. #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */
  187. #define EEH_LOG_TEMP 1 /* EEH temporary error log */
  188. #define EEH_LOG_PERM 2 /* EEH permanent error log */
  189. struct eeh_ops {
  190. char *name;
  191. struct eeh_dev *(*probe)(struct pci_dev *pdev);
  192. int (*set_option)(struct eeh_pe *pe, int option);
  193. int (*get_state)(struct eeh_pe *pe, int *delay);
  194. int (*reset)(struct eeh_pe *pe, int option);
  195. int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
  196. int (*configure_bridge)(struct eeh_pe *pe);
  197. int (*err_inject)(struct eeh_pe *pe, int type, int func,
  198. unsigned long addr, unsigned long mask);
  199. int (*read_config)(struct eeh_dev *edev, int where, int size, u32 *val);
  200. int (*write_config)(struct eeh_dev *edev, int where, int size, u32 val);
  201. int (*next_error)(struct eeh_pe **pe);
  202. int (*restore_config)(struct eeh_dev *edev);
  203. int (*notify_resume)(struct eeh_dev *edev);
  204. };
  205. extern int eeh_subsystem_flags;
  206. extern u32 eeh_max_freezes;
  207. extern bool eeh_debugfs_no_recover;
  208. extern struct eeh_ops *eeh_ops;
  209. extern raw_spinlock_t confirm_error_lock;
  210. static inline void eeh_add_flag(int flag)
  211. {
  212. eeh_subsystem_flags |= flag;
  213. }
  214. static inline void eeh_clear_flag(int flag)
  215. {
  216. eeh_subsystem_flags &= ~flag;
  217. }
  218. static inline bool eeh_has_flag(int flag)
  219. {
  220. return !!(eeh_subsystem_flags & flag);
  221. }
  222. static inline bool eeh_enabled(void)
  223. {
  224. return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
  225. }
  226. static inline void eeh_serialize_lock(unsigned long *flags)
  227. {
  228. raw_spin_lock_irqsave(&confirm_error_lock, *flags);
  229. }
  230. static inline void eeh_serialize_unlock(unsigned long flags)
  231. {
  232. raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  233. }
  234. static inline bool eeh_state_active(int state)
  235. {
  236. return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
  237. == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
  238. }
  239. typedef void (*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
  240. typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
  241. void eeh_set_pe_aux_size(int size);
  242. int eeh_phb_pe_create(struct pci_controller *phb);
  243. int eeh_wait_state(struct eeh_pe *pe, int max_wait);
  244. struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
  245. struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
  246. struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no);
  247. int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
  248. int eeh_pe_tree_remove(struct eeh_dev *edev);
  249. void eeh_pe_update_time_stamp(struct eeh_pe *pe);
  250. void *eeh_pe_traverse(struct eeh_pe *root,
  251. eeh_pe_traverse_func fn, void *flag);
  252. void eeh_pe_dev_traverse(struct eeh_pe *root,
  253. eeh_edev_traverse_func fn, void *flag);
  254. void eeh_pe_restore_bars(struct eeh_pe *pe);
  255. const char *eeh_pe_loc_get(struct eeh_pe *pe);
  256. struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
  257. void eeh_show_enabled(void);
  258. int __init eeh_init(struct eeh_ops *ops);
  259. int eeh_check_failure(const volatile void __iomem *token);
  260. int eeh_dev_check_failure(struct eeh_dev *edev);
  261. void eeh_addr_cache_init(void);
  262. void eeh_probe_device(struct pci_dev *pdev);
  263. void eeh_remove_device(struct pci_dev *);
  264. int eeh_unfreeze_pe(struct eeh_pe *pe);
  265. int eeh_pe_reset_and_recover(struct eeh_pe *pe);
  266. int eeh_dev_open(struct pci_dev *pdev);
  267. void eeh_dev_release(struct pci_dev *pdev);
  268. struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
  269. int eeh_pe_set_option(struct eeh_pe *pe, int option);
  270. int eeh_pe_get_state(struct eeh_pe *pe);
  271. int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
  272. int eeh_pe_configure(struct eeh_pe *pe);
  273. int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
  274. unsigned long addr, unsigned long mask);
  275. /**
  276. * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
  277. *
  278. * If this macro yields TRUE, the caller relays to eeh_check_failure()
  279. * which does further tests out of line.
  280. */
  281. #define EEH_POSSIBLE_ERROR(val, type) ((val) == (type)~0 && eeh_enabled())
  282. /*
  283. * Reads from a device which has been isolated by EEH will return
  284. * all 1s. This macro gives an all-1s value of the given size (in
  285. * bytes: 1, 2, or 4) for comparing with the result of a read.
  286. */
  287. #define EEH_IO_ERROR_VALUE(size) (~0U >> ((4 - (size)) * 8))
  288. #else /* !CONFIG_EEH */
  289. static inline bool eeh_enabled(void)
  290. {
  291. return false;
  292. }
  293. static inline void eeh_show_enabled(void) { }
  294. static inline int eeh_check_failure(const volatile void __iomem *token)
  295. {
  296. return 0;
  297. }
  298. #define eeh_dev_check_failure(x) (0)
  299. static inline void eeh_addr_cache_init(void) { }
  300. static inline void eeh_probe_device(struct pci_dev *dev) { }
  301. static inline void eeh_remove_device(struct pci_dev *dev) { }
  302. #define EEH_POSSIBLE_ERROR(val, type) (0)
  303. #define EEH_IO_ERROR_VALUE(size) (-1UL)
  304. static inline int eeh_phb_pe_create(struct pci_controller *phb) { return 0; }
  305. #endif /* CONFIG_EEH */
  306. #if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH)
  307. void pseries_eeh_init_edev_recursive(struct pci_dn *pdn);
  308. #endif
  309. #ifdef CONFIG_PPC64
  310. /*
  311. * MMIO read/write operations with EEH support.
  312. */
  313. static inline u8 eeh_readb(const volatile void __iomem *addr)
  314. {
  315. u8 val = in_8(addr);
  316. if (EEH_POSSIBLE_ERROR(val, u8))
  317. eeh_check_failure(addr);
  318. return val;
  319. }
  320. static inline u16 eeh_readw(const volatile void __iomem *addr)
  321. {
  322. u16 val = in_le16(addr);
  323. if (EEH_POSSIBLE_ERROR(val, u16))
  324. eeh_check_failure(addr);
  325. return val;
  326. }
  327. static inline u32 eeh_readl(const volatile void __iomem *addr)
  328. {
  329. u32 val = in_le32(addr);
  330. if (EEH_POSSIBLE_ERROR(val, u32))
  331. eeh_check_failure(addr);
  332. return val;
  333. }
  334. static inline u64 eeh_readq(const volatile void __iomem *addr)
  335. {
  336. u64 val = in_le64(addr);
  337. if (EEH_POSSIBLE_ERROR(val, u64))
  338. eeh_check_failure(addr);
  339. return val;
  340. }
  341. static inline u16 eeh_readw_be(const volatile void __iomem *addr)
  342. {
  343. u16 val = in_be16(addr);
  344. if (EEH_POSSIBLE_ERROR(val, u16))
  345. eeh_check_failure(addr);
  346. return val;
  347. }
  348. static inline u32 eeh_readl_be(const volatile void __iomem *addr)
  349. {
  350. u32 val = in_be32(addr);
  351. if (EEH_POSSIBLE_ERROR(val, u32))
  352. eeh_check_failure(addr);
  353. return val;
  354. }
  355. static inline u64 eeh_readq_be(const volatile void __iomem *addr)
  356. {
  357. u64 val = in_be64(addr);
  358. if (EEH_POSSIBLE_ERROR(val, u64))
  359. eeh_check_failure(addr);
  360. return val;
  361. }
  362. static inline void eeh_memcpy_fromio(void *dest, const
  363. volatile void __iomem *src,
  364. unsigned long n)
  365. {
  366. _memcpy_fromio(dest, src, n);
  367. /* Look for ffff's here at dest[n]. Assume that at least 4 bytes
  368. * were copied. Check all four bytes.
  369. */
  370. if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32))
  371. eeh_check_failure(src);
  372. }
  373. /* in-string eeh macros */
  374. static inline void eeh_readsb(const volatile void __iomem *addr, void * buf,
  375. int ns)
  376. {
  377. _insb(addr, buf, ns);
  378. if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8))
  379. eeh_check_failure(addr);
  380. }
  381. static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
  382. int ns)
  383. {
  384. _insw(addr, buf, ns);
  385. if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16))
  386. eeh_check_failure(addr);
  387. }
  388. static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
  389. int nl)
  390. {
  391. _insl(addr, buf, nl);
  392. if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32))
  393. eeh_check_failure(addr);
  394. }
  395. void __init eeh_cache_debugfs_init(void);
  396. #endif /* CONFIG_PPC64 */
  397. #endif /* __KERNEL__ */
  398. #endif /* _POWERPC_EEH_H */