xen-mca.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /* SPDX-License-Identifier: MIT */
  2. /******************************************************************************
  3. * arch-x86/mca.h
  4. * Guest OS machine check interface to x86 Xen.
  5. *
  6. * Contributed by Advanced Micro Devices, Inc.
  7. * Author: Christoph Egger <[email protected]>
  8. *
  9. * Updated by Intel Corporation
  10. * Author: Liu, Jinsong <[email protected]>
  11. *
  12. * Permission is hereby granted, free of charge, to any person obtaining a copy
  13. * of this software and associated documentation files (the "Software"), to
  14. * deal in the Software without restriction, including without limitation the
  15. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16. * sell copies of the Software, and to permit persons to whom the Software is
  17. * furnished to do so, subject to the following conditions:
  18. *
  19. * The above copyright notice and this permission notice shall be included in
  20. * all copies or substantial portions of the Software.
  21. *
  22. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  23. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  24. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  25. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  26. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  27. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  28. * DEALINGS IN THE SOFTWARE.
  29. */
  30. #ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
  31. #define __XEN_PUBLIC_ARCH_X86_MCA_H__
  32. /* Hypercall */
  33. #define __HYPERVISOR_mca __HYPERVISOR_arch_0
  34. #define XEN_MCA_INTERFACE_VERSION 0x01ecc003
  35. /* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
  36. #define XEN_MC_NONURGENT 0x1
  37. /* IN: Dom0 calls hypercall to retrieve urgent error log entry */
  38. #define XEN_MC_URGENT 0x2
  39. /* IN: Dom0 acknowledges previosly-fetched error log entry */
  40. #define XEN_MC_ACK 0x4
  41. /* OUT: All is ok */
  42. #define XEN_MC_OK 0x0
  43. /* OUT: Domain could not fetch data. */
  44. #define XEN_MC_FETCHFAILED 0x1
  45. /* OUT: There was no machine check data to fetch. */
  46. #define XEN_MC_NODATA 0x2
  47. #ifndef __ASSEMBLY__
  48. /* vIRQ injected to Dom0 */
  49. #define VIRQ_MCA VIRQ_ARCH_0
  50. /*
  51. * mc_info entry types
  52. * mca machine check info are recorded in mc_info entries.
  53. * when fetch mca info, it can use MC_TYPE_... to distinguish
  54. * different mca info.
  55. */
  56. #define MC_TYPE_GLOBAL 0
  57. #define MC_TYPE_BANK 1
  58. #define MC_TYPE_EXTENDED 2
  59. #define MC_TYPE_RECOVERY 3
  60. struct mcinfo_common {
  61. uint16_t type; /* structure type */
  62. uint16_t size; /* size of this struct in bytes */
  63. };
  64. #define MC_FLAG_CORRECTABLE (1 << 0)
  65. #define MC_FLAG_UNCORRECTABLE (1 << 1)
  66. #define MC_FLAG_RECOVERABLE (1 << 2)
  67. #define MC_FLAG_POLLED (1 << 3)
  68. #define MC_FLAG_RESET (1 << 4)
  69. #define MC_FLAG_CMCI (1 << 5)
  70. #define MC_FLAG_MCE (1 << 6)
  71. /* contains x86 global mc information */
  72. struct mcinfo_global {
  73. struct mcinfo_common common;
  74. uint16_t mc_domid; /* running domain at the time in error */
  75. uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
  76. uint32_t mc_socketid; /* physical socket of the physical core */
  77. uint16_t mc_coreid; /* physical impacted core */
  78. uint16_t mc_core_threadid; /* core thread of physical core */
  79. uint32_t mc_apicid;
  80. uint32_t mc_flags;
  81. uint64_t mc_gstatus; /* global status */
  82. };
  83. /* contains x86 bank mc information */
  84. struct mcinfo_bank {
  85. struct mcinfo_common common;
  86. uint16_t mc_bank; /* bank nr */
  87. uint16_t mc_domid; /* domain referenced by mc_addr if valid */
  88. uint64_t mc_status; /* bank status */
  89. uint64_t mc_addr; /* bank address */
  90. uint64_t mc_misc;
  91. uint64_t mc_ctrl2;
  92. uint64_t mc_tsc;
  93. };
  94. struct mcinfo_msr {
  95. uint64_t reg; /* MSR */
  96. uint64_t value; /* MSR value */
  97. };
  98. /* contains mc information from other or additional mc MSRs */
  99. struct mcinfo_extended {
  100. struct mcinfo_common common;
  101. uint32_t mc_msrs; /* Number of msr with valid values. */
  102. /*
  103. * Currently Intel extended MSR (32/64) include all gp registers
  104. * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
  105. * useful at present. So expand this array to 16/32 to leave room.
  106. */
  107. struct mcinfo_msr mc_msr[sizeof(void *) * 4];
  108. };
  109. /* Recovery Action flags. Giving recovery result information to DOM0 */
  110. /* Xen takes successful recovery action, the error is recovered */
  111. #define REC_ACTION_RECOVERED (0x1 << 0)
  112. /* No action is performed by XEN */
  113. #define REC_ACTION_NONE (0x1 << 1)
  114. /* It's possible DOM0 might take action ownership in some case */
  115. #define REC_ACTION_NEED_RESET (0x1 << 2)
  116. /*
  117. * Different Recovery Action types, if the action is performed successfully,
  118. * REC_ACTION_RECOVERED flag will be returned.
  119. */
  120. /* Page Offline Action */
  121. #define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
  122. /* CPU offline Action */
  123. #define MC_ACTION_CPU_OFFLINE (0x1 << 1)
  124. /* L3 cache disable Action */
  125. #define MC_ACTION_CACHE_SHRINK (0x1 << 2)
  126. /*
  127. * Below interface used between XEN/DOM0 for passing XEN's recovery action
  128. * information to DOM0.
  129. */
  130. struct page_offline_action {
  131. /* Params for passing the offlined page number to DOM0 */
  132. uint64_t mfn;
  133. uint64_t status;
  134. };
  135. struct cpu_offline_action {
  136. /* Params for passing the identity of the offlined CPU to DOM0 */
  137. uint32_t mc_socketid;
  138. uint16_t mc_coreid;
  139. uint16_t mc_core_threadid;
  140. };
  141. #define MAX_UNION_SIZE 16
  142. struct mcinfo_recovery {
  143. struct mcinfo_common common;
  144. uint16_t mc_bank; /* bank nr */
  145. uint8_t action_flags;
  146. uint8_t action_types;
  147. union {
  148. struct page_offline_action page_retire;
  149. struct cpu_offline_action cpu_offline;
  150. uint8_t pad[MAX_UNION_SIZE];
  151. } action_info;
  152. };
  153. #define MCINFO_MAXSIZE 768
  154. struct mc_info {
  155. /* Number of mcinfo_* entries in mi_data */
  156. uint32_t mi_nentries;
  157. uint32_t flags;
  158. uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
  159. };
  160. DEFINE_GUEST_HANDLE_STRUCT(mc_info);
  161. #define __MC_MSR_ARRAYSIZE 8
  162. #define __MC_NMSRS 1
  163. #define MC_NCAPS 7
  164. struct mcinfo_logical_cpu {
  165. uint32_t mc_cpunr;
  166. uint32_t mc_chipid;
  167. uint16_t mc_coreid;
  168. uint16_t mc_threadid;
  169. uint32_t mc_apicid;
  170. uint32_t mc_clusterid;
  171. uint32_t mc_ncores;
  172. uint32_t mc_ncores_active;
  173. uint32_t mc_nthreads;
  174. uint32_t mc_cpuid_level;
  175. uint32_t mc_family;
  176. uint32_t mc_vendor;
  177. uint32_t mc_model;
  178. uint32_t mc_step;
  179. char mc_vendorid[16];
  180. char mc_brandid[64];
  181. uint32_t mc_cpu_caps[MC_NCAPS];
  182. uint32_t mc_cache_size;
  183. uint32_t mc_cache_alignment;
  184. uint32_t mc_nmsrvals;
  185. struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
  186. };
  187. DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
  188. /*
  189. * Prototype:
  190. * uint32_t x86_mcinfo_nentries(struct mc_info *mi);
  191. */
  192. #define x86_mcinfo_nentries(_mi) \
  193. ((_mi)->mi_nentries)
  194. /*
  195. * Prototype:
  196. * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
  197. */
  198. #define x86_mcinfo_first(_mi) \
  199. ((struct mcinfo_common *)(_mi)->mi_data)
  200. /*
  201. * Prototype:
  202. * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
  203. */
  204. #define x86_mcinfo_next(_mic) \
  205. ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
  206. /*
  207. * Prototype:
  208. * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
  209. */
  210. static inline void x86_mcinfo_lookup(struct mcinfo_common **ret,
  211. struct mc_info *mi, uint16_t type)
  212. {
  213. uint32_t i;
  214. struct mcinfo_common *mic;
  215. bool found = 0;
  216. if (!ret || !mi)
  217. return;
  218. mic = x86_mcinfo_first(mi);
  219. for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
  220. if (mic->type == type) {
  221. found = 1;
  222. break;
  223. }
  224. mic = x86_mcinfo_next(mic);
  225. }
  226. *ret = found ? mic : NULL;
  227. }
  228. /*
  229. * Fetch machine check data from hypervisor.
  230. */
  231. #define XEN_MC_fetch 1
  232. struct xen_mc_fetch {
  233. /*
  234. * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
  235. * XEN_MC_ACK if ack'king an earlier fetch
  236. * OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA
  237. */
  238. uint32_t flags;
  239. uint32_t _pad0;
  240. /* OUT: id for ack, IN: id we are ack'ing */
  241. uint64_t fetch_id;
  242. /* OUT variables. */
  243. GUEST_HANDLE(mc_info) data;
  244. };
  245. DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
  246. /*
  247. * This tells the hypervisor to notify a DomU about the machine check error
  248. */
  249. #define XEN_MC_notifydomain 2
  250. struct xen_mc_notifydomain {
  251. /* IN variables */
  252. uint16_t mc_domid; /* The unprivileged domain to notify */
  253. uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */
  254. /* IN/OUT variables */
  255. uint32_t flags;
  256. };
  257. DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
  258. #define XEN_MC_physcpuinfo 3
  259. struct xen_mc_physcpuinfo {
  260. /* IN/OUT */
  261. uint32_t ncpus;
  262. uint32_t _pad0;
  263. /* OUT */
  264. GUEST_HANDLE(mcinfo_logical_cpu) info;
  265. };
  266. #define XEN_MC_msrinject 4
  267. #define MC_MSRINJ_MAXMSRS 8
  268. struct xen_mc_msrinject {
  269. /* IN */
  270. uint32_t mcinj_cpunr; /* target processor id */
  271. uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */
  272. uint32_t mcinj_count; /* 0 .. count-1 in array are valid */
  273. uint32_t _pad0;
  274. struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
  275. };
  276. /* Flags for mcinj_flags above; bits 16-31 are reserved */
  277. #define MC_MSRINJ_F_INTERPOSE 0x1
  278. #define XEN_MC_mceinject 5
  279. struct xen_mc_mceinject {
  280. unsigned int mceinj_cpunr; /* target processor id */
  281. };
  282. struct xen_mc {
  283. uint32_t cmd;
  284. uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
  285. union {
  286. struct xen_mc_fetch mc_fetch;
  287. struct xen_mc_notifydomain mc_notifydomain;
  288. struct xen_mc_physcpuinfo mc_physcpuinfo;
  289. struct xen_mc_msrinject mc_msrinject;
  290. struct xen_mc_mceinject mc_mceinject;
  291. } u;
  292. };
  293. DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
  294. /*
  295. * Fields are zero when not available. Also, this struct is shared with
  296. * userspace mcelog and thus must keep existing fields at current offsets.
  297. * Only add new fields to the end of the structure
  298. */
  299. struct xen_mce {
  300. __u64 status;
  301. __u64 misc;
  302. __u64 addr;
  303. __u64 mcgstatus;
  304. __u64 ip;
  305. __u64 tsc; /* cpu time stamp counter */
  306. __u64 time; /* wall time_t when error was detected */
  307. __u8 cpuvendor; /* cpu vendor as encoded in system.h */
  308. __u8 inject_flags; /* software inject flags */
  309. __u16 pad;
  310. __u32 cpuid; /* CPUID 1 EAX */
  311. __u8 cs; /* code segment */
  312. __u8 bank; /* machine check bank */
  313. __u8 cpu; /* cpu number; obsolete; use extcpu now */
  314. __u8 finished; /* entry is valid */
  315. __u32 extcpu; /* linux cpu number that detected the error */
  316. __u32 socketid; /* CPU socket ID */
  317. __u32 apicid; /* CPU initial apic ID */
  318. __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
  319. __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
  320. __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
  321. __u64 ppin; /* Protected Processor Inventory Number */
  322. };
  323. /*
  324. * This structure contains all data related to the MCE log. Also
  325. * carries a signature to make it easier to find from external
  326. * debugging tools. Each entry is only valid when its finished flag
  327. * is set.
  328. */
  329. #define XEN_MCE_LOG_LEN 32
  330. struct xen_mce_log {
  331. char signature[12]; /* "MACHINECHECK" */
  332. unsigned len; /* = XEN_MCE_LOG_LEN */
  333. unsigned next;
  334. unsigned flags;
  335. unsigned recordlen; /* length of struct xen_mce */
  336. struct xen_mce entry[XEN_MCE_LOG_LEN];
  337. };
  338. #define XEN_MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
  339. #define XEN_MCE_LOG_SIGNATURE "MACHINECHECK"
  340. #define MCE_GET_RECORD_LEN _IOR('M', 1, int)
  341. #define MCE_GET_LOG_LEN _IOR('M', 2, int)
  342. #define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
  343. #endif /* __ASSEMBLY__ */
  344. #endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */