crdump.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. /*
  2. * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include "mlx4.h"
  33. #define BAD_ACCESS 0xBADACCE5
  34. #define HEALTH_BUFFER_SIZE 0x40
  35. #define CR_ENABLE_BIT swab32(BIT(6))
  36. #define CR_ENABLE_BIT_OFFSET 0xF3F04
  37. #define MAX_NUM_OF_DUMPS_TO_STORE (8)
  38. #define REGION_CR_SPACE "cr-space"
  39. #define REGION_FW_HEALTH "fw-health"
  40. static const char * const region_cr_space_str = REGION_CR_SPACE;
  41. static const char * const region_fw_health_str = REGION_FW_HEALTH;
  42. static const struct devlink_region_ops region_cr_space_ops = {
  43. .name = REGION_CR_SPACE,
  44. .destructor = &kvfree,
  45. };
  46. static const struct devlink_region_ops region_fw_health_ops = {
  47. .name = REGION_FW_HEALTH,
  48. .destructor = &kvfree,
  49. };
  50. /* Set to true in case cr enable bit was set to true before crdump */
  51. static bool crdump_enbale_bit_set;
  52. static void crdump_enable_crspace_access(struct mlx4_dev *dev,
  53. u8 __iomem *cr_space)
  54. {
  55. /* Get current enable bit value */
  56. crdump_enbale_bit_set =
  57. readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT;
  58. /* Enable FW CR filter (set bit6 to 0) */
  59. if (crdump_enbale_bit_set)
  60. writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT,
  61. cr_space + CR_ENABLE_BIT_OFFSET);
  62. /* Enable block volatile crspace accesses */
  63. writel(swab32(1), cr_space + dev->caps.health_buffer_addrs +
  64. HEALTH_BUFFER_SIZE);
  65. }
  66. static void crdump_disable_crspace_access(struct mlx4_dev *dev,
  67. u8 __iomem *cr_space)
  68. {
  69. /* Disable block volatile crspace accesses */
  70. writel(0, cr_space + dev->caps.health_buffer_addrs +
  71. HEALTH_BUFFER_SIZE);
  72. /* Restore FW CR filter value (set bit6 to original value) */
  73. if (crdump_enbale_bit_set)
  74. writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT,
  75. cr_space + CR_ENABLE_BIT_OFFSET);
  76. }
  77. static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
  78. u8 __iomem *cr_space,
  79. u32 id)
  80. {
  81. struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
  82. struct pci_dev *pdev = dev->persist->pdev;
  83. unsigned long cr_res_size;
  84. u8 *crspace_data;
  85. int offset;
  86. int err;
  87. if (!crdump->region_crspace) {
  88. mlx4_err(dev, "crdump: cr-space region is NULL\n");
  89. return;
  90. }
  91. /* Try to collect CR space */
  92. cr_res_size = pci_resource_len(pdev, 0);
  93. crspace_data = kvmalloc(cr_res_size, GFP_KERNEL);
  94. if (crspace_data) {
  95. for (offset = 0; offset < cr_res_size; offset += 4)
  96. *(u32 *)(crspace_data + offset) =
  97. readl(cr_space + offset);
  98. err = devlink_region_snapshot_create(crdump->region_crspace,
  99. crspace_data, id);
  100. if (err) {
  101. kvfree(crspace_data);
  102. mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
  103. region_cr_space_str, id, err);
  104. } else {
  105. mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
  106. id, region_cr_space_str);
  107. }
  108. } else {
  109. mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n");
  110. }
  111. }
  112. static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
  113. u8 __iomem *cr_space,
  114. u32 id)
  115. {
  116. struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
  117. u8 *health_data;
  118. int offset;
  119. int err;
  120. if (!crdump->region_fw_health) {
  121. mlx4_err(dev, "crdump: fw-health region is NULL\n");
  122. return;
  123. }
  124. /* Try to collect health buffer */
  125. health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL);
  126. if (health_data) {
  127. u8 __iomem *health_buf_start =
  128. cr_space + dev->caps.health_buffer_addrs;
  129. for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4)
  130. *(u32 *)(health_data + offset) =
  131. readl(health_buf_start + offset);
  132. err = devlink_region_snapshot_create(crdump->region_fw_health,
  133. health_data, id);
  134. if (err) {
  135. kvfree(health_data);
  136. mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
  137. region_fw_health_str, id, err);
  138. } else {
  139. mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
  140. id, region_fw_health_str);
  141. }
  142. } else {
  143. mlx4_err(dev, "crdump: Failed to allocate health buffer\n");
  144. }
  145. }
  146. int mlx4_crdump_collect(struct mlx4_dev *dev)
  147. {
  148. struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
  149. struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
  150. struct pci_dev *pdev = dev->persist->pdev;
  151. unsigned long cr_res_size;
  152. u8 __iomem *cr_space;
  153. int err;
  154. u32 id;
  155. if (!dev->caps.health_buffer_addrs) {
  156. mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
  157. return 0;
  158. }
  159. if (!crdump->snapshot_enable) {
  160. mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n");
  161. return 0;
  162. }
  163. cr_res_size = pci_resource_len(pdev, 0);
  164. cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
  165. if (!cr_space) {
  166. mlx4_err(dev, "crdump: Failed to map pci cr region\n");
  167. return -ENODEV;
  168. }
  169. /* Get the available snapshot ID for the dumps */
  170. err = devlink_region_snapshot_id_get(devlink, &id);
  171. if (err) {
  172. mlx4_err(dev, "crdump: devlink get snapshot id err %d\n", err);
  173. iounmap(cr_space);
  174. return err;
  175. }
  176. crdump_enable_crspace_access(dev, cr_space);
  177. /* Try to capture dumps */
  178. mlx4_crdump_collect_crspace(dev, cr_space, id);
  179. mlx4_crdump_collect_fw_health(dev, cr_space, id);
  180. /* Release reference on the snapshot id */
  181. devlink_region_snapshot_id_put(devlink, id);
  182. crdump_disable_crspace_access(dev, cr_space);
  183. iounmap(cr_space);
  184. return 0;
  185. }
  186. int mlx4_crdump_init(struct mlx4_dev *dev)
  187. {
  188. struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
  189. struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
  190. struct pci_dev *pdev = dev->persist->pdev;
  191. crdump->snapshot_enable = false;
  192. /* Create cr-space region */
  193. crdump->region_crspace =
  194. devl_region_create(devlink,
  195. &region_cr_space_ops,
  196. MAX_NUM_OF_DUMPS_TO_STORE,
  197. pci_resource_len(pdev, 0));
  198. if (IS_ERR(crdump->region_crspace))
  199. mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
  200. region_cr_space_str,
  201. PTR_ERR(crdump->region_crspace));
  202. /* Create fw-health region */
  203. crdump->region_fw_health =
  204. devl_region_create(devlink,
  205. &region_fw_health_ops,
  206. MAX_NUM_OF_DUMPS_TO_STORE,
  207. HEALTH_BUFFER_SIZE);
  208. if (IS_ERR(crdump->region_fw_health))
  209. mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
  210. region_fw_health_str,
  211. PTR_ERR(crdump->region_fw_health));
  212. return 0;
  213. }
  214. void mlx4_crdump_end(struct mlx4_dev *dev)
  215. {
  216. struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
  217. devl_region_destroy(crdump->region_fw_health);
  218. devl_region_destroy(crdump->region_crspace);
  219. }