cong.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. /*
  2. * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/debugfs.h>
  33. #include "mlx5_ib.h"
  34. #include "cmd.h"
  35. enum mlx5_ib_cong_node_type {
  36. MLX5_IB_RROCE_ECN_RP = 1,
  37. MLX5_IB_RROCE_ECN_NP = 2,
  38. };
  39. static const char * const mlx5_ib_dbg_cc_name[] = {
  40. "rp_clamp_tgt_rate",
  41. "rp_clamp_tgt_rate_ati",
  42. "rp_time_reset",
  43. "rp_byte_reset",
  44. "rp_threshold",
  45. "rp_ai_rate",
  46. "rp_max_rate",
  47. "rp_hai_rate",
  48. "rp_min_dec_fac",
  49. "rp_min_rate",
  50. "rp_rate_to_set_on_first_cnp",
  51. "rp_dce_tcp_g",
  52. "rp_dce_tcp_rtt",
  53. "rp_rate_reduce_monitor_period",
  54. "rp_initial_alpha_value",
  55. "rp_gd",
  56. "np_min_time_between_cnps",
  57. "np_cnp_dscp",
  58. "np_cnp_prio_mode",
  59. "np_cnp_prio",
  60. };
  61. #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
  62. #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
  63. #define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
  64. #define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
  65. #define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
  66. #define MLX5_IB_RP_MAX_RATE_ATTR BIT(6)
  67. #define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
  68. #define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
  69. #define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
  70. #define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
  71. #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
  72. #define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
  73. #define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
  74. #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
  75. #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
  76. #define MLX5_IB_RP_GD_ATTR BIT(16)
  77. #define MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR BIT(2)
  78. #define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
  79. #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
  80. static enum mlx5_ib_cong_node_type
  81. mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
  82. {
  83. if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
  84. param_offset <= MLX5_IB_DBG_CC_RP_GD)
  85. return MLX5_IB_RROCE_ECN_RP;
  86. else
  87. return MLX5_IB_RROCE_ECN_NP;
  88. }
  89. static u32 mlx5_get_cc_param_val(void *field, int offset)
  90. {
  91. switch (offset) {
  92. case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
  93. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  94. clamp_tgt_rate);
  95. case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
  96. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  97. clamp_tgt_rate_after_time_inc);
  98. case MLX5_IB_DBG_CC_RP_TIME_RESET:
  99. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  100. rpg_time_reset);
  101. case MLX5_IB_DBG_CC_RP_BYTE_RESET:
  102. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  103. rpg_byte_reset);
  104. case MLX5_IB_DBG_CC_RP_THRESHOLD:
  105. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  106. rpg_threshold);
  107. case MLX5_IB_DBG_CC_RP_AI_RATE:
  108. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  109. rpg_ai_rate);
  110. case MLX5_IB_DBG_CC_RP_MAX_RATE:
  111. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  112. rpg_max_rate);
  113. case MLX5_IB_DBG_CC_RP_HAI_RATE:
  114. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  115. rpg_hai_rate);
  116. case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
  117. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  118. rpg_min_dec_fac);
  119. case MLX5_IB_DBG_CC_RP_MIN_RATE:
  120. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  121. rpg_min_rate);
  122. case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
  123. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  124. rate_to_set_on_first_cnp);
  125. case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
  126. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  127. dce_tcp_g);
  128. case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
  129. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  130. dce_tcp_rtt);
  131. case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
  132. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  133. rate_reduce_monitor_period);
  134. case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
  135. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  136. initial_alpha_value);
  137. case MLX5_IB_DBG_CC_RP_GD:
  138. return MLX5_GET(cong_control_r_roce_ecn_rp, field,
  139. rpg_gd);
  140. case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
  141. return MLX5_GET(cong_control_r_roce_ecn_np, field,
  142. min_time_between_cnps);
  143. case MLX5_IB_DBG_CC_NP_CNP_DSCP:
  144. return MLX5_GET(cong_control_r_roce_ecn_np, field,
  145. cnp_dscp);
  146. case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
  147. return MLX5_GET(cong_control_r_roce_ecn_np, field,
  148. cnp_prio_mode);
  149. case MLX5_IB_DBG_CC_NP_CNP_PRIO:
  150. return MLX5_GET(cong_control_r_roce_ecn_np, field,
  151. cnp_802p_prio);
  152. default:
  153. return 0;
  154. }
  155. }
  156. static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
  157. u32 var, u32 *attr_mask)
  158. {
  159. switch (offset) {
  160. case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
  161. *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
  162. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  163. clamp_tgt_rate, var);
  164. break;
  165. case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
  166. *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
  167. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  168. clamp_tgt_rate_after_time_inc, var);
  169. break;
  170. case MLX5_IB_DBG_CC_RP_TIME_RESET:
  171. *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
  172. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  173. rpg_time_reset, var);
  174. break;
  175. case MLX5_IB_DBG_CC_RP_BYTE_RESET:
  176. *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
  177. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  178. rpg_byte_reset, var);
  179. break;
  180. case MLX5_IB_DBG_CC_RP_THRESHOLD:
  181. *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
  182. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  183. rpg_threshold, var);
  184. break;
  185. case MLX5_IB_DBG_CC_RP_AI_RATE:
  186. *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
  187. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  188. rpg_ai_rate, var);
  189. break;
  190. case MLX5_IB_DBG_CC_RP_MAX_RATE:
  191. *attr_mask |= MLX5_IB_RP_MAX_RATE_ATTR;
  192. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  193. rpg_max_rate, var);
  194. break;
  195. case MLX5_IB_DBG_CC_RP_HAI_RATE:
  196. *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
  197. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  198. rpg_hai_rate, var);
  199. break;
  200. case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
  201. *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
  202. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  203. rpg_min_dec_fac, var);
  204. break;
  205. case MLX5_IB_DBG_CC_RP_MIN_RATE:
  206. *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
  207. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  208. rpg_min_rate, var);
  209. break;
  210. case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
  211. *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
  212. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  213. rate_to_set_on_first_cnp, var);
  214. break;
  215. case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
  216. *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
  217. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  218. dce_tcp_g, var);
  219. break;
  220. case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
  221. *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
  222. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  223. dce_tcp_rtt, var);
  224. break;
  225. case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
  226. *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
  227. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  228. rate_reduce_monitor_period, var);
  229. break;
  230. case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
  231. *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
  232. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  233. initial_alpha_value, var);
  234. break;
  235. case MLX5_IB_DBG_CC_RP_GD:
  236. *attr_mask |= MLX5_IB_RP_GD_ATTR;
  237. MLX5_SET(cong_control_r_roce_ecn_rp, field,
  238. rpg_gd, var);
  239. break;
  240. case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
  241. *attr_mask |= MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR;
  242. MLX5_SET(cong_control_r_roce_ecn_np, field,
  243. min_time_between_cnps, var);
  244. break;
  245. case MLX5_IB_DBG_CC_NP_CNP_DSCP:
  246. *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
  247. MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
  248. break;
  249. case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
  250. *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
  251. MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
  252. break;
  253. case MLX5_IB_DBG_CC_NP_CNP_PRIO:
  254. *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
  255. MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
  256. MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
  257. break;
  258. }
  259. }
  260. static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
  261. int offset, u32 *var)
  262. {
  263. int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
  264. void *out;
  265. void *field;
  266. int err;
  267. enum mlx5_ib_cong_node_type node;
  268. struct mlx5_core_dev *mdev;
  269. /* Takes a 1-based port number */
  270. mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
  271. if (!mdev)
  272. return -ENODEV;
  273. out = kvzalloc(outlen, GFP_KERNEL);
  274. if (!out) {
  275. err = -ENOMEM;
  276. goto alloc_err;
  277. }
  278. node = mlx5_ib_param_to_node(offset);
  279. err = mlx5_cmd_query_cong_params(mdev, node, out);
  280. if (err)
  281. goto free;
  282. field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
  283. *var = mlx5_get_cc_param_val(field, offset);
  284. free:
  285. kvfree(out);
  286. alloc_err:
  287. mlx5_ib_put_native_port_mdev(dev, port_num + 1);
  288. return err;
  289. }
  290. static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
  291. int offset, u32 var)
  292. {
  293. int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
  294. void *in;
  295. void *field;
  296. enum mlx5_ib_cong_node_type node;
  297. struct mlx5_core_dev *mdev;
  298. u32 attr_mask = 0;
  299. int err;
  300. /* Takes a 1-based port number */
  301. mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
  302. if (!mdev)
  303. return -ENODEV;
  304. in = kvzalloc(inlen, GFP_KERNEL);
  305. if (!in) {
  306. err = -ENOMEM;
  307. goto alloc_err;
  308. }
  309. MLX5_SET(modify_cong_params_in, in, opcode,
  310. MLX5_CMD_OP_MODIFY_CONG_PARAMS);
  311. node = mlx5_ib_param_to_node(offset);
  312. MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
  313. field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
  314. mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
  315. field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
  316. MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
  317. attr_mask);
  318. err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in);
  319. kvfree(in);
  320. alloc_err:
  321. mlx5_ib_put_native_port_mdev(dev, port_num + 1);
  322. return err;
  323. }
  324. static ssize_t set_param(struct file *filp, const char __user *buf,
  325. size_t count, loff_t *pos)
  326. {
  327. struct mlx5_ib_dbg_param *param = filp->private_data;
  328. int offset = param->offset;
  329. char lbuf[11] = { };
  330. u32 var;
  331. int ret;
  332. if (count > sizeof(lbuf))
  333. return -EINVAL;
  334. if (copy_from_user(lbuf, buf, count))
  335. return -EFAULT;
  336. lbuf[sizeof(lbuf) - 1] = '\0';
  337. if (kstrtou32(lbuf, 0, &var))
  338. return -EINVAL;
  339. ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
  340. return ret ? ret : count;
  341. }
  342. static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
  343. loff_t *pos)
  344. {
  345. struct mlx5_ib_dbg_param *param = filp->private_data;
  346. int offset = param->offset;
  347. u32 var = 0;
  348. int ret;
  349. char lbuf[11];
  350. ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
  351. if (ret)
  352. return ret;
  353. ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
  354. if (ret < 0)
  355. return ret;
  356. return simple_read_from_buffer(buf, count, pos, lbuf, ret);
  357. }
  358. static const struct file_operations dbg_cc_fops = {
  359. .owner = THIS_MODULE,
  360. .open = simple_open,
  361. .write = set_param,
  362. .read = get_param,
  363. };
  364. void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
  365. {
  366. if (!mlx5_debugfs_root ||
  367. !dev->port[port_num].dbg_cc_params ||
  368. !dev->port[port_num].dbg_cc_params->root)
  369. return;
  370. debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
  371. kfree(dev->port[port_num].dbg_cc_params);
  372. dev->port[port_num].dbg_cc_params = NULL;
  373. }
  374. void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
  375. {
  376. struct mlx5_ib_dbg_cc_params *dbg_cc_params;
  377. struct mlx5_core_dev *mdev;
  378. int i;
  379. if (!mlx5_debugfs_root)
  380. return;
  381. /* Takes a 1-based port number */
  382. mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
  383. if (!mdev)
  384. return;
  385. if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
  386. !MLX5_CAP_GEN(mdev, cc_modify_allowed))
  387. goto put_mdev;
  388. dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
  389. if (!dbg_cc_params)
  390. goto err;
  391. dev->port[port_num].dbg_cc_params = dbg_cc_params;
  392. dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
  393. for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
  394. dbg_cc_params->params[i].offset = i;
  395. dbg_cc_params->params[i].dev = dev;
  396. dbg_cc_params->params[i].port_num = port_num;
  397. dbg_cc_params->params[i].dentry =
  398. debugfs_create_file(mlx5_ib_dbg_cc_name[i],
  399. 0600, dbg_cc_params->root,
  400. &dbg_cc_params->params[i],
  401. &dbg_cc_fops);
  402. }
  403. put_mdev:
  404. mlx5_ib_put_native_port_mdev(dev, port_num + 1);
  405. return;
  406. err:
  407. mlx5_ib_warn(dev, "cong debugfs failure\n");
  408. mlx5_ib_cleanup_cong_debugfs(dev, port_num);
  409. mlx5_ib_put_native_port_mdev(dev, port_num + 1);
  410. /*
  411. * We don't want to fail driver if debugfs failed to initialize,
  412. * so we are not forwarding error to the user.
  413. */
  414. return;
  415. }