erdma_main.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2. /* Authors: Cheng Xu <[email protected]> */
  3. /* Kai Shen <[email protected]> */
  4. /* Copyright (c) 2020-2022, Alibaba Group. */
  5. #include <linux/module.h>
  6. #include <net/addrconf.h>
  7. #include <rdma/erdma-abi.h>
  8. #include "erdma.h"
  9. #include "erdma_cm.h"
  10. #include "erdma_verbs.h"
  11. MODULE_AUTHOR("Cheng Xu <[email protected]>");
  12. MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
  13. MODULE_LICENSE("Dual BSD/GPL");
  14. static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
  15. void *arg)
  16. {
  17. struct net_device *netdev = netdev_notifier_info_to_dev(arg);
  18. struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
  19. if (dev->netdev == NULL || dev->netdev != netdev)
  20. goto done;
  21. switch (event) {
  22. case NETDEV_UP:
  23. dev->state = IB_PORT_ACTIVE;
  24. erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
  25. break;
  26. case NETDEV_DOWN:
  27. dev->state = IB_PORT_DOWN;
  28. erdma_port_event(dev, IB_EVENT_PORT_ERR);
  29. break;
  30. case NETDEV_CHANGEMTU:
  31. if (dev->mtu != netdev->mtu) {
  32. erdma_set_mtu(dev, netdev->mtu);
  33. dev->mtu = netdev->mtu;
  34. }
  35. break;
  36. case NETDEV_REGISTER:
  37. case NETDEV_UNREGISTER:
  38. case NETDEV_CHANGEADDR:
  39. case NETDEV_GOING_DOWN:
  40. case NETDEV_CHANGE:
  41. default:
  42. break;
  43. }
  44. done:
  45. return NOTIFY_OK;
  46. }
  47. static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
  48. {
  49. struct net_device *netdev;
  50. int ret = -EPROBE_DEFER;
  51. /* Already binded to a net_device, so we skip. */
  52. if (dev->netdev)
  53. return 0;
  54. rtnl_lock();
  55. for_each_netdev(&init_net, netdev) {
  56. /*
  57. * In erdma, the paired netdev and ibdev should have the same
  58. * MAC address. erdma can get the value from its PCIe bar
  59. * registers. Since erdma can not get the paired netdev
  60. * reference directly, we do a traverse here to get the paired
  61. * netdev.
  62. */
  63. if (ether_addr_equal_unaligned(netdev->perm_addr,
  64. dev->attrs.peer_addr)) {
  65. ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
  66. if (ret) {
  67. rtnl_unlock();
  68. ibdev_warn(&dev->ibdev,
  69. "failed (%d) to link netdev", ret);
  70. return ret;
  71. }
  72. dev->netdev = netdev;
  73. break;
  74. }
  75. }
  76. rtnl_unlock();
  77. return ret;
  78. }
  79. static int erdma_device_register(struct erdma_dev *dev)
  80. {
  81. struct ib_device *ibdev = &dev->ibdev;
  82. int ret;
  83. ret = erdma_enum_and_get_netdev(dev);
  84. if (ret)
  85. return ret;
  86. dev->mtu = dev->netdev->mtu;
  87. addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
  88. ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
  89. if (ret) {
  90. dev_err(&dev->pdev->dev,
  91. "ib_register_device failed: ret = %d\n", ret);
  92. return ret;
  93. }
  94. dev->netdev_nb.notifier_call = erdma_netdev_event;
  95. ret = register_netdevice_notifier(&dev->netdev_nb);
  96. if (ret) {
  97. ibdev_err(&dev->ibdev, "failed to register notifier.\n");
  98. ib_unregister_device(ibdev);
  99. }
  100. return ret;
  101. }
  102. static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
  103. {
  104. struct erdma_dev *dev = data;
  105. erdma_cmdq_completion_handler(&dev->cmdq);
  106. erdma_aeq_event_handler(dev);
  107. return IRQ_HANDLED;
  108. }
  109. static void erdma_dwqe_resource_init(struct erdma_dev *dev)
  110. {
  111. int total_pages, type0, type1;
  112. dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
  113. if (dev->attrs.grp_num < 4)
  114. dev->attrs.disable_dwqe = true;
  115. else
  116. dev->attrs.disable_dwqe = false;
  117. /* One page contains 4 goups. */
  118. total_pages = dev->attrs.grp_num * 4;
  119. if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
  120. dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
  121. type0 = ERDMA_DWQE_TYPE0_CNT;
  122. type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
  123. } else {
  124. type1 = total_pages / 3;
  125. type0 = total_pages - type1 - 1;
  126. }
  127. dev->attrs.dwqe_pages = type0;
  128. dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
  129. }
  130. static int erdma_request_vectors(struct erdma_dev *dev)
  131. {
  132. int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
  133. int ret;
  134. ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
  135. if (ret < 0) {
  136. dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
  137. ret);
  138. return ret;
  139. }
  140. dev->attrs.irq_num = ret;
  141. return 0;
  142. }
  143. static int erdma_comm_irq_init(struct erdma_dev *dev)
  144. {
  145. snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
  146. pci_name(dev->pdev));
  147. dev->comm_irq.msix_vector =
  148. pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
  149. cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
  150. &dev->comm_irq.affinity_hint_mask);
  151. irq_set_affinity_hint(dev->comm_irq.msix_vector,
  152. &dev->comm_irq.affinity_hint_mask);
  153. return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
  154. dev->comm_irq.name, dev);
  155. }
  156. static void erdma_comm_irq_uninit(struct erdma_dev *dev)
  157. {
  158. irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
  159. free_irq(dev->comm_irq.msix_vector, dev);
  160. }
  161. static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
  162. {
  163. int ret;
  164. erdma_dwqe_resource_init(dev);
  165. ret = dma_set_mask_and_coherent(&pdev->dev,
  166. DMA_BIT_MASK(ERDMA_PCI_WIDTH));
  167. if (ret)
  168. return ret;
  169. dma_set_max_seg_size(&pdev->dev, UINT_MAX);
  170. return 0;
  171. }
  172. static void erdma_device_uninit(struct erdma_dev *dev)
  173. {
  174. u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
  175. erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
  176. }
  177. static const struct pci_device_id erdma_pci_tbl[] = {
  178. { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
  179. {}
  180. };
  181. static int erdma_probe_dev(struct pci_dev *pdev)
  182. {
  183. struct erdma_dev *dev;
  184. int bars, err;
  185. u32 version;
  186. err = pci_enable_device(pdev);
  187. if (err) {
  188. dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
  189. return err;
  190. }
  191. pci_set_master(pdev);
  192. dev = ib_alloc_device(erdma_dev, ibdev);
  193. if (!dev) {
  194. dev_err(&pdev->dev, "ib_alloc_device failed\n");
  195. err = -ENOMEM;
  196. goto err_disable_device;
  197. }
  198. pci_set_drvdata(pdev, dev);
  199. dev->pdev = pdev;
  200. dev->attrs.numa_node = dev_to_node(&pdev->dev);
  201. bars = pci_select_bars(pdev, IORESOURCE_MEM);
  202. err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
  203. if (bars != ERDMA_BAR_MASK || err) {
  204. err = err ? err : -EINVAL;
  205. goto err_ib_device_release;
  206. }
  207. dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
  208. dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
  209. dev->func_bar =
  210. devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
  211. if (!dev->func_bar) {
  212. dev_err(&pdev->dev, "devm_ioremap failed.\n");
  213. err = -EFAULT;
  214. goto err_release_bars;
  215. }
  216. version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
  217. if (version == 0) {
  218. /* we knows that it is a non-functional function. */
  219. err = -ENODEV;
  220. goto err_iounmap_func_bar;
  221. }
  222. err = erdma_device_init(dev, pdev);
  223. if (err)
  224. goto err_iounmap_func_bar;
  225. err = erdma_request_vectors(dev);
  226. if (err)
  227. goto err_iounmap_func_bar;
  228. err = erdma_comm_irq_init(dev);
  229. if (err)
  230. goto err_free_vectors;
  231. err = erdma_aeq_init(dev);
  232. if (err)
  233. goto err_uninit_comm_irq;
  234. err = erdma_cmdq_init(dev);
  235. if (err)
  236. goto err_uninit_aeq;
  237. err = erdma_ceqs_init(dev);
  238. if (err)
  239. goto err_uninit_cmdq;
  240. erdma_finish_cmdq_init(dev);
  241. return 0;
  242. err_uninit_cmdq:
  243. erdma_device_uninit(dev);
  244. erdma_cmdq_destroy(dev);
  245. err_uninit_aeq:
  246. erdma_aeq_destroy(dev);
  247. err_uninit_comm_irq:
  248. erdma_comm_irq_uninit(dev);
  249. err_free_vectors:
  250. pci_free_irq_vectors(dev->pdev);
  251. err_iounmap_func_bar:
  252. devm_iounmap(&pdev->dev, dev->func_bar);
  253. err_release_bars:
  254. pci_release_selected_regions(pdev, bars);
  255. err_ib_device_release:
  256. ib_dealloc_device(&dev->ibdev);
  257. err_disable_device:
  258. pci_disable_device(pdev);
  259. return err;
  260. }
  261. static void erdma_remove_dev(struct pci_dev *pdev)
  262. {
  263. struct erdma_dev *dev = pci_get_drvdata(pdev);
  264. erdma_ceqs_uninit(dev);
  265. erdma_device_uninit(dev);
  266. erdma_cmdq_destroy(dev);
  267. erdma_aeq_destroy(dev);
  268. erdma_comm_irq_uninit(dev);
  269. pci_free_irq_vectors(dev->pdev);
  270. devm_iounmap(&pdev->dev, dev->func_bar);
  271. pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
  272. ib_dealloc_device(&dev->ibdev);
  273. pci_disable_device(pdev);
  274. }
  275. #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
  276. static int erdma_dev_attrs_init(struct erdma_dev *dev)
  277. {
  278. int err;
  279. u64 req_hdr, cap0, cap1;
  280. erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
  281. CMDQ_OPCODE_QUERY_DEVICE);
  282. err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
  283. &cap1);
  284. if (err)
  285. return err;
  286. dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
  287. dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
  288. dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
  289. dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
  290. dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
  291. dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
  292. dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
  293. dev->attrs.max_mr = dev->attrs.max_qp << 1;
  294. dev->attrs.max_cq = dev->attrs.max_qp << 1;
  295. dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
  296. dev->attrs.max_ord = ERDMA_MAX_ORD;
  297. dev->attrs.max_ird = ERDMA_MAX_IRD;
  298. dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
  299. dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
  300. dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
  301. dev->attrs.max_pd = ERDMA_MAX_PD;
  302. dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
  303. dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
  304. erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
  305. CMDQ_OPCODE_QUERY_FW_INFO);
  306. err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
  307. &cap1);
  308. if (!err)
  309. dev->attrs.fw_version =
  310. FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
  311. return err;
  312. }
  313. static int erdma_res_cb_init(struct erdma_dev *dev)
  314. {
  315. int i, j;
  316. for (i = 0; i < ERDMA_RES_CNT; i++) {
  317. dev->res_cb[i].next_alloc_idx = 1;
  318. spin_lock_init(&dev->res_cb[i].lock);
  319. dev->res_cb[i].bitmap =
  320. bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
  321. if (!dev->res_cb[i].bitmap)
  322. goto err;
  323. }
  324. return 0;
  325. err:
  326. for (j = 0; j < i; j++)
  327. bitmap_free(dev->res_cb[j].bitmap);
  328. return -ENOMEM;
  329. }
  330. static void erdma_res_cb_free(struct erdma_dev *dev)
  331. {
  332. int i;
  333. for (i = 0; i < ERDMA_RES_CNT; i++)
  334. bitmap_free(dev->res_cb[i].bitmap);
  335. }
  336. static const struct ib_device_ops erdma_device_ops = {
  337. .owner = THIS_MODULE,
  338. .driver_id = RDMA_DRIVER_ERDMA,
  339. .uverbs_abi_ver = ERDMA_ABI_VERSION,
  340. .alloc_mr = erdma_ib_alloc_mr,
  341. .alloc_pd = erdma_alloc_pd,
  342. .alloc_ucontext = erdma_alloc_ucontext,
  343. .create_cq = erdma_create_cq,
  344. .create_qp = erdma_create_qp,
  345. .dealloc_pd = erdma_dealloc_pd,
  346. .dealloc_ucontext = erdma_dealloc_ucontext,
  347. .dereg_mr = erdma_dereg_mr,
  348. .destroy_cq = erdma_destroy_cq,
  349. .destroy_qp = erdma_destroy_qp,
  350. .get_dma_mr = erdma_get_dma_mr,
  351. .get_port_immutable = erdma_get_port_immutable,
  352. .iw_accept = erdma_accept,
  353. .iw_add_ref = erdma_qp_get_ref,
  354. .iw_connect = erdma_connect,
  355. .iw_create_listen = erdma_create_listen,
  356. .iw_destroy_listen = erdma_destroy_listen,
  357. .iw_get_qp = erdma_get_ibqp,
  358. .iw_reject = erdma_reject,
  359. .iw_rem_ref = erdma_qp_put_ref,
  360. .map_mr_sg = erdma_map_mr_sg,
  361. .mmap = erdma_mmap,
  362. .mmap_free = erdma_mmap_free,
  363. .modify_qp = erdma_modify_qp,
  364. .post_recv = erdma_post_recv,
  365. .post_send = erdma_post_send,
  366. .poll_cq = erdma_poll_cq,
  367. .query_device = erdma_query_device,
  368. .query_gid = erdma_query_gid,
  369. .query_port = erdma_query_port,
  370. .query_qp = erdma_query_qp,
  371. .req_notify_cq = erdma_req_notify_cq,
  372. .reg_user_mr = erdma_reg_user_mr,
  373. INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
  374. INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
  375. INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
  376. INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
  377. };
  378. static int erdma_ib_device_add(struct pci_dev *pdev)
  379. {
  380. struct erdma_dev *dev = pci_get_drvdata(pdev);
  381. struct ib_device *ibdev = &dev->ibdev;
  382. u64 mac;
  383. int ret;
  384. ret = erdma_dev_attrs_init(dev);
  385. if (ret)
  386. return ret;
  387. ibdev->node_type = RDMA_NODE_RNIC;
  388. memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
  389. /*
  390. * Current model (one-to-one device association):
  391. * One ERDMA device per net_device or, equivalently,
  392. * per physical port.
  393. */
  394. ibdev->phys_port_cnt = 1;
  395. ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
  396. ib_set_device_ops(ibdev, &erdma_device_ops);
  397. INIT_LIST_HEAD(&dev->cep_list);
  398. spin_lock_init(&dev->lock);
  399. xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
  400. xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
  401. dev->next_alloc_cqn = 1;
  402. dev->next_alloc_qpn = 1;
  403. ret = erdma_res_cb_init(dev);
  404. if (ret)
  405. return ret;
  406. spin_lock_init(&dev->db_bitmap_lock);
  407. bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
  408. bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
  409. atomic_set(&dev->num_ctx, 0);
  410. mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
  411. mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
  412. u64_to_ether_addr(mac, dev->attrs.peer_addr);
  413. ret = erdma_device_register(dev);
  414. if (ret)
  415. goto err_out;
  416. return 0;
  417. err_out:
  418. xa_destroy(&dev->qp_xa);
  419. xa_destroy(&dev->cq_xa);
  420. erdma_res_cb_free(dev);
  421. return ret;
  422. }
  423. static void erdma_ib_device_remove(struct pci_dev *pdev)
  424. {
  425. struct erdma_dev *dev = pci_get_drvdata(pdev);
  426. unregister_netdevice_notifier(&dev->netdev_nb);
  427. ib_unregister_device(&dev->ibdev);
  428. erdma_res_cb_free(dev);
  429. xa_destroy(&dev->qp_xa);
  430. xa_destroy(&dev->cq_xa);
  431. }
  432. static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
  433. {
  434. int ret;
  435. ret = erdma_probe_dev(pdev);
  436. if (ret)
  437. return ret;
  438. ret = erdma_ib_device_add(pdev);
  439. if (ret) {
  440. erdma_remove_dev(pdev);
  441. return ret;
  442. }
  443. return 0;
  444. }
  445. static void erdma_remove(struct pci_dev *pdev)
  446. {
  447. erdma_ib_device_remove(pdev);
  448. erdma_remove_dev(pdev);
  449. }
  450. static struct pci_driver erdma_pci_driver = {
  451. .name = DRV_MODULE_NAME,
  452. .id_table = erdma_pci_tbl,
  453. .probe = erdma_probe,
  454. .remove = erdma_remove
  455. };
  456. MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
  457. static __init int erdma_init_module(void)
  458. {
  459. int ret;
  460. ret = erdma_cm_init();
  461. if (ret)
  462. return ret;
  463. ret = pci_register_driver(&erdma_pci_driver);
  464. if (ret)
  465. erdma_cm_exit();
  466. return ret;
  467. }
  468. static void __exit erdma_exit_module(void)
  469. {
  470. pci_unregister_driver(&erdma_pci_driver);
  471. erdma_cm_exit();
  472. }
  473. module_init(erdma_init_module);
  474. module_exit(erdma_exit_module);