rxe_mcast.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
  2. /*
  3. * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved.
  4. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
  5. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
  6. */
  7. /*
  8. * rxe_mcast.c implements driver support for multicast transport.
  9. * It is based on two data structures struct rxe_mcg ('mcg') and
  10. * struct rxe_mca ('mca'). An mcg is allocated each time a qp is
  11. * attached to a new mgid for the first time. These are indexed by
  12. * a red-black tree using the mgid. This data structure is searched
  13. * for the mcg when a multicast packet is received and when another
  14. * qp is attached to the same mgid. It is cleaned up when the last qp
  15. * is detached from the mcg. Each time a qp is attached to an mcg an
  16. * mca is created. It holds a pointer to the qp and is added to a list
  17. * of qp's that are attached to the mcg. The qp_list is used to replicate
  18. * mcast packets in the rxe receive path.
  19. */
  20. #include "rxe.h"
  21. /**
  22. * rxe_mcast_add - add multicast address to rxe device
  23. * @rxe: rxe device object
  24. * @mgid: multicast address as a gid
  25. *
  26. * Returns 0 on success else an error
  27. */
  28. static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
  29. {
  30. unsigned char ll_addr[ETH_ALEN];
  31. ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
  32. return dev_mc_add(rxe->ndev, ll_addr);
  33. }
  34. /**
  35. * rxe_mcast_del - delete multicast address from rxe device
  36. * @rxe: rxe device object
  37. * @mgid: multicast address as a gid
  38. *
  39. * Returns 0 on success else an error
  40. */
  41. static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
  42. {
  43. unsigned char ll_addr[ETH_ALEN];
  44. ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
  45. return dev_mc_del(rxe->ndev, ll_addr);
  46. }
  47. /**
  48. * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree)
  49. * @mcg: mcg object with an embedded red-black tree node
  50. *
  51. * Context: caller must hold a reference to mcg and rxe->mcg_lock and
  52. * is responsible to avoid adding the same mcg twice to the tree.
  53. */
  54. static void __rxe_insert_mcg(struct rxe_mcg *mcg)
  55. {
  56. struct rb_root *tree = &mcg->rxe->mcg_tree;
  57. struct rb_node **link = &tree->rb_node;
  58. struct rb_node *node = NULL;
  59. struct rxe_mcg *tmp;
  60. int cmp;
  61. while (*link) {
  62. node = *link;
  63. tmp = rb_entry(node, struct rxe_mcg, node);
  64. cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid));
  65. if (cmp > 0)
  66. link = &(*link)->rb_left;
  67. else
  68. link = &(*link)->rb_right;
  69. }
  70. rb_link_node(&mcg->node, node, link);
  71. rb_insert_color(&mcg->node, tree);
  72. }
  73. /**
  74. * __rxe_remove_mcg - remove an mcg from red-black tree holding lock
  75. * @mcg: mcast group object with an embedded red-black tree node
  76. *
  77. * Context: caller must hold a reference to mcg and rxe->mcg_lock
  78. */
  79. static void __rxe_remove_mcg(struct rxe_mcg *mcg)
  80. {
  81. rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
  82. }
  83. /**
  84. * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock
  85. * @rxe: rxe device object
  86. * @mgid: multicast IP address
  87. *
  88. * Context: caller must hold rxe->mcg_lock
  89. * Returns: mcg on success and takes a ref to mcg else NULL
  90. */
  91. static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
  92. union ib_gid *mgid)
  93. {
  94. struct rb_root *tree = &rxe->mcg_tree;
  95. struct rxe_mcg *mcg;
  96. struct rb_node *node;
  97. int cmp;
  98. node = tree->rb_node;
  99. while (node) {
  100. mcg = rb_entry(node, struct rxe_mcg, node);
  101. cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid));
  102. if (cmp > 0)
  103. node = node->rb_left;
  104. else if (cmp < 0)
  105. node = node->rb_right;
  106. else
  107. break;
  108. }
  109. if (node) {
  110. kref_get(&mcg->ref_cnt);
  111. return mcg;
  112. }
  113. return NULL;
  114. }
  115. /**
  116. * rxe_lookup_mcg - lookup up mcg in red-back tree
  117. * @rxe: rxe device object
  118. * @mgid: multicast IP address
  119. *
  120. * Returns: mcg if found else NULL
  121. */
  122. struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
  123. {
  124. struct rxe_mcg *mcg;
  125. spin_lock_bh(&rxe->mcg_lock);
  126. mcg = __rxe_lookup_mcg(rxe, mgid);
  127. spin_unlock_bh(&rxe->mcg_lock);
  128. return mcg;
  129. }
  130. /**
  131. * __rxe_init_mcg - initialize a new mcg
  132. * @rxe: rxe device
  133. * @mgid: multicast address as a gid
  134. * @mcg: new mcg object
  135. *
  136. * Context: caller should hold rxe->mcg lock
  137. */
  138. static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
  139. struct rxe_mcg *mcg)
  140. {
  141. kref_init(&mcg->ref_cnt);
  142. memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
  143. INIT_LIST_HEAD(&mcg->qp_list);
  144. mcg->rxe = rxe;
  145. /* caller holds a ref on mcg but that will be
  146. * dropped when mcg goes out of scope. We need to take a ref
  147. * on the pointer that will be saved in the red-black tree
  148. * by __rxe_insert_mcg and used to lookup mcg from mgid later.
  149. * Inserting mcg makes it visible to outside so this should
  150. * be done last after the object is ready.
  151. */
  152. kref_get(&mcg->ref_cnt);
  153. __rxe_insert_mcg(mcg);
  154. }
  155. /**
  156. * rxe_get_mcg - lookup or allocate a mcg
  157. * @rxe: rxe device object
  158. * @mgid: multicast IP address as a gid
  159. *
  160. * Returns: mcg on success else ERR_PTR(error)
  161. */
  162. static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
  163. {
  164. struct rxe_mcg *mcg, *tmp;
  165. int err;
  166. if (rxe->attr.max_mcast_grp == 0)
  167. return ERR_PTR(-EINVAL);
  168. /* check to see if mcg already exists */
  169. mcg = rxe_lookup_mcg(rxe, mgid);
  170. if (mcg)
  171. return mcg;
  172. /* check to see if we have reached limit */
  173. if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
  174. err = -ENOMEM;
  175. goto err_dec;
  176. }
  177. /* speculative alloc of new mcg */
  178. mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
  179. if (!mcg) {
  180. err = -ENOMEM;
  181. goto err_dec;
  182. }
  183. spin_lock_bh(&rxe->mcg_lock);
  184. /* re-check to see if someone else just added it */
  185. tmp = __rxe_lookup_mcg(rxe, mgid);
  186. if (tmp) {
  187. spin_unlock_bh(&rxe->mcg_lock);
  188. atomic_dec(&rxe->mcg_num);
  189. kfree(mcg);
  190. return tmp;
  191. }
  192. __rxe_init_mcg(rxe, mgid, mcg);
  193. spin_unlock_bh(&rxe->mcg_lock);
  194. /* add mcast address outside of lock */
  195. err = rxe_mcast_add(rxe, mgid);
  196. if (!err)
  197. return mcg;
  198. kfree(mcg);
  199. err_dec:
  200. atomic_dec(&rxe->mcg_num);
  201. return ERR_PTR(err);
  202. }
  203. /**
  204. * rxe_cleanup_mcg - cleanup mcg for kref_put
  205. * @kref: struct kref embnedded in mcg
  206. */
  207. void rxe_cleanup_mcg(struct kref *kref)
  208. {
  209. struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
  210. kfree(mcg);
  211. }
  212. /**
  213. * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock
  214. * @mcg: the mcg object
  215. *
  216. * Context: caller is holding rxe->mcg_lock
  217. * no qp's are attached to mcg
  218. */
  219. static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
  220. {
  221. struct rxe_dev *rxe = mcg->rxe;
  222. /* remove mcg from red-black tree then drop ref */
  223. __rxe_remove_mcg(mcg);
  224. kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
  225. atomic_dec(&rxe->mcg_num);
  226. }
  227. /**
  228. * rxe_destroy_mcg - destroy mcg object
  229. * @mcg: the mcg object
  230. *
  231. * Context: no qp's are attached to mcg
  232. */
  233. static void rxe_destroy_mcg(struct rxe_mcg *mcg)
  234. {
  235. /* delete mcast address outside of lock */
  236. rxe_mcast_del(mcg->rxe, &mcg->mgid);
  237. spin_lock_bh(&mcg->rxe->mcg_lock);
  238. __rxe_destroy_mcg(mcg);
  239. spin_unlock_bh(&mcg->rxe->mcg_lock);
  240. }
  241. /**
  242. * __rxe_init_mca - initialize a new mca holding lock
  243. * @qp: qp object
  244. * @mcg: mcg object
  245. * @mca: empty space for new mca
  246. *
  247. * Context: caller must hold references on qp and mcg, rxe->mcg_lock
  248. * and pass memory for new mca
  249. *
  250. * Returns: 0 on success else an error
  251. */
  252. static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg,
  253. struct rxe_mca *mca)
  254. {
  255. struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
  256. int n;
  257. n = atomic_inc_return(&rxe->mcg_attach);
  258. if (n > rxe->attr.max_total_mcast_qp_attach) {
  259. atomic_dec(&rxe->mcg_attach);
  260. return -ENOMEM;
  261. }
  262. n = atomic_inc_return(&mcg->qp_num);
  263. if (n > rxe->attr.max_mcast_qp_attach) {
  264. atomic_dec(&mcg->qp_num);
  265. atomic_dec(&rxe->mcg_attach);
  266. return -ENOMEM;
  267. }
  268. atomic_inc(&qp->mcg_num);
  269. rxe_get(qp);
  270. mca->qp = qp;
  271. list_add_tail(&mca->qp_list, &mcg->qp_list);
  272. return 0;
  273. }
  274. /**
  275. * rxe_attach_mcg - attach qp to mcg if not already attached
  276. * @qp: qp object
  277. * @mcg: mcg object
  278. *
  279. * Context: caller must hold reference on qp and mcg.
  280. * Returns: 0 on success else an error
  281. */
  282. static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
  283. {
  284. struct rxe_dev *rxe = mcg->rxe;
  285. struct rxe_mca *mca, *tmp;
  286. int err;
  287. /* check to see if the qp is already a member of the group */
  288. spin_lock_bh(&rxe->mcg_lock);
  289. list_for_each_entry(mca, &mcg->qp_list, qp_list) {
  290. if (mca->qp == qp) {
  291. spin_unlock_bh(&rxe->mcg_lock);
  292. return 0;
  293. }
  294. }
  295. spin_unlock_bh(&rxe->mcg_lock);
  296. /* speculative alloc new mca without using GFP_ATOMIC */
  297. mca = kzalloc(sizeof(*mca), GFP_KERNEL);
  298. if (!mca)
  299. return -ENOMEM;
  300. spin_lock_bh(&rxe->mcg_lock);
  301. /* re-check to see if someone else just attached qp */
  302. list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
  303. if (tmp->qp == qp) {
  304. kfree(mca);
  305. err = 0;
  306. goto out;
  307. }
  308. }
  309. err = __rxe_init_mca(qp, mcg, mca);
  310. if (err)
  311. kfree(mca);
  312. out:
  313. spin_unlock_bh(&rxe->mcg_lock);
  314. return err;
  315. }
  316. /**
  317. * __rxe_cleanup_mca - cleanup mca object holding lock
  318. * @mca: mca object
  319. * @mcg: mcg object
  320. *
  321. * Context: caller must hold a reference to mcg and rxe->mcg_lock
  322. */
  323. static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
  324. {
  325. list_del(&mca->qp_list);
  326. atomic_dec(&mcg->qp_num);
  327. atomic_dec(&mcg->rxe->mcg_attach);
  328. atomic_dec(&mca->qp->mcg_num);
  329. rxe_put(mca->qp);
  330. kfree(mca);
  331. }
  332. /**
  333. * rxe_detach_mcg - detach qp from mcg
  334. * @mcg: mcg object
  335. * @qp: qp object
  336. *
  337. * Returns: 0 on success else an error if qp is not attached.
  338. */
  339. static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
  340. {
  341. struct rxe_dev *rxe = mcg->rxe;
  342. struct rxe_mca *mca, *tmp;
  343. spin_lock_bh(&rxe->mcg_lock);
  344. list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
  345. if (mca->qp == qp) {
  346. __rxe_cleanup_mca(mca, mcg);
  347. /* if the number of qp's attached to the
  348. * mcast group falls to zero go ahead and
  349. * tear it down. This will not free the
  350. * object since we are still holding a ref
  351. * from the caller
  352. */
  353. if (atomic_read(&mcg->qp_num) <= 0)
  354. __rxe_destroy_mcg(mcg);
  355. spin_unlock_bh(&rxe->mcg_lock);
  356. return 0;
  357. }
  358. }
  359. /* we didn't find the qp on the list */
  360. spin_unlock_bh(&rxe->mcg_lock);
  361. return -EINVAL;
  362. }
  363. /**
  364. * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1)
  365. * @ibqp: (IB) qp object
  366. * @mgid: multicast IP address
  367. * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6)
  368. *
  369. * Returns: 0 on success else an errno
  370. */
  371. int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
  372. {
  373. int err;
  374. struct rxe_dev *rxe = to_rdev(ibqp->device);
  375. struct rxe_qp *qp = to_rqp(ibqp);
  376. struct rxe_mcg *mcg;
  377. /* takes a ref on mcg if successful */
  378. mcg = rxe_get_mcg(rxe, mgid);
  379. if (IS_ERR(mcg))
  380. return PTR_ERR(mcg);
  381. err = rxe_attach_mcg(mcg, qp);
  382. /* if we failed to attach the first qp to mcg tear it down */
  383. if (atomic_read(&mcg->qp_num) == 0)
  384. rxe_destroy_mcg(mcg);
  385. kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
  386. return err;
  387. }
  388. /**
  389. * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2)
  390. * @ibqp: address of (IB) qp object
  391. * @mgid: multicast IP address
  392. * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6)
  393. *
  394. * Returns: 0 on success else an errno
  395. */
  396. int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
  397. {
  398. struct rxe_dev *rxe = to_rdev(ibqp->device);
  399. struct rxe_qp *qp = to_rqp(ibqp);
  400. struct rxe_mcg *mcg;
  401. int err;
  402. mcg = rxe_lookup_mcg(rxe, mgid);
  403. if (!mcg)
  404. return -EINVAL;
  405. err = rxe_detach_mcg(mcg, qp);
  406. kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
  407. return err;
  408. }