siw_cm.c 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965
  1. // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
  2. /* Authors: Bernard Metzler <[email protected]> */
  3. /* Fredy Neeser */
  4. /* Greg Joyce <[email protected]> */
  5. /* Copyright (c) 2008-2019, IBM Corporation */
  6. /* Copyright (c) 2017, Open Grid Computing, Inc. */
  7. #include <linux/errno.h>
  8. #include <linux/types.h>
  9. #include <linux/net.h>
  10. #include <linux/inetdevice.h>
  11. #include <net/addrconf.h>
  12. #include <linux/workqueue.h>
  13. #include <net/sock.h>
  14. #include <net/tcp.h>
  15. #include <linux/inet.h>
  16. #include <linux/tcp.h>
  17. #include <rdma/iw_cm.h>
  18. #include <rdma/ib_verbs.h>
  19. #include <rdma/ib_user_verbs.h>
  20. #include "siw.h"
  21. #include "siw_cm.h"
  22. /*
  23. * Set to any combination of
  24. * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR
  25. */
  26. static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR;
  27. static const bool relaxed_ird_negotiation = true;
  28. static void siw_cm_llp_state_change(struct sock *s);
  29. static void siw_cm_llp_data_ready(struct sock *s);
  30. static void siw_cm_llp_write_space(struct sock *s);
  31. static void siw_cm_llp_error_report(struct sock *s);
  32. static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
  33. int status);
  34. static void siw_sk_assign_cm_upcalls(struct sock *sk)
  35. {
  36. write_lock_bh(&sk->sk_callback_lock);
  37. sk->sk_state_change = siw_cm_llp_state_change;
  38. sk->sk_data_ready = siw_cm_llp_data_ready;
  39. sk->sk_write_space = siw_cm_llp_write_space;
  40. sk->sk_error_report = siw_cm_llp_error_report;
  41. write_unlock_bh(&sk->sk_callback_lock);
  42. }
  43. static void siw_sk_save_upcalls(struct sock *sk)
  44. {
  45. struct siw_cep *cep = sk_to_cep(sk);
  46. write_lock_bh(&sk->sk_callback_lock);
  47. cep->sk_state_change = sk->sk_state_change;
  48. cep->sk_data_ready = sk->sk_data_ready;
  49. cep->sk_write_space = sk->sk_write_space;
  50. cep->sk_error_report = sk->sk_error_report;
  51. write_unlock_bh(&sk->sk_callback_lock);
  52. }
  53. static void siw_sk_restore_upcalls(struct sock *sk, struct siw_cep *cep)
  54. {
  55. sk->sk_state_change = cep->sk_state_change;
  56. sk->sk_data_ready = cep->sk_data_ready;
  57. sk->sk_write_space = cep->sk_write_space;
  58. sk->sk_error_report = cep->sk_error_report;
  59. sk->sk_user_data = NULL;
  60. }
  61. static void siw_qp_socket_assoc(struct siw_cep *cep, struct siw_qp *qp)
  62. {
  63. struct socket *s = cep->sock;
  64. struct sock *sk = s->sk;
  65. write_lock_bh(&sk->sk_callback_lock);
  66. qp->attrs.sk = s;
  67. sk->sk_data_ready = siw_qp_llp_data_ready;
  68. sk->sk_write_space = siw_qp_llp_write_space;
  69. write_unlock_bh(&sk->sk_callback_lock);
  70. }
  71. static void siw_socket_disassoc(struct socket *s)
  72. {
  73. struct sock *sk = s->sk;
  74. struct siw_cep *cep;
  75. if (sk) {
  76. write_lock_bh(&sk->sk_callback_lock);
  77. cep = sk_to_cep(sk);
  78. if (cep) {
  79. siw_sk_restore_upcalls(sk, cep);
  80. siw_cep_put(cep);
  81. } else {
  82. pr_warn("siw: cannot restore sk callbacks: no ep\n");
  83. }
  84. write_unlock_bh(&sk->sk_callback_lock);
  85. } else {
  86. pr_warn("siw: cannot restore sk callbacks: no sk\n");
  87. }
  88. }
  89. static void siw_rtr_data_ready(struct sock *sk)
  90. {
  91. struct siw_cep *cep;
  92. struct siw_qp *qp = NULL;
  93. read_descriptor_t rd_desc;
  94. read_lock(&sk->sk_callback_lock);
  95. cep = sk_to_cep(sk);
  96. if (!cep) {
  97. WARN(1, "No connection endpoint\n");
  98. goto out;
  99. }
  100. qp = sk_to_qp(sk);
  101. memset(&rd_desc, 0, sizeof(rd_desc));
  102. rd_desc.arg.data = qp;
  103. rd_desc.count = 1;
  104. tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
  105. /*
  106. * Check if first frame was successfully processed.
  107. * Signal connection full establishment if yes.
  108. * Failed data processing would have already scheduled
  109. * connection drop.
  110. */
  111. if (!qp->rx_stream.rx_suspend)
  112. siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
  113. out:
  114. read_unlock(&sk->sk_callback_lock);
  115. if (qp)
  116. siw_qp_socket_assoc(cep, qp);
  117. }
  118. static void siw_sk_assign_rtr_upcalls(struct siw_cep *cep)
  119. {
  120. struct sock *sk = cep->sock->sk;
  121. write_lock_bh(&sk->sk_callback_lock);
  122. sk->sk_data_ready = siw_rtr_data_ready;
  123. sk->sk_write_space = siw_qp_llp_write_space;
  124. write_unlock_bh(&sk->sk_callback_lock);
  125. }
  126. static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s)
  127. {
  128. cep->sock = s;
  129. siw_cep_get(cep);
  130. s->sk->sk_user_data = cep;
  131. siw_sk_save_upcalls(s->sk);
  132. siw_sk_assign_cm_upcalls(s->sk);
  133. }
  134. static struct siw_cep *siw_cep_alloc(struct siw_device *sdev)
  135. {
  136. struct siw_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
  137. unsigned long flags;
  138. if (!cep)
  139. return NULL;
  140. INIT_LIST_HEAD(&cep->listenq);
  141. INIT_LIST_HEAD(&cep->devq);
  142. INIT_LIST_HEAD(&cep->work_freelist);
  143. kref_init(&cep->ref);
  144. cep->state = SIW_EPSTATE_IDLE;
  145. init_waitqueue_head(&cep->waitq);
  146. spin_lock_init(&cep->lock);
  147. cep->sdev = sdev;
  148. cep->enhanced_rdma_conn_est = false;
  149. spin_lock_irqsave(&sdev->lock, flags);
  150. list_add_tail(&cep->devq, &sdev->cep_list);
  151. spin_unlock_irqrestore(&sdev->lock, flags);
  152. siw_dbg_cep(cep, "new endpoint\n");
  153. return cep;
  154. }
  155. static void siw_cm_free_work(struct siw_cep *cep)
  156. {
  157. struct list_head *w, *tmp;
  158. struct siw_cm_work *work;
  159. list_for_each_safe(w, tmp, &cep->work_freelist) {
  160. work = list_entry(w, struct siw_cm_work, list);
  161. list_del(&work->list);
  162. kfree(work);
  163. }
  164. }
  165. static void siw_cancel_mpatimer(struct siw_cep *cep)
  166. {
  167. spin_lock_bh(&cep->lock);
  168. if (cep->mpa_timer) {
  169. if (cancel_delayed_work(&cep->mpa_timer->work)) {
  170. siw_cep_put(cep);
  171. kfree(cep->mpa_timer); /* not needed again */
  172. }
  173. cep->mpa_timer = NULL;
  174. }
  175. spin_unlock_bh(&cep->lock);
  176. }
  177. static void siw_put_work(struct siw_cm_work *work)
  178. {
  179. INIT_LIST_HEAD(&work->list);
  180. spin_lock_bh(&work->cep->lock);
  181. list_add(&work->list, &work->cep->work_freelist);
  182. spin_unlock_bh(&work->cep->lock);
  183. }
  184. static void siw_cep_set_inuse(struct siw_cep *cep)
  185. {
  186. unsigned long flags;
  187. retry:
  188. spin_lock_irqsave(&cep->lock, flags);
  189. if (cep->in_use) {
  190. spin_unlock_irqrestore(&cep->lock, flags);
  191. wait_event_interruptible(cep->waitq, !cep->in_use);
  192. if (signal_pending(current))
  193. flush_signals(current);
  194. goto retry;
  195. } else {
  196. cep->in_use = 1;
  197. spin_unlock_irqrestore(&cep->lock, flags);
  198. }
  199. }
  200. static void siw_cep_set_free(struct siw_cep *cep)
  201. {
  202. unsigned long flags;
  203. spin_lock_irqsave(&cep->lock, flags);
  204. cep->in_use = 0;
  205. spin_unlock_irqrestore(&cep->lock, flags);
  206. wake_up(&cep->waitq);
  207. }
  208. static void __siw_cep_dealloc(struct kref *ref)
  209. {
  210. struct siw_cep *cep = container_of(ref, struct siw_cep, ref);
  211. struct siw_device *sdev = cep->sdev;
  212. unsigned long flags;
  213. WARN_ON(cep->listen_cep);
  214. /* kfree(NULL) is safe */
  215. kfree(cep->mpa.pdata);
  216. spin_lock_bh(&cep->lock);
  217. if (!list_empty(&cep->work_freelist))
  218. siw_cm_free_work(cep);
  219. spin_unlock_bh(&cep->lock);
  220. spin_lock_irqsave(&sdev->lock, flags);
  221. list_del(&cep->devq);
  222. spin_unlock_irqrestore(&sdev->lock, flags);
  223. siw_dbg_cep(cep, "free endpoint\n");
  224. kfree(cep);
  225. }
  226. static struct siw_cm_work *siw_get_work(struct siw_cep *cep)
  227. {
  228. struct siw_cm_work *work = NULL;
  229. spin_lock_bh(&cep->lock);
  230. if (!list_empty(&cep->work_freelist)) {
  231. work = list_entry(cep->work_freelist.next, struct siw_cm_work,
  232. list);
  233. list_del_init(&work->list);
  234. }
  235. spin_unlock_bh(&cep->lock);
  236. return work;
  237. }
  238. static int siw_cm_alloc_work(struct siw_cep *cep, int num)
  239. {
  240. struct siw_cm_work *work;
  241. while (num--) {
  242. work = kmalloc(sizeof(*work), GFP_KERNEL);
  243. if (!work) {
  244. if (!(list_empty(&cep->work_freelist)))
  245. siw_cm_free_work(cep);
  246. return -ENOMEM;
  247. }
  248. work->cep = cep;
  249. INIT_LIST_HEAD(&work->list);
  250. list_add(&work->list, &cep->work_freelist);
  251. }
  252. return 0;
  253. }
  254. /*
  255. * siw_cm_upcall()
  256. *
  257. * Upcall to IWCM to inform about async connection events
  258. */
  259. static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
  260. int status)
  261. {
  262. struct iw_cm_event event;
  263. struct iw_cm_id *id;
  264. memset(&event, 0, sizeof(event));
  265. event.status = status;
  266. event.event = reason;
  267. if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
  268. event.provider_data = cep;
  269. id = cep->listen_cep->cm_id;
  270. } else {
  271. id = cep->cm_id;
  272. }
  273. /* Signal IRD and ORD */
  274. if (reason == IW_CM_EVENT_ESTABLISHED ||
  275. reason == IW_CM_EVENT_CONNECT_REPLY) {
  276. /* Signal negotiated IRD/ORD values we will use */
  277. event.ird = cep->ird;
  278. event.ord = cep->ord;
  279. } else if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
  280. event.ird = cep->ord;
  281. event.ord = cep->ird;
  282. }
  283. /* Signal private data and address information */
  284. if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
  285. reason == IW_CM_EVENT_CONNECT_REPLY) {
  286. u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
  287. if (pd_len) {
  288. /*
  289. * hand over MPA private data
  290. */
  291. event.private_data_len = pd_len;
  292. event.private_data = cep->mpa.pdata;
  293. /* Hide MPA V2 IRD/ORD control */
  294. if (cep->enhanced_rdma_conn_est) {
  295. event.private_data_len -=
  296. sizeof(struct mpa_v2_data);
  297. event.private_data +=
  298. sizeof(struct mpa_v2_data);
  299. }
  300. }
  301. getname_local(cep->sock, &event.local_addr);
  302. getname_peer(cep->sock, &event.remote_addr);
  303. }
  304. siw_dbg_cep(cep, "[QP %u]: reason=%d, status=%d\n",
  305. cep->qp ? qp_id(cep->qp) : UINT_MAX, reason, status);
  306. return id->event_handler(id, &event);
  307. }
  308. /*
  309. * siw_qp_cm_drop()
  310. *
  311. * Drops established LLP connection if present and not already
  312. * scheduled for dropping. Called from user context, SQ workqueue
  313. * or receive IRQ. Caller signals if socket can be immediately
  314. * closed (basically, if not in IRQ).
  315. */
  316. void siw_qp_cm_drop(struct siw_qp *qp, int schedule)
  317. {
  318. struct siw_cep *cep = qp->cep;
  319. qp->rx_stream.rx_suspend = 1;
  320. qp->tx_ctx.tx_suspend = 1;
  321. if (!qp->cep)
  322. return;
  323. if (schedule) {
  324. siw_cm_queue_work(cep, SIW_CM_WORK_CLOSE_LLP);
  325. } else {
  326. siw_cep_set_inuse(cep);
  327. if (cep->state == SIW_EPSTATE_CLOSED) {
  328. siw_dbg_cep(cep, "already closed\n");
  329. goto out;
  330. }
  331. siw_dbg_cep(cep, "immediate close, state %d\n", cep->state);
  332. if (qp->term_info.valid)
  333. siw_send_terminate(qp);
  334. if (cep->cm_id) {
  335. switch (cep->state) {
  336. case SIW_EPSTATE_AWAIT_MPAREP:
  337. siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
  338. -EINVAL);
  339. break;
  340. case SIW_EPSTATE_RDMA_MODE:
  341. siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
  342. break;
  343. case SIW_EPSTATE_IDLE:
  344. case SIW_EPSTATE_LISTENING:
  345. case SIW_EPSTATE_CONNECTING:
  346. case SIW_EPSTATE_AWAIT_MPAREQ:
  347. case SIW_EPSTATE_RECVD_MPAREQ:
  348. case SIW_EPSTATE_CLOSED:
  349. default:
  350. break;
  351. }
  352. cep->cm_id->rem_ref(cep->cm_id);
  353. cep->cm_id = NULL;
  354. siw_cep_put(cep);
  355. }
  356. cep->state = SIW_EPSTATE_CLOSED;
  357. if (cep->sock) {
  358. siw_socket_disassoc(cep->sock);
  359. /*
  360. * Immediately close socket
  361. */
  362. sock_release(cep->sock);
  363. cep->sock = NULL;
  364. }
  365. if (cep->qp) {
  366. cep->qp = NULL;
  367. siw_qp_put(qp);
  368. }
  369. out:
  370. siw_cep_set_free(cep);
  371. }
  372. }
  373. void siw_cep_put(struct siw_cep *cep)
  374. {
  375. WARN_ON(kref_read(&cep->ref) < 1);
  376. kref_put(&cep->ref, __siw_cep_dealloc);
  377. }
  378. void siw_cep_get(struct siw_cep *cep)
  379. {
  380. kref_get(&cep->ref);
  381. }
  382. /*
  383. * Expects params->pd_len in host byte order
  384. */
  385. static int siw_send_mpareqrep(struct siw_cep *cep, const void *pdata, u8 pd_len)
  386. {
  387. struct socket *s = cep->sock;
  388. struct mpa_rr *rr = &cep->mpa.hdr;
  389. struct kvec iov[3];
  390. struct msghdr msg;
  391. int rv;
  392. int iovec_num = 0;
  393. int mpa_len;
  394. memset(&msg, 0, sizeof(msg));
  395. iov[iovec_num].iov_base = rr;
  396. iov[iovec_num].iov_len = sizeof(*rr);
  397. mpa_len = sizeof(*rr);
  398. if (cep->enhanced_rdma_conn_est) {
  399. iovec_num++;
  400. iov[iovec_num].iov_base = &cep->mpa.v2_ctrl;
  401. iov[iovec_num].iov_len = sizeof(cep->mpa.v2_ctrl);
  402. mpa_len += sizeof(cep->mpa.v2_ctrl);
  403. }
  404. if (pd_len) {
  405. iovec_num++;
  406. iov[iovec_num].iov_base = (char *)pdata;
  407. iov[iovec_num].iov_len = pd_len;
  408. mpa_len += pd_len;
  409. }
  410. if (cep->enhanced_rdma_conn_est)
  411. pd_len += sizeof(cep->mpa.v2_ctrl);
  412. rr->params.pd_len = cpu_to_be16(pd_len);
  413. rv = kernel_sendmsg(s, &msg, iov, iovec_num + 1, mpa_len);
  414. return rv < 0 ? rv : 0;
  415. }
  416. /*
  417. * Receive MPA Request/Reply header.
  418. *
  419. * Returns 0 if complete MPA Request/Reply header including
  420. * eventual private data was received. Returns -EAGAIN if
  421. * header was partially received or negative error code otherwise.
  422. *
  423. * Context: May be called in process context only
  424. */
  425. static int siw_recv_mpa_rr(struct siw_cep *cep)
  426. {
  427. struct mpa_rr *hdr = &cep->mpa.hdr;
  428. struct socket *s = cep->sock;
  429. u16 pd_len;
  430. int rcvd, to_rcv;
  431. if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
  432. rcvd = ksock_recv(s, (char *)hdr + cep->mpa.bytes_rcvd,
  433. sizeof(struct mpa_rr) - cep->mpa.bytes_rcvd,
  434. 0);
  435. if (rcvd <= 0)
  436. return -ECONNABORTED;
  437. cep->mpa.bytes_rcvd += rcvd;
  438. if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr))
  439. return -EAGAIN;
  440. if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA)
  441. return -EPROTO;
  442. }
  443. pd_len = be16_to_cpu(hdr->params.pd_len);
  444. /*
  445. * At least the MPA Request/Reply header (frame not including
  446. * private data) has been received.
  447. * Receive (or continue receiving) any private data.
  448. */
  449. to_rcv = pd_len - (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr));
  450. if (!to_rcv) {
  451. /*
  452. * We must have hdr->params.pd_len == 0 and thus received a
  453. * complete MPA Request/Reply frame.
  454. * Check against peer protocol violation.
  455. */
  456. u32 word;
  457. rcvd = ksock_recv(s, (char *)&word, sizeof(word), MSG_DONTWAIT);
  458. if (rcvd == -EAGAIN)
  459. return 0;
  460. if (rcvd == 0) {
  461. siw_dbg_cep(cep, "peer EOF\n");
  462. return -EPIPE;
  463. }
  464. if (rcvd < 0) {
  465. siw_dbg_cep(cep, "error: %d\n", rcvd);
  466. return rcvd;
  467. }
  468. siw_dbg_cep(cep, "peer sent extra data: %d\n", rcvd);
  469. return -EPROTO;
  470. }
  471. /*
  472. * At this point, we must have hdr->params.pd_len != 0.
  473. * A private data buffer gets allocated if hdr->params.pd_len != 0.
  474. */
  475. if (!cep->mpa.pdata) {
  476. cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
  477. if (!cep->mpa.pdata)
  478. return -ENOMEM;
  479. }
  480. rcvd = ksock_recv(
  481. s, cep->mpa.pdata + cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
  482. to_rcv + 4, MSG_DONTWAIT);
  483. if (rcvd < 0)
  484. return rcvd;
  485. if (rcvd > to_rcv)
  486. return -EPROTO;
  487. cep->mpa.bytes_rcvd += rcvd;
  488. if (to_rcv == rcvd) {
  489. siw_dbg_cep(cep, "%d bytes private data received\n", pd_len);
  490. return 0;
  491. }
  492. return -EAGAIN;
  493. }
  494. /*
  495. * siw_proc_mpareq()
  496. *
  497. * Read MPA Request from socket and signal new connection to IWCM
  498. * if success. Caller must hold lock on corresponding listening CEP.
  499. */
  500. static int siw_proc_mpareq(struct siw_cep *cep)
  501. {
  502. struct mpa_rr *req;
  503. int version, rv;
  504. u16 pd_len;
  505. rv = siw_recv_mpa_rr(cep);
  506. if (rv)
  507. return rv;
  508. req = &cep->mpa.hdr;
  509. version = __mpa_rr_revision(req->params.bits);
  510. pd_len = be16_to_cpu(req->params.pd_len);
  511. if (version > MPA_REVISION_2)
  512. /* allow for 0, 1, and 2 only */
  513. return -EPROTO;
  514. if (memcmp(req->key, MPA_KEY_REQ, 16))
  515. return -EPROTO;
  516. /* Prepare for sending MPA reply */
  517. memcpy(req->key, MPA_KEY_REP, 16);
  518. if (version == MPA_REVISION_2 &&
  519. (req->params.bits & MPA_RR_FLAG_ENHANCED)) {
  520. /*
  521. * MPA version 2 must signal IRD/ORD values and P2P mode
  522. * in private data if header flag MPA_RR_FLAG_ENHANCED
  523. * is set.
  524. */
  525. if (pd_len < sizeof(struct mpa_v2_data))
  526. goto reject_conn;
  527. cep->enhanced_rdma_conn_est = true;
  528. }
  529. /* MPA Markers: currently not supported. Marker TX to be added. */
  530. if (req->params.bits & MPA_RR_FLAG_MARKERS)
  531. goto reject_conn;
  532. if (req->params.bits & MPA_RR_FLAG_CRC) {
  533. /*
  534. * RFC 5044, page 27: CRC MUST be used if peer requests it.
  535. * siw specific: 'mpa_crc_strict' parameter to reject
  536. * connection with CRC if local CRC off enforced by
  537. * 'mpa_crc_strict' module parameter.
  538. */
  539. if (!mpa_crc_required && mpa_crc_strict)
  540. goto reject_conn;
  541. /* Enable CRC if requested by module parameter */
  542. if (mpa_crc_required)
  543. req->params.bits |= MPA_RR_FLAG_CRC;
  544. }
  545. if (cep->enhanced_rdma_conn_est) {
  546. struct mpa_v2_data *v2 = (struct mpa_v2_data *)cep->mpa.pdata;
  547. /*
  548. * Peer requested ORD becomes requested local IRD,
  549. * peer requested IRD becomes requested local ORD.
  550. * IRD and ORD get limited by global maximum values.
  551. */
  552. cep->ord = ntohs(v2->ird) & MPA_IRD_ORD_MASK;
  553. cep->ord = min(cep->ord, SIW_MAX_ORD_QP);
  554. cep->ird = ntohs(v2->ord) & MPA_IRD_ORD_MASK;
  555. cep->ird = min(cep->ird, SIW_MAX_IRD_QP);
  556. /* May get overwritten by locally negotiated values */
  557. cep->mpa.v2_ctrl.ird = htons(cep->ird);
  558. cep->mpa.v2_ctrl.ord = htons(cep->ord);
  559. /*
  560. * Support for peer sent zero length Write or Read to
  561. * let local side enter RTS. Writes are preferred.
  562. * Sends would require pre-posting a Receive and are
  563. * not supported.
  564. * Propose zero length Write if none of Read and Write
  565. * is indicated.
  566. */
  567. if (v2->ird & MPA_V2_PEER_TO_PEER) {
  568. cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER;
  569. if (v2->ord & MPA_V2_RDMA_WRITE_RTR)
  570. cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR;
  571. else if (v2->ord & MPA_V2_RDMA_READ_RTR)
  572. cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_READ_RTR;
  573. else
  574. cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR;
  575. }
  576. }
  577. cep->state = SIW_EPSTATE_RECVD_MPAREQ;
  578. /* Keep reference until IWCM accepts/rejects */
  579. siw_cep_get(cep);
  580. rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
  581. if (rv)
  582. siw_cep_put(cep);
  583. return rv;
  584. reject_conn:
  585. siw_dbg_cep(cep, "reject: crc %d:%d:%d, m %d:%d\n",
  586. req->params.bits & MPA_RR_FLAG_CRC ? 1 : 0,
  587. mpa_crc_required, mpa_crc_strict,
  588. req->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0);
  589. req->params.bits &= ~MPA_RR_FLAG_MARKERS;
  590. req->params.bits |= MPA_RR_FLAG_REJECT;
  591. if (!mpa_crc_required && mpa_crc_strict)
  592. req->params.bits &= ~MPA_RR_FLAG_CRC;
  593. if (pd_len)
  594. kfree(cep->mpa.pdata);
  595. cep->mpa.pdata = NULL;
  596. siw_send_mpareqrep(cep, NULL, 0);
  597. return -EOPNOTSUPP;
  598. }
  599. static int siw_proc_mpareply(struct siw_cep *cep)
  600. {
  601. struct siw_qp_attrs qp_attrs;
  602. enum siw_qp_attr_mask qp_attr_mask;
  603. struct siw_qp *qp = cep->qp;
  604. struct mpa_rr *rep;
  605. int rv;
  606. u16 rep_ord;
  607. u16 rep_ird;
  608. bool ird_insufficient = false;
  609. enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR;
  610. rv = siw_recv_mpa_rr(cep);
  611. if (rv)
  612. goto out_err;
  613. siw_cancel_mpatimer(cep);
  614. rep = &cep->mpa.hdr;
  615. if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) {
  616. /* allow for 0, 1, and 2 only */
  617. rv = -EPROTO;
  618. goto out_err;
  619. }
  620. if (memcmp(rep->key, MPA_KEY_REP, 16)) {
  621. siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, LLP_ETYPE_MPA,
  622. LLP_ECODE_INVALID_REQ_RESP, 0);
  623. siw_send_terminate(qp);
  624. rv = -EPROTO;
  625. goto out_err;
  626. }
  627. if (rep->params.bits & MPA_RR_FLAG_REJECT) {
  628. siw_dbg_cep(cep, "got mpa reject\n");
  629. siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
  630. return -ECONNRESET;
  631. }
  632. if (try_gso && rep->params.bits & MPA_RR_FLAG_GSO_EXP) {
  633. siw_dbg_cep(cep, "peer allows GSO on TX\n");
  634. qp->tx_ctx.gso_seg_limit = 0;
  635. }
  636. if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
  637. (mpa_crc_required && !(rep->params.bits & MPA_RR_FLAG_CRC)) ||
  638. (mpa_crc_strict && !mpa_crc_required &&
  639. (rep->params.bits & MPA_RR_FLAG_CRC))) {
  640. siw_dbg_cep(cep, "reply unsupp: crc %d:%d:%d, m %d:%d\n",
  641. rep->params.bits & MPA_RR_FLAG_CRC ? 1 : 0,
  642. mpa_crc_required, mpa_crc_strict,
  643. rep->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0);
  644. siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
  645. return -EINVAL;
  646. }
  647. if (cep->enhanced_rdma_conn_est) {
  648. struct mpa_v2_data *v2;
  649. if (__mpa_rr_revision(rep->params.bits) < MPA_REVISION_2 ||
  650. !(rep->params.bits & MPA_RR_FLAG_ENHANCED)) {
  651. /*
  652. * Protocol failure: The responder MUST reply with
  653. * MPA version 2 and MUST set MPA_RR_FLAG_ENHANCED.
  654. */
  655. siw_dbg_cep(cep, "mpa reply error: vers %d, enhcd %d\n",
  656. __mpa_rr_revision(rep->params.bits),
  657. rep->params.bits & MPA_RR_FLAG_ENHANCED ?
  658. 1 :
  659. 0);
  660. siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
  661. -ECONNRESET);
  662. return -EINVAL;
  663. }
  664. v2 = (struct mpa_v2_data *)cep->mpa.pdata;
  665. rep_ird = ntohs(v2->ird) & MPA_IRD_ORD_MASK;
  666. rep_ord = ntohs(v2->ord) & MPA_IRD_ORD_MASK;
  667. if (cep->ird < rep_ord &&
  668. (relaxed_ird_negotiation == false ||
  669. rep_ord > cep->sdev->attrs.max_ird)) {
  670. siw_dbg_cep(cep, "ird %d, rep_ord %d, max_ord %d\n",
  671. cep->ird, rep_ord,
  672. cep->sdev->attrs.max_ord);
  673. ird_insufficient = true;
  674. }
  675. if (cep->ord > rep_ird && relaxed_ird_negotiation == false) {
  676. siw_dbg_cep(cep, "ord %d, rep_ird %d\n", cep->ord,
  677. rep_ird);
  678. ird_insufficient = true;
  679. }
  680. /*
  681. * Always report negotiated peer values to user,
  682. * even if IRD/ORD negotiation failed
  683. */
  684. cep->ird = rep_ord;
  685. cep->ord = rep_ird;
  686. if (ird_insufficient) {
  687. /*
  688. * If the initiator IRD is insuffient for the
  689. * responder ORD, send a TERM.
  690. */
  691. siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
  692. LLP_ETYPE_MPA,
  693. LLP_ECODE_INSUFFICIENT_IRD, 0);
  694. siw_send_terminate(qp);
  695. rv = -ENOMEM;
  696. goto out_err;
  697. }
  698. if (cep->mpa.v2_ctrl_req.ird & MPA_V2_PEER_TO_PEER)
  699. mpa_p2p_mode =
  700. cep->mpa.v2_ctrl_req.ord &
  701. (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR);
  702. /*
  703. * Check if we requested P2P mode, and if peer agrees
  704. */
  705. if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) {
  706. if ((mpa_p2p_mode & v2->ord) == 0) {
  707. /*
  708. * We requested RTR mode(s), but the peer
  709. * did not pick any mode we support.
  710. */
  711. siw_dbg_cep(cep,
  712. "rtr mode: req %2x, got %2x\n",
  713. mpa_p2p_mode,
  714. v2->ord & (MPA_V2_RDMA_WRITE_RTR |
  715. MPA_V2_RDMA_READ_RTR));
  716. siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
  717. LLP_ETYPE_MPA,
  718. LLP_ECODE_NO_MATCHING_RTR,
  719. 0);
  720. siw_send_terminate(qp);
  721. rv = -EPROTO;
  722. goto out_err;
  723. }
  724. mpa_p2p_mode = v2->ord & (MPA_V2_RDMA_WRITE_RTR |
  725. MPA_V2_RDMA_READ_RTR);
  726. }
  727. }
  728. memset(&qp_attrs, 0, sizeof(qp_attrs));
  729. if (rep->params.bits & MPA_RR_FLAG_CRC)
  730. qp_attrs.flags = SIW_MPA_CRC;
  731. qp_attrs.irq_size = cep->ird;
  732. qp_attrs.orq_size = cep->ord;
  733. qp_attrs.sk = cep->sock;
  734. qp_attrs.state = SIW_QP_STATE_RTS;
  735. qp_attr_mask = SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE |
  736. SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA;
  737. /* Move socket RX/TX under QP control */
  738. down_write(&qp->state_lock);
  739. if (qp->attrs.state > SIW_QP_STATE_RTR) {
  740. rv = -EINVAL;
  741. up_write(&qp->state_lock);
  742. goto out_err;
  743. }
  744. rv = siw_qp_modify(qp, &qp_attrs, qp_attr_mask);
  745. siw_qp_socket_assoc(cep, qp);
  746. up_write(&qp->state_lock);
  747. /* Send extra RDMA frame to trigger peer RTS if negotiated */
  748. if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) {
  749. rv = siw_qp_mpa_rts(qp, mpa_p2p_mode);
  750. if (rv)
  751. goto out_err;
  752. }
  753. if (!rv) {
  754. rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
  755. if (!rv)
  756. cep->state = SIW_EPSTATE_RDMA_MODE;
  757. return 0;
  758. }
  759. out_err:
  760. if (rv != -EAGAIN)
  761. siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
  762. return rv;
  763. }
  764. /*
  765. * siw_accept_newconn - accept an incoming pending connection
  766. *
  767. */
  768. static void siw_accept_newconn(struct siw_cep *cep)
  769. {
  770. struct socket *s = cep->sock;
  771. struct socket *new_s = NULL;
  772. struct siw_cep *new_cep = NULL;
  773. int rv = 0; /* debug only. should disappear */
  774. if (cep->state != SIW_EPSTATE_LISTENING)
  775. goto error;
  776. new_cep = siw_cep_alloc(cep->sdev);
  777. if (!new_cep)
  778. goto error;
  779. /*
  780. * 4: Allocate a sufficient number of work elements
  781. * to allow concurrent handling of local + peer close
  782. * events, MPA header processing + MPA timeout.
  783. */
  784. if (siw_cm_alloc_work(new_cep, 4) != 0)
  785. goto error;
  786. /*
  787. * Copy saved socket callbacks from listening CEP
  788. * and assign new socket with new CEP
  789. */
  790. new_cep->sk_state_change = cep->sk_state_change;
  791. new_cep->sk_data_ready = cep->sk_data_ready;
  792. new_cep->sk_write_space = cep->sk_write_space;
  793. new_cep->sk_error_report = cep->sk_error_report;
  794. rv = kernel_accept(s, &new_s, O_NONBLOCK);
  795. if (rv != 0) {
  796. /*
  797. * Connection already aborted by peer..?
  798. */
  799. siw_dbg_cep(cep, "kernel_accept() error: %d\n", rv);
  800. goto error;
  801. }
  802. new_cep->sock = new_s;
  803. siw_cep_get(new_cep);
  804. new_s->sk->sk_user_data = new_cep;
  805. if (siw_tcp_nagle == false)
  806. tcp_sock_set_nodelay(new_s->sk);
  807. new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
  808. rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
  809. if (rv)
  810. goto error;
  811. /*
  812. * See siw_proc_mpareq() etc. for the use of new_cep->listen_cep.
  813. */
  814. new_cep->listen_cep = cep;
  815. siw_cep_get(cep);
  816. if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
  817. /*
  818. * MPA REQ already queued
  819. */
  820. siw_dbg_cep(cep, "immediate mpa request\n");
  821. siw_cep_set_inuse(new_cep);
  822. rv = siw_proc_mpareq(new_cep);
  823. if (rv != -EAGAIN) {
  824. siw_cep_put(cep);
  825. new_cep->listen_cep = NULL;
  826. if (rv) {
  827. siw_cancel_mpatimer(new_cep);
  828. siw_cep_set_free(new_cep);
  829. goto error;
  830. }
  831. }
  832. siw_cep_set_free(new_cep);
  833. }
  834. return;
  835. error:
  836. if (new_cep)
  837. siw_cep_put(new_cep);
  838. if (new_s) {
  839. siw_socket_disassoc(new_s);
  840. sock_release(new_s);
  841. new_cep->sock = NULL;
  842. }
  843. siw_dbg_cep(cep, "error %d\n", rv);
  844. }
  845. static void siw_cm_work_handler(struct work_struct *w)
  846. {
  847. struct siw_cm_work *work;
  848. struct siw_cep *cep;
  849. int release_cep = 0, rv = 0;
  850. work = container_of(w, struct siw_cm_work, work.work);
  851. cep = work->cep;
  852. siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n",
  853. cep->qp ? qp_id(cep->qp) : UINT_MAX,
  854. work->type, cep->state);
  855. siw_cep_set_inuse(cep);
  856. switch (work->type) {
  857. case SIW_CM_WORK_ACCEPT:
  858. siw_accept_newconn(cep);
  859. break;
  860. case SIW_CM_WORK_READ_MPAHDR:
  861. if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
  862. if (cep->listen_cep) {
  863. siw_cep_set_inuse(cep->listen_cep);
  864. if (cep->listen_cep->state ==
  865. SIW_EPSTATE_LISTENING)
  866. rv = siw_proc_mpareq(cep);
  867. else
  868. rv = -EFAULT;
  869. siw_cep_set_free(cep->listen_cep);
  870. if (rv != -EAGAIN) {
  871. siw_cep_put(cep->listen_cep);
  872. cep->listen_cep = NULL;
  873. if (rv)
  874. siw_cep_put(cep);
  875. }
  876. }
  877. } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) {
  878. rv = siw_proc_mpareply(cep);
  879. } else {
  880. /*
  881. * CEP already moved out of MPA handshake.
  882. * any connection management already done.
  883. * silently ignore the mpa packet.
  884. */
  885. if (cep->state == SIW_EPSTATE_RDMA_MODE) {
  886. cep->sock->sk->sk_data_ready(cep->sock->sk);
  887. siw_dbg_cep(cep, "already in RDMA mode");
  888. } else {
  889. siw_dbg_cep(cep, "out of state: %d\n",
  890. cep->state);
  891. }
  892. }
  893. if (rv && rv != -EAGAIN)
  894. release_cep = 1;
  895. break;
  896. case SIW_CM_WORK_CLOSE_LLP:
  897. /*
  898. * QP scheduled LLP close
  899. */
  900. if (cep->qp && cep->qp->term_info.valid)
  901. siw_send_terminate(cep->qp);
  902. if (cep->cm_id)
  903. siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
  904. release_cep = 1;
  905. break;
  906. case SIW_CM_WORK_PEER_CLOSE:
  907. if (cep->cm_id) {
  908. if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) {
  909. /*
  910. * MPA reply not received, but connection drop
  911. */
  912. siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
  913. -ECONNRESET);
  914. } else if (cep->state == SIW_EPSTATE_RDMA_MODE) {
  915. /*
  916. * NOTE: IW_CM_EVENT_DISCONNECT is given just
  917. * to transition IWCM into CLOSING.
  918. */
  919. siw_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
  920. siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
  921. }
  922. /*
  923. * for other states there is no connection
  924. * known to the IWCM.
  925. */
  926. } else {
  927. if (cep->state == SIW_EPSTATE_RECVD_MPAREQ) {
  928. /*
  929. * Wait for the ulp/CM to call accept/reject
  930. */
  931. siw_dbg_cep(cep,
  932. "mpa req recvd, wait for ULP\n");
  933. } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
  934. /*
  935. * Socket close before MPA request received.
  936. */
  937. if (cep->listen_cep) {
  938. siw_dbg_cep(cep,
  939. "no mpareq: drop listener\n");
  940. siw_cep_put(cep->listen_cep);
  941. cep->listen_cep = NULL;
  942. }
  943. }
  944. }
  945. release_cep = 1;
  946. break;
  947. case SIW_CM_WORK_MPATIMEOUT:
  948. cep->mpa_timer = NULL;
  949. if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) {
  950. /*
  951. * MPA request timed out:
  952. * Hide any partially received private data and signal
  953. * timeout
  954. */
  955. cep->mpa.hdr.params.pd_len = 0;
  956. if (cep->cm_id)
  957. siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
  958. -ETIMEDOUT);
  959. release_cep = 1;
  960. } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
  961. /*
  962. * No MPA request received after peer TCP stream setup.
  963. */
  964. if (cep->listen_cep) {
  965. siw_cep_put(cep->listen_cep);
  966. cep->listen_cep = NULL;
  967. }
  968. release_cep = 1;
  969. }
  970. break;
  971. default:
  972. WARN(1, "Undefined CM work type: %d\n", work->type);
  973. }
  974. if (release_cep) {
  975. siw_dbg_cep(cep,
  976. "release: timer=%s, QP[%u]\n",
  977. cep->mpa_timer ? "y" : "n",
  978. cep->qp ? qp_id(cep->qp) : UINT_MAX);
  979. siw_cancel_mpatimer(cep);
  980. cep->state = SIW_EPSTATE_CLOSED;
  981. if (cep->qp) {
  982. struct siw_qp *qp = cep->qp;
  983. /*
  984. * Serialize a potential race with application
  985. * closing the QP and calling siw_qp_cm_drop()
  986. */
  987. siw_qp_get(qp);
  988. siw_cep_set_free(cep);
  989. siw_qp_llp_close(qp);
  990. siw_qp_put(qp);
  991. siw_cep_set_inuse(cep);
  992. cep->qp = NULL;
  993. siw_qp_put(qp);
  994. }
  995. if (cep->sock) {
  996. siw_socket_disassoc(cep->sock);
  997. sock_release(cep->sock);
  998. cep->sock = NULL;
  999. }
  1000. if (cep->cm_id) {
  1001. cep->cm_id->rem_ref(cep->cm_id);
  1002. cep->cm_id = NULL;
  1003. siw_cep_put(cep);
  1004. }
  1005. }
  1006. siw_cep_set_free(cep);
  1007. siw_put_work(work);
  1008. siw_cep_put(cep);
  1009. }
  1010. static struct workqueue_struct *siw_cm_wq;
  1011. int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type)
  1012. {
  1013. struct siw_cm_work *work = siw_get_work(cep);
  1014. unsigned long delay = 0;
  1015. if (!work) {
  1016. siw_dbg_cep(cep, "failed with no work available\n");
  1017. return -ENOMEM;
  1018. }
  1019. work->type = type;
  1020. work->cep = cep;
  1021. siw_cep_get(cep);
  1022. INIT_DELAYED_WORK(&work->work, siw_cm_work_handler);
  1023. if (type == SIW_CM_WORK_MPATIMEOUT) {
  1024. cep->mpa_timer = work;
  1025. if (cep->state == SIW_EPSTATE_AWAIT_MPAREP)
  1026. delay = MPAREQ_TIMEOUT;
  1027. else
  1028. delay = MPAREP_TIMEOUT;
  1029. }
  1030. siw_dbg_cep(cep, "[QP %u]: work type: %d, timeout %lu\n",
  1031. cep->qp ? qp_id(cep->qp) : -1, type, delay);
  1032. queue_delayed_work(siw_cm_wq, &work->work, delay);
  1033. return 0;
  1034. }
  1035. static void siw_cm_llp_data_ready(struct sock *sk)
  1036. {
  1037. struct siw_cep *cep;
  1038. read_lock(&sk->sk_callback_lock);
  1039. cep = sk_to_cep(sk);
  1040. if (!cep)
  1041. goto out;
  1042. siw_dbg_cep(cep, "cep state: %d, socket state %d\n",
  1043. cep->state, sk->sk_state);
  1044. if (sk->sk_state != TCP_ESTABLISHED)
  1045. goto out;
  1046. switch (cep->state) {
  1047. case SIW_EPSTATE_RDMA_MODE:
  1048. case SIW_EPSTATE_LISTENING:
  1049. break;
  1050. case SIW_EPSTATE_AWAIT_MPAREQ:
  1051. case SIW_EPSTATE_AWAIT_MPAREP:
  1052. siw_cm_queue_work(cep, SIW_CM_WORK_READ_MPAHDR);
  1053. break;
  1054. default:
  1055. siw_dbg_cep(cep, "unexpected data, state %d\n", cep->state);
  1056. break;
  1057. }
  1058. out:
  1059. read_unlock(&sk->sk_callback_lock);
  1060. }
  1061. static void siw_cm_llp_write_space(struct sock *sk)
  1062. {
  1063. struct siw_cep *cep = sk_to_cep(sk);
  1064. if (cep)
  1065. siw_dbg_cep(cep, "state: %d\n", cep->state);
  1066. }
  1067. static void siw_cm_llp_error_report(struct sock *sk)
  1068. {
  1069. struct siw_cep *cep = sk_to_cep(sk);
  1070. if (cep) {
  1071. siw_dbg_cep(cep, "error %d, socket state: %d, cep state: %d\n",
  1072. sk->sk_err, sk->sk_state, cep->state);
  1073. cep->sk_error_report(sk);
  1074. }
  1075. }
  1076. static void siw_cm_llp_state_change(struct sock *sk)
  1077. {
  1078. struct siw_cep *cep;
  1079. void (*orig_state_change)(struct sock *s);
  1080. read_lock(&sk->sk_callback_lock);
  1081. cep = sk_to_cep(sk);
  1082. if (!cep) {
  1083. /* endpoint already disassociated */
  1084. read_unlock(&sk->sk_callback_lock);
  1085. return;
  1086. }
  1087. orig_state_change = cep->sk_state_change;
  1088. siw_dbg_cep(cep, "state: %d\n", cep->state);
  1089. switch (sk->sk_state) {
  1090. case TCP_ESTABLISHED:
  1091. /*
  1092. * handle accepting socket as special case where only
  1093. * new connection is possible
  1094. */
  1095. siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT);
  1096. break;
  1097. case TCP_CLOSE:
  1098. case TCP_CLOSE_WAIT:
  1099. if (cep->qp)
  1100. cep->qp->tx_ctx.tx_suspend = 1;
  1101. siw_cm_queue_work(cep, SIW_CM_WORK_PEER_CLOSE);
  1102. break;
  1103. default:
  1104. siw_dbg_cep(cep, "unexpected socket state %d\n", sk->sk_state);
  1105. }
  1106. read_unlock(&sk->sk_callback_lock);
  1107. orig_state_change(sk);
  1108. }
  1109. static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
  1110. struct sockaddr *raddr, bool afonly)
  1111. {
  1112. int rv, flags = 0;
  1113. size_t size = laddr->sa_family == AF_INET ?
  1114. sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
  1115. /*
  1116. * Make address available again asap.
  1117. */
  1118. sock_set_reuseaddr(s->sk);
  1119. if (afonly) {
  1120. rv = ip6_sock_set_v6only(s->sk);
  1121. if (rv)
  1122. return rv;
  1123. }
  1124. rv = s->ops->bind(s, laddr, size);
  1125. if (rv < 0)
  1126. return rv;
  1127. rv = s->ops->connect(s, raddr, size, flags);
  1128. return rv < 0 ? rv : 0;
  1129. }
  1130. int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
  1131. {
  1132. struct siw_device *sdev = to_siw_dev(id->device);
  1133. struct siw_qp *qp;
  1134. struct siw_cep *cep = NULL;
  1135. struct socket *s = NULL;
  1136. struct sockaddr *laddr = (struct sockaddr *)&id->local_addr,
  1137. *raddr = (struct sockaddr *)&id->remote_addr;
  1138. bool p2p_mode = peer_to_peer, v4 = true;
  1139. u16 pd_len = params->private_data_len;
  1140. int version = mpa_version, rv;
  1141. if (pd_len > MPA_MAX_PRIVDATA)
  1142. return -EINVAL;
  1143. if (params->ird > sdev->attrs.max_ird ||
  1144. params->ord > sdev->attrs.max_ord)
  1145. return -ENOMEM;
  1146. if (laddr->sa_family == AF_INET6)
  1147. v4 = false;
  1148. else if (laddr->sa_family != AF_INET)
  1149. return -EAFNOSUPPORT;
  1150. /*
  1151. * Respect any iwarp port mapping: Use mapped remote address
  1152. * if valid. Local address must not be mapped, since siw
  1153. * uses kernel TCP stack.
  1154. */
  1155. if ((v4 && to_sockaddr_in(id->remote_addr).sin_port != 0) ||
  1156. to_sockaddr_in6(id->remote_addr).sin6_port != 0)
  1157. raddr = (struct sockaddr *)&id->m_remote_addr;
  1158. qp = siw_qp_id2obj(sdev, params->qpn);
  1159. if (!qp) {
  1160. WARN(1, "[QP %u] does not exist\n", params->qpn);
  1161. rv = -EINVAL;
  1162. goto error;
  1163. }
  1164. siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n", pd_len, laddr,
  1165. raddr);
  1166. rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s);
  1167. if (rv < 0)
  1168. goto error;
  1169. /*
  1170. * NOTE: For simplification, connect() is called in blocking
  1171. * mode. Might be reconsidered for async connection setup at
  1172. * TCP level.
  1173. */
  1174. rv = kernel_bindconnect(s, laddr, raddr, id->afonly);
  1175. if (rv != 0) {
  1176. siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
  1177. goto error;
  1178. }
  1179. if (siw_tcp_nagle == false)
  1180. tcp_sock_set_nodelay(s->sk);
  1181. cep = siw_cep_alloc(sdev);
  1182. if (!cep) {
  1183. rv = -ENOMEM;
  1184. goto error;
  1185. }
  1186. siw_cep_set_inuse(cep);
  1187. /* Associate QP with CEP */
  1188. siw_cep_get(cep);
  1189. qp->cep = cep;
  1190. /* siw_qp_get(qp) already done by QP lookup */
  1191. cep->qp = qp;
  1192. id->add_ref(id);
  1193. cep->cm_id = id;
  1194. /*
  1195. * 4: Allocate a sufficient number of work elements
  1196. * to allow concurrent handling of local + peer close
  1197. * events, MPA header processing + MPA timeout.
  1198. */
  1199. rv = siw_cm_alloc_work(cep, 4);
  1200. if (rv != 0) {
  1201. rv = -ENOMEM;
  1202. goto error;
  1203. }
  1204. cep->ird = params->ird;
  1205. cep->ord = params->ord;
  1206. if (p2p_mode && cep->ord == 0)
  1207. cep->ord = 1;
  1208. cep->state = SIW_EPSTATE_CONNECTING;
  1209. /*
  1210. * Associate CEP with socket
  1211. */
  1212. siw_cep_socket_assoc(cep, s);
  1213. cep->state = SIW_EPSTATE_AWAIT_MPAREP;
  1214. /*
  1215. * Set MPA Request bits: CRC if required, no MPA Markers,
  1216. * MPA Rev. according to module parameter 'mpa_version', Key 'Request'.
  1217. */
  1218. cep->mpa.hdr.params.bits = 0;
  1219. if (version > MPA_REVISION_2) {
  1220. pr_warn("Setting MPA version to %u\n", MPA_REVISION_2);
  1221. version = MPA_REVISION_2;
  1222. /* Adjust also module parameter */
  1223. mpa_version = MPA_REVISION_2;
  1224. }
  1225. __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, version);
  1226. if (try_gso)
  1227. cep->mpa.hdr.params.bits |= MPA_RR_FLAG_GSO_EXP;
  1228. if (mpa_crc_required)
  1229. cep->mpa.hdr.params.bits |= MPA_RR_FLAG_CRC;
  1230. /*
  1231. * If MPA version == 2:
  1232. * o Include ORD and IRD.
  1233. * o Indicate peer-to-peer mode, if required by module
  1234. * parameter 'peer_to_peer'.
  1235. */
  1236. if (version == MPA_REVISION_2) {
  1237. cep->enhanced_rdma_conn_est = true;
  1238. cep->mpa.hdr.params.bits |= MPA_RR_FLAG_ENHANCED;
  1239. cep->mpa.v2_ctrl.ird = htons(cep->ird);
  1240. cep->mpa.v2_ctrl.ord = htons(cep->ord);
  1241. if (p2p_mode) {
  1242. cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER;
  1243. cep->mpa.v2_ctrl.ord |= rtr_type;
  1244. }
  1245. /* Remember own P2P mode requested */
  1246. cep->mpa.v2_ctrl_req.ird = cep->mpa.v2_ctrl.ird;
  1247. cep->mpa.v2_ctrl_req.ord = cep->mpa.v2_ctrl.ord;
  1248. }
  1249. memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, 16);
  1250. rv = siw_send_mpareqrep(cep, params->private_data, pd_len);
  1251. /*
  1252. * Reset private data.
  1253. */
  1254. cep->mpa.hdr.params.pd_len = 0;
  1255. if (rv >= 0) {
  1256. rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT);
  1257. if (!rv) {
  1258. siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp));
  1259. siw_cep_set_free(cep);
  1260. return 0;
  1261. }
  1262. }
  1263. error:
  1264. siw_dbg(id->device, "failed: %d\n", rv);
  1265. if (cep) {
  1266. siw_socket_disassoc(s);
  1267. sock_release(s);
  1268. cep->sock = NULL;
  1269. cep->qp = NULL;
  1270. cep->cm_id = NULL;
  1271. id->rem_ref(id);
  1272. qp->cep = NULL;
  1273. siw_cep_put(cep);
  1274. cep->state = SIW_EPSTATE_CLOSED;
  1275. siw_cep_set_free(cep);
  1276. siw_cep_put(cep);
  1277. } else if (s) {
  1278. sock_release(s);
  1279. }
  1280. if (qp)
  1281. siw_qp_put(qp);
  1282. return rv;
  1283. }
  1284. /*
  1285. * siw_accept - Let SoftiWARP accept an RDMA connection request
  1286. *
  1287. * @id: New connection management id to be used for accepted
  1288. * connection request
  1289. * @params: Connection parameters provided by ULP for accepting connection
  1290. *
  1291. * Transition QP to RTS state, associate new CM id @id with accepted CEP
  1292. * and get prepared for TCP input by installing socket callbacks.
  1293. * Then send MPA Reply and generate the "connection established" event.
  1294. * Socket callbacks must be installed before sending MPA Reply, because
  1295. * the latter may cause a first RDMA message to arrive from the RDMA Initiator
  1296. * side very quickly, at which time the socket callbacks must be ready.
  1297. */
  1298. int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
  1299. {
  1300. struct siw_device *sdev = to_siw_dev(id->device);
  1301. struct siw_cep *cep = (struct siw_cep *)id->provider_data;
  1302. struct siw_qp *qp;
  1303. struct siw_qp_attrs qp_attrs;
  1304. int rv, max_priv_data = MPA_MAX_PRIVDATA;
  1305. bool wait_for_peer_rts = false;
  1306. siw_cep_set_inuse(cep);
  1307. siw_cep_put(cep);
  1308. /* Free lingering inbound private data */
  1309. if (cep->mpa.hdr.params.pd_len) {
  1310. cep->mpa.hdr.params.pd_len = 0;
  1311. kfree(cep->mpa.pdata);
  1312. cep->mpa.pdata = NULL;
  1313. }
  1314. siw_cancel_mpatimer(cep);
  1315. if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) {
  1316. siw_dbg_cep(cep, "out of state\n");
  1317. siw_cep_set_free(cep);
  1318. siw_cep_put(cep);
  1319. return -ECONNRESET;
  1320. }
  1321. qp = siw_qp_id2obj(sdev, params->qpn);
  1322. if (!qp) {
  1323. WARN(1, "[QP %d] does not exist\n", params->qpn);
  1324. siw_cep_set_free(cep);
  1325. siw_cep_put(cep);
  1326. return -EINVAL;
  1327. }
  1328. down_write(&qp->state_lock);
  1329. if (qp->attrs.state > SIW_QP_STATE_RTR) {
  1330. rv = -EINVAL;
  1331. up_write(&qp->state_lock);
  1332. goto error;
  1333. }
  1334. siw_dbg_cep(cep, "[QP %d]\n", params->qpn);
  1335. if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) {
  1336. siw_dbg_cep(cep, "peer allows GSO on TX\n");
  1337. qp->tx_ctx.gso_seg_limit = 0;
  1338. }
  1339. if (params->ord > sdev->attrs.max_ord ||
  1340. params->ird > sdev->attrs.max_ird) {
  1341. siw_dbg_cep(
  1342. cep,
  1343. "[QP %u]: ord %d (max %d), ird %d (max %d)\n",
  1344. qp_id(qp), params->ord, sdev->attrs.max_ord,
  1345. params->ird, sdev->attrs.max_ird);
  1346. rv = -EINVAL;
  1347. up_write(&qp->state_lock);
  1348. goto error;
  1349. }
  1350. if (cep->enhanced_rdma_conn_est)
  1351. max_priv_data -= sizeof(struct mpa_v2_data);
  1352. if (params->private_data_len > max_priv_data) {
  1353. siw_dbg_cep(
  1354. cep,
  1355. "[QP %u]: private data length: %d (max %d)\n",
  1356. qp_id(qp), params->private_data_len, max_priv_data);
  1357. rv = -EINVAL;
  1358. up_write(&qp->state_lock);
  1359. goto error;
  1360. }
  1361. if (cep->enhanced_rdma_conn_est) {
  1362. if (params->ord > cep->ord) {
  1363. if (relaxed_ird_negotiation) {
  1364. params->ord = cep->ord;
  1365. } else {
  1366. cep->ird = params->ird;
  1367. cep->ord = params->ord;
  1368. rv = -EINVAL;
  1369. up_write(&qp->state_lock);
  1370. goto error;
  1371. }
  1372. }
  1373. if (params->ird < cep->ird) {
  1374. if (relaxed_ird_negotiation &&
  1375. cep->ird <= sdev->attrs.max_ird)
  1376. params->ird = cep->ird;
  1377. else {
  1378. rv = -ENOMEM;
  1379. up_write(&qp->state_lock);
  1380. goto error;
  1381. }
  1382. }
  1383. if (cep->mpa.v2_ctrl.ord &
  1384. (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR))
  1385. wait_for_peer_rts = true;
  1386. /*
  1387. * Signal back negotiated IRD and ORD values
  1388. */
  1389. cep->mpa.v2_ctrl.ord =
  1390. htons(params->ord & MPA_IRD_ORD_MASK) |
  1391. (cep->mpa.v2_ctrl.ord & ~MPA_V2_MASK_IRD_ORD);
  1392. cep->mpa.v2_ctrl.ird =
  1393. htons(params->ird & MPA_IRD_ORD_MASK) |
  1394. (cep->mpa.v2_ctrl.ird & ~MPA_V2_MASK_IRD_ORD);
  1395. }
  1396. cep->ird = params->ird;
  1397. cep->ord = params->ord;
  1398. cep->cm_id = id;
  1399. id->add_ref(id);
  1400. memset(&qp_attrs, 0, sizeof(qp_attrs));
  1401. qp_attrs.orq_size = cep->ord;
  1402. qp_attrs.irq_size = cep->ird;
  1403. qp_attrs.sk = cep->sock;
  1404. if (cep->mpa.hdr.params.bits & MPA_RR_FLAG_CRC)
  1405. qp_attrs.flags = SIW_MPA_CRC;
  1406. qp_attrs.state = SIW_QP_STATE_RTS;
  1407. siw_dbg_cep(cep, "[QP%u]: moving to rts\n", qp_id(qp));
  1408. /* Associate QP with CEP */
  1409. siw_cep_get(cep);
  1410. qp->cep = cep;
  1411. /* siw_qp_get(qp) already done by QP lookup */
  1412. cep->qp = qp;
  1413. cep->state = SIW_EPSTATE_RDMA_MODE;
  1414. /* Move socket RX/TX under QP control */
  1415. rv = siw_qp_modify(qp, &qp_attrs,
  1416. SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE |
  1417. SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD |
  1418. SIW_QP_ATTR_MPA);
  1419. up_write(&qp->state_lock);
  1420. if (rv)
  1421. goto error;
  1422. siw_dbg_cep(cep, "[QP %u]: send mpa reply, %d byte pdata\n",
  1423. qp_id(qp), params->private_data_len);
  1424. rv = siw_send_mpareqrep(cep, params->private_data,
  1425. params->private_data_len);
  1426. if (rv != 0)
  1427. goto error;
  1428. if (wait_for_peer_rts) {
  1429. siw_sk_assign_rtr_upcalls(cep);
  1430. } else {
  1431. siw_qp_socket_assoc(cep, qp);
  1432. rv = siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
  1433. if (rv)
  1434. goto error;
  1435. }
  1436. siw_cep_set_free(cep);
  1437. return 0;
  1438. error:
  1439. siw_socket_disassoc(cep->sock);
  1440. sock_release(cep->sock);
  1441. cep->sock = NULL;
  1442. cep->state = SIW_EPSTATE_CLOSED;
  1443. if (cep->cm_id) {
  1444. cep->cm_id->rem_ref(id);
  1445. cep->cm_id = NULL;
  1446. }
  1447. if (qp->cep) {
  1448. siw_cep_put(cep);
  1449. qp->cep = NULL;
  1450. }
  1451. cep->qp = NULL;
  1452. siw_qp_put(qp);
  1453. siw_cep_set_free(cep);
  1454. siw_cep_put(cep);
  1455. return rv;
  1456. }
  1457. /*
  1458. * siw_reject()
  1459. *
  1460. * Local connection reject case. Send private data back to peer,
  1461. * close connection and dereference connection id.
  1462. */
  1463. int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len)
  1464. {
  1465. struct siw_cep *cep = (struct siw_cep *)id->provider_data;
  1466. siw_cep_set_inuse(cep);
  1467. siw_cep_put(cep);
  1468. siw_cancel_mpatimer(cep);
  1469. if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) {
  1470. siw_dbg_cep(cep, "out of state\n");
  1471. siw_cep_set_free(cep);
  1472. siw_cep_put(cep); /* put last reference */
  1473. return -ECONNRESET;
  1474. }
  1475. siw_dbg_cep(cep, "cep->state %d, pd_len %d\n", cep->state,
  1476. pd_len);
  1477. if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) {
  1478. cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
  1479. siw_send_mpareqrep(cep, pdata, pd_len);
  1480. }
  1481. siw_socket_disassoc(cep->sock);
  1482. sock_release(cep->sock);
  1483. cep->sock = NULL;
  1484. cep->state = SIW_EPSTATE_CLOSED;
  1485. siw_cep_set_free(cep);
  1486. siw_cep_put(cep);
  1487. return 0;
  1488. }
  1489. /*
  1490. * siw_create_listen - Create resources for a listener's IWCM ID @id
  1491. *
  1492. * Starts listen on the socket address id->local_addr.
  1493. *
  1494. */
  1495. int siw_create_listen(struct iw_cm_id *id, int backlog)
  1496. {
  1497. struct socket *s;
  1498. struct siw_cep *cep = NULL;
  1499. struct siw_device *sdev = to_siw_dev(id->device);
  1500. int addr_family = id->local_addr.ss_family;
  1501. int rv = 0;
  1502. if (addr_family != AF_INET && addr_family != AF_INET6)
  1503. return -EAFNOSUPPORT;
  1504. rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
  1505. if (rv < 0)
  1506. return rv;
  1507. /*
  1508. * Allow binding local port when still in TIME_WAIT from last close.
  1509. */
  1510. sock_set_reuseaddr(s->sk);
  1511. if (addr_family == AF_INET) {
  1512. struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
  1513. /* For wildcard addr, limit binding to current device only */
  1514. if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
  1515. s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
  1516. rv = s->ops->bind(s, (struct sockaddr *)laddr,
  1517. sizeof(struct sockaddr_in));
  1518. } else {
  1519. struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr);
  1520. if (id->afonly) {
  1521. rv = ip6_sock_set_v6only(s->sk);
  1522. if (rv) {
  1523. siw_dbg(id->device,
  1524. "ip6_sock_set_v6only erro: %d\n", rv);
  1525. goto error;
  1526. }
  1527. }
  1528. /* For wildcard addr, limit binding to current device only */
  1529. if (ipv6_addr_any(&laddr->sin6_addr))
  1530. s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
  1531. rv = s->ops->bind(s, (struct sockaddr *)laddr,
  1532. sizeof(struct sockaddr_in6));
  1533. }
  1534. if (rv) {
  1535. siw_dbg(id->device, "socket bind error: %d\n", rv);
  1536. goto error;
  1537. }
  1538. cep = siw_cep_alloc(sdev);
  1539. if (!cep) {
  1540. rv = -ENOMEM;
  1541. goto error;
  1542. }
  1543. siw_cep_socket_assoc(cep, s);
  1544. rv = siw_cm_alloc_work(cep, backlog);
  1545. if (rv) {
  1546. siw_dbg(id->device,
  1547. "alloc_work error %d, backlog %d\n",
  1548. rv, backlog);
  1549. goto error;
  1550. }
  1551. rv = s->ops->listen(s, backlog);
  1552. if (rv) {
  1553. siw_dbg(id->device, "listen error %d\n", rv);
  1554. goto error;
  1555. }
  1556. cep->cm_id = id;
  1557. id->add_ref(id);
  1558. /*
  1559. * In case of a wildcard rdma_listen on a multi-homed device,
  1560. * a listener's IWCM id is associated with more than one listening CEP.
  1561. *
  1562. * We currently use id->provider_data in three different ways:
  1563. *
  1564. * o For a listener's IWCM id, id->provider_data points to
  1565. * the list_head of the list of listening CEPs.
  1566. * Uses: siw_create_listen(), siw_destroy_listen()
  1567. *
  1568. * o For each accepted passive-side IWCM id, id->provider_data
  1569. * points to the CEP itself. This is a consequence of
  1570. * - siw_cm_upcall() setting event.provider_data = cep and
  1571. * - the IWCM's cm_conn_req_handler() setting provider_data of the
  1572. * new passive-side IWCM id equal to event.provider_data
  1573. * Uses: siw_accept(), siw_reject()
  1574. *
  1575. * o For an active-side IWCM id, id->provider_data is not used at all.
  1576. *
  1577. */
  1578. if (!id->provider_data) {
  1579. id->provider_data =
  1580. kmalloc(sizeof(struct list_head), GFP_KERNEL);
  1581. if (!id->provider_data) {
  1582. rv = -ENOMEM;
  1583. goto error;
  1584. }
  1585. INIT_LIST_HEAD((struct list_head *)id->provider_data);
  1586. }
  1587. list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
  1588. cep->state = SIW_EPSTATE_LISTENING;
  1589. siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
  1590. return 0;
  1591. error:
  1592. siw_dbg(id->device, "failed: %d\n", rv);
  1593. if (cep) {
  1594. siw_cep_set_inuse(cep);
  1595. if (cep->cm_id) {
  1596. cep->cm_id->rem_ref(cep->cm_id);
  1597. cep->cm_id = NULL;
  1598. }
  1599. cep->sock = NULL;
  1600. siw_socket_disassoc(s);
  1601. cep->state = SIW_EPSTATE_CLOSED;
  1602. siw_cep_set_free(cep);
  1603. siw_cep_put(cep);
  1604. }
  1605. sock_release(s);
  1606. return rv;
  1607. }
  1608. static void siw_drop_listeners(struct iw_cm_id *id)
  1609. {
  1610. struct list_head *p, *tmp;
  1611. /*
  1612. * In case of a wildcard rdma_listen on a multi-homed device,
  1613. * a listener's IWCM id is associated with more than one listening CEP.
  1614. */
  1615. list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
  1616. struct siw_cep *cep = list_entry(p, struct siw_cep, listenq);
  1617. list_del(p);
  1618. siw_dbg_cep(cep, "drop cep, state %d\n", cep->state);
  1619. siw_cep_set_inuse(cep);
  1620. if (cep->cm_id) {
  1621. cep->cm_id->rem_ref(cep->cm_id);
  1622. cep->cm_id = NULL;
  1623. }
  1624. if (cep->sock) {
  1625. siw_socket_disassoc(cep->sock);
  1626. sock_release(cep->sock);
  1627. cep->sock = NULL;
  1628. }
  1629. cep->state = SIW_EPSTATE_CLOSED;
  1630. siw_cep_set_free(cep);
  1631. siw_cep_put(cep);
  1632. }
  1633. }
  1634. int siw_destroy_listen(struct iw_cm_id *id)
  1635. {
  1636. if (!id->provider_data) {
  1637. siw_dbg(id->device, "no cep(s)\n");
  1638. return 0;
  1639. }
  1640. siw_drop_listeners(id);
  1641. kfree(id->provider_data);
  1642. id->provider_data = NULL;
  1643. return 0;
  1644. }
  1645. int siw_cm_init(void)
  1646. {
  1647. /*
  1648. * create_single_workqueue for strict ordering
  1649. */
  1650. siw_cm_wq = create_singlethread_workqueue("siw_cm_wq");
  1651. if (!siw_cm_wq)
  1652. return -ENOMEM;
  1653. return 0;
  1654. }
  1655. void siw_cm_exit(void)
  1656. {
  1657. if (siw_cm_wq)
  1658. destroy_workqueue(siw_cm_wq);
  1659. }