smc_close.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * Socket Closing - normal and abnormal
  6. *
  7. * Copyright IBM Corp. 2016
  8. *
  9. * Author(s): Ursula Braun <[email protected]>
  10. */
  11. #include <linux/workqueue.h>
  12. #include <linux/sched/signal.h>
  13. #include <net/sock.h>
  14. #include <net/tcp.h>
  15. #include "smc.h"
  16. #include "smc_tx.h"
  17. #include "smc_cdc.h"
  18. #include "smc_close.h"
  19. /* release the clcsock that is assigned to the smc_sock */
  20. void smc_clcsock_release(struct smc_sock *smc)
  21. {
  22. struct socket *tcp;
  23. if (smc->listen_smc && current_work() != &smc->smc_listen_work)
  24. cancel_work_sync(&smc->smc_listen_work);
  25. mutex_lock(&smc->clcsock_release_lock);
  26. if (smc->clcsock) {
  27. tcp = smc->clcsock;
  28. smc->clcsock = NULL;
  29. sock_release(tcp);
  30. }
  31. mutex_unlock(&smc->clcsock_release_lock);
  32. }
  33. static void smc_close_cleanup_listen(struct sock *parent)
  34. {
  35. struct sock *sk;
  36. /* Close non-accepted connections */
  37. while ((sk = smc_accept_dequeue(parent, NULL)))
  38. smc_close_non_accepted(sk);
  39. }
  40. /* wait for sndbuf data being transmitted */
  41. static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
  42. {
  43. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  44. struct sock *sk = &smc->sk;
  45. if (!timeout)
  46. return;
  47. if (!smc_tx_prepared_sends(&smc->conn))
  48. return;
  49. /* Send out corked data remaining in sndbuf */
  50. smc_tx_pending(&smc->conn);
  51. smc->wait_close_tx_prepared = 1;
  52. add_wait_queue(sk_sleep(sk), &wait);
  53. while (!signal_pending(current) && timeout) {
  54. int rc;
  55. rc = sk_wait_event(sk, &timeout,
  56. !smc_tx_prepared_sends(&smc->conn) ||
  57. READ_ONCE(sk->sk_err) == ECONNABORTED ||
  58. READ_ONCE(sk->sk_err) == ECONNRESET ||
  59. smc->conn.killed,
  60. &wait);
  61. if (rc)
  62. break;
  63. }
  64. remove_wait_queue(sk_sleep(sk), &wait);
  65. smc->wait_close_tx_prepared = 0;
  66. }
  67. void smc_close_wake_tx_prepared(struct smc_sock *smc)
  68. {
  69. if (smc->wait_close_tx_prepared)
  70. /* wake up socket closing */
  71. smc->sk.sk_state_change(&smc->sk);
  72. }
  73. static int smc_close_wr(struct smc_connection *conn)
  74. {
  75. conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
  76. return smc_cdc_get_slot_and_msg_send(conn);
  77. }
  78. static int smc_close_final(struct smc_connection *conn)
  79. {
  80. if (atomic_read(&conn->bytes_to_rcv))
  81. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  82. else
  83. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
  84. if (conn->killed)
  85. return -EPIPE;
  86. return smc_cdc_get_slot_and_msg_send(conn);
  87. }
  88. int smc_close_abort(struct smc_connection *conn)
  89. {
  90. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  91. return smc_cdc_get_slot_and_msg_send(conn);
  92. }
  93. static void smc_close_cancel_work(struct smc_sock *smc)
  94. {
  95. struct sock *sk = &smc->sk;
  96. release_sock(sk);
  97. if (cancel_work_sync(&smc->conn.close_work))
  98. sock_put(sk);
  99. cancel_delayed_work_sync(&smc->conn.tx_work);
  100. lock_sock(sk);
  101. }
  102. /* terminate smc socket abnormally - active abort
  103. * link group is terminated, i.e. RDMA communication no longer possible
  104. */
  105. void smc_close_active_abort(struct smc_sock *smc)
  106. {
  107. struct sock *sk = &smc->sk;
  108. bool release_clcsock = false;
  109. if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
  110. sk->sk_err = ECONNABORTED;
  111. if (smc->clcsock && smc->clcsock->sk)
  112. tcp_abort(smc->clcsock->sk, ECONNABORTED);
  113. }
  114. switch (sk->sk_state) {
  115. case SMC_ACTIVE:
  116. case SMC_APPCLOSEWAIT1:
  117. case SMC_APPCLOSEWAIT2:
  118. sk->sk_state = SMC_PEERABORTWAIT;
  119. smc_close_cancel_work(smc);
  120. if (sk->sk_state != SMC_PEERABORTWAIT)
  121. break;
  122. sk->sk_state = SMC_CLOSED;
  123. sock_put(sk); /* (postponed) passive closing */
  124. break;
  125. case SMC_PEERCLOSEWAIT1:
  126. case SMC_PEERCLOSEWAIT2:
  127. case SMC_PEERFINCLOSEWAIT:
  128. sk->sk_state = SMC_PEERABORTWAIT;
  129. smc_close_cancel_work(smc);
  130. if (sk->sk_state != SMC_PEERABORTWAIT)
  131. break;
  132. sk->sk_state = SMC_CLOSED;
  133. smc_conn_free(&smc->conn);
  134. release_clcsock = true;
  135. sock_put(sk); /* passive closing */
  136. break;
  137. case SMC_PROCESSABORT:
  138. case SMC_APPFINCLOSEWAIT:
  139. sk->sk_state = SMC_PEERABORTWAIT;
  140. smc_close_cancel_work(smc);
  141. if (sk->sk_state != SMC_PEERABORTWAIT)
  142. break;
  143. sk->sk_state = SMC_CLOSED;
  144. smc_conn_free(&smc->conn);
  145. release_clcsock = true;
  146. break;
  147. case SMC_INIT:
  148. case SMC_PEERABORTWAIT:
  149. case SMC_CLOSED:
  150. break;
  151. }
  152. smc_sock_set_flag(sk, SOCK_DEAD);
  153. sk->sk_state_change(sk);
  154. if (release_clcsock) {
  155. release_sock(sk);
  156. smc_clcsock_release(smc);
  157. lock_sock(sk);
  158. }
  159. }
  160. static inline bool smc_close_sent_any_close(struct smc_connection *conn)
  161. {
  162. return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
  163. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
  164. }
  165. int smc_close_active(struct smc_sock *smc)
  166. {
  167. struct smc_cdc_conn_state_flags *txflags =
  168. &smc->conn.local_tx_ctrl.conn_state_flags;
  169. struct smc_connection *conn = &smc->conn;
  170. struct sock *sk = &smc->sk;
  171. int old_state;
  172. long timeout;
  173. int rc = 0;
  174. int rc1 = 0;
  175. timeout = current->flags & PF_EXITING ?
  176. 0 : sock_flag(sk, SOCK_LINGER) ?
  177. sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  178. old_state = sk->sk_state;
  179. again:
  180. switch (sk->sk_state) {
  181. case SMC_INIT:
  182. sk->sk_state = SMC_CLOSED;
  183. break;
  184. case SMC_LISTEN:
  185. sk->sk_state = SMC_CLOSED;
  186. sk->sk_state_change(sk); /* wake up accept */
  187. if (smc->clcsock && smc->clcsock->sk) {
  188. write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
  189. smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
  190. &smc->clcsk_data_ready);
  191. smc->clcsock->sk->sk_user_data = NULL;
  192. write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
  193. rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
  194. }
  195. smc_close_cleanup_listen(sk);
  196. release_sock(sk);
  197. flush_work(&smc->tcp_listen_work);
  198. lock_sock(sk);
  199. break;
  200. case SMC_ACTIVE:
  201. smc_close_stream_wait(smc, timeout);
  202. release_sock(sk);
  203. cancel_delayed_work_sync(&conn->tx_work);
  204. lock_sock(sk);
  205. if (sk->sk_state == SMC_ACTIVE) {
  206. /* send close request */
  207. rc = smc_close_final(conn);
  208. sk->sk_state = SMC_PEERCLOSEWAIT1;
  209. /* actively shutdown clcsock before peer close it,
  210. * prevent peer from entering TIME_WAIT state.
  211. */
  212. if (smc->clcsock && smc->clcsock->sk) {
  213. rc1 = kernel_sock_shutdown(smc->clcsock,
  214. SHUT_RDWR);
  215. rc = rc ? rc : rc1;
  216. }
  217. } else {
  218. /* peer event has changed the state */
  219. goto again;
  220. }
  221. break;
  222. case SMC_APPFINCLOSEWAIT:
  223. /* socket already shutdown wr or both (active close) */
  224. if (txflags->peer_done_writing &&
  225. !smc_close_sent_any_close(conn)) {
  226. /* just shutdown wr done, send close request */
  227. rc = smc_close_final(conn);
  228. }
  229. sk->sk_state = SMC_CLOSED;
  230. break;
  231. case SMC_APPCLOSEWAIT1:
  232. case SMC_APPCLOSEWAIT2:
  233. if (!smc_cdc_rxed_any_close(conn))
  234. smc_close_stream_wait(smc, timeout);
  235. release_sock(sk);
  236. cancel_delayed_work_sync(&conn->tx_work);
  237. lock_sock(sk);
  238. if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
  239. sk->sk_state != SMC_APPCLOSEWAIT2)
  240. goto again;
  241. /* confirm close from peer */
  242. rc = smc_close_final(conn);
  243. if (smc_cdc_rxed_any_close(conn)) {
  244. /* peer has closed the socket already */
  245. sk->sk_state = SMC_CLOSED;
  246. sock_put(sk); /* postponed passive closing */
  247. } else {
  248. /* peer has just issued a shutdown write */
  249. sk->sk_state = SMC_PEERFINCLOSEWAIT;
  250. }
  251. break;
  252. case SMC_PEERCLOSEWAIT1:
  253. case SMC_PEERCLOSEWAIT2:
  254. if (txflags->peer_done_writing &&
  255. !smc_close_sent_any_close(conn)) {
  256. /* just shutdown wr done, send close request */
  257. rc = smc_close_final(conn);
  258. }
  259. /* peer sending PeerConnectionClosed will cause transition */
  260. break;
  261. case SMC_PEERFINCLOSEWAIT:
  262. /* peer sending PeerConnectionClosed will cause transition */
  263. break;
  264. case SMC_PROCESSABORT:
  265. rc = smc_close_abort(conn);
  266. sk->sk_state = SMC_CLOSED;
  267. break;
  268. case SMC_PEERABORTWAIT:
  269. sk->sk_state = SMC_CLOSED;
  270. break;
  271. case SMC_CLOSED:
  272. /* nothing to do, add tracing in future patch */
  273. break;
  274. }
  275. if (old_state != sk->sk_state)
  276. sk->sk_state_change(sk);
  277. return rc;
  278. }
  279. static void smc_close_passive_abort_received(struct smc_sock *smc)
  280. {
  281. struct smc_cdc_conn_state_flags *txflags =
  282. &smc->conn.local_tx_ctrl.conn_state_flags;
  283. struct sock *sk = &smc->sk;
  284. switch (sk->sk_state) {
  285. case SMC_INIT:
  286. case SMC_ACTIVE:
  287. case SMC_APPCLOSEWAIT1:
  288. sk->sk_state = SMC_PROCESSABORT;
  289. sock_put(sk); /* passive closing */
  290. break;
  291. case SMC_APPFINCLOSEWAIT:
  292. sk->sk_state = SMC_PROCESSABORT;
  293. break;
  294. case SMC_PEERCLOSEWAIT1:
  295. case SMC_PEERCLOSEWAIT2:
  296. if (txflags->peer_done_writing &&
  297. !smc_close_sent_any_close(&smc->conn))
  298. /* just shutdown, but not yet closed locally */
  299. sk->sk_state = SMC_PROCESSABORT;
  300. else
  301. sk->sk_state = SMC_CLOSED;
  302. sock_put(sk); /* passive closing */
  303. break;
  304. case SMC_APPCLOSEWAIT2:
  305. case SMC_PEERFINCLOSEWAIT:
  306. sk->sk_state = SMC_CLOSED;
  307. sock_put(sk); /* passive closing */
  308. break;
  309. case SMC_PEERABORTWAIT:
  310. sk->sk_state = SMC_CLOSED;
  311. break;
  312. case SMC_PROCESSABORT:
  313. /* nothing to do, add tracing in future patch */
  314. break;
  315. }
  316. }
  317. /* Either some kind of closing has been received: peer_conn_closed,
  318. * peer_conn_abort, or peer_done_writing
  319. * or the link group of the connection terminates abnormally.
  320. */
  321. static void smc_close_passive_work(struct work_struct *work)
  322. {
  323. struct smc_connection *conn = container_of(work,
  324. struct smc_connection,
  325. close_work);
  326. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  327. struct smc_cdc_conn_state_flags *rxflags;
  328. bool release_clcsock = false;
  329. struct sock *sk = &smc->sk;
  330. int old_state;
  331. lock_sock(sk);
  332. old_state = sk->sk_state;
  333. rxflags = &conn->local_rx_ctrl.conn_state_flags;
  334. if (rxflags->peer_conn_abort) {
  335. /* peer has not received all data */
  336. smc_close_passive_abort_received(smc);
  337. release_sock(sk);
  338. cancel_delayed_work_sync(&conn->tx_work);
  339. lock_sock(sk);
  340. goto wakeup;
  341. }
  342. switch (sk->sk_state) {
  343. case SMC_INIT:
  344. sk->sk_state = SMC_APPCLOSEWAIT1;
  345. break;
  346. case SMC_ACTIVE:
  347. sk->sk_state = SMC_APPCLOSEWAIT1;
  348. /* postpone sock_put() for passive closing to cover
  349. * received SEND_SHUTDOWN as well
  350. */
  351. break;
  352. case SMC_PEERCLOSEWAIT1:
  353. if (rxflags->peer_done_writing)
  354. sk->sk_state = SMC_PEERCLOSEWAIT2;
  355. fallthrough;
  356. /* to check for closing */
  357. case SMC_PEERCLOSEWAIT2:
  358. if (!smc_cdc_rxed_any_close(conn))
  359. break;
  360. if (sock_flag(sk, SOCK_DEAD) &&
  361. smc_close_sent_any_close(conn)) {
  362. /* smc_release has already been called locally */
  363. sk->sk_state = SMC_CLOSED;
  364. } else {
  365. /* just shutdown, but not yet closed locally */
  366. sk->sk_state = SMC_APPFINCLOSEWAIT;
  367. }
  368. sock_put(sk); /* passive closing */
  369. break;
  370. case SMC_PEERFINCLOSEWAIT:
  371. if (smc_cdc_rxed_any_close(conn)) {
  372. sk->sk_state = SMC_CLOSED;
  373. sock_put(sk); /* passive closing */
  374. }
  375. break;
  376. case SMC_APPCLOSEWAIT1:
  377. case SMC_APPCLOSEWAIT2:
  378. /* postpone sock_put() for passive closing to cover
  379. * received SEND_SHUTDOWN as well
  380. */
  381. break;
  382. case SMC_APPFINCLOSEWAIT:
  383. case SMC_PEERABORTWAIT:
  384. case SMC_PROCESSABORT:
  385. case SMC_CLOSED:
  386. /* nothing to do, add tracing in future patch */
  387. break;
  388. }
  389. wakeup:
  390. sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
  391. sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
  392. if (old_state != sk->sk_state) {
  393. sk->sk_state_change(sk);
  394. if ((sk->sk_state == SMC_CLOSED) &&
  395. (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
  396. smc_conn_free(conn);
  397. if (smc->clcsock)
  398. release_clcsock = true;
  399. }
  400. }
  401. release_sock(sk);
  402. if (release_clcsock)
  403. smc_clcsock_release(smc);
  404. sock_put(sk); /* sock_hold done by schedulers of close_work */
  405. }
  406. int smc_close_shutdown_write(struct smc_sock *smc)
  407. {
  408. struct smc_connection *conn = &smc->conn;
  409. struct sock *sk = &smc->sk;
  410. int old_state;
  411. long timeout;
  412. int rc = 0;
  413. timeout = current->flags & PF_EXITING ?
  414. 0 : sock_flag(sk, SOCK_LINGER) ?
  415. sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  416. old_state = sk->sk_state;
  417. again:
  418. switch (sk->sk_state) {
  419. case SMC_ACTIVE:
  420. smc_close_stream_wait(smc, timeout);
  421. release_sock(sk);
  422. cancel_delayed_work_sync(&conn->tx_work);
  423. lock_sock(sk);
  424. if (sk->sk_state != SMC_ACTIVE)
  425. goto again;
  426. /* send close wr request */
  427. rc = smc_close_wr(conn);
  428. sk->sk_state = SMC_PEERCLOSEWAIT1;
  429. break;
  430. case SMC_APPCLOSEWAIT1:
  431. /* passive close */
  432. if (!smc_cdc_rxed_any_close(conn))
  433. smc_close_stream_wait(smc, timeout);
  434. release_sock(sk);
  435. cancel_delayed_work_sync(&conn->tx_work);
  436. lock_sock(sk);
  437. if (sk->sk_state != SMC_APPCLOSEWAIT1)
  438. goto again;
  439. /* confirm close from peer */
  440. rc = smc_close_wr(conn);
  441. sk->sk_state = SMC_APPCLOSEWAIT2;
  442. break;
  443. case SMC_APPCLOSEWAIT2:
  444. case SMC_PEERFINCLOSEWAIT:
  445. case SMC_PEERCLOSEWAIT1:
  446. case SMC_PEERCLOSEWAIT2:
  447. case SMC_APPFINCLOSEWAIT:
  448. case SMC_PROCESSABORT:
  449. case SMC_PEERABORTWAIT:
  450. /* nothing to do, add tracing in future patch */
  451. break;
  452. }
  453. if (old_state != sk->sk_state)
  454. sk->sk_state_change(sk);
  455. return rc;
  456. }
  457. /* Initialize close properties on connection establishment. */
  458. void smc_close_init(struct smc_sock *smc)
  459. {
  460. INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
  461. }