123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422 |
- // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
- /* Authors: Cheng Xu <[email protected]> */
- /* Kai Shen <[email protected]> */
- /* Copyright (c) 2020-2022, Alibaba Group. */
- /* Authors: Bernard Metzler <[email protected]> */
- /* Fredy Neeser */
- /* Greg Joyce <[email protected]> */
- /* Copyright (c) 2008-2019, IBM Corporation */
- /* Copyright (c) 2017, Open Grid Computing, Inc. */
- #include <linux/workqueue.h>
- #include "erdma.h"
- #include "erdma_cm.h"
- #include "erdma_verbs.h"
- static struct workqueue_struct *erdma_cm_wq;
- static void erdma_cm_llp_state_change(struct sock *sk);
- static void erdma_cm_llp_data_ready(struct sock *sk);
- static void erdma_cm_llp_error_report(struct sock *sk);
- static void erdma_sk_assign_cm_upcalls(struct sock *sk)
- {
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_state_change = erdma_cm_llp_state_change;
- sk->sk_data_ready = erdma_cm_llp_data_ready;
- sk->sk_error_report = erdma_cm_llp_error_report;
- write_unlock_bh(&sk->sk_callback_lock);
- }
- static void erdma_sk_save_upcalls(struct sock *sk)
- {
- struct erdma_cep *cep = sk_to_cep(sk);
- write_lock_bh(&sk->sk_callback_lock);
- cep->sk_state_change = sk->sk_state_change;
- cep->sk_data_ready = sk->sk_data_ready;
- cep->sk_error_report = sk->sk_error_report;
- write_unlock_bh(&sk->sk_callback_lock);
- }
- static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
- {
- sk->sk_state_change = cep->sk_state_change;
- sk->sk_data_ready = cep->sk_data_ready;
- sk->sk_error_report = cep->sk_error_report;
- sk->sk_user_data = NULL;
- }
- static void erdma_socket_disassoc(struct socket *s)
- {
- struct sock *sk = s->sk;
- struct erdma_cep *cep;
- if (sk) {
- write_lock_bh(&sk->sk_callback_lock);
- cep = sk_to_cep(sk);
- if (cep) {
- erdma_sk_restore_upcalls(sk, cep);
- erdma_cep_put(cep);
- } else {
- WARN_ON_ONCE(1);
- }
- write_unlock_bh(&sk->sk_callback_lock);
- } else {
- WARN_ON_ONCE(1);
- }
- }
- static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
- {
- cep->sock = s;
- erdma_cep_get(cep);
- s->sk->sk_user_data = cep;
- erdma_sk_save_upcalls(s->sk);
- erdma_sk_assign_cm_upcalls(s->sk);
- }
- static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
- {
- if (cep->listen_cep) {
- erdma_cep_put(cep->listen_cep);
- cep->listen_cep = NULL;
- }
- }
- static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
- {
- struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
- unsigned long flags;
- if (!cep)
- return NULL;
- INIT_LIST_HEAD(&cep->listenq);
- INIT_LIST_HEAD(&cep->devq);
- INIT_LIST_HEAD(&cep->work_freelist);
- kref_init(&cep->ref);
- cep->state = ERDMA_EPSTATE_IDLE;
- init_waitqueue_head(&cep->waitq);
- spin_lock_init(&cep->lock);
- cep->dev = dev;
- spin_lock_irqsave(&dev->lock, flags);
- list_add_tail(&cep->devq, &dev->cep_list);
- spin_unlock_irqrestore(&dev->lock, flags);
- return cep;
- }
- static void erdma_cm_free_work(struct erdma_cep *cep)
- {
- struct list_head *w, *tmp;
- struct erdma_cm_work *work;
- list_for_each_safe(w, tmp, &cep->work_freelist) {
- work = list_entry(w, struct erdma_cm_work, list);
- list_del(&work->list);
- kfree(work);
- }
- }
- static void erdma_cancel_mpatimer(struct erdma_cep *cep)
- {
- spin_lock_bh(&cep->lock);
- if (cep->mpa_timer) {
- if (cancel_delayed_work(&cep->mpa_timer->work)) {
- erdma_cep_put(cep);
- kfree(cep->mpa_timer);
- }
- cep->mpa_timer = NULL;
- }
- spin_unlock_bh(&cep->lock);
- }
- static void erdma_put_work(struct erdma_cm_work *work)
- {
- INIT_LIST_HEAD(&work->list);
- spin_lock_bh(&work->cep->lock);
- list_add(&work->list, &work->cep->work_freelist);
- spin_unlock_bh(&work->cep->lock);
- }
- static void erdma_cep_set_inuse(struct erdma_cep *cep)
- {
- unsigned long flags;
- spin_lock_irqsave(&cep->lock, flags);
- while (cep->in_use) {
- spin_unlock_irqrestore(&cep->lock, flags);
- wait_event_interruptible(cep->waitq, !cep->in_use);
- if (signal_pending(current))
- flush_signals(current);
- spin_lock_irqsave(&cep->lock, flags);
- }
- cep->in_use = 1;
- spin_unlock_irqrestore(&cep->lock, flags);
- }
- static void erdma_cep_set_free(struct erdma_cep *cep)
- {
- unsigned long flags;
- spin_lock_irqsave(&cep->lock, flags);
- cep->in_use = 0;
- spin_unlock_irqrestore(&cep->lock, flags);
- wake_up(&cep->waitq);
- }
- static void __erdma_cep_dealloc(struct kref *ref)
- {
- struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
- struct erdma_dev *dev = cep->dev;
- unsigned long flags;
- WARN_ON(cep->listen_cep);
- kfree(cep->private_data);
- kfree(cep->mpa.pdata);
- spin_lock_bh(&cep->lock);
- if (!list_empty(&cep->work_freelist))
- erdma_cm_free_work(cep);
- spin_unlock_bh(&cep->lock);
- spin_lock_irqsave(&dev->lock, flags);
- list_del(&cep->devq);
- spin_unlock_irqrestore(&dev->lock, flags);
- kfree(cep);
- }
- static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
- {
- struct erdma_cm_work *work = NULL;
- spin_lock_bh(&cep->lock);
- if (!list_empty(&cep->work_freelist)) {
- work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
- list);
- list_del_init(&work->list);
- }
- spin_unlock_bh(&cep->lock);
- return work;
- }
- static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
- {
- struct erdma_cm_work *work;
- while (num--) {
- work = kmalloc(sizeof(*work), GFP_KERNEL);
- if (!work) {
- if (!(list_empty(&cep->work_freelist)))
- erdma_cm_free_work(cep);
- return -ENOMEM;
- }
- work->cep = cep;
- INIT_LIST_HEAD(&work->list);
- list_add(&work->list, &cep->work_freelist);
- }
- return 0;
- }
- static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
- int status)
- {
- struct iw_cm_event event;
- struct iw_cm_id *cm_id;
- memset(&event, 0, sizeof(event));
- event.status = status;
- event.event = reason;
- if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
- event.provider_data = cep;
- cm_id = cep->listen_cep->cm_id;
- event.ird = cep->dev->attrs.max_ird;
- event.ord = cep->dev->attrs.max_ord;
- } else {
- cm_id = cep->cm_id;
- }
- if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
- reason == IW_CM_EVENT_CONNECT_REPLY) {
- u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
- if (pd_len && cep->mpa.pdata) {
- event.private_data_len = pd_len;
- event.private_data = cep->mpa.pdata;
- }
- getname_local(cep->sock, &event.local_addr);
- getname_peer(cep->sock, &event.remote_addr);
- }
- return cm_id->event_handler(cm_id, &event);
- }
- void erdma_qp_cm_drop(struct erdma_qp *qp)
- {
- struct erdma_cep *cep = qp->cep;
- if (!qp->cep)
- return;
- erdma_cep_set_inuse(cep);
- /* already closed. */
- if (cep->state == ERDMA_EPSTATE_CLOSED)
- goto out;
- if (cep->cm_id) {
- switch (cep->state) {
- case ERDMA_EPSTATE_AWAIT_MPAREP:
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
- -EINVAL);
- break;
- case ERDMA_EPSTATE_RDMA_MODE:
- erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
- break;
- case ERDMA_EPSTATE_IDLE:
- case ERDMA_EPSTATE_LISTENING:
- case ERDMA_EPSTATE_CONNECTING:
- case ERDMA_EPSTATE_AWAIT_MPAREQ:
- case ERDMA_EPSTATE_RECVD_MPAREQ:
- case ERDMA_EPSTATE_CLOSED:
- default:
- break;
- }
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
- erdma_cep_put(cep);
- }
- cep->state = ERDMA_EPSTATE_CLOSED;
- if (cep->sock) {
- erdma_socket_disassoc(cep->sock);
- sock_release(cep->sock);
- cep->sock = NULL;
- }
- if (cep->qp) {
- cep->qp = NULL;
- erdma_qp_put(qp);
- }
- out:
- erdma_cep_set_free(cep);
- }
- void erdma_cep_put(struct erdma_cep *cep)
- {
- WARN_ON(kref_read(&cep->ref) < 1);
- kref_put(&cep->ref, __erdma_cep_dealloc);
- }
- void erdma_cep_get(struct erdma_cep *cep)
- {
- kref_get(&cep->ref);
- }
- static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
- u8 pd_len)
- {
- struct socket *s = cep->sock;
- struct mpa_rr *rr = &cep->mpa.hdr;
- struct kvec iov[3];
- struct msghdr msg;
- int iovec_num = 0;
- int ret;
- int mpa_len;
- memset(&msg, 0, sizeof(msg));
- rr->params.pd_len = cpu_to_be16(pd_len);
- iov[iovec_num].iov_base = rr;
- iov[iovec_num].iov_len = sizeof(*rr);
- iovec_num++;
- mpa_len = sizeof(*rr);
- iov[iovec_num].iov_base = &cep->mpa.ext_data;
- iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
- iovec_num++;
- mpa_len += sizeof(cep->mpa.ext_data);
- if (pd_len) {
- iov[iovec_num].iov_base = (char *)pdata;
- iov[iovec_num].iov_len = pd_len;
- mpa_len += pd_len;
- iovec_num++;
- }
- ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
- return ret < 0 ? ret : 0;
- }
- static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
- int flags)
- {
- struct kvec iov = { buf, size };
- struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
- return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
- }
- static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
- int hdr_size, int *rcvd_out)
- {
- struct socket *s = cep->sock;
- int rcvd;
- *rcvd_out = 0;
- if (hdr_rcvd < hdr_size) {
- rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
- MSG_DONTWAIT);
- if (rcvd == -EAGAIN)
- return -EAGAIN;
- if (rcvd <= 0)
- return -ECONNABORTED;
- hdr_rcvd += rcvd;
- *rcvd_out = rcvd;
- if (hdr_rcvd < hdr_size)
- return -EAGAIN;
- }
- return 0;
- }
- static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
- {
- *bits = (*bits & ~MPA_RR_MASK_REVISION) |
- (cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
- }
- static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
- {
- __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
- return (u8)be16_to_cpu(rev);
- }
- static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
- {
- *bits = (*bits & ~MPA_EXT_FLAG_CC) |
- (cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
- }
- static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
- {
- __be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
- return (u8)be32_to_cpu(cc);
- }
- /*
- * Receive MPA Request/Reply header.
- *
- * Returns 0 if complete MPA Request/Reply haeder including
- * eventual private data was received. Returns -EAGAIN if
- * header was partially received or negative error code otherwise.
- *
- * Context: May be called in process context only
- */
- static int erdma_recv_mpa_rr(struct erdma_cep *cep)
- {
- struct mpa_rr *hdr = &cep->mpa.hdr;
- struct socket *s = cep->sock;
- u16 pd_len;
- int rcvd, to_rcv, ret, pd_rcvd;
- if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
- ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
- (char *)&cep->mpa.hdr,
- sizeof(struct mpa_rr), &rcvd);
- cep->mpa.bytes_rcvd += rcvd;
- if (ret)
- return ret;
- }
- if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
- __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
- return -EPROTO;
- if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
- sizeof(struct erdma_mpa_ext)) {
- ret = __recv_mpa_hdr(
- cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
- (char *)&cep->mpa.ext_data,
- sizeof(struct erdma_mpa_ext), &rcvd);
- cep->mpa.bytes_rcvd += rcvd;
- if (ret)
- return ret;
- }
- pd_len = be16_to_cpu(hdr->params.pd_len);
- pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
- sizeof(struct erdma_mpa_ext);
- to_rcv = pd_len - pd_rcvd;
- if (!to_rcv) {
- /*
- * We have received the whole MPA Request/Reply message.
- * Check against peer protocol violation.
- */
- u32 word;
- ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
- &rcvd);
- if (ret == -EAGAIN && rcvd == 0)
- return 0;
- if (ret)
- return ret;
- return -EPROTO;
- }
- /*
- * At this point, MPA header has been fully received, and pd_len != 0.
- * So, begin to receive private data.
- */
- if (!cep->mpa.pdata) {
- cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
- if (!cep->mpa.pdata)
- return -ENOMEM;
- }
- rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
- MSG_DONTWAIT);
- if (rcvd < 0)
- return rcvd;
- if (rcvd > to_rcv)
- return -EPROTO;
- cep->mpa.bytes_rcvd += rcvd;
- if (to_rcv == rcvd)
- return 0;
- return -EAGAIN;
- }
- /*
- * erdma_proc_mpareq()
- *
- * Read MPA Request from socket and signal new connection to IWCM
- * if success. Caller must hold lock on corresponding listening CEP.
- */
- static int erdma_proc_mpareq(struct erdma_cep *cep)
- {
- struct mpa_rr *req;
- int ret;
- ret = erdma_recv_mpa_rr(cep);
- if (ret)
- return ret;
- req = &cep->mpa.hdr;
- if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
- return -EPROTO;
- memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
- /* Currently does not support marker and crc. */
- if (req->params.bits & MPA_RR_FLAG_MARKERS ||
- req->params.bits & MPA_RR_FLAG_CRC)
- goto reject_conn;
- cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
- /* Keep reference until IWCM accepts/rejects */
- erdma_cep_get(cep);
- ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
- if (ret)
- erdma_cep_put(cep);
- return ret;
- reject_conn:
- req->params.bits &= ~MPA_RR_FLAG_MARKERS;
- req->params.bits |= MPA_RR_FLAG_REJECT;
- req->params.bits &= ~MPA_RR_FLAG_CRC;
- kfree(cep->mpa.pdata);
- cep->mpa.pdata = NULL;
- erdma_send_mpareqrep(cep, NULL, 0);
- return -EOPNOTSUPP;
- }
- static int erdma_proc_mpareply(struct erdma_cep *cep)
- {
- struct erdma_qp_attrs qp_attrs;
- struct erdma_qp *qp = cep->qp;
- struct mpa_rr *rep;
- int ret;
- ret = erdma_recv_mpa_rr(cep);
- if (ret)
- goto out_err;
- erdma_cancel_mpatimer(cep);
- rep = &cep->mpa.hdr;
- if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
- ret = -EPROTO;
- goto out_err;
- }
- if (rep->params.bits & MPA_RR_FLAG_REJECT) {
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
- return -ECONNRESET;
- }
- /* Currently does not support marker and crc. */
- if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
- (rep->params.bits & MPA_RR_FLAG_CRC)) {
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
- return -EINVAL;
- }
- memset(&qp_attrs, 0, sizeof(qp_attrs));
- qp_attrs.irq_size = cep->ird;
- qp_attrs.orq_size = cep->ord;
- qp_attrs.state = ERDMA_QP_STATE_RTS;
- down_write(&qp->state_lock);
- if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
- ret = -EINVAL;
- up_write(&qp->state_lock);
- goto out_err;
- }
- qp->attrs.qp_type = ERDMA_QP_ACTIVE;
- if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
- qp->attrs.cc = COMPROMISE_CC;
- ret = erdma_modify_qp_internal(qp, &qp_attrs,
- ERDMA_QP_ATTR_STATE |
- ERDMA_QP_ATTR_LLP_HANDLE |
- ERDMA_QP_ATTR_MPA);
- up_write(&qp->state_lock);
- if (!ret) {
- ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
- if (!ret)
- cep->state = ERDMA_EPSTATE_RDMA_MODE;
- return 0;
- }
- out_err:
- if (ret != -EAGAIN)
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
- return ret;
- }
- static void erdma_accept_newconn(struct erdma_cep *cep)
- {
- struct socket *s = cep->sock;
- struct socket *new_s = NULL;
- struct erdma_cep *new_cep = NULL;
- int ret = 0;
- if (cep->state != ERDMA_EPSTATE_LISTENING)
- goto error;
- new_cep = erdma_cep_alloc(cep->dev);
- if (!new_cep)
- goto error;
- /*
- * 4: Allocate a sufficient number of work elements
- * to allow concurrent handling of local + peer close
- * events, MPA header processing + MPA timeout.
- */
- if (erdma_cm_alloc_work(new_cep, 4) != 0)
- goto error;
- /*
- * Copy saved socket callbacks from listening CEP
- * and assign new socket with new CEP
- */
- new_cep->sk_state_change = cep->sk_state_change;
- new_cep->sk_data_ready = cep->sk_data_ready;
- new_cep->sk_error_report = cep->sk_error_report;
- ret = kernel_accept(s, &new_s, O_NONBLOCK);
- if (ret != 0)
- goto error;
- new_cep->sock = new_s;
- erdma_cep_get(new_cep);
- new_s->sk->sk_user_data = new_cep;
- tcp_sock_set_nodelay(new_s->sk);
- new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
- ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
- if (ret)
- goto error;
- new_cep->listen_cep = cep;
- erdma_cep_get(cep);
- if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
- /* MPA REQ already queued */
- erdma_cep_set_inuse(new_cep);
- ret = erdma_proc_mpareq(new_cep);
- if (ret != -EAGAIN) {
- erdma_cep_put(cep);
- new_cep->listen_cep = NULL;
- if (ret) {
- erdma_cep_set_free(new_cep);
- goto error;
- }
- }
- erdma_cep_set_free(new_cep);
- }
- return;
- error:
- if (new_cep) {
- new_cep->state = ERDMA_EPSTATE_CLOSED;
- erdma_cancel_mpatimer(new_cep);
- erdma_cep_put(new_cep);
- new_cep->sock = NULL;
- }
- if (new_s) {
- erdma_socket_disassoc(new_s);
- sock_release(new_s);
- }
- }
- static int erdma_newconn_connected(struct erdma_cep *cep)
- {
- int ret = 0;
- cep->mpa.hdr.params.bits = 0;
- __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
- memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
- cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
- __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
- ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
- cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
- cep->mpa.hdr.params.pd_len = 0;
- if (ret >= 0)
- ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
- return ret;
- }
- static void erdma_cm_work_handler(struct work_struct *w)
- {
- struct erdma_cm_work *work;
- struct erdma_cep *cep;
- int release_cep = 0, ret = 0;
- work = container_of(w, struct erdma_cm_work, work.work);
- cep = work->cep;
- erdma_cep_set_inuse(cep);
- switch (work->type) {
- case ERDMA_CM_WORK_CONNECTED:
- erdma_cancel_mpatimer(cep);
- if (cep->state == ERDMA_EPSTATE_CONNECTING) {
- ret = erdma_newconn_connected(cep);
- if (ret) {
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
- -EIO);
- release_cep = 1;
- }
- }
- break;
- case ERDMA_CM_WORK_CONNECTTIMEOUT:
- if (cep->state == ERDMA_EPSTATE_CONNECTING) {
- cep->mpa_timer = NULL;
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
- -ETIMEDOUT);
- release_cep = 1;
- }
- break;
- case ERDMA_CM_WORK_ACCEPT:
- erdma_accept_newconn(cep);
- break;
- case ERDMA_CM_WORK_READ_MPAHDR:
- if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
- if (cep->listen_cep) {
- erdma_cep_set_inuse(cep->listen_cep);
- if (cep->listen_cep->state ==
- ERDMA_EPSTATE_LISTENING)
- ret = erdma_proc_mpareq(cep);
- else
- ret = -EFAULT;
- erdma_cep_set_free(cep->listen_cep);
- if (ret != -EAGAIN) {
- erdma_cep_put(cep->listen_cep);
- cep->listen_cep = NULL;
- if (ret)
- erdma_cep_put(cep);
- }
- }
- } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
- ret = erdma_proc_mpareply(cep);
- }
- if (ret && ret != -EAGAIN)
- release_cep = 1;
- break;
- case ERDMA_CM_WORK_CLOSE_LLP:
- if (cep->cm_id)
- erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
- release_cep = 1;
- break;
- case ERDMA_CM_WORK_PEER_CLOSE:
- if (cep->cm_id) {
- if (cep->state == ERDMA_EPSTATE_CONNECTING ||
- cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
- /*
- * MPA reply not received, but connection drop
- */
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
- -ECONNRESET);
- } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
- /*
- * NOTE: IW_CM_EVENT_DISCONNECT is given just
- * to transition IWCM into CLOSING.
- */
- erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
- erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
- }
- } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
- /* Socket close before MPA request received. */
- erdma_disassoc_listen_cep(cep);
- erdma_cep_put(cep);
- }
- release_cep = 1;
- break;
- case ERDMA_CM_WORK_MPATIMEOUT:
- cep->mpa_timer = NULL;
- if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
- /*
- * MPA request timed out:
- * Hide any partially received private data and signal
- * timeout
- */
- cep->mpa.hdr.params.pd_len = 0;
- if (cep->cm_id)
- erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
- -ETIMEDOUT);
- release_cep = 1;
- } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
- /* No MPA req received after peer TCP stream setup. */
- erdma_disassoc_listen_cep(cep);
- erdma_cep_put(cep);
- release_cep = 1;
- }
- break;
- default:
- WARN(1, "Undefined CM work type: %d\n", work->type);
- }
- if (release_cep) {
- erdma_cancel_mpatimer(cep);
- cep->state = ERDMA_EPSTATE_CLOSED;
- if (cep->qp) {
- struct erdma_qp *qp = cep->qp;
- /*
- * Serialize a potential race with application
- * closing the QP and calling erdma_qp_cm_drop()
- */
- erdma_qp_get(qp);
- erdma_cep_set_free(cep);
- erdma_qp_llp_close(qp);
- erdma_qp_put(qp);
- erdma_cep_set_inuse(cep);
- cep->qp = NULL;
- erdma_qp_put(qp);
- }
- if (cep->sock) {
- erdma_socket_disassoc(cep->sock);
- sock_release(cep->sock);
- cep->sock = NULL;
- }
- if (cep->cm_id) {
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
- if (cep->state != ERDMA_EPSTATE_LISTENING)
- erdma_cep_put(cep);
- }
- }
- erdma_cep_set_free(cep);
- erdma_put_work(work);
- erdma_cep_put(cep);
- }
- int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
- {
- struct erdma_cm_work *work = erdma_get_work(cep);
- unsigned long delay = 0;
- if (!work)
- return -ENOMEM;
- work->type = type;
- work->cep = cep;
- erdma_cep_get(cep);
- INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
- if (type == ERDMA_CM_WORK_MPATIMEOUT) {
- cep->mpa_timer = work;
- if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
- delay = MPAREP_TIMEOUT;
- else
- delay = MPAREQ_TIMEOUT;
- } else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
- cep->mpa_timer = work;
- delay = CONNECT_TIMEOUT;
- }
- queue_delayed_work(erdma_cm_wq, &work->work, delay);
- return 0;
- }
- static void erdma_cm_llp_data_ready(struct sock *sk)
- {
- struct erdma_cep *cep;
- read_lock(&sk->sk_callback_lock);
- cep = sk_to_cep(sk);
- if (!cep)
- goto out;
- if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
- cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
- erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
- out:
- read_unlock(&sk->sk_callback_lock);
- }
- static void erdma_cm_llp_error_report(struct sock *sk)
- {
- struct erdma_cep *cep = sk_to_cep(sk);
- if (cep)
- cep->sk_error_report(sk);
- }
- static void erdma_cm_llp_state_change(struct sock *sk)
- {
- struct erdma_cep *cep;
- void (*orig_state_change)(struct sock *sk);
- read_lock(&sk->sk_callback_lock);
- cep = sk_to_cep(sk);
- if (!cep) {
- read_unlock(&sk->sk_callback_lock);
- return;
- }
- orig_state_change = cep->sk_state_change;
- switch (sk->sk_state) {
- case TCP_ESTABLISHED:
- if (cep->state == ERDMA_EPSTATE_CONNECTING)
- erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
- else
- erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
- break;
- case TCP_CLOSE:
- case TCP_CLOSE_WAIT:
- if (cep->state != ERDMA_EPSTATE_LISTENING)
- erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
- break;
- default:
- break;
- }
- read_unlock(&sk->sk_callback_lock);
- orig_state_change(sk);
- }
- static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
- int laddrlen, struct sockaddr *raddr,
- int raddrlen, int flags)
- {
- int ret;
- sock_set_reuseaddr(s->sk);
- ret = s->ops->bind(s, laddr, laddrlen);
- if (ret)
- return ret;
- ret = s->ops->connect(s, raddr, raddrlen, flags);
- return ret < 0 ? ret : 0;
- }
- int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
- {
- struct erdma_dev *dev = to_edev(id->device);
- struct erdma_qp *qp;
- struct erdma_cep *cep = NULL;
- struct socket *s = NULL;
- struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
- struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
- u16 pd_len = params->private_data_len;
- int ret;
- if (pd_len > MPA_MAX_PRIVDATA)
- return -EINVAL;
- if (params->ird > dev->attrs.max_ird ||
- params->ord > dev->attrs.max_ord)
- return -EINVAL;
- if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
- return -EAFNOSUPPORT;
- qp = find_qp_by_qpn(dev, params->qpn);
- if (!qp)
- return -ENOENT;
- erdma_qp_get(qp);
- ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
- if (ret < 0)
- goto error_put_qp;
- cep = erdma_cep_alloc(dev);
- if (!cep) {
- ret = -ENOMEM;
- goto error_release_sock;
- }
- erdma_cep_set_inuse(cep);
- /* Associate QP with CEP */
- erdma_cep_get(cep);
- qp->cep = cep;
- cep->qp = qp;
- /* Associate cm_id with CEP */
- id->add_ref(id);
- cep->cm_id = id;
- /*
- * 6: Allocate a sufficient number of work elements
- * to allow concurrent handling of local + peer close
- * events, MPA header processing + MPA timeout, connected event
- * and connect timeout.
- */
- ret = erdma_cm_alloc_work(cep, 6);
- if (ret != 0) {
- ret = -ENOMEM;
- goto error_release_cep;
- }
- cep->ird = params->ird;
- cep->ord = params->ord;
- cep->state = ERDMA_EPSTATE_CONNECTING;
- erdma_cep_socket_assoc(cep, s);
- if (pd_len) {
- cep->pd_len = pd_len;
- cep->private_data = kmalloc(pd_len, GFP_KERNEL);
- if (!cep->private_data) {
- ret = -ENOMEM;
- goto error_disassoc;
- }
- memcpy(cep->private_data, params->private_data,
- params->private_data_len);
- }
- ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
- sizeof(*raddr), O_NONBLOCK);
- if (ret != -EINPROGRESS && ret != 0) {
- goto error_disassoc;
- } else if (ret == 0) {
- ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
- if (ret)
- goto error_disassoc;
- } else {
- ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
- if (ret)
- goto error_disassoc;
- }
- erdma_cep_set_free(cep);
- return 0;
- error_disassoc:
- kfree(cep->private_data);
- cep->private_data = NULL;
- cep->pd_len = 0;
- erdma_socket_disassoc(s);
- error_release_cep:
- /* disassoc with cm_id */
- cep->cm_id = NULL;
- id->rem_ref(id);
- /* disassoc with qp */
- qp->cep = NULL;
- erdma_cep_put(cep);
- cep->qp = NULL;
- cep->state = ERDMA_EPSTATE_CLOSED;
- erdma_cep_set_free(cep);
- /* release the cep. */
- erdma_cep_put(cep);
- error_release_sock:
- if (s)
- sock_release(s);
- error_put_qp:
- erdma_qp_put(qp);
- return ret;
- }
- int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
- {
- struct erdma_dev *dev = to_edev(id->device);
- struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
- struct erdma_qp *qp;
- struct erdma_qp_attrs qp_attrs;
- int ret;
- erdma_cep_set_inuse(cep);
- erdma_cep_put(cep);
- /* Free lingering inbound private data */
- if (cep->mpa.hdr.params.pd_len) {
- cep->mpa.hdr.params.pd_len = 0;
- kfree(cep->mpa.pdata);
- cep->mpa.pdata = NULL;
- }
- erdma_cancel_mpatimer(cep);
- if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
- erdma_cep_set_free(cep);
- erdma_cep_put(cep);
- return -ECONNRESET;
- }
- qp = find_qp_by_qpn(dev, params->qpn);
- if (!qp)
- return -ENOENT;
- erdma_qp_get(qp);
- down_write(&qp->state_lock);
- if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
- ret = -EINVAL;
- up_write(&qp->state_lock);
- goto error;
- }
- if (params->ord > dev->attrs.max_ord ||
- params->ird > dev->attrs.max_ord) {
- ret = -EINVAL;
- up_write(&qp->state_lock);
- goto error;
- }
- if (params->private_data_len > MPA_MAX_PRIVDATA) {
- ret = -EINVAL;
- up_write(&qp->state_lock);
- goto error;
- }
- cep->ird = params->ird;
- cep->ord = params->ord;
- cep->cm_id = id;
- id->add_ref(id);
- memset(&qp_attrs, 0, sizeof(qp_attrs));
- qp_attrs.orq_size = params->ord;
- qp_attrs.irq_size = params->ird;
- qp_attrs.state = ERDMA_QP_STATE_RTS;
- /* Associate QP with CEP */
- erdma_cep_get(cep);
- qp->cep = cep;
- cep->qp = qp;
- cep->state = ERDMA_EPSTATE_RDMA_MODE;
- qp->attrs.qp_type = ERDMA_QP_PASSIVE;
- qp->attrs.pd_len = params->private_data_len;
- if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
- qp->attrs.cc = COMPROMISE_CC;
- /* move to rts */
- ret = erdma_modify_qp_internal(qp, &qp_attrs,
- ERDMA_QP_ATTR_STATE |
- ERDMA_QP_ATTR_ORD |
- ERDMA_QP_ATTR_LLP_HANDLE |
- ERDMA_QP_ATTR_IRD |
- ERDMA_QP_ATTR_MPA);
- up_write(&qp->state_lock);
- if (ret)
- goto error;
- cep->mpa.ext_data.bits = 0;
- __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
- cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
- ret = erdma_send_mpareqrep(cep, params->private_data,
- params->private_data_len);
- if (!ret) {
- ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
- if (ret)
- goto error;
- erdma_cep_set_free(cep);
- return 0;
- }
- error:
- erdma_socket_disassoc(cep->sock);
- sock_release(cep->sock);
- cep->sock = NULL;
- cep->state = ERDMA_EPSTATE_CLOSED;
- if (cep->cm_id) {
- cep->cm_id->rem_ref(id);
- cep->cm_id = NULL;
- }
- if (qp->cep) {
- erdma_cep_put(cep);
- qp->cep = NULL;
- }
- cep->qp = NULL;
- erdma_qp_put(qp);
- erdma_cep_set_free(cep);
- erdma_cep_put(cep);
- return ret;
- }
- int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
- {
- struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
- erdma_cep_set_inuse(cep);
- erdma_cep_put(cep);
- erdma_cancel_mpatimer(cep);
- if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
- erdma_cep_set_free(cep);
- erdma_cep_put(cep);
- return -ECONNRESET;
- }
- if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
- cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
- erdma_send_mpareqrep(cep, pdata, plen);
- }
- erdma_socket_disassoc(cep->sock);
- sock_release(cep->sock);
- cep->sock = NULL;
- cep->state = ERDMA_EPSTATE_CLOSED;
- erdma_cep_set_free(cep);
- erdma_cep_put(cep);
- return 0;
- }
- int erdma_create_listen(struct iw_cm_id *id, int backlog)
- {
- struct socket *s;
- struct erdma_cep *cep = NULL;
- int ret = 0;
- struct erdma_dev *dev = to_edev(id->device);
- int addr_family = id->local_addr.ss_family;
- struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
- if (addr_family != AF_INET)
- return -EAFNOSUPPORT;
- ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
- if (ret < 0)
- return ret;
- sock_set_reuseaddr(s->sk);
- /* For wildcard addr, limit binding to current device only */
- if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
- s->sk->sk_bound_dev_if = dev->netdev->ifindex;
- ret = s->ops->bind(s, (struct sockaddr *)laddr,
- sizeof(struct sockaddr_in));
- if (ret)
- goto error;
- cep = erdma_cep_alloc(dev);
- if (!cep) {
- ret = -ENOMEM;
- goto error;
- }
- erdma_cep_socket_assoc(cep, s);
- ret = erdma_cm_alloc_work(cep, backlog);
- if (ret)
- goto error;
- ret = s->ops->listen(s, backlog);
- if (ret)
- goto error;
- cep->cm_id = id;
- id->add_ref(id);
- if (!id->provider_data) {
- id->provider_data =
- kmalloc(sizeof(struct list_head), GFP_KERNEL);
- if (!id->provider_data) {
- ret = -ENOMEM;
- goto error;
- }
- INIT_LIST_HEAD((struct list_head *)id->provider_data);
- }
- list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
- cep->state = ERDMA_EPSTATE_LISTENING;
- return 0;
- error:
- if (cep) {
- erdma_cep_set_inuse(cep);
- if (cep->cm_id) {
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
- }
- cep->sock = NULL;
- erdma_socket_disassoc(s);
- cep->state = ERDMA_EPSTATE_CLOSED;
- erdma_cep_set_free(cep);
- erdma_cep_put(cep);
- }
- sock_release(s);
- return ret;
- }
- static void erdma_drop_listeners(struct iw_cm_id *id)
- {
- struct list_head *p, *tmp;
- /*
- * In case of a wildcard rdma_listen on a multi-homed device,
- * a listener's IWCM id is associated with more than one listening CEP.
- */
- list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
- struct erdma_cep *cep =
- list_entry(p, struct erdma_cep, listenq);
- list_del(p);
- erdma_cep_set_inuse(cep);
- if (cep->cm_id) {
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
- }
- if (cep->sock) {
- erdma_socket_disassoc(cep->sock);
- sock_release(cep->sock);
- cep->sock = NULL;
- }
- cep->state = ERDMA_EPSTATE_CLOSED;
- erdma_cep_set_free(cep);
- erdma_cep_put(cep);
- }
- }
- int erdma_destroy_listen(struct iw_cm_id *id)
- {
- if (!id->provider_data)
- return 0;
- erdma_drop_listeners(id);
- kfree(id->provider_data);
- id->provider_data = NULL;
- return 0;
- }
- int erdma_cm_init(void)
- {
- erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
- if (!erdma_cm_wq)
- return -ENOMEM;
- return 0;
- }
- void erdma_cm_exit(void)
- {
- if (erdma_cm_wq)
- destroy_workqueue(erdma_cm_wq);
- }
|