af_smc.c 90 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * AF_SMC protocol family socket handler keeping the AF_INET sock address type
  6. * applies to SOCK_STREAM sockets only
  7. * offers an alternative communication option for TCP-protocol sockets
  8. * applicable with RoCE-cards only
  9. *
  10. * Initial restrictions:
  11. * - support for alternate links postponed
  12. *
  13. * Copyright IBM Corp. 2016, 2018
  14. *
  15. * Author(s): Ursula Braun <[email protected]>
  16. * based on prototype from Frank Blaschka
  17. */
  18. #define KMSG_COMPONENT "smc"
  19. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  20. #include <linux/module.h>
  21. #include <linux/socket.h>
  22. #include <linux/workqueue.h>
  23. #include <linux/in.h>
  24. #include <linux/sched/signal.h>
  25. #include <linux/if_vlan.h>
  26. #include <linux/rcupdate_wait.h>
  27. #include <linux/ctype.h>
  28. #include <net/sock.h>
  29. #include <net/tcp.h>
  30. #include <net/smc.h>
  31. #include <asm/ioctls.h>
  32. #include <net/net_namespace.h>
  33. #include <net/netns/generic.h>
  34. #include "smc_netns.h"
  35. #include "smc.h"
  36. #include "smc_clc.h"
  37. #include "smc_llc.h"
  38. #include "smc_cdc.h"
  39. #include "smc_core.h"
  40. #include "smc_ib.h"
  41. #include "smc_ism.h"
  42. #include "smc_pnet.h"
  43. #include "smc_netlink.h"
  44. #include "smc_tx.h"
  45. #include "smc_rx.h"
  46. #include "smc_close.h"
  47. #include "smc_stats.h"
  48. #include "smc_tracepoint.h"
  49. #include "smc_sysctl.h"
  50. static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
  51. * creation on server
  52. */
  53. static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
  54. * creation on client
  55. */
  56. static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */
  57. struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
  58. struct workqueue_struct *smc_close_wq; /* wq for close work */
  59. static void smc_tcp_listen_work(struct work_struct *);
  60. static void smc_connect_work(struct work_struct *);
  61. int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb)
  62. {
  63. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  64. void *hdr;
  65. if (cb_ctx->pos[0])
  66. goto out;
  67. hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  68. &smc_gen_nl_family, NLM_F_MULTI,
  69. SMC_NETLINK_DUMP_HS_LIMITATION);
  70. if (!hdr)
  71. return -ENOMEM;
  72. if (nla_put_u8(skb, SMC_NLA_HS_LIMITATION_ENABLED,
  73. sock_net(skb->sk)->smc.limit_smc_hs))
  74. goto err;
  75. genlmsg_end(skb, hdr);
  76. cb_ctx->pos[0] = 1;
  77. out:
  78. return skb->len;
  79. err:
  80. genlmsg_cancel(skb, hdr);
  81. return -EMSGSIZE;
  82. }
  83. int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
  84. {
  85. sock_net(skb->sk)->smc.limit_smc_hs = true;
  86. return 0;
  87. }
  88. int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
  89. {
  90. sock_net(skb->sk)->smc.limit_smc_hs = false;
  91. return 0;
  92. }
  93. static void smc_set_keepalive(struct sock *sk, int val)
  94. {
  95. struct smc_sock *smc = smc_sk(sk);
  96. smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
  97. }
  98. static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
  99. struct sk_buff *skb,
  100. struct request_sock *req,
  101. struct dst_entry *dst,
  102. struct request_sock *req_unhash,
  103. bool *own_req)
  104. {
  105. struct smc_sock *smc;
  106. struct sock *child;
  107. smc = smc_clcsock_user_data(sk);
  108. if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) >
  109. sk->sk_max_ack_backlog)
  110. goto drop;
  111. if (sk_acceptq_is_full(&smc->sk)) {
  112. NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
  113. goto drop;
  114. }
  115. /* passthrough to original syn recv sock fct */
  116. child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
  117. own_req);
  118. /* child must not inherit smc or its ops */
  119. if (child) {
  120. rcu_assign_sk_user_data(child, NULL);
  121. /* v4-mapped sockets don't inherit parent ops. Don't restore. */
  122. if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops)
  123. inet_csk(child)->icsk_af_ops = smc->ori_af_ops;
  124. }
  125. return child;
  126. drop:
  127. dst_release(dst);
  128. tcp_listendrop(sk);
  129. return NULL;
  130. }
  131. static bool smc_hs_congested(const struct sock *sk)
  132. {
  133. const struct smc_sock *smc;
  134. smc = smc_clcsock_user_data(sk);
  135. if (!smc)
  136. return true;
  137. if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
  138. return true;
  139. return false;
  140. }
  141. static struct smc_hashinfo smc_v4_hashinfo = {
  142. .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
  143. };
  144. static struct smc_hashinfo smc_v6_hashinfo = {
  145. .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
  146. };
  147. int smc_hash_sk(struct sock *sk)
  148. {
  149. struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  150. struct hlist_head *head;
  151. head = &h->ht;
  152. write_lock_bh(&h->lock);
  153. sk_add_node(sk, head);
  154. write_unlock_bh(&h->lock);
  155. sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  156. return 0;
  157. }
  158. EXPORT_SYMBOL_GPL(smc_hash_sk);
  159. void smc_unhash_sk(struct sock *sk)
  160. {
  161. struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  162. write_lock_bh(&h->lock);
  163. if (sk_del_node_init(sk))
  164. sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  165. write_unlock_bh(&h->lock);
  166. }
  167. EXPORT_SYMBOL_GPL(smc_unhash_sk);
  168. /* This will be called before user really release sock_lock. So do the
  169. * work which we didn't do because of user hold the sock_lock in the
  170. * BH context
  171. */
  172. static void smc_release_cb(struct sock *sk)
  173. {
  174. struct smc_sock *smc = smc_sk(sk);
  175. if (smc->conn.tx_in_release_sock) {
  176. smc_tx_pending(&smc->conn);
  177. smc->conn.tx_in_release_sock = false;
  178. }
  179. }
  180. struct proto smc_proto = {
  181. .name = "SMC",
  182. .owner = THIS_MODULE,
  183. .keepalive = smc_set_keepalive,
  184. .hash = smc_hash_sk,
  185. .unhash = smc_unhash_sk,
  186. .release_cb = smc_release_cb,
  187. .obj_size = sizeof(struct smc_sock),
  188. .h.smc_hash = &smc_v4_hashinfo,
  189. .slab_flags = SLAB_TYPESAFE_BY_RCU,
  190. };
  191. EXPORT_SYMBOL_GPL(smc_proto);
  192. struct proto smc_proto6 = {
  193. .name = "SMC6",
  194. .owner = THIS_MODULE,
  195. .keepalive = smc_set_keepalive,
  196. .hash = smc_hash_sk,
  197. .unhash = smc_unhash_sk,
  198. .release_cb = smc_release_cb,
  199. .obj_size = sizeof(struct smc_sock),
  200. .h.smc_hash = &smc_v6_hashinfo,
  201. .slab_flags = SLAB_TYPESAFE_BY_RCU,
  202. };
  203. EXPORT_SYMBOL_GPL(smc_proto6);
  204. static void smc_fback_restore_callbacks(struct smc_sock *smc)
  205. {
  206. struct sock *clcsk = smc->clcsock->sk;
  207. write_lock_bh(&clcsk->sk_callback_lock);
  208. clcsk->sk_user_data = NULL;
  209. smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
  210. smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
  211. smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
  212. smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
  213. write_unlock_bh(&clcsk->sk_callback_lock);
  214. }
  215. static void smc_restore_fallback_changes(struct smc_sock *smc)
  216. {
  217. if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
  218. smc->clcsock->file->private_data = smc->sk.sk_socket;
  219. smc->clcsock->file = NULL;
  220. smc_fback_restore_callbacks(smc);
  221. }
  222. }
  223. static int __smc_release(struct smc_sock *smc)
  224. {
  225. struct sock *sk = &smc->sk;
  226. int rc = 0;
  227. if (!smc->use_fallback) {
  228. rc = smc_close_active(smc);
  229. smc_sock_set_flag(sk, SOCK_DEAD);
  230. sk->sk_shutdown |= SHUTDOWN_MASK;
  231. } else {
  232. if (sk->sk_state != SMC_CLOSED) {
  233. if (sk->sk_state != SMC_LISTEN &&
  234. sk->sk_state != SMC_INIT)
  235. sock_put(sk); /* passive closing */
  236. if (sk->sk_state == SMC_LISTEN) {
  237. /* wake up clcsock accept */
  238. rc = kernel_sock_shutdown(smc->clcsock,
  239. SHUT_RDWR);
  240. }
  241. sk->sk_state = SMC_CLOSED;
  242. sk->sk_state_change(sk);
  243. }
  244. smc_restore_fallback_changes(smc);
  245. }
  246. sk->sk_prot->unhash(sk);
  247. if (sk->sk_state == SMC_CLOSED) {
  248. if (smc->clcsock) {
  249. release_sock(sk);
  250. smc_clcsock_release(smc);
  251. lock_sock(sk);
  252. }
  253. if (!smc->use_fallback)
  254. smc_conn_free(&smc->conn);
  255. }
  256. return rc;
  257. }
  258. static int smc_release(struct socket *sock)
  259. {
  260. struct sock *sk = sock->sk;
  261. struct smc_sock *smc;
  262. int old_state, rc = 0;
  263. if (!sk)
  264. goto out;
  265. sock_hold(sk); /* sock_put below */
  266. smc = smc_sk(sk);
  267. old_state = sk->sk_state;
  268. /* cleanup for a dangling non-blocking connect */
  269. if (smc->connect_nonblock && old_state == SMC_INIT)
  270. tcp_abort(smc->clcsock->sk, ECONNABORTED);
  271. if (cancel_work_sync(&smc->connect_work))
  272. sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
  273. if (sk->sk_state == SMC_LISTEN)
  274. /* smc_close_non_accepted() is called and acquires
  275. * sock lock for child sockets again
  276. */
  277. lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  278. else
  279. lock_sock(sk);
  280. if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
  281. !smc->use_fallback)
  282. smc_close_active_abort(smc);
  283. rc = __smc_release(smc);
  284. /* detach socket */
  285. sock_orphan(sk);
  286. sock->sk = NULL;
  287. release_sock(sk);
  288. sock_put(sk); /* sock_hold above */
  289. sock_put(sk); /* final sock_put */
  290. out:
  291. return rc;
  292. }
  293. static void smc_destruct(struct sock *sk)
  294. {
  295. if (sk->sk_state != SMC_CLOSED)
  296. return;
  297. if (!sock_flag(sk, SOCK_DEAD))
  298. return;
  299. sk_refcnt_debug_dec(sk);
  300. }
  301. static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
  302. int protocol)
  303. {
  304. struct smc_sock *smc;
  305. struct proto *prot;
  306. struct sock *sk;
  307. prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
  308. sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
  309. if (!sk)
  310. return NULL;
  311. sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
  312. sk->sk_state = SMC_INIT;
  313. sk->sk_destruct = smc_destruct;
  314. sk->sk_protocol = protocol;
  315. WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
  316. WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
  317. smc = smc_sk(sk);
  318. INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
  319. INIT_WORK(&smc->connect_work, smc_connect_work);
  320. INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
  321. INIT_LIST_HEAD(&smc->accept_q);
  322. spin_lock_init(&smc->accept_q_lock);
  323. spin_lock_init(&smc->conn.send_lock);
  324. sk->sk_prot->hash(sk);
  325. sk_refcnt_debug_inc(sk);
  326. mutex_init(&smc->clcsock_release_lock);
  327. smc_init_saved_callbacks(smc);
  328. return sk;
  329. }
  330. static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
  331. int addr_len)
  332. {
  333. struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
  334. struct sock *sk = sock->sk;
  335. struct smc_sock *smc;
  336. int rc;
  337. smc = smc_sk(sk);
  338. /* replicate tests from inet_bind(), to be safe wrt. future changes */
  339. rc = -EINVAL;
  340. if (addr_len < sizeof(struct sockaddr_in))
  341. goto out;
  342. rc = -EAFNOSUPPORT;
  343. if (addr->sin_family != AF_INET &&
  344. addr->sin_family != AF_INET6 &&
  345. addr->sin_family != AF_UNSPEC)
  346. goto out;
  347. /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
  348. if (addr->sin_family == AF_UNSPEC &&
  349. addr->sin_addr.s_addr != htonl(INADDR_ANY))
  350. goto out;
  351. lock_sock(sk);
  352. /* Check if socket is already active */
  353. rc = -EINVAL;
  354. if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
  355. goto out_rel;
  356. smc->clcsock->sk->sk_reuse = sk->sk_reuse;
  357. smc->clcsock->sk->sk_reuseport = sk->sk_reuseport;
  358. rc = kernel_bind(smc->clcsock, uaddr, addr_len);
  359. out_rel:
  360. release_sock(sk);
  361. out:
  362. return rc;
  363. }
  364. /* copy only relevant settings and flags of SOL_SOCKET level from smc to
  365. * clc socket (since smc is not called for these options from net/core)
  366. */
  367. #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
  368. (1UL << SOCK_KEEPOPEN) | \
  369. (1UL << SOCK_LINGER) | \
  370. (1UL << SOCK_BROADCAST) | \
  371. (1UL << SOCK_TIMESTAMP) | \
  372. (1UL << SOCK_DBG) | \
  373. (1UL << SOCK_RCVTSTAMP) | \
  374. (1UL << SOCK_RCVTSTAMPNS) | \
  375. (1UL << SOCK_LOCALROUTE) | \
  376. (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
  377. (1UL << SOCK_RXQ_OVFL) | \
  378. (1UL << SOCK_WIFI_STATUS) | \
  379. (1UL << SOCK_NOFCS) | \
  380. (1UL << SOCK_FILTER_LOCKED) | \
  381. (1UL << SOCK_TSTAMP_NEW))
  382. /* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
  383. static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
  384. unsigned long mask)
  385. {
  386. struct net *nnet = sock_net(nsk);
  387. nsk->sk_userlocks = osk->sk_userlocks;
  388. if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
  389. nsk->sk_sndbuf = osk->sk_sndbuf;
  390. } else {
  391. if (mask == SK_FLAGS_SMC_TO_CLC)
  392. WRITE_ONCE(nsk->sk_sndbuf,
  393. READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
  394. else
  395. WRITE_ONCE(nsk->sk_sndbuf,
  396. 2 * READ_ONCE(nnet->smc.sysctl_wmem));
  397. }
  398. if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
  399. nsk->sk_rcvbuf = osk->sk_rcvbuf;
  400. } else {
  401. if (mask == SK_FLAGS_SMC_TO_CLC)
  402. WRITE_ONCE(nsk->sk_rcvbuf,
  403. READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
  404. else
  405. WRITE_ONCE(nsk->sk_rcvbuf,
  406. 2 * READ_ONCE(nnet->smc.sysctl_rmem));
  407. }
  408. }
  409. static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
  410. unsigned long mask)
  411. {
  412. /* options we don't get control via setsockopt for */
  413. nsk->sk_type = osk->sk_type;
  414. nsk->sk_sndtimeo = osk->sk_sndtimeo;
  415. nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
  416. nsk->sk_mark = READ_ONCE(osk->sk_mark);
  417. nsk->sk_priority = osk->sk_priority;
  418. nsk->sk_rcvlowat = osk->sk_rcvlowat;
  419. nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
  420. nsk->sk_err = osk->sk_err;
  421. nsk->sk_flags &= ~mask;
  422. nsk->sk_flags |= osk->sk_flags & mask;
  423. smc_adjust_sock_bufsizes(nsk, osk, mask);
  424. }
  425. static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
  426. {
  427. smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
  428. }
  429. #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
  430. (1UL << SOCK_KEEPOPEN) | \
  431. (1UL << SOCK_LINGER) | \
  432. (1UL << SOCK_DBG))
  433. /* copy only settings and flags relevant for smc from clc to smc socket */
  434. static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
  435. {
  436. smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
  437. }
  438. /* register the new vzalloced sndbuf on all links */
  439. static int smcr_lgr_reg_sndbufs(struct smc_link *link,
  440. struct smc_buf_desc *snd_desc)
  441. {
  442. struct smc_link_group *lgr = link->lgr;
  443. int i, rc = 0;
  444. if (!snd_desc->is_vm)
  445. return -EINVAL;
  446. /* protect against parallel smcr_link_reg_buf() */
  447. mutex_lock(&lgr->llc_conf_mutex);
  448. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  449. if (!smc_link_active(&lgr->lnk[i]))
  450. continue;
  451. rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
  452. if (rc)
  453. break;
  454. }
  455. mutex_unlock(&lgr->llc_conf_mutex);
  456. return rc;
  457. }
  458. /* register the new rmb on all links */
  459. static int smcr_lgr_reg_rmbs(struct smc_link *link,
  460. struct smc_buf_desc *rmb_desc)
  461. {
  462. struct smc_link_group *lgr = link->lgr;
  463. int i, rc = 0;
  464. rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
  465. if (rc)
  466. return rc;
  467. /* protect against parallel smc_llc_cli_rkey_exchange() and
  468. * parallel smcr_link_reg_buf()
  469. */
  470. mutex_lock(&lgr->llc_conf_mutex);
  471. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  472. if (!smc_link_active(&lgr->lnk[i]))
  473. continue;
  474. rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
  475. if (rc)
  476. goto out;
  477. }
  478. /* exchange confirm_rkey msg with peer */
  479. rc = smc_llc_do_confirm_rkey(link, rmb_desc);
  480. if (rc) {
  481. rc = -EFAULT;
  482. goto out;
  483. }
  484. rmb_desc->is_conf_rkey = true;
  485. out:
  486. mutex_unlock(&lgr->llc_conf_mutex);
  487. smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
  488. return rc;
  489. }
  490. static int smcr_clnt_conf_first_link(struct smc_sock *smc)
  491. {
  492. struct smc_link *link = smc->conn.lnk;
  493. struct smc_llc_qentry *qentry;
  494. int rc;
  495. /* Receive CONFIRM LINK request from server over RoCE fabric.
  496. * Increasing the client's timeout by twice as much as the server's
  497. * timeout by default can temporarily avoid decline messages of
  498. * both sides crossing or colliding
  499. */
  500. qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
  501. SMC_LLC_CONFIRM_LINK);
  502. if (!qentry) {
  503. struct smc_clc_msg_decline dclc;
  504. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  505. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  506. return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
  507. }
  508. smc_llc_save_peer_uid(qentry);
  509. rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
  510. smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
  511. if (rc)
  512. return SMC_CLC_DECL_RMBE_EC;
  513. rc = smc_ib_modify_qp_rts(link);
  514. if (rc)
  515. return SMC_CLC_DECL_ERR_RDYLNK;
  516. smc_wr_remember_qp_attr(link);
  517. /* reg the sndbuf if it was vzalloced */
  518. if (smc->conn.sndbuf_desc->is_vm) {
  519. if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
  520. return SMC_CLC_DECL_ERR_REGBUF;
  521. }
  522. /* reg the rmb */
  523. if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
  524. return SMC_CLC_DECL_ERR_REGBUF;
  525. /* confirm_rkey is implicit on 1st contact */
  526. smc->conn.rmb_desc->is_conf_rkey = true;
  527. /* send CONFIRM LINK response over RoCE fabric */
  528. rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
  529. if (rc < 0)
  530. return SMC_CLC_DECL_TIMEOUT_CL;
  531. smc_llc_link_active(link);
  532. smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
  533. /* optional 2nd link, receive ADD LINK request from server */
  534. qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
  535. SMC_LLC_ADD_LINK);
  536. if (!qentry) {
  537. struct smc_clc_msg_decline dclc;
  538. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  539. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  540. if (rc == -EAGAIN)
  541. rc = 0; /* no DECLINE received, go with one link */
  542. return rc;
  543. }
  544. smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
  545. smc_llc_cli_add_link(link, qentry);
  546. return 0;
  547. }
  548. static bool smc_isascii(char *hostname)
  549. {
  550. int i;
  551. for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
  552. if (!isascii(hostname[i]))
  553. return false;
  554. return true;
  555. }
  556. static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
  557. struct smc_clc_msg_accept_confirm *clc)
  558. {
  559. struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
  560. (struct smc_clc_msg_accept_confirm_v2 *)clc;
  561. struct smc_clc_first_contact_ext *fce;
  562. int clc_v2_len;
  563. if (clc->hdr.version == SMC_V1 ||
  564. !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
  565. return;
  566. if (smc->conn.lgr->is_smcd) {
  567. memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
  568. SMC_MAX_EID_LEN);
  569. clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
  570. d1);
  571. } else {
  572. memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
  573. SMC_MAX_EID_LEN);
  574. clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
  575. r1);
  576. }
  577. fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
  578. smc->conn.lgr->peer_os = fce->os_type;
  579. smc->conn.lgr->peer_smc_release = fce->release;
  580. if (smc_isascii(fce->hostname))
  581. memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
  582. SMC_MAX_HOSTNAME_LEN);
  583. }
  584. static void smcr_conn_save_peer_info(struct smc_sock *smc,
  585. struct smc_clc_msg_accept_confirm *clc)
  586. {
  587. int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
  588. smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
  589. smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
  590. smc->conn.peer_rmbe_size = bufsize;
  591. atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
  592. smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
  593. }
  594. static void smcd_conn_save_peer_info(struct smc_sock *smc,
  595. struct smc_clc_msg_accept_confirm *clc)
  596. {
  597. int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
  598. smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
  599. smc->conn.peer_token = clc->d0.token;
  600. /* msg header takes up space in the buffer */
  601. smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
  602. atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
  603. smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
  604. }
  605. static void smc_conn_save_peer_info(struct smc_sock *smc,
  606. struct smc_clc_msg_accept_confirm *clc)
  607. {
  608. if (smc->conn.lgr->is_smcd)
  609. smcd_conn_save_peer_info(smc, clc);
  610. else
  611. smcr_conn_save_peer_info(smc, clc);
  612. smc_conn_save_peer_info_fce(smc, clc);
  613. }
  614. static void smc_link_save_peer_info(struct smc_link *link,
  615. struct smc_clc_msg_accept_confirm *clc,
  616. struct smc_init_info *ini)
  617. {
  618. link->peer_qpn = ntoh24(clc->r0.qpn);
  619. memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
  620. memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
  621. link->peer_psn = ntoh24(clc->r0.psn);
  622. link->peer_mtu = clc->r0.qp_mtu;
  623. }
  624. static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
  625. struct smc_stats_fback *fback_arr)
  626. {
  627. int cnt;
  628. for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
  629. if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
  630. fback_arr[cnt].count++;
  631. break;
  632. }
  633. if (!fback_arr[cnt].fback_code) {
  634. fback_arr[cnt].fback_code = smc->fallback_rsn;
  635. fback_arr[cnt].count++;
  636. break;
  637. }
  638. }
  639. }
  640. static void smc_stat_fallback(struct smc_sock *smc)
  641. {
  642. struct net *net = sock_net(&smc->sk);
  643. mutex_lock(&net->smc.mutex_fback_rsn);
  644. if (smc->listen_smc) {
  645. smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
  646. net->smc.fback_rsn->srv_fback_cnt++;
  647. } else {
  648. smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
  649. net->smc.fback_rsn->clnt_fback_cnt++;
  650. }
  651. mutex_unlock(&net->smc.mutex_fback_rsn);
  652. }
  653. /* must be called under rcu read lock */
  654. static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
  655. {
  656. struct socket_wq *wq;
  657. __poll_t flags;
  658. wq = rcu_dereference(smc->sk.sk_wq);
  659. if (!skwq_has_sleeper(wq))
  660. return;
  661. /* wake up smc sk->sk_wq */
  662. if (!key) {
  663. /* sk_state_change */
  664. wake_up_interruptible_all(&wq->wait);
  665. } else {
  666. flags = key_to_poll(key);
  667. if (flags & (EPOLLIN | EPOLLOUT))
  668. /* sk_data_ready or sk_write_space */
  669. wake_up_interruptible_sync_poll(&wq->wait, flags);
  670. else if (flags & EPOLLERR)
  671. /* sk_error_report */
  672. wake_up_interruptible_poll(&wq->wait, flags);
  673. }
  674. }
  675. static int smc_fback_mark_woken(wait_queue_entry_t *wait,
  676. unsigned int mode, int sync, void *key)
  677. {
  678. struct smc_mark_woken *mark =
  679. container_of(wait, struct smc_mark_woken, wait_entry);
  680. mark->woken = true;
  681. mark->key = key;
  682. return 0;
  683. }
  684. static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
  685. void (*clcsock_callback)(struct sock *sk))
  686. {
  687. struct smc_mark_woken mark = { .woken = false };
  688. struct socket_wq *wq;
  689. init_waitqueue_func_entry(&mark.wait_entry,
  690. smc_fback_mark_woken);
  691. rcu_read_lock();
  692. wq = rcu_dereference(clcsk->sk_wq);
  693. if (!wq)
  694. goto out;
  695. add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
  696. clcsock_callback(clcsk);
  697. remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
  698. if (mark.woken)
  699. smc_fback_wakeup_waitqueue(smc, mark.key);
  700. out:
  701. rcu_read_unlock();
  702. }
  703. static void smc_fback_state_change(struct sock *clcsk)
  704. {
  705. struct smc_sock *smc;
  706. read_lock_bh(&clcsk->sk_callback_lock);
  707. smc = smc_clcsock_user_data(clcsk);
  708. if (smc)
  709. smc_fback_forward_wakeup(smc, clcsk,
  710. smc->clcsk_state_change);
  711. read_unlock_bh(&clcsk->sk_callback_lock);
  712. }
  713. static void smc_fback_data_ready(struct sock *clcsk)
  714. {
  715. struct smc_sock *smc;
  716. read_lock_bh(&clcsk->sk_callback_lock);
  717. smc = smc_clcsock_user_data(clcsk);
  718. if (smc)
  719. smc_fback_forward_wakeup(smc, clcsk,
  720. smc->clcsk_data_ready);
  721. read_unlock_bh(&clcsk->sk_callback_lock);
  722. }
  723. static void smc_fback_write_space(struct sock *clcsk)
  724. {
  725. struct smc_sock *smc;
  726. read_lock_bh(&clcsk->sk_callback_lock);
  727. smc = smc_clcsock_user_data(clcsk);
  728. if (smc)
  729. smc_fback_forward_wakeup(smc, clcsk,
  730. smc->clcsk_write_space);
  731. read_unlock_bh(&clcsk->sk_callback_lock);
  732. }
  733. static void smc_fback_error_report(struct sock *clcsk)
  734. {
  735. struct smc_sock *smc;
  736. read_lock_bh(&clcsk->sk_callback_lock);
  737. smc = smc_clcsock_user_data(clcsk);
  738. if (smc)
  739. smc_fback_forward_wakeup(smc, clcsk,
  740. smc->clcsk_error_report);
  741. read_unlock_bh(&clcsk->sk_callback_lock);
  742. }
  743. static void smc_fback_replace_callbacks(struct smc_sock *smc)
  744. {
  745. struct sock *clcsk = smc->clcsock->sk;
  746. write_lock_bh(&clcsk->sk_callback_lock);
  747. clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
  748. smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
  749. &smc->clcsk_state_change);
  750. smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
  751. &smc->clcsk_data_ready);
  752. smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
  753. &smc->clcsk_write_space);
  754. smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
  755. &smc->clcsk_error_report);
  756. write_unlock_bh(&clcsk->sk_callback_lock);
  757. }
  758. static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
  759. {
  760. int rc = 0;
  761. mutex_lock(&smc->clcsock_release_lock);
  762. if (!smc->clcsock) {
  763. rc = -EBADF;
  764. goto out;
  765. }
  766. smc->use_fallback = true;
  767. smc->fallback_rsn = reason_code;
  768. smc_stat_fallback(smc);
  769. trace_smc_switch_to_fallback(smc, reason_code);
  770. if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
  771. smc->clcsock->file = smc->sk.sk_socket->file;
  772. smc->clcsock->file->private_data = smc->clcsock;
  773. smc->clcsock->wq.fasync_list =
  774. smc->sk.sk_socket->wq.fasync_list;
  775. /* There might be some wait entries remaining
  776. * in smc sk->sk_wq and they should be woken up
  777. * as clcsock's wait queue is woken up.
  778. */
  779. smc_fback_replace_callbacks(smc);
  780. }
  781. out:
  782. mutex_unlock(&smc->clcsock_release_lock);
  783. return rc;
  784. }
  785. /* fall back during connect */
  786. static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
  787. {
  788. struct net *net = sock_net(&smc->sk);
  789. int rc = 0;
  790. rc = smc_switch_to_fallback(smc, reason_code);
  791. if (rc) { /* fallback fails */
  792. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  793. if (smc->sk.sk_state == SMC_INIT)
  794. sock_put(&smc->sk); /* passive closing */
  795. return rc;
  796. }
  797. smc_copy_sock_settings_to_clc(smc);
  798. smc->connect_nonblock = 0;
  799. if (smc->sk.sk_state == SMC_INIT)
  800. smc->sk.sk_state = SMC_ACTIVE;
  801. return 0;
  802. }
  803. /* decline and fall back during connect */
  804. static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
  805. u8 version)
  806. {
  807. struct net *net = sock_net(&smc->sk);
  808. int rc;
  809. if (reason_code < 0) { /* error, fallback is not possible */
  810. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  811. if (smc->sk.sk_state == SMC_INIT)
  812. sock_put(&smc->sk); /* passive closing */
  813. return reason_code;
  814. }
  815. if (reason_code != SMC_CLC_DECL_PEERDECL) {
  816. rc = smc_clc_send_decline(smc, reason_code, version);
  817. if (rc < 0) {
  818. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  819. if (smc->sk.sk_state == SMC_INIT)
  820. sock_put(&smc->sk); /* passive closing */
  821. return rc;
  822. }
  823. }
  824. return smc_connect_fallback(smc, reason_code);
  825. }
  826. static void smc_conn_abort(struct smc_sock *smc, int local_first)
  827. {
  828. struct smc_connection *conn = &smc->conn;
  829. struct smc_link_group *lgr = conn->lgr;
  830. bool lgr_valid = false;
  831. if (smc_conn_lgr_valid(conn))
  832. lgr_valid = true;
  833. smc_conn_free(conn);
  834. if (local_first && lgr_valid)
  835. smc_lgr_cleanup_early(lgr);
  836. }
  837. /* check if there is a rdma device available for this connection. */
  838. /* called for connect and listen */
  839. static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
  840. {
  841. /* PNET table look up: search active ib_device and port
  842. * within same PNETID that also contains the ethernet device
  843. * used for the internal TCP socket
  844. */
  845. smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
  846. if (!ini->check_smcrv2 && !ini->ib_dev)
  847. return SMC_CLC_DECL_NOSMCRDEV;
  848. if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
  849. return SMC_CLC_DECL_NOSMCRDEV;
  850. return 0;
  851. }
  852. /* check if there is an ISM device available for this connection. */
  853. /* called for connect and listen */
  854. static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
  855. {
  856. /* Find ISM device with same PNETID as connecting interface */
  857. smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
  858. if (!ini->ism_dev[0])
  859. return SMC_CLC_DECL_NOSMCDDEV;
  860. else
  861. ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
  862. return 0;
  863. }
  864. /* is chid unique for the ism devices that are already determined? */
  865. static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
  866. int cnt)
  867. {
  868. int i = (!ini->ism_dev[0]) ? 1 : 0;
  869. for (; i < cnt; i++)
  870. if (ini->ism_chid[i] == chid)
  871. return false;
  872. return true;
  873. }
  874. /* determine possible V2 ISM devices (either without PNETID or with PNETID plus
  875. * PNETID matching net_device)
  876. */
  877. static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
  878. struct smc_init_info *ini)
  879. {
  880. int rc = SMC_CLC_DECL_NOSMCDDEV;
  881. struct smcd_dev *smcd;
  882. int i = 1;
  883. u16 chid;
  884. if (smcd_indicated(ini->smc_type_v1))
  885. rc = 0; /* already initialized for V1 */
  886. mutex_lock(&smcd_dev_list.mutex);
  887. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  888. if (smcd->going_away || smcd == ini->ism_dev[0])
  889. continue;
  890. chid = smc_ism_get_chid(smcd);
  891. if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
  892. continue;
  893. if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
  894. smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
  895. ini->ism_dev[i] = smcd;
  896. ini->ism_chid[i] = chid;
  897. ini->is_smcd = true;
  898. rc = 0;
  899. i++;
  900. if (i > SMC_MAX_ISM_DEVS)
  901. break;
  902. }
  903. }
  904. mutex_unlock(&smcd_dev_list.mutex);
  905. ini->ism_offered_cnt = i - 1;
  906. if (!ini->ism_dev[0] && !ini->ism_dev[1])
  907. ini->smcd_version = 0;
  908. return rc;
  909. }
  910. /* Check for VLAN ID and register it on ISM device just for CLC handshake */
  911. static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
  912. struct smc_init_info *ini)
  913. {
  914. if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
  915. return SMC_CLC_DECL_ISMVLANERR;
  916. return 0;
  917. }
  918. static int smc_find_proposal_devices(struct smc_sock *smc,
  919. struct smc_init_info *ini)
  920. {
  921. int rc = 0;
  922. /* check if there is an ism device available */
  923. if (!(ini->smcd_version & SMC_V1) ||
  924. smc_find_ism_device(smc, ini) ||
  925. smc_connect_ism_vlan_setup(smc, ini))
  926. ini->smcd_version &= ~SMC_V1;
  927. /* else ISM V1 is supported for this connection */
  928. /* check if there is an rdma device available */
  929. if (!(ini->smcr_version & SMC_V1) ||
  930. smc_find_rdma_device(smc, ini))
  931. ini->smcr_version &= ~SMC_V1;
  932. /* else RDMA is supported for this connection */
  933. ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
  934. ini->smcr_version & SMC_V1);
  935. /* check if there is an ism v2 device available */
  936. if (!(ini->smcd_version & SMC_V2) ||
  937. !smc_ism_is_v2_capable() ||
  938. smc_find_ism_v2_device_clnt(smc, ini))
  939. ini->smcd_version &= ~SMC_V2;
  940. /* check if there is an rdma v2 device available */
  941. ini->check_smcrv2 = true;
  942. ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
  943. if (!(ini->smcr_version & SMC_V2) ||
  944. smc->clcsock->sk->sk_family != AF_INET ||
  945. !smc_clc_ueid_count() ||
  946. smc_find_rdma_device(smc, ini))
  947. ini->smcr_version &= ~SMC_V2;
  948. ini->check_smcrv2 = false;
  949. ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
  950. ini->smcr_version & SMC_V2);
  951. /* if neither ISM nor RDMA are supported, fallback */
  952. if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
  953. rc = SMC_CLC_DECL_NOSMCDEV;
  954. return rc;
  955. }
  956. /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
  957. * used, the VLAN ID will be registered again during the connection setup.
  958. */
  959. static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
  960. struct smc_init_info *ini)
  961. {
  962. if (!smcd_indicated(ini->smc_type_v1))
  963. return 0;
  964. if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
  965. return SMC_CLC_DECL_CNFERR;
  966. return 0;
  967. }
  968. #define SMC_CLC_MAX_ACCEPT_LEN \
  969. (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
  970. sizeof(struct smc_clc_first_contact_ext) + \
  971. sizeof(struct smc_clc_msg_trail))
  972. /* CLC handshake during connect */
  973. static int smc_connect_clc(struct smc_sock *smc,
  974. struct smc_clc_msg_accept_confirm_v2 *aclc2,
  975. struct smc_init_info *ini)
  976. {
  977. int rc = 0;
  978. /* do inband token exchange */
  979. rc = smc_clc_send_proposal(smc, ini);
  980. if (rc)
  981. return rc;
  982. /* receive SMC Accept CLC message */
  983. return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
  984. SMC_CLC_ACCEPT, CLC_WAIT_TIME);
  985. }
  986. void smc_fill_gid_list(struct smc_link_group *lgr,
  987. struct smc_gidlist *gidlist,
  988. struct smc_ib_device *known_dev, u8 *known_gid)
  989. {
  990. struct smc_init_info *alt_ini = NULL;
  991. memset(gidlist, 0, sizeof(*gidlist));
  992. memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
  993. alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
  994. if (!alt_ini)
  995. goto out;
  996. alt_ini->vlan_id = lgr->vlan_id;
  997. alt_ini->check_smcrv2 = true;
  998. alt_ini->smcrv2.saddr = lgr->saddr;
  999. smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
  1000. if (!alt_ini->smcrv2.ib_dev_v2)
  1001. goto out;
  1002. memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
  1003. SMC_GID_SIZE);
  1004. out:
  1005. kfree(alt_ini);
  1006. }
  1007. static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
  1008. struct smc_clc_msg_accept_confirm *aclc,
  1009. struct smc_init_info *ini)
  1010. {
  1011. struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
  1012. (struct smc_clc_msg_accept_confirm_v2 *)aclc;
  1013. struct smc_clc_first_contact_ext *fce =
  1014. (struct smc_clc_first_contact_ext *)
  1015. (((u8 *)clc_v2) + sizeof(*clc_v2));
  1016. struct net *net = sock_net(&smc->sk);
  1017. if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
  1018. return 0;
  1019. if (fce->v2_direct) {
  1020. memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
  1021. ini->smcrv2.uses_gateway = false;
  1022. } else {
  1023. if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
  1024. smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
  1025. ini->smcrv2.nexthop_mac,
  1026. &ini->smcrv2.uses_gateway))
  1027. return SMC_CLC_DECL_NOROUTE;
  1028. if (!ini->smcrv2.uses_gateway) {
  1029. /* mismatch: peer claims indirect, but its direct */
  1030. return SMC_CLC_DECL_NOINDIRECT;
  1031. }
  1032. }
  1033. return 0;
  1034. }
  1035. /* setup for RDMA connection of client */
  1036. static int smc_connect_rdma(struct smc_sock *smc,
  1037. struct smc_clc_msg_accept_confirm *aclc,
  1038. struct smc_init_info *ini)
  1039. {
  1040. int i, reason_code = 0;
  1041. struct smc_link *link;
  1042. u8 *eid = NULL;
  1043. ini->is_smcd = false;
  1044. ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
  1045. ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
  1046. memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
  1047. memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
  1048. memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
  1049. reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
  1050. if (reason_code)
  1051. return reason_code;
  1052. mutex_lock(&smc_client_lgr_pending);
  1053. reason_code = smc_conn_create(smc, ini);
  1054. if (reason_code) {
  1055. mutex_unlock(&smc_client_lgr_pending);
  1056. return reason_code;
  1057. }
  1058. smc_conn_save_peer_info(smc, aclc);
  1059. if (ini->first_contact_local) {
  1060. link = smc->conn.lnk;
  1061. } else {
  1062. /* set link that was assigned by server */
  1063. link = NULL;
  1064. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1065. struct smc_link *l = &smc->conn.lgr->lnk[i];
  1066. if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
  1067. !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
  1068. SMC_GID_SIZE) &&
  1069. (aclc->hdr.version > SMC_V1 ||
  1070. !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
  1071. sizeof(l->peer_mac)))) {
  1072. link = l;
  1073. break;
  1074. }
  1075. }
  1076. if (!link) {
  1077. reason_code = SMC_CLC_DECL_NOSRVLINK;
  1078. goto connect_abort;
  1079. }
  1080. smc_switch_link_and_count(&smc->conn, link);
  1081. }
  1082. /* create send buffer and rmb */
  1083. if (smc_buf_create(smc, false)) {
  1084. reason_code = SMC_CLC_DECL_MEM;
  1085. goto connect_abort;
  1086. }
  1087. if (ini->first_contact_local)
  1088. smc_link_save_peer_info(link, aclc, ini);
  1089. if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
  1090. reason_code = SMC_CLC_DECL_ERR_RTOK;
  1091. goto connect_abort;
  1092. }
  1093. smc_close_init(smc);
  1094. smc_rx_init(smc);
  1095. if (ini->first_contact_local) {
  1096. if (smc_ib_ready_link(link)) {
  1097. reason_code = SMC_CLC_DECL_ERR_RDYLNK;
  1098. goto connect_abort;
  1099. }
  1100. } else {
  1101. /* reg sendbufs if they were vzalloced */
  1102. if (smc->conn.sndbuf_desc->is_vm) {
  1103. if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
  1104. reason_code = SMC_CLC_DECL_ERR_REGBUF;
  1105. goto connect_abort;
  1106. }
  1107. }
  1108. if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
  1109. reason_code = SMC_CLC_DECL_ERR_REGBUF;
  1110. goto connect_abort;
  1111. }
  1112. }
  1113. if (aclc->hdr.version > SMC_V1) {
  1114. struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
  1115. (struct smc_clc_msg_accept_confirm_v2 *)aclc;
  1116. eid = clc_v2->r1.eid;
  1117. if (ini->first_contact_local)
  1118. smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
  1119. link->smcibdev, link->gid);
  1120. }
  1121. reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
  1122. aclc->hdr.version, eid, ini);
  1123. if (reason_code)
  1124. goto connect_abort;
  1125. smc_tx_init(smc);
  1126. if (ini->first_contact_local) {
  1127. /* QP confirmation over RoCE fabric */
  1128. smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
  1129. reason_code = smcr_clnt_conf_first_link(smc);
  1130. smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
  1131. if (reason_code)
  1132. goto connect_abort;
  1133. }
  1134. mutex_unlock(&smc_client_lgr_pending);
  1135. smc_copy_sock_settings_to_clc(smc);
  1136. smc->connect_nonblock = 0;
  1137. if (smc->sk.sk_state == SMC_INIT)
  1138. smc->sk.sk_state = SMC_ACTIVE;
  1139. return 0;
  1140. connect_abort:
  1141. smc_conn_abort(smc, ini->first_contact_local);
  1142. mutex_unlock(&smc_client_lgr_pending);
  1143. smc->connect_nonblock = 0;
  1144. return reason_code;
  1145. }
  1146. /* The server has chosen one of the proposed ISM devices for the communication.
  1147. * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
  1148. */
  1149. static int
  1150. smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
  1151. struct smc_init_info *ini)
  1152. {
  1153. int i;
  1154. for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
  1155. if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
  1156. ini->ism_selected = i;
  1157. return 0;
  1158. }
  1159. }
  1160. return -EPROTO;
  1161. }
  1162. /* setup for ISM connection of client */
  1163. static int smc_connect_ism(struct smc_sock *smc,
  1164. struct smc_clc_msg_accept_confirm *aclc,
  1165. struct smc_init_info *ini)
  1166. {
  1167. u8 *eid = NULL;
  1168. int rc = 0;
  1169. ini->is_smcd = true;
  1170. ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
  1171. if (aclc->hdr.version == SMC_V2) {
  1172. struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
  1173. (struct smc_clc_msg_accept_confirm_v2 *)aclc;
  1174. rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
  1175. if (rc)
  1176. return rc;
  1177. }
  1178. ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
  1179. /* there is only one lgr role for SMC-D; use server lock */
  1180. mutex_lock(&smc_server_lgr_pending);
  1181. rc = smc_conn_create(smc, ini);
  1182. if (rc) {
  1183. mutex_unlock(&smc_server_lgr_pending);
  1184. return rc;
  1185. }
  1186. /* Create send and receive buffers */
  1187. rc = smc_buf_create(smc, true);
  1188. if (rc) {
  1189. rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
  1190. goto connect_abort;
  1191. }
  1192. smc_conn_save_peer_info(smc, aclc);
  1193. smc_close_init(smc);
  1194. smc_rx_init(smc);
  1195. smc_tx_init(smc);
  1196. if (aclc->hdr.version > SMC_V1) {
  1197. struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
  1198. (struct smc_clc_msg_accept_confirm_v2 *)aclc;
  1199. eid = clc_v2->d1.eid;
  1200. }
  1201. rc = smc_clc_send_confirm(smc, ini->first_contact_local,
  1202. aclc->hdr.version, eid, NULL);
  1203. if (rc)
  1204. goto connect_abort;
  1205. mutex_unlock(&smc_server_lgr_pending);
  1206. smc_copy_sock_settings_to_clc(smc);
  1207. smc->connect_nonblock = 0;
  1208. if (smc->sk.sk_state == SMC_INIT)
  1209. smc->sk.sk_state = SMC_ACTIVE;
  1210. return 0;
  1211. connect_abort:
  1212. smc_conn_abort(smc, ini->first_contact_local);
  1213. mutex_unlock(&smc_server_lgr_pending);
  1214. smc->connect_nonblock = 0;
  1215. return rc;
  1216. }
  1217. /* check if received accept type and version matches a proposed one */
  1218. static int smc_connect_check_aclc(struct smc_init_info *ini,
  1219. struct smc_clc_msg_accept_confirm *aclc)
  1220. {
  1221. if (aclc->hdr.typev1 != SMC_TYPE_R &&
  1222. aclc->hdr.typev1 != SMC_TYPE_D)
  1223. return SMC_CLC_DECL_MODEUNSUPP;
  1224. if (aclc->hdr.version >= SMC_V2) {
  1225. if ((aclc->hdr.typev1 == SMC_TYPE_R &&
  1226. !smcr_indicated(ini->smc_type_v2)) ||
  1227. (aclc->hdr.typev1 == SMC_TYPE_D &&
  1228. !smcd_indicated(ini->smc_type_v2)))
  1229. return SMC_CLC_DECL_MODEUNSUPP;
  1230. } else {
  1231. if ((aclc->hdr.typev1 == SMC_TYPE_R &&
  1232. !smcr_indicated(ini->smc_type_v1)) ||
  1233. (aclc->hdr.typev1 == SMC_TYPE_D &&
  1234. !smcd_indicated(ini->smc_type_v1)))
  1235. return SMC_CLC_DECL_MODEUNSUPP;
  1236. }
  1237. return 0;
  1238. }
  1239. /* perform steps before actually connecting */
  1240. static int __smc_connect(struct smc_sock *smc)
  1241. {
  1242. u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
  1243. struct smc_clc_msg_accept_confirm_v2 *aclc2;
  1244. struct smc_clc_msg_accept_confirm *aclc;
  1245. struct smc_init_info *ini = NULL;
  1246. u8 *buf = NULL;
  1247. int rc = 0;
  1248. if (smc->use_fallback)
  1249. return smc_connect_fallback(smc, smc->fallback_rsn);
  1250. /* if peer has not signalled SMC-capability, fall back */
  1251. if (!tcp_sk(smc->clcsock->sk)->syn_smc)
  1252. return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
  1253. /* IPSec connections opt out of SMC optimizations */
  1254. if (using_ipsec(smc))
  1255. return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
  1256. version);
  1257. ini = kzalloc(sizeof(*ini), GFP_KERNEL);
  1258. if (!ini)
  1259. return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
  1260. version);
  1261. ini->smcd_version = SMC_V1 | SMC_V2;
  1262. ini->smcr_version = SMC_V1 | SMC_V2;
  1263. ini->smc_type_v1 = SMC_TYPE_B;
  1264. ini->smc_type_v2 = SMC_TYPE_B;
  1265. /* get vlan id from IP device */
  1266. if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
  1267. ini->smcd_version &= ~SMC_V1;
  1268. ini->smcr_version = 0;
  1269. ini->smc_type_v1 = SMC_TYPE_N;
  1270. if (!ini->smcd_version) {
  1271. rc = SMC_CLC_DECL_GETVLANERR;
  1272. goto fallback;
  1273. }
  1274. }
  1275. rc = smc_find_proposal_devices(smc, ini);
  1276. if (rc)
  1277. goto fallback;
  1278. buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
  1279. if (!buf) {
  1280. rc = SMC_CLC_DECL_MEM;
  1281. goto fallback;
  1282. }
  1283. aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
  1284. aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
  1285. /* perform CLC handshake */
  1286. rc = smc_connect_clc(smc, aclc2, ini);
  1287. if (rc) {
  1288. /* -EAGAIN on timeout, see tcp_recvmsg() */
  1289. if (rc == -EAGAIN) {
  1290. rc = -ETIMEDOUT;
  1291. smc->sk.sk_err = ETIMEDOUT;
  1292. }
  1293. goto vlan_cleanup;
  1294. }
  1295. /* check if smc modes and versions of CLC proposal and accept match */
  1296. rc = smc_connect_check_aclc(ini, aclc);
  1297. version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
  1298. if (rc)
  1299. goto vlan_cleanup;
  1300. /* depending on previous steps, connect using rdma or ism */
  1301. if (aclc->hdr.typev1 == SMC_TYPE_R) {
  1302. ini->smcr_version = version;
  1303. rc = smc_connect_rdma(smc, aclc, ini);
  1304. } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
  1305. ini->smcd_version = version;
  1306. rc = smc_connect_ism(smc, aclc, ini);
  1307. }
  1308. if (rc)
  1309. goto vlan_cleanup;
  1310. SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
  1311. smc_connect_ism_vlan_cleanup(smc, ini);
  1312. kfree(buf);
  1313. kfree(ini);
  1314. return 0;
  1315. vlan_cleanup:
  1316. smc_connect_ism_vlan_cleanup(smc, ini);
  1317. kfree(buf);
  1318. fallback:
  1319. kfree(ini);
  1320. return smc_connect_decline_fallback(smc, rc, version);
  1321. }
  1322. static void smc_connect_work(struct work_struct *work)
  1323. {
  1324. struct smc_sock *smc = container_of(work, struct smc_sock,
  1325. connect_work);
  1326. long timeo = smc->sk.sk_sndtimeo;
  1327. int rc = 0;
  1328. if (!timeo)
  1329. timeo = MAX_SCHEDULE_TIMEOUT;
  1330. lock_sock(smc->clcsock->sk);
  1331. if (smc->clcsock->sk->sk_err) {
  1332. smc->sk.sk_err = smc->clcsock->sk->sk_err;
  1333. } else if ((1 << smc->clcsock->sk->sk_state) &
  1334. (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
  1335. rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
  1336. if ((rc == -EPIPE) &&
  1337. ((1 << smc->clcsock->sk->sk_state) &
  1338. (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
  1339. rc = 0;
  1340. }
  1341. release_sock(smc->clcsock->sk);
  1342. lock_sock(&smc->sk);
  1343. if (rc != 0 || smc->sk.sk_err) {
  1344. smc->sk.sk_state = SMC_CLOSED;
  1345. if (rc == -EPIPE || rc == -EAGAIN)
  1346. smc->sk.sk_err = EPIPE;
  1347. else if (rc == -ECONNREFUSED)
  1348. smc->sk.sk_err = ECONNREFUSED;
  1349. else if (signal_pending(current))
  1350. smc->sk.sk_err = -sock_intr_errno(timeo);
  1351. sock_put(&smc->sk); /* passive closing */
  1352. goto out;
  1353. }
  1354. rc = __smc_connect(smc);
  1355. if (rc < 0)
  1356. smc->sk.sk_err = -rc;
  1357. out:
  1358. if (!sock_flag(&smc->sk, SOCK_DEAD)) {
  1359. if (smc->sk.sk_err) {
  1360. smc->sk.sk_state_change(&smc->sk);
  1361. } else { /* allow polling before and after fallback decision */
  1362. smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
  1363. smc->sk.sk_write_space(&smc->sk);
  1364. }
  1365. }
  1366. release_sock(&smc->sk);
  1367. }
  1368. static int smc_connect(struct socket *sock, struct sockaddr *addr,
  1369. int alen, int flags)
  1370. {
  1371. struct sock *sk = sock->sk;
  1372. struct smc_sock *smc;
  1373. int rc = -EINVAL;
  1374. smc = smc_sk(sk);
  1375. /* separate smc parameter checking to be safe */
  1376. if (alen < sizeof(addr->sa_family))
  1377. goto out_err;
  1378. if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
  1379. goto out_err;
  1380. lock_sock(sk);
  1381. switch (sock->state) {
  1382. default:
  1383. rc = -EINVAL;
  1384. goto out;
  1385. case SS_CONNECTED:
  1386. rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
  1387. goto out;
  1388. case SS_CONNECTING:
  1389. if (sk->sk_state == SMC_ACTIVE)
  1390. goto connected;
  1391. break;
  1392. case SS_UNCONNECTED:
  1393. sock->state = SS_CONNECTING;
  1394. break;
  1395. }
  1396. switch (sk->sk_state) {
  1397. default:
  1398. goto out;
  1399. case SMC_CLOSED:
  1400. rc = sock_error(sk) ? : -ECONNABORTED;
  1401. sock->state = SS_UNCONNECTED;
  1402. goto out;
  1403. case SMC_ACTIVE:
  1404. rc = -EISCONN;
  1405. goto out;
  1406. case SMC_INIT:
  1407. break;
  1408. }
  1409. smc_copy_sock_settings_to_clc(smc);
  1410. tcp_sk(smc->clcsock->sk)->syn_smc = 1;
  1411. if (smc->connect_nonblock) {
  1412. rc = -EALREADY;
  1413. goto out;
  1414. }
  1415. rc = kernel_connect(smc->clcsock, addr, alen, flags);
  1416. if (rc && rc != -EINPROGRESS)
  1417. goto out;
  1418. if (smc->use_fallback) {
  1419. sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
  1420. goto out;
  1421. }
  1422. sock_hold(&smc->sk); /* sock put in passive closing */
  1423. if (flags & O_NONBLOCK) {
  1424. if (queue_work(smc_hs_wq, &smc->connect_work))
  1425. smc->connect_nonblock = 1;
  1426. rc = -EINPROGRESS;
  1427. goto out;
  1428. } else {
  1429. rc = __smc_connect(smc);
  1430. if (rc < 0)
  1431. goto out;
  1432. }
  1433. connected:
  1434. rc = 0;
  1435. sock->state = SS_CONNECTED;
  1436. out:
  1437. release_sock(sk);
  1438. out_err:
  1439. return rc;
  1440. }
  1441. static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
  1442. {
  1443. struct socket *new_clcsock = NULL;
  1444. struct sock *lsk = &lsmc->sk;
  1445. struct sock *new_sk;
  1446. int rc = -EINVAL;
  1447. release_sock(lsk);
  1448. new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
  1449. if (!new_sk) {
  1450. rc = -ENOMEM;
  1451. lsk->sk_err = ENOMEM;
  1452. *new_smc = NULL;
  1453. lock_sock(lsk);
  1454. goto out;
  1455. }
  1456. *new_smc = smc_sk(new_sk);
  1457. mutex_lock(&lsmc->clcsock_release_lock);
  1458. if (lsmc->clcsock)
  1459. rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
  1460. mutex_unlock(&lsmc->clcsock_release_lock);
  1461. lock_sock(lsk);
  1462. if (rc < 0 && rc != -EAGAIN)
  1463. lsk->sk_err = -rc;
  1464. if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
  1465. new_sk->sk_prot->unhash(new_sk);
  1466. if (new_clcsock)
  1467. sock_release(new_clcsock);
  1468. new_sk->sk_state = SMC_CLOSED;
  1469. smc_sock_set_flag(new_sk, SOCK_DEAD);
  1470. sock_put(new_sk); /* final */
  1471. *new_smc = NULL;
  1472. goto out;
  1473. }
  1474. /* new clcsock has inherited the smc listen-specific sk_data_ready
  1475. * function; switch it back to the original sk_data_ready function
  1476. */
  1477. new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
  1478. /* if new clcsock has also inherited the fallback-specific callback
  1479. * functions, switch them back to the original ones.
  1480. */
  1481. if (lsmc->use_fallback) {
  1482. if (lsmc->clcsk_state_change)
  1483. new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
  1484. if (lsmc->clcsk_write_space)
  1485. new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
  1486. if (lsmc->clcsk_error_report)
  1487. new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
  1488. }
  1489. (*new_smc)->clcsock = new_clcsock;
  1490. out:
  1491. return rc;
  1492. }
  1493. /* add a just created sock to the accept queue of the listen sock as
  1494. * candidate for a following socket accept call from user space
  1495. */
  1496. static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
  1497. {
  1498. struct smc_sock *par = smc_sk(parent);
  1499. sock_hold(sk); /* sock_put in smc_accept_unlink () */
  1500. spin_lock(&par->accept_q_lock);
  1501. list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
  1502. spin_unlock(&par->accept_q_lock);
  1503. sk_acceptq_added(parent);
  1504. }
  1505. /* remove a socket from the accept queue of its parental listening socket */
  1506. static void smc_accept_unlink(struct sock *sk)
  1507. {
  1508. struct smc_sock *par = smc_sk(sk)->listen_smc;
  1509. spin_lock(&par->accept_q_lock);
  1510. list_del_init(&smc_sk(sk)->accept_q);
  1511. spin_unlock(&par->accept_q_lock);
  1512. sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
  1513. sock_put(sk); /* sock_hold in smc_accept_enqueue */
  1514. }
  1515. /* remove a sock from the accept queue to bind it to a new socket created
  1516. * for a socket accept call from user space
  1517. */
  1518. struct sock *smc_accept_dequeue(struct sock *parent,
  1519. struct socket *new_sock)
  1520. {
  1521. struct smc_sock *isk, *n;
  1522. struct sock *new_sk;
  1523. list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
  1524. new_sk = (struct sock *)isk;
  1525. smc_accept_unlink(new_sk);
  1526. if (new_sk->sk_state == SMC_CLOSED) {
  1527. new_sk->sk_prot->unhash(new_sk);
  1528. if (isk->clcsock) {
  1529. sock_release(isk->clcsock);
  1530. isk->clcsock = NULL;
  1531. }
  1532. sock_put(new_sk); /* final */
  1533. continue;
  1534. }
  1535. if (new_sock) {
  1536. sock_graft(new_sk, new_sock);
  1537. new_sock->state = SS_CONNECTED;
  1538. if (isk->use_fallback) {
  1539. smc_sk(new_sk)->clcsock->file = new_sock->file;
  1540. isk->clcsock->file->private_data = isk->clcsock;
  1541. }
  1542. }
  1543. return new_sk;
  1544. }
  1545. return NULL;
  1546. }
  1547. /* clean up for a created but never accepted sock */
  1548. void smc_close_non_accepted(struct sock *sk)
  1549. {
  1550. struct smc_sock *smc = smc_sk(sk);
  1551. sock_hold(sk); /* sock_put below */
  1552. lock_sock(sk);
  1553. if (!sk->sk_lingertime)
  1554. /* wait for peer closing */
  1555. WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
  1556. __smc_release(smc);
  1557. release_sock(sk);
  1558. sock_put(sk); /* sock_hold above */
  1559. sock_put(sk); /* final sock_put */
  1560. }
  1561. static int smcr_serv_conf_first_link(struct smc_sock *smc)
  1562. {
  1563. struct smc_link *link = smc->conn.lnk;
  1564. struct smc_llc_qentry *qentry;
  1565. int rc;
  1566. /* reg the sndbuf if it was vzalloced*/
  1567. if (smc->conn.sndbuf_desc->is_vm) {
  1568. if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
  1569. return SMC_CLC_DECL_ERR_REGBUF;
  1570. }
  1571. /* reg the rmb */
  1572. if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
  1573. return SMC_CLC_DECL_ERR_REGBUF;
  1574. /* send CONFIRM LINK request to client over the RoCE fabric */
  1575. rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
  1576. if (rc < 0)
  1577. return SMC_CLC_DECL_TIMEOUT_CL;
  1578. /* receive CONFIRM LINK response from client over the RoCE fabric */
  1579. qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
  1580. SMC_LLC_CONFIRM_LINK);
  1581. if (!qentry) {
  1582. struct smc_clc_msg_decline dclc;
  1583. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  1584. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  1585. return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
  1586. }
  1587. smc_llc_save_peer_uid(qentry);
  1588. rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
  1589. smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
  1590. if (rc)
  1591. return SMC_CLC_DECL_RMBE_EC;
  1592. /* confirm_rkey is implicit on 1st contact */
  1593. smc->conn.rmb_desc->is_conf_rkey = true;
  1594. smc_llc_link_active(link);
  1595. smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
  1596. mutex_lock(&link->lgr->llc_conf_mutex);
  1597. /* initial contact - try to establish second link */
  1598. smc_llc_srv_add_link(link, NULL);
  1599. mutex_unlock(&link->lgr->llc_conf_mutex);
  1600. return 0;
  1601. }
  1602. /* listen worker: finish */
  1603. static void smc_listen_out(struct smc_sock *new_smc)
  1604. {
  1605. struct smc_sock *lsmc = new_smc->listen_smc;
  1606. struct sock *newsmcsk = &new_smc->sk;
  1607. if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
  1608. atomic_dec(&lsmc->queued_smc_hs);
  1609. if (lsmc->sk.sk_state == SMC_LISTEN) {
  1610. lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
  1611. smc_accept_enqueue(&lsmc->sk, newsmcsk);
  1612. release_sock(&lsmc->sk);
  1613. } else { /* no longer listening */
  1614. smc_close_non_accepted(newsmcsk);
  1615. }
  1616. /* Wake up accept */
  1617. lsmc->sk.sk_data_ready(&lsmc->sk);
  1618. sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
  1619. }
  1620. /* listen worker: finish in state connected */
  1621. static void smc_listen_out_connected(struct smc_sock *new_smc)
  1622. {
  1623. struct sock *newsmcsk = &new_smc->sk;
  1624. if (newsmcsk->sk_state == SMC_INIT)
  1625. newsmcsk->sk_state = SMC_ACTIVE;
  1626. smc_listen_out(new_smc);
  1627. }
  1628. /* listen worker: finish in error state */
  1629. static void smc_listen_out_err(struct smc_sock *new_smc)
  1630. {
  1631. struct sock *newsmcsk = &new_smc->sk;
  1632. struct net *net = sock_net(newsmcsk);
  1633. this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
  1634. if (newsmcsk->sk_state == SMC_INIT)
  1635. sock_put(&new_smc->sk); /* passive closing */
  1636. newsmcsk->sk_state = SMC_CLOSED;
  1637. smc_listen_out(new_smc);
  1638. }
  1639. /* listen worker: decline and fall back if possible */
  1640. static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
  1641. int local_first, u8 version)
  1642. {
  1643. /* RDMA setup failed, switch back to TCP */
  1644. smc_conn_abort(new_smc, local_first);
  1645. if (reason_code < 0 ||
  1646. smc_switch_to_fallback(new_smc, reason_code)) {
  1647. /* error, no fallback possible */
  1648. smc_listen_out_err(new_smc);
  1649. return;
  1650. }
  1651. if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
  1652. if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
  1653. smc_listen_out_err(new_smc);
  1654. return;
  1655. }
  1656. }
  1657. smc_listen_out_connected(new_smc);
  1658. }
  1659. /* listen worker: version checking */
  1660. static int smc_listen_v2_check(struct smc_sock *new_smc,
  1661. struct smc_clc_msg_proposal *pclc,
  1662. struct smc_init_info *ini)
  1663. {
  1664. struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
  1665. struct smc_clc_v2_extension *pclc_v2_ext;
  1666. int rc = SMC_CLC_DECL_PEERNOSMC;
  1667. ini->smc_type_v1 = pclc->hdr.typev1;
  1668. ini->smc_type_v2 = pclc->hdr.typev2;
  1669. ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
  1670. ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
  1671. if (pclc->hdr.version > SMC_V1) {
  1672. if (smcd_indicated(ini->smc_type_v2))
  1673. ini->smcd_version |= SMC_V2;
  1674. if (smcr_indicated(ini->smc_type_v2))
  1675. ini->smcr_version |= SMC_V2;
  1676. }
  1677. if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
  1678. rc = SMC_CLC_DECL_PEERNOSMC;
  1679. goto out;
  1680. }
  1681. pclc_v2_ext = smc_get_clc_v2_ext(pclc);
  1682. if (!pclc_v2_ext) {
  1683. ini->smcd_version &= ~SMC_V2;
  1684. ini->smcr_version &= ~SMC_V2;
  1685. rc = SMC_CLC_DECL_NOV2EXT;
  1686. goto out;
  1687. }
  1688. pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
  1689. if (ini->smcd_version & SMC_V2) {
  1690. if (!smc_ism_is_v2_capable()) {
  1691. ini->smcd_version &= ~SMC_V2;
  1692. rc = SMC_CLC_DECL_NOISM2SUPP;
  1693. } else if (!pclc_smcd_v2_ext) {
  1694. ini->smcd_version &= ~SMC_V2;
  1695. rc = SMC_CLC_DECL_NOV2DEXT;
  1696. } else if (!pclc_v2_ext->hdr.eid_cnt &&
  1697. !pclc_v2_ext->hdr.flag.seid) {
  1698. ini->smcd_version &= ~SMC_V2;
  1699. rc = SMC_CLC_DECL_NOUEID;
  1700. }
  1701. }
  1702. if (ini->smcr_version & SMC_V2) {
  1703. if (!pclc_v2_ext->hdr.eid_cnt) {
  1704. ini->smcr_version &= ~SMC_V2;
  1705. rc = SMC_CLC_DECL_NOUEID;
  1706. }
  1707. }
  1708. out:
  1709. if (!ini->smcd_version && !ini->smcr_version)
  1710. return rc;
  1711. return 0;
  1712. }
  1713. /* listen worker: check prefixes */
  1714. static int smc_listen_prfx_check(struct smc_sock *new_smc,
  1715. struct smc_clc_msg_proposal *pclc)
  1716. {
  1717. struct smc_clc_msg_proposal_prefix *pclc_prfx;
  1718. struct socket *newclcsock = new_smc->clcsock;
  1719. if (pclc->hdr.typev1 == SMC_TYPE_N)
  1720. return 0;
  1721. pclc_prfx = smc_clc_proposal_get_prefix(pclc);
  1722. if (smc_clc_prfx_match(newclcsock, pclc_prfx))
  1723. return SMC_CLC_DECL_DIFFPREFIX;
  1724. return 0;
  1725. }
  1726. /* listen worker: initialize connection and buffers */
  1727. static int smc_listen_rdma_init(struct smc_sock *new_smc,
  1728. struct smc_init_info *ini)
  1729. {
  1730. int rc;
  1731. /* allocate connection / link group */
  1732. rc = smc_conn_create(new_smc, ini);
  1733. if (rc)
  1734. return rc;
  1735. /* create send buffer and rmb */
  1736. if (smc_buf_create(new_smc, false)) {
  1737. smc_conn_abort(new_smc, ini->first_contact_local);
  1738. return SMC_CLC_DECL_MEM;
  1739. }
  1740. return 0;
  1741. }
  1742. /* listen worker: initialize connection and buffers for SMC-D */
  1743. static int smc_listen_ism_init(struct smc_sock *new_smc,
  1744. struct smc_init_info *ini)
  1745. {
  1746. int rc;
  1747. rc = smc_conn_create(new_smc, ini);
  1748. if (rc)
  1749. return rc;
  1750. /* Create send and receive buffers */
  1751. rc = smc_buf_create(new_smc, true);
  1752. if (rc) {
  1753. smc_conn_abort(new_smc, ini->first_contact_local);
  1754. return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
  1755. SMC_CLC_DECL_MEM;
  1756. }
  1757. return 0;
  1758. }
  1759. static bool smc_is_already_selected(struct smcd_dev *smcd,
  1760. struct smc_init_info *ini,
  1761. int matches)
  1762. {
  1763. int i;
  1764. for (i = 0; i < matches; i++)
  1765. if (smcd == ini->ism_dev[i])
  1766. return true;
  1767. return false;
  1768. }
  1769. /* check for ISM devices matching proposed ISM devices */
  1770. static void smc_check_ism_v2_match(struct smc_init_info *ini,
  1771. u16 proposed_chid, u64 proposed_gid,
  1772. unsigned int *matches)
  1773. {
  1774. struct smcd_dev *smcd;
  1775. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  1776. if (smcd->going_away)
  1777. continue;
  1778. if (smc_is_already_selected(smcd, ini, *matches))
  1779. continue;
  1780. if (smc_ism_get_chid(smcd) == proposed_chid &&
  1781. !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
  1782. ini->ism_peer_gid[*matches] = proposed_gid;
  1783. ini->ism_dev[*matches] = smcd;
  1784. (*matches)++;
  1785. break;
  1786. }
  1787. }
  1788. }
  1789. static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini)
  1790. {
  1791. if (!ini->rc)
  1792. ini->rc = rc;
  1793. }
  1794. static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
  1795. struct smc_clc_msg_proposal *pclc,
  1796. struct smc_init_info *ini)
  1797. {
  1798. struct smc_clc_smcd_v2_extension *smcd_v2_ext;
  1799. struct smc_clc_v2_extension *smc_v2_ext;
  1800. struct smc_clc_msg_smcd *pclc_smcd;
  1801. unsigned int matches = 0;
  1802. u8 smcd_version;
  1803. u8 *eid = NULL;
  1804. int i, rc;
  1805. if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
  1806. goto not_found;
  1807. pclc_smcd = smc_get_clc_msg_smcd(pclc);
  1808. smc_v2_ext = smc_get_clc_v2_ext(pclc);
  1809. smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
  1810. mutex_lock(&smcd_dev_list.mutex);
  1811. if (pclc_smcd->ism.chid)
  1812. /* check for ISM device matching proposed native ISM device */
  1813. smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
  1814. ntohll(pclc_smcd->ism.gid), &matches);
  1815. for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
  1816. /* check for ISM devices matching proposed non-native ISM
  1817. * devices
  1818. */
  1819. smc_check_ism_v2_match(ini,
  1820. ntohs(smcd_v2_ext->gidchid[i - 1].chid),
  1821. ntohll(smcd_v2_ext->gidchid[i - 1].gid),
  1822. &matches);
  1823. }
  1824. mutex_unlock(&smcd_dev_list.mutex);
  1825. if (!ini->ism_dev[0]) {
  1826. smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
  1827. goto not_found;
  1828. }
  1829. smc_ism_get_system_eid(&eid);
  1830. if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
  1831. smcd_v2_ext->system_eid, eid))
  1832. goto not_found;
  1833. /* separate - outside the smcd_dev_list.lock */
  1834. smcd_version = ini->smcd_version;
  1835. for (i = 0; i < matches; i++) {
  1836. ini->smcd_version = SMC_V2;
  1837. ini->is_smcd = true;
  1838. ini->ism_selected = i;
  1839. rc = smc_listen_ism_init(new_smc, ini);
  1840. if (rc) {
  1841. smc_find_ism_store_rc(rc, ini);
  1842. /* try next active ISM device */
  1843. continue;
  1844. }
  1845. return; /* matching and usable V2 ISM device found */
  1846. }
  1847. /* no V2 ISM device could be initialized */
  1848. ini->smcd_version = smcd_version; /* restore original value */
  1849. ini->negotiated_eid[0] = 0;
  1850. not_found:
  1851. ini->smcd_version &= ~SMC_V2;
  1852. ini->ism_dev[0] = NULL;
  1853. ini->is_smcd = false;
  1854. }
  1855. static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
  1856. struct smc_clc_msg_proposal *pclc,
  1857. struct smc_init_info *ini)
  1858. {
  1859. struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
  1860. int rc = 0;
  1861. /* check if ISM V1 is available */
  1862. if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
  1863. goto not_found;
  1864. ini->is_smcd = true; /* prepare ISM check */
  1865. ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
  1866. rc = smc_find_ism_device(new_smc, ini);
  1867. if (rc)
  1868. goto not_found;
  1869. ini->ism_selected = 0;
  1870. rc = smc_listen_ism_init(new_smc, ini);
  1871. if (!rc)
  1872. return; /* V1 ISM device found */
  1873. not_found:
  1874. smc_find_ism_store_rc(rc, ini);
  1875. ini->smcd_version &= ~SMC_V1;
  1876. ini->ism_dev[0] = NULL;
  1877. ini->is_smcd = false;
  1878. }
  1879. /* listen worker: register buffers */
  1880. static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
  1881. {
  1882. struct smc_connection *conn = &new_smc->conn;
  1883. if (!local_first) {
  1884. /* reg sendbufs if they were vzalloced */
  1885. if (conn->sndbuf_desc->is_vm) {
  1886. if (smcr_lgr_reg_sndbufs(conn->lnk,
  1887. conn->sndbuf_desc))
  1888. return SMC_CLC_DECL_ERR_REGBUF;
  1889. }
  1890. if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
  1891. return SMC_CLC_DECL_ERR_REGBUF;
  1892. }
  1893. return 0;
  1894. }
  1895. static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
  1896. struct smc_clc_msg_proposal *pclc,
  1897. struct smc_init_info *ini)
  1898. {
  1899. struct smc_clc_v2_extension *smc_v2_ext;
  1900. u8 smcr_version;
  1901. int rc;
  1902. if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
  1903. goto not_found;
  1904. smc_v2_ext = smc_get_clc_v2_ext(pclc);
  1905. if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
  1906. goto not_found;
  1907. /* prepare RDMA check */
  1908. memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
  1909. memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
  1910. memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
  1911. ini->check_smcrv2 = true;
  1912. ini->smcrv2.clc_sk = new_smc->clcsock->sk;
  1913. ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
  1914. ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
  1915. rc = smc_find_rdma_device(new_smc, ini);
  1916. if (rc) {
  1917. smc_find_ism_store_rc(rc, ini);
  1918. goto not_found;
  1919. }
  1920. if (!ini->smcrv2.uses_gateway)
  1921. memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
  1922. smcr_version = ini->smcr_version;
  1923. ini->smcr_version = SMC_V2;
  1924. rc = smc_listen_rdma_init(new_smc, ini);
  1925. if (!rc) {
  1926. rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
  1927. if (rc)
  1928. smc_conn_abort(new_smc, ini->first_contact_local);
  1929. }
  1930. if (!rc)
  1931. return;
  1932. ini->smcr_version = smcr_version;
  1933. smc_find_ism_store_rc(rc, ini);
  1934. not_found:
  1935. ini->smcr_version &= ~SMC_V2;
  1936. ini->smcrv2.ib_dev_v2 = NULL;
  1937. ini->check_smcrv2 = false;
  1938. }
  1939. static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
  1940. struct smc_clc_msg_proposal *pclc,
  1941. struct smc_init_info *ini)
  1942. {
  1943. int rc;
  1944. if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
  1945. return SMC_CLC_DECL_NOSMCDEV;
  1946. /* prepare RDMA check */
  1947. memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
  1948. memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
  1949. memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
  1950. rc = smc_find_rdma_device(new_smc, ini);
  1951. if (rc) {
  1952. /* no RDMA device found */
  1953. return SMC_CLC_DECL_NOSMCDEV;
  1954. }
  1955. rc = smc_listen_rdma_init(new_smc, ini);
  1956. if (rc)
  1957. return rc;
  1958. return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
  1959. }
  1960. /* determine the local device matching to proposal */
  1961. static int smc_listen_find_device(struct smc_sock *new_smc,
  1962. struct smc_clc_msg_proposal *pclc,
  1963. struct smc_init_info *ini)
  1964. {
  1965. int prfx_rc;
  1966. /* check for ISM device matching V2 proposed device */
  1967. smc_find_ism_v2_device_serv(new_smc, pclc, ini);
  1968. if (ini->ism_dev[0])
  1969. return 0;
  1970. /* check for matching IP prefix and subnet length (V1) */
  1971. prfx_rc = smc_listen_prfx_check(new_smc, pclc);
  1972. if (prfx_rc)
  1973. smc_find_ism_store_rc(prfx_rc, ini);
  1974. /* get vlan id from IP device */
  1975. if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
  1976. return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
  1977. /* check for ISM device matching V1 proposed device */
  1978. if (!prfx_rc)
  1979. smc_find_ism_v1_device_serv(new_smc, pclc, ini);
  1980. if (ini->ism_dev[0])
  1981. return 0;
  1982. if (!smcr_indicated(pclc->hdr.typev1) &&
  1983. !smcr_indicated(pclc->hdr.typev2))
  1984. /* skip RDMA and decline */
  1985. return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
  1986. /* check if RDMA V2 is available */
  1987. smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
  1988. if (ini->smcrv2.ib_dev_v2)
  1989. return 0;
  1990. /* check if RDMA V1 is available */
  1991. if (!prfx_rc) {
  1992. int rc;
  1993. rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
  1994. smc_find_ism_store_rc(rc, ini);
  1995. return (!rc) ? 0 : ini->rc;
  1996. }
  1997. return prfx_rc;
  1998. }
  1999. /* listen worker: finish RDMA setup */
  2000. static int smc_listen_rdma_finish(struct smc_sock *new_smc,
  2001. struct smc_clc_msg_accept_confirm *cclc,
  2002. bool local_first,
  2003. struct smc_init_info *ini)
  2004. {
  2005. struct smc_link *link = new_smc->conn.lnk;
  2006. int reason_code = 0;
  2007. if (local_first)
  2008. smc_link_save_peer_info(link, cclc, ini);
  2009. if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
  2010. return SMC_CLC_DECL_ERR_RTOK;
  2011. if (local_first) {
  2012. if (smc_ib_ready_link(link))
  2013. return SMC_CLC_DECL_ERR_RDYLNK;
  2014. /* QP confirmation over RoCE fabric */
  2015. smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
  2016. reason_code = smcr_serv_conf_first_link(new_smc);
  2017. smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
  2018. }
  2019. return reason_code;
  2020. }
  2021. /* setup for connection of server */
  2022. static void smc_listen_work(struct work_struct *work)
  2023. {
  2024. struct smc_sock *new_smc = container_of(work, struct smc_sock,
  2025. smc_listen_work);
  2026. struct socket *newclcsock = new_smc->clcsock;
  2027. struct smc_clc_msg_accept_confirm *cclc;
  2028. struct smc_clc_msg_proposal_area *buf;
  2029. struct smc_clc_msg_proposal *pclc;
  2030. struct smc_init_info *ini = NULL;
  2031. u8 proposal_version = SMC_V1;
  2032. u8 accept_version;
  2033. int rc = 0;
  2034. if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
  2035. return smc_listen_out_err(new_smc);
  2036. if (new_smc->use_fallback) {
  2037. smc_listen_out_connected(new_smc);
  2038. return;
  2039. }
  2040. /* check if peer is smc capable */
  2041. if (!tcp_sk(newclcsock->sk)->syn_smc) {
  2042. rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
  2043. if (rc)
  2044. smc_listen_out_err(new_smc);
  2045. else
  2046. smc_listen_out_connected(new_smc);
  2047. return;
  2048. }
  2049. /* do inband token exchange -
  2050. * wait for and receive SMC Proposal CLC message
  2051. */
  2052. buf = kzalloc(sizeof(*buf), GFP_KERNEL);
  2053. if (!buf) {
  2054. rc = SMC_CLC_DECL_MEM;
  2055. goto out_decl;
  2056. }
  2057. pclc = (struct smc_clc_msg_proposal *)buf;
  2058. rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
  2059. SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
  2060. if (rc)
  2061. goto out_decl;
  2062. if (pclc->hdr.version > SMC_V1)
  2063. proposal_version = SMC_V2;
  2064. /* IPSec connections opt out of SMC optimizations */
  2065. if (using_ipsec(new_smc)) {
  2066. rc = SMC_CLC_DECL_IPSEC;
  2067. goto out_decl;
  2068. }
  2069. ini = kzalloc(sizeof(*ini), GFP_KERNEL);
  2070. if (!ini) {
  2071. rc = SMC_CLC_DECL_MEM;
  2072. goto out_decl;
  2073. }
  2074. /* initial version checking */
  2075. rc = smc_listen_v2_check(new_smc, pclc, ini);
  2076. if (rc)
  2077. goto out_decl;
  2078. mutex_lock(&smc_server_lgr_pending);
  2079. smc_close_init(new_smc);
  2080. smc_rx_init(new_smc);
  2081. smc_tx_init(new_smc);
  2082. /* determine ISM or RoCE device used for connection */
  2083. rc = smc_listen_find_device(new_smc, pclc, ini);
  2084. if (rc)
  2085. goto out_unlock;
  2086. /* send SMC Accept CLC message */
  2087. accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
  2088. rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
  2089. accept_version, ini->negotiated_eid);
  2090. if (rc)
  2091. goto out_unlock;
  2092. /* SMC-D does not need this lock any more */
  2093. if (ini->is_smcd)
  2094. mutex_unlock(&smc_server_lgr_pending);
  2095. /* receive SMC Confirm CLC message */
  2096. memset(buf, 0, sizeof(*buf));
  2097. cclc = (struct smc_clc_msg_accept_confirm *)buf;
  2098. rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
  2099. SMC_CLC_CONFIRM, CLC_WAIT_TIME);
  2100. if (rc) {
  2101. if (!ini->is_smcd)
  2102. goto out_unlock;
  2103. goto out_decl;
  2104. }
  2105. /* finish worker */
  2106. if (!ini->is_smcd) {
  2107. rc = smc_listen_rdma_finish(new_smc, cclc,
  2108. ini->first_contact_local, ini);
  2109. if (rc)
  2110. goto out_unlock;
  2111. mutex_unlock(&smc_server_lgr_pending);
  2112. }
  2113. smc_conn_save_peer_info(new_smc, cclc);
  2114. smc_listen_out_connected(new_smc);
  2115. SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
  2116. goto out_free;
  2117. out_unlock:
  2118. mutex_unlock(&smc_server_lgr_pending);
  2119. out_decl:
  2120. smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
  2121. proposal_version);
  2122. out_free:
  2123. kfree(ini);
  2124. kfree(buf);
  2125. }
  2126. static void smc_tcp_listen_work(struct work_struct *work)
  2127. {
  2128. struct smc_sock *lsmc = container_of(work, struct smc_sock,
  2129. tcp_listen_work);
  2130. struct sock *lsk = &lsmc->sk;
  2131. struct smc_sock *new_smc;
  2132. int rc = 0;
  2133. lock_sock(lsk);
  2134. while (lsk->sk_state == SMC_LISTEN) {
  2135. rc = smc_clcsock_accept(lsmc, &new_smc);
  2136. if (rc) /* clcsock accept queue empty or error */
  2137. goto out;
  2138. if (!new_smc)
  2139. continue;
  2140. if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
  2141. atomic_inc(&lsmc->queued_smc_hs);
  2142. new_smc->listen_smc = lsmc;
  2143. new_smc->use_fallback = lsmc->use_fallback;
  2144. new_smc->fallback_rsn = lsmc->fallback_rsn;
  2145. sock_hold(lsk); /* sock_put in smc_listen_work */
  2146. INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
  2147. smc_copy_sock_settings_to_smc(new_smc);
  2148. sock_hold(&new_smc->sk); /* sock_put in passive closing */
  2149. if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
  2150. sock_put(&new_smc->sk);
  2151. }
  2152. out:
  2153. release_sock(lsk);
  2154. sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
  2155. }
  2156. static void smc_clcsock_data_ready(struct sock *listen_clcsock)
  2157. {
  2158. struct smc_sock *lsmc;
  2159. read_lock_bh(&listen_clcsock->sk_callback_lock);
  2160. lsmc = smc_clcsock_user_data(listen_clcsock);
  2161. if (!lsmc)
  2162. goto out;
  2163. lsmc->clcsk_data_ready(listen_clcsock);
  2164. if (lsmc->sk.sk_state == SMC_LISTEN) {
  2165. sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
  2166. if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
  2167. sock_put(&lsmc->sk);
  2168. }
  2169. out:
  2170. read_unlock_bh(&listen_clcsock->sk_callback_lock);
  2171. }
  2172. static int smc_listen(struct socket *sock, int backlog)
  2173. {
  2174. struct sock *sk = sock->sk;
  2175. struct smc_sock *smc;
  2176. int rc;
  2177. smc = smc_sk(sk);
  2178. lock_sock(sk);
  2179. rc = -EINVAL;
  2180. if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
  2181. smc->connect_nonblock || sock->state != SS_UNCONNECTED)
  2182. goto out;
  2183. rc = 0;
  2184. if (sk->sk_state == SMC_LISTEN) {
  2185. sk->sk_max_ack_backlog = backlog;
  2186. goto out;
  2187. }
  2188. /* some socket options are handled in core, so we could not apply
  2189. * them to the clc socket -- copy smc socket options to clc socket
  2190. */
  2191. smc_copy_sock_settings_to_clc(smc);
  2192. if (!smc->use_fallback)
  2193. tcp_sk(smc->clcsock->sk)->syn_smc = 1;
  2194. /* save original sk_data_ready function and establish
  2195. * smc-specific sk_data_ready function
  2196. */
  2197. write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
  2198. smc->clcsock->sk->sk_user_data =
  2199. (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
  2200. smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
  2201. smc_clcsock_data_ready, &smc->clcsk_data_ready);
  2202. write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
  2203. /* save original ops */
  2204. smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
  2205. smc->af_ops = *smc->ori_af_ops;
  2206. smc->af_ops.syn_recv_sock = smc_tcp_syn_recv_sock;
  2207. inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
  2208. if (smc->limit_smc_hs)
  2209. tcp_sk(smc->clcsock->sk)->smc_hs_congested = smc_hs_congested;
  2210. rc = kernel_listen(smc->clcsock, backlog);
  2211. if (rc) {
  2212. write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
  2213. smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
  2214. &smc->clcsk_data_ready);
  2215. smc->clcsock->sk->sk_user_data = NULL;
  2216. write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
  2217. goto out;
  2218. }
  2219. sk->sk_max_ack_backlog = backlog;
  2220. sk->sk_ack_backlog = 0;
  2221. sk->sk_state = SMC_LISTEN;
  2222. out:
  2223. release_sock(sk);
  2224. return rc;
  2225. }
  2226. static int smc_accept(struct socket *sock, struct socket *new_sock,
  2227. int flags, bool kern)
  2228. {
  2229. struct sock *sk = sock->sk, *nsk;
  2230. DECLARE_WAITQUEUE(wait, current);
  2231. struct smc_sock *lsmc;
  2232. long timeo;
  2233. int rc = 0;
  2234. lsmc = smc_sk(sk);
  2235. sock_hold(sk); /* sock_put below */
  2236. lock_sock(sk);
  2237. if (lsmc->sk.sk_state != SMC_LISTEN) {
  2238. rc = -EINVAL;
  2239. release_sock(sk);
  2240. goto out;
  2241. }
  2242. /* Wait for an incoming connection */
  2243. timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
  2244. add_wait_queue_exclusive(sk_sleep(sk), &wait);
  2245. while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
  2246. set_current_state(TASK_INTERRUPTIBLE);
  2247. if (!timeo) {
  2248. rc = -EAGAIN;
  2249. break;
  2250. }
  2251. release_sock(sk);
  2252. timeo = schedule_timeout(timeo);
  2253. /* wakeup by sk_data_ready in smc_listen_work() */
  2254. sched_annotate_sleep();
  2255. lock_sock(sk);
  2256. if (signal_pending(current)) {
  2257. rc = sock_intr_errno(timeo);
  2258. break;
  2259. }
  2260. }
  2261. set_current_state(TASK_RUNNING);
  2262. remove_wait_queue(sk_sleep(sk), &wait);
  2263. if (!rc)
  2264. rc = sock_error(nsk);
  2265. release_sock(sk);
  2266. if (rc)
  2267. goto out;
  2268. if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
  2269. /* wait till data arrives on the socket */
  2270. timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
  2271. MSEC_PER_SEC);
  2272. if (smc_sk(nsk)->use_fallback) {
  2273. struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
  2274. lock_sock(clcsk);
  2275. if (skb_queue_empty(&clcsk->sk_receive_queue))
  2276. sk_wait_data(clcsk, &timeo, NULL);
  2277. release_sock(clcsk);
  2278. } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
  2279. lock_sock(nsk);
  2280. smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
  2281. release_sock(nsk);
  2282. }
  2283. }
  2284. out:
  2285. sock_put(sk); /* sock_hold above */
  2286. return rc;
  2287. }
  2288. static int smc_getname(struct socket *sock, struct sockaddr *addr,
  2289. int peer)
  2290. {
  2291. struct smc_sock *smc;
  2292. if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
  2293. (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
  2294. return -ENOTCONN;
  2295. smc = smc_sk(sock->sk);
  2296. return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
  2297. }
  2298. static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
  2299. {
  2300. struct sock *sk = sock->sk;
  2301. struct smc_sock *smc;
  2302. int rc;
  2303. smc = smc_sk(sk);
  2304. lock_sock(sk);
  2305. /* SMC does not support connect with fastopen */
  2306. if (msg->msg_flags & MSG_FASTOPEN) {
  2307. /* not connected yet, fallback */
  2308. if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
  2309. rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
  2310. if (rc)
  2311. goto out;
  2312. } else {
  2313. rc = -EINVAL;
  2314. goto out;
  2315. }
  2316. } else if ((sk->sk_state != SMC_ACTIVE) &&
  2317. (sk->sk_state != SMC_APPCLOSEWAIT1) &&
  2318. (sk->sk_state != SMC_INIT)) {
  2319. rc = -EPIPE;
  2320. goto out;
  2321. }
  2322. if (smc->use_fallback) {
  2323. rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
  2324. } else {
  2325. rc = smc_tx_sendmsg(smc, msg, len);
  2326. SMC_STAT_TX_PAYLOAD(smc, len, rc);
  2327. }
  2328. out:
  2329. release_sock(sk);
  2330. return rc;
  2331. }
  2332. static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  2333. int flags)
  2334. {
  2335. struct sock *sk = sock->sk;
  2336. struct smc_sock *smc;
  2337. int rc = -ENOTCONN;
  2338. smc = smc_sk(sk);
  2339. lock_sock(sk);
  2340. if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
  2341. /* socket was connected before, no more data to read */
  2342. rc = 0;
  2343. goto out;
  2344. }
  2345. if ((sk->sk_state == SMC_INIT) ||
  2346. (sk->sk_state == SMC_LISTEN) ||
  2347. (sk->sk_state == SMC_CLOSED))
  2348. goto out;
  2349. if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
  2350. rc = 0;
  2351. goto out;
  2352. }
  2353. if (smc->use_fallback) {
  2354. rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
  2355. } else {
  2356. msg->msg_namelen = 0;
  2357. rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
  2358. SMC_STAT_RX_PAYLOAD(smc, rc, rc);
  2359. }
  2360. out:
  2361. release_sock(sk);
  2362. return rc;
  2363. }
  2364. static __poll_t smc_accept_poll(struct sock *parent)
  2365. {
  2366. struct smc_sock *isk = smc_sk(parent);
  2367. __poll_t mask = 0;
  2368. spin_lock(&isk->accept_q_lock);
  2369. if (!list_empty(&isk->accept_q))
  2370. mask = EPOLLIN | EPOLLRDNORM;
  2371. spin_unlock(&isk->accept_q_lock);
  2372. return mask;
  2373. }
  2374. static __poll_t smc_poll(struct file *file, struct socket *sock,
  2375. poll_table *wait)
  2376. {
  2377. struct sock *sk = sock->sk;
  2378. struct smc_sock *smc;
  2379. __poll_t mask = 0;
  2380. if (!sk)
  2381. return EPOLLNVAL;
  2382. smc = smc_sk(sock->sk);
  2383. if (smc->use_fallback) {
  2384. /* delegate to CLC child sock */
  2385. mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
  2386. sk->sk_err = smc->clcsock->sk->sk_err;
  2387. } else {
  2388. if (sk->sk_state != SMC_CLOSED)
  2389. sock_poll_wait(file, sock, wait);
  2390. if (sk->sk_err)
  2391. mask |= EPOLLERR;
  2392. if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
  2393. (sk->sk_state == SMC_CLOSED))
  2394. mask |= EPOLLHUP;
  2395. if (sk->sk_state == SMC_LISTEN) {
  2396. /* woken up by sk_data_ready in smc_listen_work() */
  2397. mask |= smc_accept_poll(sk);
  2398. } else if (smc->use_fallback) { /* as result of connect_work()*/
  2399. mask |= smc->clcsock->ops->poll(file, smc->clcsock,
  2400. wait);
  2401. sk->sk_err = smc->clcsock->sk->sk_err;
  2402. } else {
  2403. if ((sk->sk_state != SMC_INIT &&
  2404. atomic_read(&smc->conn.sndbuf_space)) ||
  2405. sk->sk_shutdown & SEND_SHUTDOWN) {
  2406. mask |= EPOLLOUT | EPOLLWRNORM;
  2407. } else {
  2408. sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  2409. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  2410. }
  2411. if (atomic_read(&smc->conn.bytes_to_rcv))
  2412. mask |= EPOLLIN | EPOLLRDNORM;
  2413. if (sk->sk_shutdown & RCV_SHUTDOWN)
  2414. mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
  2415. if (sk->sk_state == SMC_APPCLOSEWAIT1)
  2416. mask |= EPOLLIN;
  2417. if (smc->conn.urg_state == SMC_URG_VALID)
  2418. mask |= EPOLLPRI;
  2419. }
  2420. }
  2421. return mask;
  2422. }
  2423. static int smc_shutdown(struct socket *sock, int how)
  2424. {
  2425. struct sock *sk = sock->sk;
  2426. bool do_shutdown = true;
  2427. struct smc_sock *smc;
  2428. int rc = -EINVAL;
  2429. int old_state;
  2430. int rc1 = 0;
  2431. smc = smc_sk(sk);
  2432. if ((how < SHUT_RD) || (how > SHUT_RDWR))
  2433. return rc;
  2434. lock_sock(sk);
  2435. if (sock->state == SS_CONNECTING) {
  2436. if (sk->sk_state == SMC_ACTIVE)
  2437. sock->state = SS_CONNECTED;
  2438. else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
  2439. sk->sk_state == SMC_PEERCLOSEWAIT2 ||
  2440. sk->sk_state == SMC_APPCLOSEWAIT1 ||
  2441. sk->sk_state == SMC_APPCLOSEWAIT2 ||
  2442. sk->sk_state == SMC_APPFINCLOSEWAIT)
  2443. sock->state = SS_DISCONNECTING;
  2444. }
  2445. rc = -ENOTCONN;
  2446. if ((sk->sk_state != SMC_ACTIVE) &&
  2447. (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
  2448. (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
  2449. (sk->sk_state != SMC_APPCLOSEWAIT1) &&
  2450. (sk->sk_state != SMC_APPCLOSEWAIT2) &&
  2451. (sk->sk_state != SMC_APPFINCLOSEWAIT))
  2452. goto out;
  2453. if (smc->use_fallback) {
  2454. rc = kernel_sock_shutdown(smc->clcsock, how);
  2455. sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
  2456. if (sk->sk_shutdown == SHUTDOWN_MASK) {
  2457. sk->sk_state = SMC_CLOSED;
  2458. sk->sk_socket->state = SS_UNCONNECTED;
  2459. sock_put(sk);
  2460. }
  2461. goto out;
  2462. }
  2463. switch (how) {
  2464. case SHUT_RDWR: /* shutdown in both directions */
  2465. old_state = sk->sk_state;
  2466. rc = smc_close_active(smc);
  2467. if (old_state == SMC_ACTIVE &&
  2468. sk->sk_state == SMC_PEERCLOSEWAIT1)
  2469. do_shutdown = false;
  2470. break;
  2471. case SHUT_WR:
  2472. rc = smc_close_shutdown_write(smc);
  2473. break;
  2474. case SHUT_RD:
  2475. rc = 0;
  2476. /* nothing more to do because peer is not involved */
  2477. break;
  2478. }
  2479. if (do_shutdown && smc->clcsock)
  2480. rc1 = kernel_sock_shutdown(smc->clcsock, how);
  2481. /* map sock_shutdown_cmd constants to sk_shutdown value range */
  2482. sk->sk_shutdown |= how + 1;
  2483. if (sk->sk_state == SMC_CLOSED)
  2484. sock->state = SS_UNCONNECTED;
  2485. else
  2486. sock->state = SS_DISCONNECTING;
  2487. out:
  2488. release_sock(sk);
  2489. return rc ? rc : rc1;
  2490. }
  2491. static int __smc_getsockopt(struct socket *sock, int level, int optname,
  2492. char __user *optval, int __user *optlen)
  2493. {
  2494. struct smc_sock *smc;
  2495. int val, len;
  2496. smc = smc_sk(sock->sk);
  2497. if (get_user(len, optlen))
  2498. return -EFAULT;
  2499. len = min_t(int, len, sizeof(int));
  2500. if (len < 0)
  2501. return -EINVAL;
  2502. switch (optname) {
  2503. case SMC_LIMIT_HS:
  2504. val = smc->limit_smc_hs;
  2505. break;
  2506. default:
  2507. return -EOPNOTSUPP;
  2508. }
  2509. if (put_user(len, optlen))
  2510. return -EFAULT;
  2511. if (copy_to_user(optval, &val, len))
  2512. return -EFAULT;
  2513. return 0;
  2514. }
  2515. static int __smc_setsockopt(struct socket *sock, int level, int optname,
  2516. sockptr_t optval, unsigned int optlen)
  2517. {
  2518. struct sock *sk = sock->sk;
  2519. struct smc_sock *smc;
  2520. int val, rc;
  2521. smc = smc_sk(sk);
  2522. lock_sock(sk);
  2523. switch (optname) {
  2524. case SMC_LIMIT_HS:
  2525. if (optlen < sizeof(int)) {
  2526. rc = -EINVAL;
  2527. break;
  2528. }
  2529. if (copy_from_sockptr(&val, optval, sizeof(int))) {
  2530. rc = -EFAULT;
  2531. break;
  2532. }
  2533. smc->limit_smc_hs = !!val;
  2534. rc = 0;
  2535. break;
  2536. default:
  2537. rc = -EOPNOTSUPP;
  2538. break;
  2539. }
  2540. release_sock(sk);
  2541. return rc;
  2542. }
  2543. static int smc_setsockopt(struct socket *sock, int level, int optname,
  2544. sockptr_t optval, unsigned int optlen)
  2545. {
  2546. struct sock *sk = sock->sk;
  2547. struct smc_sock *smc;
  2548. int val, rc;
  2549. if (level == SOL_TCP && optname == TCP_ULP)
  2550. return -EOPNOTSUPP;
  2551. else if (level == SOL_SMC)
  2552. return __smc_setsockopt(sock, level, optname, optval, optlen);
  2553. smc = smc_sk(sk);
  2554. /* generic setsockopts reaching us here always apply to the
  2555. * CLC socket
  2556. */
  2557. mutex_lock(&smc->clcsock_release_lock);
  2558. if (!smc->clcsock) {
  2559. mutex_unlock(&smc->clcsock_release_lock);
  2560. return -EBADF;
  2561. }
  2562. if (unlikely(!smc->clcsock->ops->setsockopt))
  2563. rc = -EOPNOTSUPP;
  2564. else
  2565. rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
  2566. optval, optlen);
  2567. if (smc->clcsock->sk->sk_err) {
  2568. sk->sk_err = smc->clcsock->sk->sk_err;
  2569. sk_error_report(sk);
  2570. }
  2571. mutex_unlock(&smc->clcsock_release_lock);
  2572. if (optlen < sizeof(int))
  2573. return -EINVAL;
  2574. if (copy_from_sockptr(&val, optval, sizeof(int)))
  2575. return -EFAULT;
  2576. lock_sock(sk);
  2577. if (rc || smc->use_fallback)
  2578. goto out;
  2579. switch (optname) {
  2580. case TCP_FASTOPEN:
  2581. case TCP_FASTOPEN_CONNECT:
  2582. case TCP_FASTOPEN_KEY:
  2583. case TCP_FASTOPEN_NO_COOKIE:
  2584. /* option not supported by SMC */
  2585. if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
  2586. rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
  2587. } else {
  2588. rc = -EINVAL;
  2589. }
  2590. break;
  2591. case TCP_NODELAY:
  2592. if (sk->sk_state != SMC_INIT &&
  2593. sk->sk_state != SMC_LISTEN &&
  2594. sk->sk_state != SMC_CLOSED) {
  2595. if (val) {
  2596. SMC_STAT_INC(smc, ndly_cnt);
  2597. smc_tx_pending(&smc->conn);
  2598. cancel_delayed_work(&smc->conn.tx_work);
  2599. }
  2600. }
  2601. break;
  2602. case TCP_CORK:
  2603. if (sk->sk_state != SMC_INIT &&
  2604. sk->sk_state != SMC_LISTEN &&
  2605. sk->sk_state != SMC_CLOSED) {
  2606. if (!val) {
  2607. SMC_STAT_INC(smc, cork_cnt);
  2608. smc_tx_pending(&smc->conn);
  2609. cancel_delayed_work(&smc->conn.tx_work);
  2610. }
  2611. }
  2612. break;
  2613. case TCP_DEFER_ACCEPT:
  2614. smc->sockopt_defer_accept = val;
  2615. break;
  2616. default:
  2617. break;
  2618. }
  2619. out:
  2620. release_sock(sk);
  2621. return rc;
  2622. }
  2623. static int smc_getsockopt(struct socket *sock, int level, int optname,
  2624. char __user *optval, int __user *optlen)
  2625. {
  2626. struct smc_sock *smc;
  2627. int rc;
  2628. if (level == SOL_SMC)
  2629. return __smc_getsockopt(sock, level, optname, optval, optlen);
  2630. smc = smc_sk(sock->sk);
  2631. mutex_lock(&smc->clcsock_release_lock);
  2632. if (!smc->clcsock) {
  2633. mutex_unlock(&smc->clcsock_release_lock);
  2634. return -EBADF;
  2635. }
  2636. /* socket options apply to the CLC socket */
  2637. if (unlikely(!smc->clcsock->ops->getsockopt)) {
  2638. mutex_unlock(&smc->clcsock_release_lock);
  2639. return -EOPNOTSUPP;
  2640. }
  2641. rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
  2642. optval, optlen);
  2643. mutex_unlock(&smc->clcsock_release_lock);
  2644. return rc;
  2645. }
  2646. static int smc_ioctl(struct socket *sock, unsigned int cmd,
  2647. unsigned long arg)
  2648. {
  2649. union smc_host_cursor cons, urg;
  2650. struct smc_connection *conn;
  2651. struct smc_sock *smc;
  2652. int answ;
  2653. smc = smc_sk(sock->sk);
  2654. conn = &smc->conn;
  2655. lock_sock(&smc->sk);
  2656. if (smc->use_fallback) {
  2657. if (!smc->clcsock) {
  2658. release_sock(&smc->sk);
  2659. return -EBADF;
  2660. }
  2661. answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
  2662. release_sock(&smc->sk);
  2663. return answ;
  2664. }
  2665. switch (cmd) {
  2666. case SIOCINQ: /* same as FIONREAD */
  2667. if (smc->sk.sk_state == SMC_LISTEN) {
  2668. release_sock(&smc->sk);
  2669. return -EINVAL;
  2670. }
  2671. if (smc->sk.sk_state == SMC_INIT ||
  2672. smc->sk.sk_state == SMC_CLOSED)
  2673. answ = 0;
  2674. else
  2675. answ = atomic_read(&smc->conn.bytes_to_rcv);
  2676. break;
  2677. case SIOCOUTQ:
  2678. /* output queue size (not send + not acked) */
  2679. if (smc->sk.sk_state == SMC_LISTEN) {
  2680. release_sock(&smc->sk);
  2681. return -EINVAL;
  2682. }
  2683. if (smc->sk.sk_state == SMC_INIT ||
  2684. smc->sk.sk_state == SMC_CLOSED)
  2685. answ = 0;
  2686. else
  2687. answ = smc->conn.sndbuf_desc->len -
  2688. atomic_read(&smc->conn.sndbuf_space);
  2689. break;
  2690. case SIOCOUTQNSD:
  2691. /* output queue size (not send only) */
  2692. if (smc->sk.sk_state == SMC_LISTEN) {
  2693. release_sock(&smc->sk);
  2694. return -EINVAL;
  2695. }
  2696. if (smc->sk.sk_state == SMC_INIT ||
  2697. smc->sk.sk_state == SMC_CLOSED)
  2698. answ = 0;
  2699. else
  2700. answ = smc_tx_prepared_sends(&smc->conn);
  2701. break;
  2702. case SIOCATMARK:
  2703. if (smc->sk.sk_state == SMC_LISTEN) {
  2704. release_sock(&smc->sk);
  2705. return -EINVAL;
  2706. }
  2707. if (smc->sk.sk_state == SMC_INIT ||
  2708. smc->sk.sk_state == SMC_CLOSED) {
  2709. answ = 0;
  2710. } else {
  2711. smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
  2712. smc_curs_copy(&urg, &conn->urg_curs, conn);
  2713. answ = smc_curs_diff(conn->rmb_desc->len,
  2714. &cons, &urg) == 1;
  2715. }
  2716. break;
  2717. default:
  2718. release_sock(&smc->sk);
  2719. return -ENOIOCTLCMD;
  2720. }
  2721. release_sock(&smc->sk);
  2722. return put_user(answ, (int __user *)arg);
  2723. }
  2724. static ssize_t smc_sendpage(struct socket *sock, struct page *page,
  2725. int offset, size_t size, int flags)
  2726. {
  2727. struct sock *sk = sock->sk;
  2728. struct smc_sock *smc;
  2729. int rc = -EPIPE;
  2730. smc = smc_sk(sk);
  2731. lock_sock(sk);
  2732. if (sk->sk_state != SMC_ACTIVE) {
  2733. release_sock(sk);
  2734. goto out;
  2735. }
  2736. release_sock(sk);
  2737. if (smc->use_fallback) {
  2738. rc = kernel_sendpage(smc->clcsock, page, offset,
  2739. size, flags);
  2740. } else {
  2741. lock_sock(sk);
  2742. rc = smc_tx_sendpage(smc, page, offset, size, flags);
  2743. release_sock(sk);
  2744. SMC_STAT_INC(smc, sendpage_cnt);
  2745. }
  2746. out:
  2747. return rc;
  2748. }
  2749. /* Map the affected portions of the rmbe into an spd, note the number of bytes
  2750. * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
  2751. * updates till whenever a respective page has been fully processed.
  2752. * Note that subsequent recv() calls have to wait till all splice() processing
  2753. * completed.
  2754. */
  2755. static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
  2756. struct pipe_inode_info *pipe, size_t len,
  2757. unsigned int flags)
  2758. {
  2759. struct sock *sk = sock->sk;
  2760. struct smc_sock *smc;
  2761. int rc = -ENOTCONN;
  2762. smc = smc_sk(sk);
  2763. lock_sock(sk);
  2764. if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
  2765. /* socket was connected before, no more data to read */
  2766. rc = 0;
  2767. goto out;
  2768. }
  2769. if (sk->sk_state == SMC_INIT ||
  2770. sk->sk_state == SMC_LISTEN ||
  2771. sk->sk_state == SMC_CLOSED)
  2772. goto out;
  2773. if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
  2774. rc = 0;
  2775. goto out;
  2776. }
  2777. if (smc->use_fallback) {
  2778. rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
  2779. pipe, len, flags);
  2780. } else {
  2781. if (*ppos) {
  2782. rc = -ESPIPE;
  2783. goto out;
  2784. }
  2785. if (flags & SPLICE_F_NONBLOCK)
  2786. flags = MSG_DONTWAIT;
  2787. else
  2788. flags = 0;
  2789. SMC_STAT_INC(smc, splice_cnt);
  2790. rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
  2791. }
  2792. out:
  2793. release_sock(sk);
  2794. return rc;
  2795. }
  2796. /* must look like tcp */
  2797. static const struct proto_ops smc_sock_ops = {
  2798. .family = PF_SMC,
  2799. .owner = THIS_MODULE,
  2800. .release = smc_release,
  2801. .bind = smc_bind,
  2802. .connect = smc_connect,
  2803. .socketpair = sock_no_socketpair,
  2804. .accept = smc_accept,
  2805. .getname = smc_getname,
  2806. .poll = smc_poll,
  2807. .ioctl = smc_ioctl,
  2808. .listen = smc_listen,
  2809. .shutdown = smc_shutdown,
  2810. .setsockopt = smc_setsockopt,
  2811. .getsockopt = smc_getsockopt,
  2812. .sendmsg = smc_sendmsg,
  2813. .recvmsg = smc_recvmsg,
  2814. .mmap = sock_no_mmap,
  2815. .sendpage = smc_sendpage,
  2816. .splice_read = smc_splice_read,
  2817. };
  2818. static int __smc_create(struct net *net, struct socket *sock, int protocol,
  2819. int kern, struct socket *clcsock)
  2820. {
  2821. int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
  2822. struct smc_sock *smc;
  2823. struct sock *sk;
  2824. int rc;
  2825. rc = -ESOCKTNOSUPPORT;
  2826. if (sock->type != SOCK_STREAM)
  2827. goto out;
  2828. rc = -EPROTONOSUPPORT;
  2829. if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
  2830. goto out;
  2831. rc = -ENOBUFS;
  2832. sock->ops = &smc_sock_ops;
  2833. sock->state = SS_UNCONNECTED;
  2834. sk = smc_sock_alloc(net, sock, protocol);
  2835. if (!sk)
  2836. goto out;
  2837. /* create internal TCP socket for CLC handshake and fallback */
  2838. smc = smc_sk(sk);
  2839. smc->use_fallback = false; /* assume rdma capability first */
  2840. smc->fallback_rsn = 0;
  2841. /* default behavior from limit_smc_hs in every net namespace */
  2842. smc->limit_smc_hs = net->smc.limit_smc_hs;
  2843. rc = 0;
  2844. if (!clcsock) {
  2845. rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
  2846. &smc->clcsock);
  2847. if (rc) {
  2848. sk_common_release(sk);
  2849. goto out;
  2850. }
  2851. } else {
  2852. smc->clcsock = clcsock;
  2853. }
  2854. out:
  2855. return rc;
  2856. }
  2857. static int smc_create(struct net *net, struct socket *sock, int protocol,
  2858. int kern)
  2859. {
  2860. return __smc_create(net, sock, protocol, kern, NULL);
  2861. }
  2862. static const struct net_proto_family smc_sock_family_ops = {
  2863. .family = PF_SMC,
  2864. .owner = THIS_MODULE,
  2865. .create = smc_create,
  2866. };
  2867. static int smc_ulp_init(struct sock *sk)
  2868. {
  2869. struct socket *tcp = sk->sk_socket;
  2870. struct net *net = sock_net(sk);
  2871. struct socket *smcsock;
  2872. int protocol, ret;
  2873. /* only TCP can be replaced */
  2874. if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP ||
  2875. (sk->sk_family != AF_INET && sk->sk_family != AF_INET6))
  2876. return -ESOCKTNOSUPPORT;
  2877. /* don't handle wq now */
  2878. if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list)
  2879. return -ENOTCONN;
  2880. if (sk->sk_family == AF_INET)
  2881. protocol = SMCPROTO_SMC;
  2882. else
  2883. protocol = SMCPROTO_SMC6;
  2884. smcsock = sock_alloc();
  2885. if (!smcsock)
  2886. return -ENFILE;
  2887. smcsock->type = SOCK_STREAM;
  2888. __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */
  2889. ret = __smc_create(net, smcsock, protocol, 1, tcp);
  2890. if (ret) {
  2891. sock_release(smcsock); /* module_put() which ops won't be NULL */
  2892. return ret;
  2893. }
  2894. /* replace tcp socket to smc */
  2895. smcsock->file = tcp->file;
  2896. smcsock->file->private_data = smcsock;
  2897. smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */
  2898. smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */
  2899. tcp->file = NULL;
  2900. return ret;
  2901. }
  2902. static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
  2903. const gfp_t priority)
  2904. {
  2905. struct inet_connection_sock *icsk = inet_csk(newsk);
  2906. /* don't inherit ulp ops to child when listen */
  2907. icsk->icsk_ulp_ops = NULL;
  2908. }
  2909. static struct tcp_ulp_ops smc_ulp_ops __read_mostly = {
  2910. .name = "smc",
  2911. .owner = THIS_MODULE,
  2912. .init = smc_ulp_init,
  2913. .clone = smc_ulp_clone,
  2914. };
  2915. unsigned int smc_net_id;
  2916. static __net_init int smc_net_init(struct net *net)
  2917. {
  2918. int rc;
  2919. rc = smc_sysctl_net_init(net);
  2920. if (rc)
  2921. return rc;
  2922. return smc_pnet_net_init(net);
  2923. }
  2924. static void __net_exit smc_net_exit(struct net *net)
  2925. {
  2926. smc_sysctl_net_exit(net);
  2927. smc_pnet_net_exit(net);
  2928. }
  2929. static __net_init int smc_net_stat_init(struct net *net)
  2930. {
  2931. return smc_stats_init(net);
  2932. }
  2933. static void __net_exit smc_net_stat_exit(struct net *net)
  2934. {
  2935. smc_stats_exit(net);
  2936. }
  2937. static struct pernet_operations smc_net_ops = {
  2938. .init = smc_net_init,
  2939. .exit = smc_net_exit,
  2940. .id = &smc_net_id,
  2941. .size = sizeof(struct smc_net),
  2942. };
  2943. static struct pernet_operations smc_net_stat_ops = {
  2944. .init = smc_net_stat_init,
  2945. .exit = smc_net_stat_exit,
  2946. };
  2947. static int __init smc_init(void)
  2948. {
  2949. int rc;
  2950. rc = register_pernet_subsys(&smc_net_ops);
  2951. if (rc)
  2952. return rc;
  2953. rc = register_pernet_subsys(&smc_net_stat_ops);
  2954. if (rc)
  2955. goto out_pernet_subsys;
  2956. smc_ism_init();
  2957. smc_clc_init();
  2958. rc = smc_nl_init();
  2959. if (rc)
  2960. goto out_pernet_subsys_stat;
  2961. rc = smc_pnet_init();
  2962. if (rc)
  2963. goto out_nl;
  2964. rc = -ENOMEM;
  2965. smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0);
  2966. if (!smc_tcp_ls_wq)
  2967. goto out_pnet;
  2968. smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
  2969. if (!smc_hs_wq)
  2970. goto out_alloc_tcp_ls_wq;
  2971. smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
  2972. if (!smc_close_wq)
  2973. goto out_alloc_hs_wq;
  2974. rc = smc_core_init();
  2975. if (rc) {
  2976. pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
  2977. goto out_alloc_wqs;
  2978. }
  2979. rc = smc_llc_init();
  2980. if (rc) {
  2981. pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
  2982. goto out_core;
  2983. }
  2984. rc = smc_cdc_init();
  2985. if (rc) {
  2986. pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
  2987. goto out_core;
  2988. }
  2989. rc = proto_register(&smc_proto, 1);
  2990. if (rc) {
  2991. pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
  2992. goto out_core;
  2993. }
  2994. rc = proto_register(&smc_proto6, 1);
  2995. if (rc) {
  2996. pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
  2997. goto out_proto;
  2998. }
  2999. rc = sock_register(&smc_sock_family_ops);
  3000. if (rc) {
  3001. pr_err("%s: sock_register fails with %d\n", __func__, rc);
  3002. goto out_proto6;
  3003. }
  3004. INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
  3005. INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
  3006. rc = smc_ib_register_client();
  3007. if (rc) {
  3008. pr_err("%s: ib_register fails with %d\n", __func__, rc);
  3009. goto out_sock;
  3010. }
  3011. rc = tcp_register_ulp(&smc_ulp_ops);
  3012. if (rc) {
  3013. pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
  3014. goto out_ib;
  3015. }
  3016. static_branch_enable(&tcp_have_smc);
  3017. return 0;
  3018. out_ib:
  3019. smc_ib_unregister_client();
  3020. out_sock:
  3021. sock_unregister(PF_SMC);
  3022. out_proto6:
  3023. proto_unregister(&smc_proto6);
  3024. out_proto:
  3025. proto_unregister(&smc_proto);
  3026. out_core:
  3027. smc_core_exit();
  3028. out_alloc_wqs:
  3029. destroy_workqueue(smc_close_wq);
  3030. out_alloc_hs_wq:
  3031. destroy_workqueue(smc_hs_wq);
  3032. out_alloc_tcp_ls_wq:
  3033. destroy_workqueue(smc_tcp_ls_wq);
  3034. out_pnet:
  3035. smc_pnet_exit();
  3036. out_nl:
  3037. smc_nl_exit();
  3038. out_pernet_subsys_stat:
  3039. unregister_pernet_subsys(&smc_net_stat_ops);
  3040. out_pernet_subsys:
  3041. unregister_pernet_subsys(&smc_net_ops);
  3042. return rc;
  3043. }
  3044. static void __exit smc_exit(void)
  3045. {
  3046. static_branch_disable(&tcp_have_smc);
  3047. tcp_unregister_ulp(&smc_ulp_ops);
  3048. sock_unregister(PF_SMC);
  3049. smc_core_exit();
  3050. smc_ib_unregister_client();
  3051. destroy_workqueue(smc_close_wq);
  3052. destroy_workqueue(smc_tcp_ls_wq);
  3053. destroy_workqueue(smc_hs_wq);
  3054. proto_unregister(&smc_proto6);
  3055. proto_unregister(&smc_proto);
  3056. smc_pnet_exit();
  3057. smc_nl_exit();
  3058. smc_clc_exit();
  3059. unregister_pernet_subsys(&smc_net_stat_ops);
  3060. unregister_pernet_subsys(&smc_net_ops);
  3061. rcu_barrier();
  3062. }
  3063. module_init(smc_init);
  3064. module_exit(smc_exit);
  3065. MODULE_AUTHOR("Ursula Braun <[email protected]>");
  3066. MODULE_DESCRIPTION("smc socket address family");
  3067. MODULE_LICENSE("GPL");
  3068. MODULE_ALIAS_NETPROTO(PF_SMC);
  3069. MODULE_ALIAS_TCP_ULP("smc");
  3070. MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);