rc.c 89 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244
  1. // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
  2. /*
  3. * Copyright(c) 2015 - 2018 Intel Corporation.
  4. */
  5. #include <linux/io.h>
  6. #include <rdma/rdma_vt.h>
  7. #include <rdma/rdmavt_qp.h>
  8. #include "hfi.h"
  9. #include "qp.h"
  10. #include "rc.h"
  11. #include "verbs_txreq.h"
  12. #include "trace.h"
  13. struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
  14. u8 *prev_ack, bool *scheduled)
  15. __must_hold(&qp->s_lock)
  16. {
  17. struct rvt_ack_entry *e = NULL;
  18. u8 i, p;
  19. bool s = true;
  20. for (i = qp->r_head_ack_queue; ; i = p) {
  21. if (i == qp->s_tail_ack_queue)
  22. s = false;
  23. if (i)
  24. p = i - 1;
  25. else
  26. p = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
  27. if (p == qp->r_head_ack_queue) {
  28. e = NULL;
  29. break;
  30. }
  31. e = &qp->s_ack_queue[p];
  32. if (!e->opcode) {
  33. e = NULL;
  34. break;
  35. }
  36. if (cmp_psn(psn, e->psn) >= 0) {
  37. if (p == qp->s_tail_ack_queue &&
  38. cmp_psn(psn, e->lpsn) <= 0)
  39. s = false;
  40. break;
  41. }
  42. }
  43. if (prev)
  44. *prev = p;
  45. if (prev_ack)
  46. *prev_ack = i;
  47. if (scheduled)
  48. *scheduled = s;
  49. return e;
  50. }
  51. /**
  52. * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  53. * @dev: the device for this QP
  54. * @qp: a pointer to the QP
  55. * @ohdr: a pointer to the IB header being constructed
  56. * @ps: the xmit packet state
  57. *
  58. * Return 1 if constructed; otherwise, return 0.
  59. * Note that we are in the responder's side of the QP context.
  60. * Note the QP s_lock must be held.
  61. */
  62. static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
  63. struct ib_other_headers *ohdr,
  64. struct hfi1_pkt_state *ps)
  65. {
  66. struct rvt_ack_entry *e;
  67. u32 hwords, hdrlen;
  68. u32 len = 0;
  69. u32 bth0 = 0, bth2 = 0;
  70. u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
  71. int middle = 0;
  72. u32 pmtu = qp->pmtu;
  73. struct hfi1_qp_priv *qpriv = qp->priv;
  74. bool last_pkt;
  75. u32 delta;
  76. u8 next = qp->s_tail_ack_queue;
  77. struct tid_rdma_request *req;
  78. trace_hfi1_rsp_make_rc_ack(qp, 0);
  79. lockdep_assert_held(&qp->s_lock);
  80. /* Don't send an ACK if we aren't supposed to. */
  81. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
  82. goto bail;
  83. if (qpriv->hdr_type == HFI1_PKT_TYPE_9B)
  84. /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  85. hwords = 5;
  86. else
  87. /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
  88. hwords = 7;
  89. switch (qp->s_ack_state) {
  90. case OP(RDMA_READ_RESPONSE_LAST):
  91. case OP(RDMA_READ_RESPONSE_ONLY):
  92. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  93. release_rdma_sge_mr(e);
  94. fallthrough;
  95. case OP(ATOMIC_ACKNOWLEDGE):
  96. /*
  97. * We can increment the tail pointer now that the last
  98. * response has been sent instead of only being
  99. * constructed.
  100. */
  101. if (++next > rvt_size_atomic(&dev->rdi))
  102. next = 0;
  103. /*
  104. * Only advance the s_acked_ack_queue pointer if there
  105. * have been no TID RDMA requests.
  106. */
  107. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  108. if (e->opcode != TID_OP(WRITE_REQ) &&
  109. qp->s_acked_ack_queue == qp->s_tail_ack_queue)
  110. qp->s_acked_ack_queue = next;
  111. qp->s_tail_ack_queue = next;
  112. trace_hfi1_rsp_make_rc_ack(qp, e->psn);
  113. fallthrough;
  114. case OP(SEND_ONLY):
  115. case OP(ACKNOWLEDGE):
  116. /* Check for no next entry in the queue. */
  117. if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
  118. if (qp->s_flags & RVT_S_ACK_PENDING)
  119. goto normal;
  120. goto bail;
  121. }
  122. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  123. /* Check for tid write fence */
  124. if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
  125. hfi1_tid_rdma_ack_interlock(qp, e)) {
  126. iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
  127. goto bail;
  128. }
  129. if (e->opcode == OP(RDMA_READ_REQUEST)) {
  130. /*
  131. * If a RDMA read response is being resent and
  132. * we haven't seen the duplicate request yet,
  133. * then stop sending the remaining responses the
  134. * responder has seen until the requester re-sends it.
  135. */
  136. len = e->rdma_sge.sge_length;
  137. if (len && !e->rdma_sge.mr) {
  138. if (qp->s_acked_ack_queue ==
  139. qp->s_tail_ack_queue)
  140. qp->s_acked_ack_queue =
  141. qp->r_head_ack_queue;
  142. qp->s_tail_ack_queue = qp->r_head_ack_queue;
  143. goto bail;
  144. }
  145. /* Copy SGE state in case we need to resend */
  146. ps->s_txreq->mr = e->rdma_sge.mr;
  147. if (ps->s_txreq->mr)
  148. rvt_get_mr(ps->s_txreq->mr);
  149. qp->s_ack_rdma_sge.sge = e->rdma_sge;
  150. qp->s_ack_rdma_sge.num_sge = 1;
  151. ps->s_txreq->ss = &qp->s_ack_rdma_sge;
  152. if (len > pmtu) {
  153. len = pmtu;
  154. qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
  155. } else {
  156. qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
  157. e->sent = 1;
  158. }
  159. ohdr->u.aeth = rvt_compute_aeth(qp);
  160. hwords++;
  161. qp->s_ack_rdma_psn = e->psn;
  162. bth2 = mask_psn(qp->s_ack_rdma_psn++);
  163. } else if (e->opcode == TID_OP(WRITE_REQ)) {
  164. /*
  165. * If a TID RDMA WRITE RESP is being resent, we have to
  166. * wait for the actual request. All requests that are to
  167. * be resent will have their state set to
  168. * TID_REQUEST_RESEND. When the new request arrives, the
  169. * state will be changed to TID_REQUEST_RESEND_ACTIVE.
  170. */
  171. req = ack_to_tid_req(e);
  172. if (req->state == TID_REQUEST_RESEND ||
  173. req->state == TID_REQUEST_INIT_RESEND)
  174. goto bail;
  175. qp->s_ack_state = TID_OP(WRITE_RESP);
  176. qp->s_ack_rdma_psn = mask_psn(e->psn + req->cur_seg);
  177. goto write_resp;
  178. } else if (e->opcode == TID_OP(READ_REQ)) {
  179. /*
  180. * If a TID RDMA read response is being resent and
  181. * we haven't seen the duplicate request yet,
  182. * then stop sending the remaining responses the
  183. * responder has seen until the requester re-sends it.
  184. */
  185. len = e->rdma_sge.sge_length;
  186. if (len && !e->rdma_sge.mr) {
  187. if (qp->s_acked_ack_queue ==
  188. qp->s_tail_ack_queue)
  189. qp->s_acked_ack_queue =
  190. qp->r_head_ack_queue;
  191. qp->s_tail_ack_queue = qp->r_head_ack_queue;
  192. goto bail;
  193. }
  194. /* Copy SGE state in case we need to resend */
  195. ps->s_txreq->mr = e->rdma_sge.mr;
  196. if (ps->s_txreq->mr)
  197. rvt_get_mr(ps->s_txreq->mr);
  198. qp->s_ack_rdma_sge.sge = e->rdma_sge;
  199. qp->s_ack_rdma_sge.num_sge = 1;
  200. qp->s_ack_state = TID_OP(READ_RESP);
  201. goto read_resp;
  202. } else {
  203. /* COMPARE_SWAP or FETCH_ADD */
  204. ps->s_txreq->ss = NULL;
  205. len = 0;
  206. qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
  207. ohdr->u.at.aeth = rvt_compute_aeth(qp);
  208. ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
  209. hwords += sizeof(ohdr->u.at) / sizeof(u32);
  210. bth2 = mask_psn(e->psn);
  211. e->sent = 1;
  212. }
  213. trace_hfi1_tid_write_rsp_make_rc_ack(qp);
  214. bth0 = qp->s_ack_state << 24;
  215. break;
  216. case OP(RDMA_READ_RESPONSE_FIRST):
  217. qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
  218. fallthrough;
  219. case OP(RDMA_READ_RESPONSE_MIDDLE):
  220. ps->s_txreq->ss = &qp->s_ack_rdma_sge;
  221. ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
  222. if (ps->s_txreq->mr)
  223. rvt_get_mr(ps->s_txreq->mr);
  224. len = qp->s_ack_rdma_sge.sge.sge_length;
  225. if (len > pmtu) {
  226. len = pmtu;
  227. middle = HFI1_CAP_IS_KSET(SDMA_AHG);
  228. } else {
  229. ohdr->u.aeth = rvt_compute_aeth(qp);
  230. hwords++;
  231. qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
  232. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  233. e->sent = 1;
  234. }
  235. bth0 = qp->s_ack_state << 24;
  236. bth2 = mask_psn(qp->s_ack_rdma_psn++);
  237. break;
  238. case TID_OP(WRITE_RESP):
  239. write_resp:
  240. /*
  241. * 1. Check if RVT_S_ACK_PENDING is set. If yes,
  242. * goto normal.
  243. * 2. Attempt to allocate TID resources.
  244. * 3. Remove RVT_S_RESP_PENDING flags from s_flags
  245. * 4. If resources not available:
  246. * 4.1 Set RVT_S_WAIT_TID_SPACE
  247. * 4.2 Queue QP on RCD TID queue
  248. * 4.3 Put QP on iowait list.
  249. * 4.4 Build IB RNR NAK with appropriate timeout value
  250. * 4.5 Return indication progress made.
  251. * 5. If resources are available:
  252. * 5.1 Program HW flow CSRs
  253. * 5.2 Build TID RDMA WRITE RESP packet
  254. * 5.3 If more resources needed, do 2.1 - 2.3.
  255. * 5.4 Wake up next QP on RCD TID queue.
  256. * 5.5 Return indication progress made.
  257. */
  258. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  259. req = ack_to_tid_req(e);
  260. /*
  261. * Send scheduled RNR NAK's. RNR NAK's need to be sent at
  262. * segment boundaries, not at request boundaries. Don't change
  263. * s_ack_state because we are still in the middle of a request
  264. */
  265. if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND &&
  266. qp->s_tail_ack_queue == qpriv->r_tid_alloc &&
  267. req->cur_seg == req->alloc_seg) {
  268. qpriv->rnr_nak_state = TID_RNR_NAK_SENT;
  269. goto normal_no_state;
  270. }
  271. bth2 = mask_psn(qp->s_ack_rdma_psn);
  272. hdrlen = hfi1_build_tid_rdma_write_resp(qp, e, ohdr, &bth1,
  273. bth2, &len,
  274. &ps->s_txreq->ss);
  275. if (!hdrlen)
  276. return 0;
  277. hwords += hdrlen;
  278. bth0 = qp->s_ack_state << 24;
  279. qp->s_ack_rdma_psn++;
  280. trace_hfi1_tid_req_make_rc_ack_write(qp, 0, e->opcode, e->psn,
  281. e->lpsn, req);
  282. if (req->cur_seg != req->total_segs)
  283. break;
  284. e->sent = 1;
  285. /* Do not free e->rdma_sge until all data are received */
  286. qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
  287. break;
  288. case TID_OP(READ_RESP):
  289. read_resp:
  290. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  291. ps->s_txreq->ss = &qp->s_ack_rdma_sge;
  292. delta = hfi1_build_tid_rdma_read_resp(qp, e, ohdr, &bth0,
  293. &bth1, &bth2, &len,
  294. &last_pkt);
  295. if (delta == 0)
  296. goto error_qp;
  297. hwords += delta;
  298. if (last_pkt) {
  299. e->sent = 1;
  300. /*
  301. * Increment qp->s_tail_ack_queue through s_ack_state
  302. * transition.
  303. */
  304. qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
  305. }
  306. break;
  307. case TID_OP(READ_REQ):
  308. goto bail;
  309. default:
  310. normal:
  311. /*
  312. * Send a regular ACK.
  313. * Set the s_ack_state so we wait until after sending
  314. * the ACK before setting s_ack_state to ACKNOWLEDGE
  315. * (see above).
  316. */
  317. qp->s_ack_state = OP(SEND_ONLY);
  318. normal_no_state:
  319. if (qp->s_nak_state)
  320. ohdr->u.aeth =
  321. cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
  322. (qp->s_nak_state <<
  323. IB_AETH_CREDIT_SHIFT));
  324. else
  325. ohdr->u.aeth = rvt_compute_aeth(qp);
  326. hwords++;
  327. len = 0;
  328. bth0 = OP(ACKNOWLEDGE) << 24;
  329. bth2 = mask_psn(qp->s_ack_psn);
  330. qp->s_flags &= ~RVT_S_ACK_PENDING;
  331. ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
  332. ps->s_txreq->ss = NULL;
  333. }
  334. qp->s_rdma_ack_cnt++;
  335. ps->s_txreq->sde = qpriv->s_sde;
  336. ps->s_txreq->s_cur_size = len;
  337. ps->s_txreq->hdr_dwords = hwords;
  338. hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
  339. return 1;
  340. error_qp:
  341. spin_unlock_irqrestore(&qp->s_lock, ps->flags);
  342. spin_lock_irqsave(&qp->r_lock, ps->flags);
  343. spin_lock(&qp->s_lock);
  344. rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  345. spin_unlock(&qp->s_lock);
  346. spin_unlock_irqrestore(&qp->r_lock, ps->flags);
  347. spin_lock_irqsave(&qp->s_lock, ps->flags);
  348. bail:
  349. qp->s_ack_state = OP(ACKNOWLEDGE);
  350. /*
  351. * Ensure s_rdma_ack_cnt changes are committed prior to resetting
  352. * RVT_S_RESP_PENDING
  353. */
  354. smp_wmb();
  355. qp->s_flags &= ~(RVT_S_RESP_PENDING
  356. | RVT_S_ACK_PENDING
  357. | HFI1_S_AHG_VALID);
  358. return 0;
  359. }
  360. /**
  361. * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  362. * @qp: a pointer to the QP
  363. * @ps: the current packet state
  364. *
  365. * Assumes s_lock is held.
  366. *
  367. * Return 1 if constructed; otherwise, return 0.
  368. */
  369. int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
  370. {
  371. struct hfi1_qp_priv *priv = qp->priv;
  372. struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
  373. struct ib_other_headers *ohdr;
  374. struct rvt_sge_state *ss = NULL;
  375. struct rvt_swqe *wqe;
  376. struct hfi1_swqe_priv *wpriv;
  377. struct tid_rdma_request *req = NULL;
  378. /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  379. u32 hwords = 5;
  380. u32 len = 0;
  381. u32 bth0 = 0, bth2 = 0;
  382. u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
  383. u32 pmtu = qp->pmtu;
  384. char newreq;
  385. int middle = 0;
  386. int delta;
  387. struct tid_rdma_flow *flow = NULL;
  388. struct tid_rdma_params *remote;
  389. trace_hfi1_sender_make_rc_req(qp);
  390. lockdep_assert_held(&qp->s_lock);
  391. ps->s_txreq = get_txreq(ps->dev, qp);
  392. if (!ps->s_txreq)
  393. goto bail_no_tx;
  394. if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
  395. /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  396. hwords = 5;
  397. if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
  398. ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
  399. else
  400. ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
  401. } else {
  402. /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
  403. hwords = 7;
  404. if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
  405. (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
  406. ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
  407. else
  408. ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
  409. }
  410. /* Sending responses has higher priority over sending requests. */
  411. if ((qp->s_flags & RVT_S_RESP_PENDING) &&
  412. make_rc_ack(dev, qp, ohdr, ps))
  413. return 1;
  414. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
  415. if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
  416. goto bail;
  417. /* We are in the error state, flush the work request. */
  418. if (qp->s_last == READ_ONCE(qp->s_head))
  419. goto bail;
  420. /* If DMAs are in progress, we can't flush immediately. */
  421. if (iowait_sdma_pending(&priv->s_iowait)) {
  422. qp->s_flags |= RVT_S_WAIT_DMA;
  423. goto bail;
  424. }
  425. clear_ahg(qp);
  426. wqe = rvt_get_swqe_ptr(qp, qp->s_last);
  427. hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
  428. IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
  429. /* will get called again */
  430. goto done_free_tx;
  431. }
  432. if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK | HFI1_S_WAIT_HALT))
  433. goto bail;
  434. if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
  435. if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
  436. qp->s_flags |= RVT_S_WAIT_PSN;
  437. goto bail;
  438. }
  439. qp->s_sending_psn = qp->s_psn;
  440. qp->s_sending_hpsn = qp->s_psn - 1;
  441. }
  442. /* Send a request. */
  443. wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
  444. check_s_state:
  445. switch (qp->s_state) {
  446. default:
  447. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
  448. goto bail;
  449. /*
  450. * Resend an old request or start a new one.
  451. *
  452. * We keep track of the current SWQE so that
  453. * we don't reset the "furthest progress" state
  454. * if we need to back up.
  455. */
  456. newreq = 0;
  457. if (qp->s_cur == qp->s_tail) {
  458. /* Check if send work queue is empty. */
  459. if (qp->s_tail == READ_ONCE(qp->s_head)) {
  460. clear_ahg(qp);
  461. goto bail;
  462. }
  463. /*
  464. * If a fence is requested, wait for previous
  465. * RDMA read and atomic operations to finish.
  466. * However, there is no need to guard against
  467. * TID RDMA READ after TID RDMA READ.
  468. */
  469. if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
  470. qp->s_num_rd_atomic &&
  471. (wqe->wr.opcode != IB_WR_TID_RDMA_READ ||
  472. priv->pending_tid_r_segs < qp->s_num_rd_atomic)) {
  473. qp->s_flags |= RVT_S_WAIT_FENCE;
  474. goto bail;
  475. }
  476. /*
  477. * Local operations are processed immediately
  478. * after all prior requests have completed
  479. */
  480. if (wqe->wr.opcode == IB_WR_REG_MR ||
  481. wqe->wr.opcode == IB_WR_LOCAL_INV) {
  482. int local_ops = 0;
  483. int err = 0;
  484. if (qp->s_last != qp->s_cur)
  485. goto bail;
  486. if (++qp->s_cur == qp->s_size)
  487. qp->s_cur = 0;
  488. if (++qp->s_tail == qp->s_size)
  489. qp->s_tail = 0;
  490. if (!(wqe->wr.send_flags &
  491. RVT_SEND_COMPLETION_ONLY)) {
  492. err = rvt_invalidate_rkey(
  493. qp,
  494. wqe->wr.ex.invalidate_rkey);
  495. local_ops = 1;
  496. }
  497. rvt_send_complete(qp, wqe,
  498. err ? IB_WC_LOC_PROT_ERR
  499. : IB_WC_SUCCESS);
  500. if (local_ops)
  501. atomic_dec(&qp->local_ops_pending);
  502. goto done_free_tx;
  503. }
  504. newreq = 1;
  505. qp->s_psn = wqe->psn;
  506. }
  507. /*
  508. * Note that we have to be careful not to modify the
  509. * original work request since we may need to resend
  510. * it.
  511. */
  512. len = wqe->length;
  513. ss = &qp->s_sge;
  514. bth2 = mask_psn(qp->s_psn);
  515. /*
  516. * Interlock between various IB requests and TID RDMA
  517. * if necessary.
  518. */
  519. if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
  520. hfi1_tid_rdma_wqe_interlock(qp, wqe))
  521. goto bail;
  522. switch (wqe->wr.opcode) {
  523. case IB_WR_SEND:
  524. case IB_WR_SEND_WITH_IMM:
  525. case IB_WR_SEND_WITH_INV:
  526. /* If no credit, return. */
  527. if (!rvt_rc_credit_avail(qp, wqe))
  528. goto bail;
  529. if (len > pmtu) {
  530. qp->s_state = OP(SEND_FIRST);
  531. len = pmtu;
  532. break;
  533. }
  534. if (wqe->wr.opcode == IB_WR_SEND) {
  535. qp->s_state = OP(SEND_ONLY);
  536. } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
  537. qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
  538. /* Immediate data comes after the BTH */
  539. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  540. hwords += 1;
  541. } else {
  542. qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
  543. /* Invalidate rkey comes after the BTH */
  544. ohdr->u.ieth = cpu_to_be32(
  545. wqe->wr.ex.invalidate_rkey);
  546. hwords += 1;
  547. }
  548. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  549. bth0 |= IB_BTH_SOLICITED;
  550. bth2 |= IB_BTH_REQ_ACK;
  551. if (++qp->s_cur == qp->s_size)
  552. qp->s_cur = 0;
  553. break;
  554. case IB_WR_RDMA_WRITE:
  555. if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  556. qp->s_lsn++;
  557. goto no_flow_control;
  558. case IB_WR_RDMA_WRITE_WITH_IMM:
  559. /* If no credit, return. */
  560. if (!rvt_rc_credit_avail(qp, wqe))
  561. goto bail;
  562. no_flow_control:
  563. put_ib_reth_vaddr(
  564. wqe->rdma_wr.remote_addr,
  565. &ohdr->u.rc.reth);
  566. ohdr->u.rc.reth.rkey =
  567. cpu_to_be32(wqe->rdma_wr.rkey);
  568. ohdr->u.rc.reth.length = cpu_to_be32(len);
  569. hwords += sizeof(struct ib_reth) / sizeof(u32);
  570. if (len > pmtu) {
  571. qp->s_state = OP(RDMA_WRITE_FIRST);
  572. len = pmtu;
  573. break;
  574. }
  575. if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
  576. qp->s_state = OP(RDMA_WRITE_ONLY);
  577. } else {
  578. qp->s_state =
  579. OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
  580. /* Immediate data comes after RETH */
  581. ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
  582. hwords += 1;
  583. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  584. bth0 |= IB_BTH_SOLICITED;
  585. }
  586. bth2 |= IB_BTH_REQ_ACK;
  587. if (++qp->s_cur == qp->s_size)
  588. qp->s_cur = 0;
  589. break;
  590. case IB_WR_TID_RDMA_WRITE:
  591. if (newreq) {
  592. /*
  593. * Limit the number of TID RDMA WRITE requests.
  594. */
  595. if (atomic_read(&priv->n_tid_requests) >=
  596. HFI1_TID_RDMA_WRITE_CNT)
  597. goto bail;
  598. if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  599. qp->s_lsn++;
  600. }
  601. hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr,
  602. &bth1, &bth2,
  603. &len);
  604. ss = NULL;
  605. if (priv->s_tid_cur == HFI1_QP_WQE_INVALID) {
  606. priv->s_tid_cur = qp->s_cur;
  607. if (priv->s_tid_tail == HFI1_QP_WQE_INVALID) {
  608. priv->s_tid_tail = qp->s_cur;
  609. priv->s_state = TID_OP(WRITE_RESP);
  610. }
  611. } else if (priv->s_tid_cur == priv->s_tid_head) {
  612. struct rvt_swqe *__w;
  613. struct tid_rdma_request *__r;
  614. __w = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
  615. __r = wqe_to_tid_req(__w);
  616. /*
  617. * The s_tid_cur pointer is advanced to s_cur if
  618. * any of the following conditions about the WQE
  619. * to which s_ti_cur currently points to are
  620. * satisfied:
  621. * 1. The request is not a TID RDMA WRITE
  622. * request,
  623. * 2. The request is in the INACTIVE or
  624. * COMPLETE states (TID RDMA READ requests
  625. * stay at INACTIVE and TID RDMA WRITE
  626. * transition to COMPLETE when done),
  627. * 3. The request is in the ACTIVE or SYNC
  628. * state and the number of completed
  629. * segments is equal to the total segment
  630. * count.
  631. * (If ACTIVE, the request is waiting for
  632. * ACKs. If SYNC, the request has not
  633. * received any responses because it's
  634. * waiting on a sync point.)
  635. */
  636. if (__w->wr.opcode != IB_WR_TID_RDMA_WRITE ||
  637. __r->state == TID_REQUEST_INACTIVE ||
  638. __r->state == TID_REQUEST_COMPLETE ||
  639. ((__r->state == TID_REQUEST_ACTIVE ||
  640. __r->state == TID_REQUEST_SYNC) &&
  641. __r->comp_seg == __r->total_segs)) {
  642. if (priv->s_tid_tail ==
  643. priv->s_tid_cur &&
  644. priv->s_state ==
  645. TID_OP(WRITE_DATA_LAST)) {
  646. priv->s_tid_tail = qp->s_cur;
  647. priv->s_state =
  648. TID_OP(WRITE_RESP);
  649. }
  650. priv->s_tid_cur = qp->s_cur;
  651. }
  652. /*
  653. * A corner case: when the last TID RDMA WRITE
  654. * request was completed, s_tid_head,
  655. * s_tid_cur, and s_tid_tail all point to the
  656. * same location. Other requests are posted and
  657. * s_cur wraps around to the same location,
  658. * where a new TID RDMA WRITE is posted. In
  659. * this case, none of the indices need to be
  660. * updated. However, the priv->s_state should.
  661. */
  662. if (priv->s_tid_tail == qp->s_cur &&
  663. priv->s_state == TID_OP(WRITE_DATA_LAST))
  664. priv->s_state = TID_OP(WRITE_RESP);
  665. }
  666. req = wqe_to_tid_req(wqe);
  667. if (newreq) {
  668. priv->s_tid_head = qp->s_cur;
  669. priv->pending_tid_w_resp += req->total_segs;
  670. atomic_inc(&priv->n_tid_requests);
  671. atomic_dec(&priv->n_requests);
  672. } else {
  673. req->state = TID_REQUEST_RESEND;
  674. req->comp_seg = delta_psn(bth2, wqe->psn);
  675. /*
  676. * Pull back any segments since we are going
  677. * to re-receive them.
  678. */
  679. req->setup_head = req->clear_tail;
  680. priv->pending_tid_w_resp +=
  681. delta_psn(wqe->lpsn, bth2) + 1;
  682. }
  683. trace_hfi1_tid_write_sender_make_req(qp, newreq);
  684. trace_hfi1_tid_req_make_req_write(qp, newreq,
  685. wqe->wr.opcode,
  686. wqe->psn, wqe->lpsn,
  687. req);
  688. if (++qp->s_cur == qp->s_size)
  689. qp->s_cur = 0;
  690. break;
  691. case IB_WR_RDMA_READ:
  692. /*
  693. * Don't allow more operations to be started
  694. * than the QP limits allow.
  695. */
  696. if (qp->s_num_rd_atomic >=
  697. qp->s_max_rd_atomic) {
  698. qp->s_flags |= RVT_S_WAIT_RDMAR;
  699. goto bail;
  700. }
  701. qp->s_num_rd_atomic++;
  702. if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  703. qp->s_lsn++;
  704. put_ib_reth_vaddr(
  705. wqe->rdma_wr.remote_addr,
  706. &ohdr->u.rc.reth);
  707. ohdr->u.rc.reth.rkey =
  708. cpu_to_be32(wqe->rdma_wr.rkey);
  709. ohdr->u.rc.reth.length = cpu_to_be32(len);
  710. qp->s_state = OP(RDMA_READ_REQUEST);
  711. hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
  712. ss = NULL;
  713. len = 0;
  714. bth2 |= IB_BTH_REQ_ACK;
  715. if (++qp->s_cur == qp->s_size)
  716. qp->s_cur = 0;
  717. break;
  718. case IB_WR_TID_RDMA_READ:
  719. trace_hfi1_tid_read_sender_make_req(qp, newreq);
  720. wpriv = wqe->priv;
  721. req = wqe_to_tid_req(wqe);
  722. trace_hfi1_tid_req_make_req_read(qp, newreq,
  723. wqe->wr.opcode,
  724. wqe->psn, wqe->lpsn,
  725. req);
  726. delta = cmp_psn(qp->s_psn, wqe->psn);
  727. /*
  728. * Don't allow more operations to be started
  729. * than the QP limits allow. We could get here under
  730. * three conditions; (1) It's a new request; (2) We are
  731. * sending the second or later segment of a request,
  732. * but the qp->s_state is set to OP(RDMA_READ_REQUEST)
  733. * when the last segment of a previous request is
  734. * received just before this; (3) We are re-sending a
  735. * request.
  736. */
  737. if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
  738. qp->s_flags |= RVT_S_WAIT_RDMAR;
  739. goto bail;
  740. }
  741. if (newreq) {
  742. struct tid_rdma_flow *flow =
  743. &req->flows[req->setup_head];
  744. /*
  745. * Set up s_sge as it is needed for TID
  746. * allocation. However, if the pages have been
  747. * walked and mapped, skip it. An earlier try
  748. * has failed to allocate the TID entries.
  749. */
  750. if (!flow->npagesets) {
  751. qp->s_sge.sge = wqe->sg_list[0];
  752. qp->s_sge.sg_list = wqe->sg_list + 1;
  753. qp->s_sge.num_sge = wqe->wr.num_sge;
  754. qp->s_sge.total_len = wqe->length;
  755. qp->s_len = wqe->length;
  756. req->isge = 0;
  757. req->clear_tail = req->setup_head;
  758. req->flow_idx = req->setup_head;
  759. req->state = TID_REQUEST_ACTIVE;
  760. }
  761. } else if (delta == 0) {
  762. /* Re-send a request */
  763. req->cur_seg = 0;
  764. req->comp_seg = 0;
  765. req->ack_pending = 0;
  766. req->flow_idx = req->clear_tail;
  767. req->state = TID_REQUEST_RESEND;
  768. }
  769. req->s_next_psn = qp->s_psn;
  770. /* Read one segment at a time */
  771. len = min_t(u32, req->seg_len,
  772. wqe->length - req->seg_len * req->cur_seg);
  773. delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr,
  774. &bth1, &bth2,
  775. &len);
  776. if (delta <= 0) {
  777. /* Wait for TID space */
  778. goto bail;
  779. }
  780. if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  781. qp->s_lsn++;
  782. hwords += delta;
  783. ss = &wpriv->ss;
  784. /* Check if this is the last segment */
  785. if (req->cur_seg >= req->total_segs &&
  786. ++qp->s_cur == qp->s_size)
  787. qp->s_cur = 0;
  788. break;
  789. case IB_WR_ATOMIC_CMP_AND_SWP:
  790. case IB_WR_ATOMIC_FETCH_AND_ADD:
  791. /*
  792. * Don't allow more operations to be started
  793. * than the QP limits allow.
  794. */
  795. if (qp->s_num_rd_atomic >=
  796. qp->s_max_rd_atomic) {
  797. qp->s_flags |= RVT_S_WAIT_RDMAR;
  798. goto bail;
  799. }
  800. qp->s_num_rd_atomic++;
  801. fallthrough;
  802. case IB_WR_OPFN:
  803. if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
  804. qp->s_lsn++;
  805. if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  806. wqe->wr.opcode == IB_WR_OPFN) {
  807. qp->s_state = OP(COMPARE_SWAP);
  808. put_ib_ateth_swap(wqe->atomic_wr.swap,
  809. &ohdr->u.atomic_eth);
  810. put_ib_ateth_compare(wqe->atomic_wr.compare_add,
  811. &ohdr->u.atomic_eth);
  812. } else {
  813. qp->s_state = OP(FETCH_ADD);
  814. put_ib_ateth_swap(wqe->atomic_wr.compare_add,
  815. &ohdr->u.atomic_eth);
  816. put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
  817. }
  818. put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
  819. &ohdr->u.atomic_eth);
  820. ohdr->u.atomic_eth.rkey = cpu_to_be32(
  821. wqe->atomic_wr.rkey);
  822. hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
  823. ss = NULL;
  824. len = 0;
  825. bth2 |= IB_BTH_REQ_ACK;
  826. if (++qp->s_cur == qp->s_size)
  827. qp->s_cur = 0;
  828. break;
  829. default:
  830. goto bail;
  831. }
  832. if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) {
  833. qp->s_sge.sge = wqe->sg_list[0];
  834. qp->s_sge.sg_list = wqe->sg_list + 1;
  835. qp->s_sge.num_sge = wqe->wr.num_sge;
  836. qp->s_sge.total_len = wqe->length;
  837. qp->s_len = wqe->length;
  838. }
  839. if (newreq) {
  840. qp->s_tail++;
  841. if (qp->s_tail >= qp->s_size)
  842. qp->s_tail = 0;
  843. }
  844. if (wqe->wr.opcode == IB_WR_RDMA_READ ||
  845. wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
  846. qp->s_psn = wqe->lpsn + 1;
  847. else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
  848. qp->s_psn = req->s_next_psn;
  849. else
  850. qp->s_psn++;
  851. break;
  852. case OP(RDMA_READ_RESPONSE_FIRST):
  853. /*
  854. * qp->s_state is normally set to the opcode of the
  855. * last packet constructed for new requests and therefore
  856. * is never set to RDMA read response.
  857. * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
  858. * thread to indicate a SEND needs to be restarted from an
  859. * earlier PSN without interfering with the sending thread.
  860. * See restart_rc().
  861. */
  862. qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
  863. fallthrough;
  864. case OP(SEND_FIRST):
  865. qp->s_state = OP(SEND_MIDDLE);
  866. fallthrough;
  867. case OP(SEND_MIDDLE):
  868. bth2 = mask_psn(qp->s_psn++);
  869. ss = &qp->s_sge;
  870. len = qp->s_len;
  871. if (len > pmtu) {
  872. len = pmtu;
  873. middle = HFI1_CAP_IS_KSET(SDMA_AHG);
  874. break;
  875. }
  876. if (wqe->wr.opcode == IB_WR_SEND) {
  877. qp->s_state = OP(SEND_LAST);
  878. } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
  879. qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
  880. /* Immediate data comes after the BTH */
  881. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  882. hwords += 1;
  883. } else {
  884. qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
  885. /* invalidate data comes after the BTH */
  886. ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
  887. hwords += 1;
  888. }
  889. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  890. bth0 |= IB_BTH_SOLICITED;
  891. bth2 |= IB_BTH_REQ_ACK;
  892. qp->s_cur++;
  893. if (qp->s_cur >= qp->s_size)
  894. qp->s_cur = 0;
  895. break;
  896. case OP(RDMA_READ_RESPONSE_LAST):
  897. /*
  898. * qp->s_state is normally set to the opcode of the
  899. * last packet constructed for new requests and therefore
  900. * is never set to RDMA read response.
  901. * RDMA_READ_RESPONSE_LAST is used by the ACK processing
  902. * thread to indicate a RDMA write needs to be restarted from
  903. * an earlier PSN without interfering with the sending thread.
  904. * See restart_rc().
  905. */
  906. qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
  907. fallthrough;
  908. case OP(RDMA_WRITE_FIRST):
  909. qp->s_state = OP(RDMA_WRITE_MIDDLE);
  910. fallthrough;
  911. case OP(RDMA_WRITE_MIDDLE):
  912. bth2 = mask_psn(qp->s_psn++);
  913. ss = &qp->s_sge;
  914. len = qp->s_len;
  915. if (len > pmtu) {
  916. len = pmtu;
  917. middle = HFI1_CAP_IS_KSET(SDMA_AHG);
  918. break;
  919. }
  920. if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
  921. qp->s_state = OP(RDMA_WRITE_LAST);
  922. } else {
  923. qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
  924. /* Immediate data comes after the BTH */
  925. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  926. hwords += 1;
  927. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  928. bth0 |= IB_BTH_SOLICITED;
  929. }
  930. bth2 |= IB_BTH_REQ_ACK;
  931. qp->s_cur++;
  932. if (qp->s_cur >= qp->s_size)
  933. qp->s_cur = 0;
  934. break;
  935. case OP(RDMA_READ_RESPONSE_MIDDLE):
  936. /*
  937. * qp->s_state is normally set to the opcode of the
  938. * last packet constructed for new requests and therefore
  939. * is never set to RDMA read response.
  940. * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
  941. * thread to indicate a RDMA read needs to be restarted from
  942. * an earlier PSN without interfering with the sending thread.
  943. * See restart_rc().
  944. */
  945. len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
  946. put_ib_reth_vaddr(
  947. wqe->rdma_wr.remote_addr + len,
  948. &ohdr->u.rc.reth);
  949. ohdr->u.rc.reth.rkey =
  950. cpu_to_be32(wqe->rdma_wr.rkey);
  951. ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
  952. qp->s_state = OP(RDMA_READ_REQUEST);
  953. hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
  954. bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK;
  955. qp->s_psn = wqe->lpsn + 1;
  956. ss = NULL;
  957. len = 0;
  958. qp->s_cur++;
  959. if (qp->s_cur == qp->s_size)
  960. qp->s_cur = 0;
  961. break;
  962. case TID_OP(WRITE_RESP):
  963. /*
  964. * This value for s_state is used for restarting a TID RDMA
  965. * WRITE request. See comment in OP(RDMA_READ_RESPONSE_MIDDLE
  966. * for more).
  967. */
  968. req = wqe_to_tid_req(wqe);
  969. req->state = TID_REQUEST_RESEND;
  970. rcu_read_lock();
  971. remote = rcu_dereference(priv->tid_rdma.remote);
  972. req->comp_seg = delta_psn(qp->s_psn, wqe->psn);
  973. len = wqe->length - (req->comp_seg * remote->max_len);
  974. rcu_read_unlock();
  975. bth2 = mask_psn(qp->s_psn);
  976. hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr, &bth1,
  977. &bth2, &len);
  978. qp->s_psn = wqe->lpsn + 1;
  979. ss = NULL;
  980. qp->s_state = TID_OP(WRITE_REQ);
  981. priv->pending_tid_w_resp += delta_psn(wqe->lpsn, bth2) + 1;
  982. priv->s_tid_cur = qp->s_cur;
  983. if (++qp->s_cur == qp->s_size)
  984. qp->s_cur = 0;
  985. trace_hfi1_tid_req_make_req_write(qp, 0, wqe->wr.opcode,
  986. wqe->psn, wqe->lpsn, req);
  987. break;
  988. case TID_OP(READ_RESP):
  989. if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
  990. goto bail;
  991. /* This is used to restart a TID read request */
  992. req = wqe_to_tid_req(wqe);
  993. wpriv = wqe->priv;
  994. /*
  995. * Back down. The field qp->s_psn has been set to the psn with
  996. * which the request should be restart. It's OK to use division
  997. * as this is on the retry path.
  998. */
  999. req->cur_seg = delta_psn(qp->s_psn, wqe->psn) / priv->pkts_ps;
  1000. /*
  1001. * The following function need to be redefined to return the
  1002. * status to make sure that we find the flow. At the same
  1003. * time, we can use the req->state change to check if the
  1004. * call succeeds or not.
  1005. */
  1006. req->state = TID_REQUEST_RESEND;
  1007. hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
  1008. if (req->state != TID_REQUEST_ACTIVE) {
  1009. /*
  1010. * Failed to find the flow. Release all allocated tid
  1011. * resources.
  1012. */
  1013. hfi1_kern_exp_rcv_clear_all(req);
  1014. hfi1_kern_clear_hw_flow(priv->rcd, qp);
  1015. hfi1_trdma_send_complete(qp, wqe, IB_WC_LOC_QP_OP_ERR);
  1016. goto bail;
  1017. }
  1018. req->state = TID_REQUEST_RESEND;
  1019. len = min_t(u32, req->seg_len,
  1020. wqe->length - req->seg_len * req->cur_seg);
  1021. flow = &req->flows[req->flow_idx];
  1022. len -= flow->sent;
  1023. req->s_next_psn = flow->flow_state.ib_lpsn + 1;
  1024. delta = hfi1_build_tid_rdma_read_packet(wqe, ohdr, &bth1,
  1025. &bth2, &len);
  1026. if (delta <= 0) {
  1027. /* Wait for TID space */
  1028. goto bail;
  1029. }
  1030. hwords += delta;
  1031. ss = &wpriv->ss;
  1032. /* Check if this is the last segment */
  1033. if (req->cur_seg >= req->total_segs &&
  1034. ++qp->s_cur == qp->s_size)
  1035. qp->s_cur = 0;
  1036. qp->s_psn = req->s_next_psn;
  1037. trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
  1038. wqe->psn, wqe->lpsn, req);
  1039. break;
  1040. case TID_OP(READ_REQ):
  1041. req = wqe_to_tid_req(wqe);
  1042. delta = cmp_psn(qp->s_psn, wqe->psn);
  1043. /*
  1044. * If the current WR is not TID RDMA READ, or this is the start
  1045. * of a new request, we need to change the qp->s_state so that
  1046. * the request can be set up properly.
  1047. */
  1048. if (wqe->wr.opcode != IB_WR_TID_RDMA_READ || delta == 0 ||
  1049. qp->s_cur == qp->s_tail) {
  1050. qp->s_state = OP(RDMA_READ_REQUEST);
  1051. if (delta == 0 || qp->s_cur == qp->s_tail)
  1052. goto check_s_state;
  1053. else
  1054. goto bail;
  1055. }
  1056. /* Rate limiting */
  1057. if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
  1058. qp->s_flags |= RVT_S_WAIT_RDMAR;
  1059. goto bail;
  1060. }
  1061. wpriv = wqe->priv;
  1062. /* Read one segment at a time */
  1063. len = min_t(u32, req->seg_len,
  1064. wqe->length - req->seg_len * req->cur_seg);
  1065. delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr, &bth1,
  1066. &bth2, &len);
  1067. if (delta <= 0) {
  1068. /* Wait for TID space */
  1069. goto bail;
  1070. }
  1071. hwords += delta;
  1072. ss = &wpriv->ss;
  1073. /* Check if this is the last segment */
  1074. if (req->cur_seg >= req->total_segs &&
  1075. ++qp->s_cur == qp->s_size)
  1076. qp->s_cur = 0;
  1077. qp->s_psn = req->s_next_psn;
  1078. trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
  1079. wqe->psn, wqe->lpsn, req);
  1080. break;
  1081. }
  1082. qp->s_sending_hpsn = bth2;
  1083. delta = delta_psn(bth2, wqe->psn);
  1084. if (delta && delta % HFI1_PSN_CREDIT == 0 &&
  1085. wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
  1086. bth2 |= IB_BTH_REQ_ACK;
  1087. if (qp->s_flags & RVT_S_SEND_ONE) {
  1088. qp->s_flags &= ~RVT_S_SEND_ONE;
  1089. qp->s_flags |= RVT_S_WAIT_ACK;
  1090. bth2 |= IB_BTH_REQ_ACK;
  1091. }
  1092. qp->s_len -= len;
  1093. ps->s_txreq->hdr_dwords = hwords;
  1094. ps->s_txreq->sde = priv->s_sde;
  1095. ps->s_txreq->ss = ss;
  1096. ps->s_txreq->s_cur_size = len;
  1097. hfi1_make_ruc_header(
  1098. qp,
  1099. ohdr,
  1100. bth0 | (qp->s_state << 24),
  1101. bth1,
  1102. bth2,
  1103. middle,
  1104. ps);
  1105. return 1;
  1106. done_free_tx:
  1107. hfi1_put_txreq(ps->s_txreq);
  1108. ps->s_txreq = NULL;
  1109. return 1;
  1110. bail:
  1111. hfi1_put_txreq(ps->s_txreq);
  1112. bail_no_tx:
  1113. ps->s_txreq = NULL;
  1114. qp->s_flags &= ~RVT_S_BUSY;
  1115. /*
  1116. * If we didn't get a txreq, the QP will be woken up later to try
  1117. * again. Set the flags to indicate which work item to wake
  1118. * up.
  1119. */
  1120. iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
  1121. return 0;
  1122. }
  1123. static inline void hfi1_make_bth_aeth(struct rvt_qp *qp,
  1124. struct ib_other_headers *ohdr,
  1125. u32 bth0, u32 bth1)
  1126. {
  1127. if (qp->r_nak_state)
  1128. ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
  1129. (qp->r_nak_state <<
  1130. IB_AETH_CREDIT_SHIFT));
  1131. else
  1132. ohdr->u.aeth = rvt_compute_aeth(qp);
  1133. ohdr->bth[0] = cpu_to_be32(bth0);
  1134. ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn);
  1135. ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
  1136. }
  1137. static inline void hfi1_queue_rc_ack(struct hfi1_packet *packet, bool is_fecn)
  1138. {
  1139. struct rvt_qp *qp = packet->qp;
  1140. struct hfi1_ibport *ibp;
  1141. unsigned long flags;
  1142. spin_lock_irqsave(&qp->s_lock, flags);
  1143. if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
  1144. goto unlock;
  1145. ibp = rcd_to_iport(packet->rcd);
  1146. this_cpu_inc(*ibp->rvp.rc_qacks);
  1147. qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
  1148. qp->s_nak_state = qp->r_nak_state;
  1149. qp->s_ack_psn = qp->r_ack_psn;
  1150. if (is_fecn)
  1151. qp->s_flags |= RVT_S_ECN;
  1152. /* Schedule the send tasklet. */
  1153. hfi1_schedule_send(qp);
  1154. unlock:
  1155. spin_unlock_irqrestore(&qp->s_lock, flags);
  1156. }
  1157. static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet,
  1158. struct hfi1_opa_header *opa_hdr,
  1159. u8 sc5, bool is_fecn,
  1160. u64 *pbc_flags, u32 *hwords,
  1161. u32 *nwords)
  1162. {
  1163. struct rvt_qp *qp = packet->qp;
  1164. struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
  1165. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  1166. struct ib_header *hdr = &opa_hdr->ibh;
  1167. struct ib_other_headers *ohdr;
  1168. u16 lrh0 = HFI1_LRH_BTH;
  1169. u16 pkey;
  1170. u32 bth0, bth1;
  1171. opa_hdr->hdr_type = HFI1_PKT_TYPE_9B;
  1172. ohdr = &hdr->u.oth;
  1173. /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
  1174. *hwords = 6;
  1175. if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
  1176. *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
  1177. rdma_ah_read_grh(&qp->remote_ah_attr),
  1178. *hwords - 2, SIZE_OF_CRC);
  1179. ohdr = &hdr->u.l.oth;
  1180. lrh0 = HFI1_LRH_GRH;
  1181. }
  1182. /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
  1183. *pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
  1184. /* read pkey_index w/o lock (its atomic) */
  1185. pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
  1186. lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT |
  1187. (rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) <<
  1188. IB_SL_SHIFT;
  1189. hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC,
  1190. opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B),
  1191. ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr));
  1192. bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
  1193. if (qp->s_mig_state == IB_MIG_MIGRATED)
  1194. bth0 |= IB_BTH_MIG_REQ;
  1195. bth1 = (!!is_fecn) << IB_BECN_SHIFT;
  1196. /*
  1197. * Inline ACKs go out without the use of the Verbs send engine, so
  1198. * we need to set the STL Verbs Extended bit here
  1199. */
  1200. bth1 |= HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT;
  1201. hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
  1202. }
  1203. static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet,
  1204. struct hfi1_opa_header *opa_hdr,
  1205. u8 sc5, bool is_fecn,
  1206. u64 *pbc_flags, u32 *hwords,
  1207. u32 *nwords)
  1208. {
  1209. struct rvt_qp *qp = packet->qp;
  1210. struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
  1211. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  1212. struct hfi1_16b_header *hdr = &opa_hdr->opah;
  1213. struct ib_other_headers *ohdr;
  1214. u32 bth0, bth1 = 0;
  1215. u16 len, pkey;
  1216. bool becn = is_fecn;
  1217. u8 l4 = OPA_16B_L4_IB_LOCAL;
  1218. u8 extra_bytes;
  1219. opa_hdr->hdr_type = HFI1_PKT_TYPE_16B;
  1220. ohdr = &hdr->u.oth;
  1221. /* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */
  1222. *hwords = 8;
  1223. extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0);
  1224. *nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2);
  1225. if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
  1226. hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) {
  1227. *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
  1228. rdma_ah_read_grh(&qp->remote_ah_attr),
  1229. *hwords - 4, *nwords);
  1230. ohdr = &hdr->u.l.oth;
  1231. l4 = OPA_16B_L4_IB_GLOBAL;
  1232. }
  1233. *pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
  1234. /* read pkey_index w/o lock (its atomic) */
  1235. pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
  1236. /* Convert dwords to flits */
  1237. len = (*hwords + *nwords) >> 1;
  1238. hfi1_make_16b_hdr(hdr, ppd->lid |
  1239. (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
  1240. ((1 << ppd->lmc) - 1)),
  1241. opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
  1242. 16B), len, pkey, becn, 0, l4, sc5);
  1243. bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
  1244. bth0 |= extra_bytes << 20;
  1245. if (qp->s_mig_state == IB_MIG_MIGRATED)
  1246. bth1 = OPA_BTH_MIG_REQ;
  1247. hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
  1248. }
  1249. typedef void (*hfi1_make_rc_ack)(struct hfi1_packet *packet,
  1250. struct hfi1_opa_header *opa_hdr,
  1251. u8 sc5, bool is_fecn,
  1252. u64 *pbc_flags, u32 *hwords,
  1253. u32 *nwords);
  1254. /* We support only two types - 9B and 16B for now */
  1255. static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = {
  1256. [HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B,
  1257. [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B
  1258. };
  1259. /*
  1260. * hfi1_send_rc_ack - Construct an ACK packet and send it
  1261. *
  1262. * This is called from hfi1_rc_rcv() and handle_receive_interrupt().
  1263. * Note that RDMA reads and atomics are handled in the
  1264. * send side QP state and send engine.
  1265. */
  1266. void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
  1267. {
  1268. struct hfi1_ctxtdata *rcd = packet->rcd;
  1269. struct rvt_qp *qp = packet->qp;
  1270. struct hfi1_ibport *ibp = rcd_to_iport(rcd);
  1271. struct hfi1_qp_priv *priv = qp->priv;
  1272. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  1273. u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
  1274. u64 pbc, pbc_flags = 0;
  1275. u32 hwords = 0;
  1276. u32 nwords = 0;
  1277. u32 plen;
  1278. struct pio_buf *pbuf;
  1279. struct hfi1_opa_header opa_hdr;
  1280. /* clear the defer count */
  1281. qp->r_adefered = 0;
  1282. /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
  1283. if (qp->s_flags & RVT_S_RESP_PENDING) {
  1284. hfi1_queue_rc_ack(packet, is_fecn);
  1285. return;
  1286. }
  1287. /* Ensure s_rdma_ack_cnt changes are committed */
  1288. if (qp->s_rdma_ack_cnt) {
  1289. hfi1_queue_rc_ack(packet, is_fecn);
  1290. return;
  1291. }
  1292. /* Don't try to send ACKs if the link isn't ACTIVE */
  1293. if (driver_lstate(ppd) != IB_PORT_ACTIVE)
  1294. return;
  1295. /* Make the appropriate header */
  1296. hfi1_make_rc_ack_tbl[priv->hdr_type](packet, &opa_hdr, sc5, is_fecn,
  1297. &pbc_flags, &hwords, &nwords);
  1298. plen = 2 /* PBC */ + hwords + nwords;
  1299. pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps,
  1300. sc_to_vlt(ppd->dd, sc5), plen);
  1301. pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL);
  1302. if (IS_ERR_OR_NULL(pbuf)) {
  1303. /*
  1304. * We have no room to send at the moment. Pass
  1305. * responsibility for sending the ACK to the send engine
  1306. * so that when enough buffer space becomes available,
  1307. * the ACK is sent ahead of other outgoing packets.
  1308. */
  1309. hfi1_queue_rc_ack(packet, is_fecn);
  1310. return;
  1311. }
  1312. trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
  1313. &opa_hdr, ib_is_sc5(sc5));
  1314. /* write the pbc and data */
  1315. ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
  1316. (priv->hdr_type == HFI1_PKT_TYPE_9B ?
  1317. (void *)&opa_hdr.ibh :
  1318. (void *)&opa_hdr.opah), hwords);
  1319. return;
  1320. }
  1321. /**
  1322. * update_num_rd_atomic - update the qp->s_num_rd_atomic
  1323. * @qp: the QP
  1324. * @psn: the packet sequence number to restart at
  1325. * @wqe: the wqe
  1326. *
  1327. * This is called from reset_psn() to update qp->s_num_rd_atomic
  1328. * for the current wqe.
  1329. * Called at interrupt level with the QP s_lock held.
  1330. */
  1331. static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
  1332. struct rvt_swqe *wqe)
  1333. {
  1334. u32 opcode = wqe->wr.opcode;
  1335. if (opcode == IB_WR_RDMA_READ ||
  1336. opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1337. opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
  1338. qp->s_num_rd_atomic++;
  1339. } else if (opcode == IB_WR_TID_RDMA_READ) {
  1340. struct tid_rdma_request *req = wqe_to_tid_req(wqe);
  1341. struct hfi1_qp_priv *priv = qp->priv;
  1342. if (cmp_psn(psn, wqe->lpsn) <= 0) {
  1343. u32 cur_seg;
  1344. cur_seg = (psn - wqe->psn) / priv->pkts_ps;
  1345. req->ack_pending = cur_seg - req->comp_seg;
  1346. priv->pending_tid_r_segs += req->ack_pending;
  1347. qp->s_num_rd_atomic += req->ack_pending;
  1348. trace_hfi1_tid_req_update_num_rd_atomic(qp, 0,
  1349. wqe->wr.opcode,
  1350. wqe->psn,
  1351. wqe->lpsn,
  1352. req);
  1353. } else {
  1354. priv->pending_tid_r_segs += req->total_segs;
  1355. qp->s_num_rd_atomic += req->total_segs;
  1356. }
  1357. }
  1358. }
  1359. /**
  1360. * reset_psn - reset the QP state to send starting from PSN
  1361. * @qp: the QP
  1362. * @psn: the packet sequence number to restart at
  1363. *
  1364. * This is called from hfi1_rc_rcv() to process an incoming RC ACK
  1365. * for the given QP.
  1366. * Called at interrupt level with the QP s_lock held.
  1367. */
  1368. static void reset_psn(struct rvt_qp *qp, u32 psn)
  1369. {
  1370. u32 n = qp->s_acked;
  1371. struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
  1372. u32 opcode;
  1373. struct hfi1_qp_priv *priv = qp->priv;
  1374. lockdep_assert_held(&qp->s_lock);
  1375. qp->s_cur = n;
  1376. priv->pending_tid_r_segs = 0;
  1377. priv->pending_tid_w_resp = 0;
  1378. qp->s_num_rd_atomic = 0;
  1379. /*
  1380. * If we are starting the request from the beginning,
  1381. * let the normal send code handle initialization.
  1382. */
  1383. if (cmp_psn(psn, wqe->psn) <= 0) {
  1384. qp->s_state = OP(SEND_LAST);
  1385. goto done;
  1386. }
  1387. update_num_rd_atomic(qp, psn, wqe);
  1388. /* Find the work request opcode corresponding to the given PSN. */
  1389. for (;;) {
  1390. int diff;
  1391. if (++n == qp->s_size)
  1392. n = 0;
  1393. if (n == qp->s_tail)
  1394. break;
  1395. wqe = rvt_get_swqe_ptr(qp, n);
  1396. diff = cmp_psn(psn, wqe->psn);
  1397. if (diff < 0) {
  1398. /* Point wqe back to the previous one*/
  1399. wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
  1400. break;
  1401. }
  1402. qp->s_cur = n;
  1403. /*
  1404. * If we are starting the request from the beginning,
  1405. * let the normal send code handle initialization.
  1406. */
  1407. if (diff == 0) {
  1408. qp->s_state = OP(SEND_LAST);
  1409. goto done;
  1410. }
  1411. update_num_rd_atomic(qp, psn, wqe);
  1412. }
  1413. opcode = wqe->wr.opcode;
  1414. /*
  1415. * Set the state to restart in the middle of a request.
  1416. * Don't change the s_sge, s_cur_sge, or s_cur_size.
  1417. * See hfi1_make_rc_req().
  1418. */
  1419. switch (opcode) {
  1420. case IB_WR_SEND:
  1421. case IB_WR_SEND_WITH_IMM:
  1422. qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
  1423. break;
  1424. case IB_WR_RDMA_WRITE:
  1425. case IB_WR_RDMA_WRITE_WITH_IMM:
  1426. qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
  1427. break;
  1428. case IB_WR_TID_RDMA_WRITE:
  1429. qp->s_state = TID_OP(WRITE_RESP);
  1430. break;
  1431. case IB_WR_RDMA_READ:
  1432. qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
  1433. break;
  1434. case IB_WR_TID_RDMA_READ:
  1435. qp->s_state = TID_OP(READ_RESP);
  1436. break;
  1437. default:
  1438. /*
  1439. * This case shouldn't happen since its only
  1440. * one PSN per req.
  1441. */
  1442. qp->s_state = OP(SEND_LAST);
  1443. }
  1444. done:
  1445. priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
  1446. qp->s_psn = psn;
  1447. /*
  1448. * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
  1449. * asynchronously before the send engine can get scheduled.
  1450. * Doing it in hfi1_make_rc_req() is too late.
  1451. */
  1452. if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
  1453. (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
  1454. qp->s_flags |= RVT_S_WAIT_PSN;
  1455. qp->s_flags &= ~HFI1_S_AHG_VALID;
  1456. trace_hfi1_sender_reset_psn(qp);
  1457. }
  1458. /*
  1459. * Back up requester to resend the last un-ACKed request.
  1460. * The QP r_lock and s_lock should be held and interrupts disabled.
  1461. */
  1462. void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
  1463. {
  1464. struct hfi1_qp_priv *priv = qp->priv;
  1465. struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1466. struct hfi1_ibport *ibp;
  1467. lockdep_assert_held(&qp->r_lock);
  1468. lockdep_assert_held(&qp->s_lock);
  1469. trace_hfi1_sender_restart_rc(qp);
  1470. if (qp->s_retry == 0) {
  1471. if (qp->s_mig_state == IB_MIG_ARMED) {
  1472. hfi1_migrate_qp(qp);
  1473. qp->s_retry = qp->s_retry_cnt;
  1474. } else if (qp->s_last == qp->s_acked) {
  1475. /*
  1476. * We need special handling for the OPFN request WQEs as
  1477. * they are not allowed to generate real user errors
  1478. */
  1479. if (wqe->wr.opcode == IB_WR_OPFN) {
  1480. struct hfi1_ibport *ibp =
  1481. to_iport(qp->ibqp.device, qp->port_num);
  1482. /*
  1483. * Call opfn_conn_reply() with capcode and
  1484. * remaining data as 0 to close out the
  1485. * current request
  1486. */
  1487. opfn_conn_reply(qp, priv->opfn.curr);
  1488. wqe = do_rc_completion(qp, wqe, ibp);
  1489. qp->s_flags &= ~RVT_S_WAIT_ACK;
  1490. } else {
  1491. trace_hfi1_tid_write_sender_restart_rc(qp, 0);
  1492. if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
  1493. struct tid_rdma_request *req;
  1494. req = wqe_to_tid_req(wqe);
  1495. hfi1_kern_exp_rcv_clear_all(req);
  1496. hfi1_kern_clear_hw_flow(priv->rcd, qp);
  1497. }
  1498. hfi1_trdma_send_complete(qp, wqe,
  1499. IB_WC_RETRY_EXC_ERR);
  1500. rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  1501. }
  1502. return;
  1503. } else { /* need to handle delayed completion */
  1504. return;
  1505. }
  1506. } else {
  1507. qp->s_retry--;
  1508. }
  1509. ibp = to_iport(qp->ibqp.device, qp->port_num);
  1510. if (wqe->wr.opcode == IB_WR_RDMA_READ ||
  1511. wqe->wr.opcode == IB_WR_TID_RDMA_READ)
  1512. ibp->rvp.n_rc_resends++;
  1513. else
  1514. ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
  1515. qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
  1516. RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
  1517. RVT_S_WAIT_ACK | HFI1_S_WAIT_TID_RESP);
  1518. if (wait)
  1519. qp->s_flags |= RVT_S_SEND_ONE;
  1520. reset_psn(qp, psn);
  1521. }
  1522. /*
  1523. * Set qp->s_sending_psn to the next PSN after the given one.
  1524. * This would be psn+1 except when RDMA reads or TID RDMA ops
  1525. * are present.
  1526. */
  1527. static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
  1528. {
  1529. struct rvt_swqe *wqe;
  1530. u32 n = qp->s_last;
  1531. lockdep_assert_held(&qp->s_lock);
  1532. /* Find the work request corresponding to the given PSN. */
  1533. for (;;) {
  1534. wqe = rvt_get_swqe_ptr(qp, n);
  1535. if (cmp_psn(psn, wqe->lpsn) <= 0) {
  1536. if (wqe->wr.opcode == IB_WR_RDMA_READ ||
  1537. wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
  1538. wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
  1539. qp->s_sending_psn = wqe->lpsn + 1;
  1540. else
  1541. qp->s_sending_psn = psn + 1;
  1542. break;
  1543. }
  1544. if (++n == qp->s_size)
  1545. n = 0;
  1546. if (n == qp->s_tail)
  1547. break;
  1548. }
  1549. }
  1550. /**
  1551. * hfi1_rc_verbs_aborted - handle abort status
  1552. * @qp: the QP
  1553. * @opah: the opa header
  1554. *
  1555. * This code modifies both ACK bit in BTH[2]
  1556. * and the s_flags to go into send one mode.
  1557. *
  1558. * This serves to throttle the send engine to only
  1559. * send a single packet in the likely case the
  1560. * a link has gone down.
  1561. */
  1562. void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah)
  1563. {
  1564. struct ib_other_headers *ohdr = hfi1_get_rc_ohdr(opah);
  1565. u8 opcode = ib_bth_get_opcode(ohdr);
  1566. u32 psn;
  1567. /* ignore responses */
  1568. if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
  1569. opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
  1570. opcode == TID_OP(READ_RESP) ||
  1571. opcode == TID_OP(WRITE_RESP))
  1572. return;
  1573. psn = ib_bth_get_psn(ohdr) | IB_BTH_REQ_ACK;
  1574. ohdr->bth[2] = cpu_to_be32(psn);
  1575. qp->s_flags |= RVT_S_SEND_ONE;
  1576. }
  1577. /*
  1578. * This should be called with the QP s_lock held and interrupts disabled.
  1579. */
  1580. void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
  1581. {
  1582. struct ib_other_headers *ohdr;
  1583. struct hfi1_qp_priv *priv = qp->priv;
  1584. struct rvt_swqe *wqe;
  1585. u32 opcode, head, tail;
  1586. u32 psn;
  1587. struct tid_rdma_request *req;
  1588. lockdep_assert_held(&qp->s_lock);
  1589. if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
  1590. return;
  1591. ohdr = hfi1_get_rc_ohdr(opah);
  1592. opcode = ib_bth_get_opcode(ohdr);
  1593. if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
  1594. opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
  1595. opcode == TID_OP(READ_RESP) ||
  1596. opcode == TID_OP(WRITE_RESP)) {
  1597. WARN_ON(!qp->s_rdma_ack_cnt);
  1598. qp->s_rdma_ack_cnt--;
  1599. return;
  1600. }
  1601. psn = ib_bth_get_psn(ohdr);
  1602. /*
  1603. * Don't attempt to reset the sending PSN for packets in the
  1604. * KDETH PSN space since the PSN does not match anything.
  1605. */
  1606. if (opcode != TID_OP(WRITE_DATA) &&
  1607. opcode != TID_OP(WRITE_DATA_LAST) &&
  1608. opcode != TID_OP(ACK) && opcode != TID_OP(RESYNC))
  1609. reset_sending_psn(qp, psn);
  1610. /* Handle TID RDMA WRITE packets differently */
  1611. if (opcode >= TID_OP(WRITE_REQ) &&
  1612. opcode <= TID_OP(WRITE_DATA_LAST)) {
  1613. head = priv->s_tid_head;
  1614. tail = priv->s_tid_cur;
  1615. /*
  1616. * s_tid_cur is set to s_tid_head in the case, where
  1617. * a new TID RDMA request is being started and all
  1618. * previous ones have been completed.
  1619. * Therefore, we need to do a secondary check in order
  1620. * to properly determine whether we should start the
  1621. * RC timer.
  1622. */
  1623. wqe = rvt_get_swqe_ptr(qp, tail);
  1624. req = wqe_to_tid_req(wqe);
  1625. if (head == tail && req->comp_seg < req->total_segs) {
  1626. if (tail == 0)
  1627. tail = qp->s_size - 1;
  1628. else
  1629. tail -= 1;
  1630. }
  1631. } else {
  1632. head = qp->s_tail;
  1633. tail = qp->s_acked;
  1634. }
  1635. /*
  1636. * Start timer after a packet requesting an ACK has been sent and
  1637. * there are still requests that haven't been acked.
  1638. */
  1639. if ((psn & IB_BTH_REQ_ACK) && tail != head &&
  1640. opcode != TID_OP(WRITE_DATA) && opcode != TID_OP(WRITE_DATA_LAST) &&
  1641. opcode != TID_OP(RESYNC) &&
  1642. !(qp->s_flags &
  1643. (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
  1644. (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
  1645. if (opcode == TID_OP(READ_REQ))
  1646. rvt_add_retry_timer_ext(qp, priv->timeout_shift);
  1647. else
  1648. rvt_add_retry_timer(qp);
  1649. }
  1650. /* Start TID RDMA ACK timer */
  1651. if ((opcode == TID_OP(WRITE_DATA) ||
  1652. opcode == TID_OP(WRITE_DATA_LAST) ||
  1653. opcode == TID_OP(RESYNC)) &&
  1654. (psn & IB_BTH_REQ_ACK) &&
  1655. !(priv->s_flags & HFI1_S_TID_RETRY_TIMER) &&
  1656. (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
  1657. /*
  1658. * The TID RDMA ACK packet could be received before this
  1659. * function is called. Therefore, add the timer only if TID
  1660. * RDMA ACK packets are actually pending.
  1661. */
  1662. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1663. req = wqe_to_tid_req(wqe);
  1664. if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
  1665. req->ack_seg < req->cur_seg)
  1666. hfi1_add_tid_retry_timer(qp);
  1667. }
  1668. while (qp->s_last != qp->s_acked) {
  1669. wqe = rvt_get_swqe_ptr(qp, qp->s_last);
  1670. if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
  1671. cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
  1672. break;
  1673. trdma_clean_swqe(qp, wqe);
  1674. trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
  1675. rvt_qp_complete_swqe(qp,
  1676. wqe,
  1677. ib_hfi1_wc_opcode[wqe->wr.opcode],
  1678. IB_WC_SUCCESS);
  1679. }
  1680. /*
  1681. * If we were waiting for sends to complete before re-sending,
  1682. * and they are now complete, restart sending.
  1683. */
  1684. trace_hfi1_sendcomplete(qp, psn);
  1685. if (qp->s_flags & RVT_S_WAIT_PSN &&
  1686. cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
  1687. qp->s_flags &= ~RVT_S_WAIT_PSN;
  1688. qp->s_sending_psn = qp->s_psn;
  1689. qp->s_sending_hpsn = qp->s_psn - 1;
  1690. hfi1_schedule_send(qp);
  1691. }
  1692. }
  1693. static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
  1694. {
  1695. qp->s_last_psn = psn;
  1696. }
  1697. /*
  1698. * Generate a SWQE completion.
  1699. * This is similar to hfi1_send_complete but has to check to be sure
  1700. * that the SGEs are not being referenced if the SWQE is being resent.
  1701. */
  1702. struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
  1703. struct rvt_swqe *wqe,
  1704. struct hfi1_ibport *ibp)
  1705. {
  1706. struct hfi1_qp_priv *priv = qp->priv;
  1707. lockdep_assert_held(&qp->s_lock);
  1708. /*
  1709. * Don't decrement refcount and don't generate a
  1710. * completion if the SWQE is being resent until the send
  1711. * is finished.
  1712. */
  1713. trace_hfi1_rc_completion(qp, wqe->lpsn);
  1714. if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
  1715. cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
  1716. trdma_clean_swqe(qp, wqe);
  1717. trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
  1718. rvt_qp_complete_swqe(qp,
  1719. wqe,
  1720. ib_hfi1_wc_opcode[wqe->wr.opcode],
  1721. IB_WC_SUCCESS);
  1722. } else {
  1723. struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  1724. this_cpu_inc(*ibp->rvp.rc_delayed_comp);
  1725. /*
  1726. * If send progress not running attempt to progress
  1727. * SDMA queue.
  1728. */
  1729. if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
  1730. struct sdma_engine *engine;
  1731. u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
  1732. u8 sc5;
  1733. /* For now use sc to find engine */
  1734. sc5 = ibp->sl_to_sc[sl];
  1735. engine = qp_to_sdma_engine(qp, sc5);
  1736. sdma_engine_progress_schedule(engine);
  1737. }
  1738. }
  1739. qp->s_retry = qp->s_retry_cnt;
  1740. /*
  1741. * Don't update the last PSN if the request being completed is
  1742. * a TID RDMA WRITE request.
  1743. * Completion of the TID RDMA WRITE requests are done by the
  1744. * TID RDMA ACKs and as such could be for a request that has
  1745. * already been ACKed as far as the IB state machine is
  1746. * concerned.
  1747. */
  1748. if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
  1749. update_last_psn(qp, wqe->lpsn);
  1750. /*
  1751. * If we are completing a request which is in the process of
  1752. * being resent, we can stop re-sending it since we know the
  1753. * responder has already seen it.
  1754. */
  1755. if (qp->s_acked == qp->s_cur) {
  1756. if (++qp->s_cur >= qp->s_size)
  1757. qp->s_cur = 0;
  1758. qp->s_acked = qp->s_cur;
  1759. wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
  1760. if (qp->s_acked != qp->s_tail) {
  1761. qp->s_state = OP(SEND_LAST);
  1762. qp->s_psn = wqe->psn;
  1763. }
  1764. } else {
  1765. if (++qp->s_acked >= qp->s_size)
  1766. qp->s_acked = 0;
  1767. if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
  1768. qp->s_draining = 0;
  1769. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1770. }
  1771. if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
  1772. priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
  1773. hfi1_schedule_send(qp);
  1774. }
  1775. return wqe;
  1776. }
  1777. static void set_restart_qp(struct rvt_qp *qp, struct hfi1_ctxtdata *rcd)
  1778. {
  1779. /* Retry this request. */
  1780. if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
  1781. qp->r_flags |= RVT_R_RDMAR_SEQ;
  1782. hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
  1783. if (list_empty(&qp->rspwait)) {
  1784. qp->r_flags |= RVT_R_RSP_SEND;
  1785. rvt_get_qp(qp);
  1786. list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
  1787. }
  1788. }
  1789. }
  1790. /**
  1791. * update_qp_retry_state - Update qp retry state.
  1792. * @qp: the QP
  1793. * @psn: the packet sequence number of the TID RDMA WRITE RESP.
  1794. * @spsn: The start psn for the given TID RDMA WRITE swqe.
  1795. * @lpsn: The last psn for the given TID RDMA WRITE swqe.
  1796. *
  1797. * This function is called to update the qp retry state upon
  1798. * receiving a TID WRITE RESP after the qp is scheduled to retry
  1799. * a request.
  1800. */
  1801. static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn,
  1802. u32 lpsn)
  1803. {
  1804. struct hfi1_qp_priv *qpriv = qp->priv;
  1805. qp->s_psn = psn + 1;
  1806. /*
  1807. * If this is the first TID RDMA WRITE RESP packet for the current
  1808. * request, change the s_state so that the retry will be processed
  1809. * correctly. Similarly, if this is the last TID RDMA WRITE RESP
  1810. * packet, change the s_state and advance the s_cur.
  1811. */
  1812. if (cmp_psn(psn, lpsn) >= 0) {
  1813. qp->s_cur = qpriv->s_tid_cur + 1;
  1814. if (qp->s_cur >= qp->s_size)
  1815. qp->s_cur = 0;
  1816. qp->s_state = TID_OP(WRITE_REQ);
  1817. } else if (!cmp_psn(psn, spsn)) {
  1818. qp->s_cur = qpriv->s_tid_cur;
  1819. qp->s_state = TID_OP(WRITE_RESP);
  1820. }
  1821. }
  1822. /*
  1823. * do_rc_ack - process an incoming RC ACK
  1824. * @qp: the QP the ACK came in on
  1825. * @psn: the packet sequence number of the ACK
  1826. * @opcode: the opcode of the request that resulted in the ACK
  1827. *
  1828. * This is called from rc_rcv_resp() to process an incoming RC ACK
  1829. * for the given QP.
  1830. * May be called at interrupt level, with the QP s_lock held.
  1831. * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  1832. */
  1833. int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
  1834. u64 val, struct hfi1_ctxtdata *rcd)
  1835. {
  1836. struct hfi1_ibport *ibp;
  1837. enum ib_wc_status status;
  1838. struct hfi1_qp_priv *qpriv = qp->priv;
  1839. struct rvt_swqe *wqe;
  1840. int ret = 0;
  1841. u32 ack_psn;
  1842. int diff;
  1843. struct rvt_dev_info *rdi;
  1844. lockdep_assert_held(&qp->s_lock);
  1845. /*
  1846. * Note that NAKs implicitly ACK outstanding SEND and RDMA write
  1847. * requests and implicitly NAK RDMA read and atomic requests issued
  1848. * before the NAK'ed request. The MSN won't include the NAK'ed
  1849. * request but will include an ACK'ed request(s).
  1850. */
  1851. ack_psn = psn;
  1852. if (aeth >> IB_AETH_NAK_SHIFT)
  1853. ack_psn--;
  1854. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  1855. ibp = rcd_to_iport(rcd);
  1856. /*
  1857. * The MSN might be for a later WQE than the PSN indicates so
  1858. * only complete WQEs that the PSN finishes.
  1859. */
  1860. while ((diff = delta_psn(ack_psn, wqe->lpsn)) >= 0) {
  1861. /*
  1862. * RDMA_READ_RESPONSE_ONLY is a special case since
  1863. * we want to generate completion events for everything
  1864. * before the RDMA read, copy the data, then generate
  1865. * the completion for the read.
  1866. */
  1867. if (wqe->wr.opcode == IB_WR_RDMA_READ &&
  1868. opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
  1869. diff == 0) {
  1870. ret = 1;
  1871. goto bail_stop;
  1872. }
  1873. /*
  1874. * If this request is a RDMA read or atomic, and the ACK is
  1875. * for a later operation, this ACK NAKs the RDMA read or
  1876. * atomic. In other words, only a RDMA_READ_LAST or ONLY
  1877. * can ACK a RDMA read and likewise for atomic ops. Note
  1878. * that the NAK case can only happen if relaxed ordering is
  1879. * used and requests are sent after an RDMA read or atomic
  1880. * is sent but before the response is received.
  1881. */
  1882. if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
  1883. (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
  1884. (wqe->wr.opcode == IB_WR_TID_RDMA_READ &&
  1885. (opcode != TID_OP(READ_RESP) || diff != 0)) ||
  1886. ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1887. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
  1888. (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0)) ||
  1889. (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
  1890. (delta_psn(psn, qp->s_last_psn) != 1))) {
  1891. set_restart_qp(qp, rcd);
  1892. /*
  1893. * No need to process the ACK/NAK since we are
  1894. * restarting an earlier request.
  1895. */
  1896. goto bail_stop;
  1897. }
  1898. if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1899. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
  1900. u64 *vaddr = wqe->sg_list[0].vaddr;
  1901. *vaddr = val;
  1902. }
  1903. if (wqe->wr.opcode == IB_WR_OPFN)
  1904. opfn_conn_reply(qp, val);
  1905. if (qp->s_num_rd_atomic &&
  1906. (wqe->wr.opcode == IB_WR_RDMA_READ ||
  1907. wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  1908. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
  1909. qp->s_num_rd_atomic--;
  1910. /* Restart sending task if fence is complete */
  1911. if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
  1912. !qp->s_num_rd_atomic) {
  1913. qp->s_flags &= ~(RVT_S_WAIT_FENCE |
  1914. RVT_S_WAIT_ACK);
  1915. hfi1_schedule_send(qp);
  1916. } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
  1917. qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
  1918. RVT_S_WAIT_ACK);
  1919. hfi1_schedule_send(qp);
  1920. }
  1921. }
  1922. /*
  1923. * TID RDMA WRITE requests will be completed by the TID RDMA
  1924. * ACK packet handler (see tid_rdma.c).
  1925. */
  1926. if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
  1927. break;
  1928. wqe = do_rc_completion(qp, wqe, ibp);
  1929. if (qp->s_acked == qp->s_tail)
  1930. break;
  1931. }
  1932. trace_hfi1_rc_ack_do(qp, aeth, psn, wqe);
  1933. trace_hfi1_sender_do_rc_ack(qp);
  1934. switch (aeth >> IB_AETH_NAK_SHIFT) {
  1935. case 0: /* ACK */
  1936. this_cpu_inc(*ibp->rvp.rc_acks);
  1937. if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
  1938. if (wqe_to_tid_req(wqe)->ack_pending)
  1939. rvt_mod_retry_timer_ext(qp,
  1940. qpriv->timeout_shift);
  1941. else
  1942. rvt_stop_rc_timers(qp);
  1943. } else if (qp->s_acked != qp->s_tail) {
  1944. struct rvt_swqe *__w = NULL;
  1945. if (qpriv->s_tid_cur != HFI1_QP_WQE_INVALID)
  1946. __w = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
  1947. /*
  1948. * Stop timers if we've received all of the TID RDMA
  1949. * WRITE * responses.
  1950. */
  1951. if (__w && __w->wr.opcode == IB_WR_TID_RDMA_WRITE &&
  1952. opcode == TID_OP(WRITE_RESP)) {
  1953. /*
  1954. * Normally, the loop above would correctly
  1955. * process all WQEs from s_acked onward and
  1956. * either complete them or check for correct
  1957. * PSN sequencing.
  1958. * However, for TID RDMA, due to pipelining,
  1959. * the response may not be for the request at
  1960. * s_acked so the above look would just be
  1961. * skipped. This does not allow for checking
  1962. * the PSN sequencing. It has to be done
  1963. * separately.
  1964. */
  1965. if (cmp_psn(psn, qp->s_last_psn + 1)) {
  1966. set_restart_qp(qp, rcd);
  1967. goto bail_stop;
  1968. }
  1969. /*
  1970. * If the psn is being resent, stop the
  1971. * resending.
  1972. */
  1973. if (qp->s_cur != qp->s_tail &&
  1974. cmp_psn(qp->s_psn, psn) <= 0)
  1975. update_qp_retry_state(qp, psn,
  1976. __w->psn,
  1977. __w->lpsn);
  1978. else if (--qpriv->pending_tid_w_resp)
  1979. rvt_mod_retry_timer(qp);
  1980. else
  1981. rvt_stop_rc_timers(qp);
  1982. } else {
  1983. /*
  1984. * We are expecting more ACKs so
  1985. * mod the retry timer.
  1986. */
  1987. rvt_mod_retry_timer(qp);
  1988. /*
  1989. * We can stop re-sending the earlier packets
  1990. * and continue with the next packet the
  1991. * receiver wants.
  1992. */
  1993. if (cmp_psn(qp->s_psn, psn) <= 0)
  1994. reset_psn(qp, psn + 1);
  1995. }
  1996. } else {
  1997. /* No more acks - kill all timers */
  1998. rvt_stop_rc_timers(qp);
  1999. if (cmp_psn(qp->s_psn, psn) <= 0) {
  2000. qp->s_state = OP(SEND_LAST);
  2001. qp->s_psn = psn + 1;
  2002. }
  2003. }
  2004. if (qp->s_flags & RVT_S_WAIT_ACK) {
  2005. qp->s_flags &= ~RVT_S_WAIT_ACK;
  2006. hfi1_schedule_send(qp);
  2007. }
  2008. rvt_get_credit(qp, aeth);
  2009. qp->s_rnr_retry = qp->s_rnr_retry_cnt;
  2010. qp->s_retry = qp->s_retry_cnt;
  2011. /*
  2012. * If the current request is a TID RDMA WRITE request and the
  2013. * response is not a TID RDMA WRITE RESP packet, s_last_psn
  2014. * can't be advanced.
  2015. */
  2016. if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
  2017. opcode != TID_OP(WRITE_RESP) &&
  2018. cmp_psn(psn, wqe->psn) >= 0)
  2019. return 1;
  2020. update_last_psn(qp, psn);
  2021. return 1;
  2022. case 1: /* RNR NAK */
  2023. ibp->rvp.n_rnr_naks++;
  2024. if (qp->s_acked == qp->s_tail)
  2025. goto bail_stop;
  2026. if (qp->s_flags & RVT_S_WAIT_RNR)
  2027. goto bail_stop;
  2028. rdi = ib_to_rvt(qp->ibqp.device);
  2029. if (!(rdi->post_parms[wqe->wr.opcode].flags &
  2030. RVT_OPERATION_IGN_RNR_CNT)) {
  2031. if (qp->s_rnr_retry == 0) {
  2032. status = IB_WC_RNR_RETRY_EXC_ERR;
  2033. goto class_b;
  2034. }
  2035. if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
  2036. qp->s_rnr_retry--;
  2037. }
  2038. /*
  2039. * The last valid PSN is the previous PSN. For TID RDMA WRITE
  2040. * request, s_last_psn should be incremented only when a TID
  2041. * RDMA WRITE RESP is received to avoid skipping lost TID RDMA
  2042. * WRITE RESP packets.
  2043. */
  2044. if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
  2045. reset_psn(qp, qp->s_last_psn + 1);
  2046. } else {
  2047. update_last_psn(qp, psn - 1);
  2048. reset_psn(qp, psn);
  2049. }
  2050. ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
  2051. qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
  2052. rvt_stop_rc_timers(qp);
  2053. rvt_add_rnr_timer(qp, aeth);
  2054. return 0;
  2055. case 3: /* NAK */
  2056. if (qp->s_acked == qp->s_tail)
  2057. goto bail_stop;
  2058. /* The last valid PSN is the previous PSN. */
  2059. update_last_psn(qp, psn - 1);
  2060. switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
  2061. IB_AETH_CREDIT_MASK) {
  2062. case 0: /* PSN sequence error */
  2063. ibp->rvp.n_seq_naks++;
  2064. /*
  2065. * Back up to the responder's expected PSN.
  2066. * Note that we might get a NAK in the middle of an
  2067. * RDMA READ response which terminates the RDMA
  2068. * READ.
  2069. */
  2070. hfi1_restart_rc(qp, psn, 0);
  2071. hfi1_schedule_send(qp);
  2072. break;
  2073. case 1: /* Invalid Request */
  2074. status = IB_WC_REM_INV_REQ_ERR;
  2075. ibp->rvp.n_other_naks++;
  2076. goto class_b;
  2077. case 2: /* Remote Access Error */
  2078. status = IB_WC_REM_ACCESS_ERR;
  2079. ibp->rvp.n_other_naks++;
  2080. goto class_b;
  2081. case 3: /* Remote Operation Error */
  2082. status = IB_WC_REM_OP_ERR;
  2083. ibp->rvp.n_other_naks++;
  2084. class_b:
  2085. if (qp->s_last == qp->s_acked) {
  2086. if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
  2087. hfi1_kern_read_tid_flow_free(qp);
  2088. hfi1_trdma_send_complete(qp, wqe, status);
  2089. rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  2090. }
  2091. break;
  2092. default:
  2093. /* Ignore other reserved NAK error codes */
  2094. goto reserved;
  2095. }
  2096. qp->s_retry = qp->s_retry_cnt;
  2097. qp->s_rnr_retry = qp->s_rnr_retry_cnt;
  2098. goto bail_stop;
  2099. default: /* 2: reserved */
  2100. reserved:
  2101. /* Ignore reserved NAK codes. */
  2102. goto bail_stop;
  2103. }
  2104. /* cannot be reached */
  2105. bail_stop:
  2106. rvt_stop_rc_timers(qp);
  2107. return ret;
  2108. }
  2109. /*
  2110. * We have seen an out of sequence RDMA read middle or last packet.
  2111. * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  2112. */
  2113. static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
  2114. struct hfi1_ctxtdata *rcd)
  2115. {
  2116. struct rvt_swqe *wqe;
  2117. lockdep_assert_held(&qp->s_lock);
  2118. /* Remove QP from retry timer */
  2119. rvt_stop_rc_timers(qp);
  2120. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  2121. while (cmp_psn(psn, wqe->lpsn) > 0) {
  2122. if (wqe->wr.opcode == IB_WR_RDMA_READ ||
  2123. wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
  2124. wqe->wr.opcode == IB_WR_TID_RDMA_WRITE ||
  2125. wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  2126. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
  2127. break;
  2128. wqe = do_rc_completion(qp, wqe, ibp);
  2129. }
  2130. ibp->rvp.n_rdma_seq++;
  2131. qp->r_flags |= RVT_R_RDMAR_SEQ;
  2132. hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
  2133. if (list_empty(&qp->rspwait)) {
  2134. qp->r_flags |= RVT_R_RSP_SEND;
  2135. rvt_get_qp(qp);
  2136. list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
  2137. }
  2138. }
  2139. /**
  2140. * rc_rcv_resp - process an incoming RC response packet
  2141. * @packet: data packet information
  2142. *
  2143. * This is called from hfi1_rc_rcv() to process an incoming RC response
  2144. * packet for the given QP.
  2145. * Called at interrupt level.
  2146. */
  2147. static void rc_rcv_resp(struct hfi1_packet *packet)
  2148. {
  2149. struct hfi1_ctxtdata *rcd = packet->rcd;
  2150. void *data = packet->payload;
  2151. u32 tlen = packet->tlen;
  2152. struct rvt_qp *qp = packet->qp;
  2153. struct hfi1_ibport *ibp;
  2154. struct ib_other_headers *ohdr = packet->ohdr;
  2155. struct rvt_swqe *wqe;
  2156. enum ib_wc_status status;
  2157. unsigned long flags;
  2158. int diff;
  2159. u64 val;
  2160. u32 aeth;
  2161. u32 psn = ib_bth_get_psn(packet->ohdr);
  2162. u32 pmtu = qp->pmtu;
  2163. u16 hdrsize = packet->hlen;
  2164. u8 opcode = packet->opcode;
  2165. u8 pad = packet->pad;
  2166. u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
  2167. spin_lock_irqsave(&qp->s_lock, flags);
  2168. trace_hfi1_ack(qp, psn);
  2169. /* Ignore invalid responses. */
  2170. if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
  2171. goto ack_done;
  2172. /* Ignore duplicate responses. */
  2173. diff = cmp_psn(psn, qp->s_last_psn);
  2174. if (unlikely(diff <= 0)) {
  2175. /* Update credits for "ghost" ACKs */
  2176. if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
  2177. aeth = be32_to_cpu(ohdr->u.aeth);
  2178. if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
  2179. rvt_get_credit(qp, aeth);
  2180. }
  2181. goto ack_done;
  2182. }
  2183. /*
  2184. * Skip everything other than the PSN we expect, if we are waiting
  2185. * for a reply to a restarted RDMA read or atomic op.
  2186. */
  2187. if (qp->r_flags & RVT_R_RDMAR_SEQ) {
  2188. if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
  2189. goto ack_done;
  2190. qp->r_flags &= ~RVT_R_RDMAR_SEQ;
  2191. }
  2192. if (unlikely(qp->s_acked == qp->s_tail))
  2193. goto ack_done;
  2194. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  2195. status = IB_WC_SUCCESS;
  2196. switch (opcode) {
  2197. case OP(ACKNOWLEDGE):
  2198. case OP(ATOMIC_ACKNOWLEDGE):
  2199. case OP(RDMA_READ_RESPONSE_FIRST):
  2200. aeth = be32_to_cpu(ohdr->u.aeth);
  2201. if (opcode == OP(ATOMIC_ACKNOWLEDGE))
  2202. val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
  2203. else
  2204. val = 0;
  2205. if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
  2206. opcode != OP(RDMA_READ_RESPONSE_FIRST))
  2207. goto ack_done;
  2208. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  2209. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  2210. goto ack_op_err;
  2211. /*
  2212. * If this is a response to a resent RDMA read, we
  2213. * have to be careful to copy the data to the right
  2214. * location.
  2215. */
  2216. qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
  2217. wqe, psn, pmtu);
  2218. goto read_middle;
  2219. case OP(RDMA_READ_RESPONSE_MIDDLE):
  2220. /* no AETH, no ACK */
  2221. if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
  2222. goto ack_seq_err;
  2223. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  2224. goto ack_op_err;
  2225. read_middle:
  2226. if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
  2227. goto ack_len_err;
  2228. if (unlikely(pmtu >= qp->s_rdma_read_len))
  2229. goto ack_len_err;
  2230. /*
  2231. * We got a response so update the timeout.
  2232. * 4.096 usec. * (1 << qp->timeout)
  2233. */
  2234. rvt_mod_retry_timer(qp);
  2235. if (qp->s_flags & RVT_S_WAIT_ACK) {
  2236. qp->s_flags &= ~RVT_S_WAIT_ACK;
  2237. hfi1_schedule_send(qp);
  2238. }
  2239. if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
  2240. qp->s_retry = qp->s_retry_cnt;
  2241. /*
  2242. * Update the RDMA receive state but do the copy w/o
  2243. * holding the locks and blocking interrupts.
  2244. */
  2245. qp->s_rdma_read_len -= pmtu;
  2246. update_last_psn(qp, psn);
  2247. spin_unlock_irqrestore(&qp->s_lock, flags);
  2248. rvt_copy_sge(qp, &qp->s_rdma_read_sge,
  2249. data, pmtu, false, false);
  2250. goto bail;
  2251. case OP(RDMA_READ_RESPONSE_ONLY):
  2252. aeth = be32_to_cpu(ohdr->u.aeth);
  2253. if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
  2254. goto ack_done;
  2255. /*
  2256. * Check that the data size is >= 0 && <= pmtu.
  2257. * Remember to account for ICRC (4).
  2258. */
  2259. if (unlikely(tlen < (hdrsize + extra_bytes)))
  2260. goto ack_len_err;
  2261. /*
  2262. * If this is a response to a resent RDMA read, we
  2263. * have to be careful to copy the data to the right
  2264. * location.
  2265. */
  2266. wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
  2267. qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
  2268. wqe, psn, pmtu);
  2269. goto read_last;
  2270. case OP(RDMA_READ_RESPONSE_LAST):
  2271. /* ACKs READ req. */
  2272. if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
  2273. goto ack_seq_err;
  2274. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  2275. goto ack_op_err;
  2276. /*
  2277. * Check that the data size is >= 1 && <= pmtu.
  2278. * Remember to account for ICRC (4).
  2279. */
  2280. if (unlikely(tlen <= (hdrsize + extra_bytes)))
  2281. goto ack_len_err;
  2282. read_last:
  2283. tlen -= hdrsize + extra_bytes;
  2284. if (unlikely(tlen != qp->s_rdma_read_len))
  2285. goto ack_len_err;
  2286. aeth = be32_to_cpu(ohdr->u.aeth);
  2287. rvt_copy_sge(qp, &qp->s_rdma_read_sge,
  2288. data, tlen, false, false);
  2289. WARN_ON(qp->s_rdma_read_sge.num_sge);
  2290. (void)do_rc_ack(qp, aeth, psn,
  2291. OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
  2292. goto ack_done;
  2293. }
  2294. ack_op_err:
  2295. status = IB_WC_LOC_QP_OP_ERR;
  2296. goto ack_err;
  2297. ack_seq_err:
  2298. ibp = rcd_to_iport(rcd);
  2299. rdma_seq_err(qp, ibp, psn, rcd);
  2300. goto ack_done;
  2301. ack_len_err:
  2302. status = IB_WC_LOC_LEN_ERR;
  2303. ack_err:
  2304. if (qp->s_last == qp->s_acked) {
  2305. rvt_send_complete(qp, wqe, status);
  2306. rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
  2307. }
  2308. ack_done:
  2309. spin_unlock_irqrestore(&qp->s_lock, flags);
  2310. bail:
  2311. return;
  2312. }
  2313. static inline void rc_cancel_ack(struct rvt_qp *qp)
  2314. {
  2315. qp->r_adefered = 0;
  2316. if (list_empty(&qp->rspwait))
  2317. return;
  2318. list_del_init(&qp->rspwait);
  2319. qp->r_flags &= ~RVT_R_RSP_NAK;
  2320. rvt_put_qp(qp);
  2321. }
  2322. /**
  2323. * rc_rcv_error - process an incoming duplicate or error RC packet
  2324. * @ohdr: the other headers for this packet
  2325. * @data: the packet data
  2326. * @qp: the QP for this packet
  2327. * @opcode: the opcode for this packet
  2328. * @psn: the packet sequence number for this packet
  2329. * @diff: the difference between the PSN and the expected PSN
  2330. * @rcd: the receive context
  2331. *
  2332. * This is called from hfi1_rc_rcv() to process an unexpected
  2333. * incoming RC packet for the given QP.
  2334. * Called at interrupt level.
  2335. * Return 1 if no more processing is needed; otherwise return 0 to
  2336. * schedule a response to be sent.
  2337. */
  2338. static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
  2339. struct rvt_qp *qp, u32 opcode, u32 psn,
  2340. int diff, struct hfi1_ctxtdata *rcd)
  2341. {
  2342. struct hfi1_ibport *ibp = rcd_to_iport(rcd);
  2343. struct rvt_ack_entry *e;
  2344. unsigned long flags;
  2345. u8 prev;
  2346. u8 mra; /* most recent ACK */
  2347. bool old_req;
  2348. trace_hfi1_rcv_error(qp, psn);
  2349. if (diff > 0) {
  2350. /*
  2351. * Packet sequence error.
  2352. * A NAK will ACK earlier sends and RDMA writes.
  2353. * Don't queue the NAK if we already sent one.
  2354. */
  2355. if (!qp->r_nak_state) {
  2356. ibp->rvp.n_rc_seqnak++;
  2357. qp->r_nak_state = IB_NAK_PSN_ERROR;
  2358. /* Use the expected PSN. */
  2359. qp->r_ack_psn = qp->r_psn;
  2360. /*
  2361. * Wait to send the sequence NAK until all packets
  2362. * in the receive queue have been processed.
  2363. * Otherwise, we end up propagating congestion.
  2364. */
  2365. rc_defered_ack(rcd, qp);
  2366. }
  2367. goto done;
  2368. }
  2369. /*
  2370. * Handle a duplicate request. Don't re-execute SEND, RDMA
  2371. * write or atomic op. Don't NAK errors, just silently drop
  2372. * the duplicate request. Note that r_sge, r_len, and
  2373. * r_rcv_len may be in use so don't modify them.
  2374. *
  2375. * We are supposed to ACK the earliest duplicate PSN but we
  2376. * can coalesce an outstanding duplicate ACK. We have to
  2377. * send the earliest so that RDMA reads can be restarted at
  2378. * the requester's expected PSN.
  2379. *
  2380. * First, find where this duplicate PSN falls within the
  2381. * ACKs previously sent.
  2382. * old_req is true if there is an older response that is scheduled
  2383. * to be sent before sending this one.
  2384. */
  2385. e = NULL;
  2386. old_req = true;
  2387. ibp->rvp.n_rc_dupreq++;
  2388. spin_lock_irqsave(&qp->s_lock, flags);
  2389. e = find_prev_entry(qp, psn, &prev, &mra, &old_req);
  2390. switch (opcode) {
  2391. case OP(RDMA_READ_REQUEST): {
  2392. struct ib_reth *reth;
  2393. u32 offset;
  2394. u32 len;
  2395. /*
  2396. * If we didn't find the RDMA read request in the ack queue,
  2397. * we can ignore this request.
  2398. */
  2399. if (!e || e->opcode != OP(RDMA_READ_REQUEST))
  2400. goto unlock_done;
  2401. /* RETH comes after BTH */
  2402. reth = &ohdr->u.rc.reth;
  2403. /*
  2404. * Address range must be a subset of the original
  2405. * request and start on pmtu boundaries.
  2406. * We reuse the old ack_queue slot since the requester
  2407. * should not back up and request an earlier PSN for the
  2408. * same request.
  2409. */
  2410. offset = delta_psn(psn, e->psn) * qp->pmtu;
  2411. len = be32_to_cpu(reth->length);
  2412. if (unlikely(offset + len != e->rdma_sge.sge_length))
  2413. goto unlock_done;
  2414. release_rdma_sge_mr(e);
  2415. if (len != 0) {
  2416. u32 rkey = be32_to_cpu(reth->rkey);
  2417. u64 vaddr = get_ib_reth_vaddr(reth);
  2418. int ok;
  2419. ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
  2420. IB_ACCESS_REMOTE_READ);
  2421. if (unlikely(!ok))
  2422. goto unlock_done;
  2423. } else {
  2424. e->rdma_sge.vaddr = NULL;
  2425. e->rdma_sge.length = 0;
  2426. e->rdma_sge.sge_length = 0;
  2427. }
  2428. e->psn = psn;
  2429. if (old_req)
  2430. goto unlock_done;
  2431. if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
  2432. qp->s_acked_ack_queue = prev;
  2433. qp->s_tail_ack_queue = prev;
  2434. break;
  2435. }
  2436. case OP(COMPARE_SWAP):
  2437. case OP(FETCH_ADD): {
  2438. /*
  2439. * If we didn't find the atomic request in the ack queue
  2440. * or the send engine is already backed up to send an
  2441. * earlier entry, we can ignore this request.
  2442. */
  2443. if (!e || e->opcode != (u8)opcode || old_req)
  2444. goto unlock_done;
  2445. if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
  2446. qp->s_acked_ack_queue = prev;
  2447. qp->s_tail_ack_queue = prev;
  2448. break;
  2449. }
  2450. default:
  2451. /*
  2452. * Ignore this operation if it doesn't request an ACK
  2453. * or an earlier RDMA read or atomic is going to be resent.
  2454. */
  2455. if (!(psn & IB_BTH_REQ_ACK) || old_req)
  2456. goto unlock_done;
  2457. /*
  2458. * Resend the most recent ACK if this request is
  2459. * after all the previous RDMA reads and atomics.
  2460. */
  2461. if (mra == qp->r_head_ack_queue) {
  2462. spin_unlock_irqrestore(&qp->s_lock, flags);
  2463. qp->r_nak_state = 0;
  2464. qp->r_ack_psn = qp->r_psn - 1;
  2465. goto send_ack;
  2466. }
  2467. /*
  2468. * Resend the RDMA read or atomic op which
  2469. * ACKs this duplicate request.
  2470. */
  2471. if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
  2472. qp->s_acked_ack_queue = mra;
  2473. qp->s_tail_ack_queue = mra;
  2474. break;
  2475. }
  2476. qp->s_ack_state = OP(ACKNOWLEDGE);
  2477. qp->s_flags |= RVT_S_RESP_PENDING;
  2478. qp->r_nak_state = 0;
  2479. hfi1_schedule_send(qp);
  2480. unlock_done:
  2481. spin_unlock_irqrestore(&qp->s_lock, flags);
  2482. done:
  2483. return 1;
  2484. send_ack:
  2485. return 0;
  2486. }
  2487. static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
  2488. u32 lqpn, u32 rqpn, u8 svc_type)
  2489. {
  2490. struct opa_hfi1_cong_log_event_internal *cc_event;
  2491. unsigned long flags;
  2492. if (sl >= OPA_MAX_SLS)
  2493. return;
  2494. spin_lock_irqsave(&ppd->cc_log_lock, flags);
  2495. ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
  2496. ppd->threshold_event_counter++;
  2497. cc_event = &ppd->cc_events[ppd->cc_log_idx++];
  2498. if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
  2499. ppd->cc_log_idx = 0;
  2500. cc_event->lqpn = lqpn & RVT_QPN_MASK;
  2501. cc_event->rqpn = rqpn & RVT_QPN_MASK;
  2502. cc_event->sl = sl;
  2503. cc_event->svc_type = svc_type;
  2504. cc_event->rlid = rlid;
  2505. /* keep timestamp in units of 1.024 usec */
  2506. cc_event->timestamp = ktime_get_ns() / 1024;
  2507. spin_unlock_irqrestore(&ppd->cc_log_lock, flags);
  2508. }
  2509. void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
  2510. u32 rqpn, u8 svc_type)
  2511. {
  2512. struct cca_timer *cca_timer;
  2513. u16 ccti, ccti_incr, ccti_timer, ccti_limit;
  2514. u8 trigger_threshold;
  2515. struct cc_state *cc_state;
  2516. unsigned long flags;
  2517. if (sl >= OPA_MAX_SLS)
  2518. return;
  2519. cc_state = get_cc_state(ppd);
  2520. if (!cc_state)
  2521. return;
  2522. /*
  2523. * 1) increase CCTI (for this SL)
  2524. * 2) select IPG (i.e., call set_link_ipg())
  2525. * 3) start timer
  2526. */
  2527. ccti_limit = cc_state->cct.ccti_limit;
  2528. ccti_incr = cc_state->cong_setting.entries[sl].ccti_increase;
  2529. ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer;
  2530. trigger_threshold =
  2531. cc_state->cong_setting.entries[sl].trigger_threshold;
  2532. spin_lock_irqsave(&ppd->cca_timer_lock, flags);
  2533. cca_timer = &ppd->cca_timer[sl];
  2534. if (cca_timer->ccti < ccti_limit) {
  2535. if (cca_timer->ccti + ccti_incr <= ccti_limit)
  2536. cca_timer->ccti += ccti_incr;
  2537. else
  2538. cca_timer->ccti = ccti_limit;
  2539. set_link_ipg(ppd);
  2540. }
  2541. ccti = cca_timer->ccti;
  2542. if (!hrtimer_active(&cca_timer->hrtimer)) {
  2543. /* ccti_timer is in units of 1.024 usec */
  2544. unsigned long nsec = 1024 * ccti_timer;
  2545. hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
  2546. HRTIMER_MODE_REL_PINNED);
  2547. }
  2548. spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
  2549. if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
  2550. log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
  2551. }
  2552. /**
  2553. * hfi1_rc_rcv - process an incoming RC packet
  2554. * @packet: data packet information
  2555. *
  2556. * This is called from qp_rcv() to process an incoming RC packet
  2557. * for the given QP.
  2558. * May be called at interrupt level.
  2559. */
  2560. void hfi1_rc_rcv(struct hfi1_packet *packet)
  2561. {
  2562. struct hfi1_ctxtdata *rcd = packet->rcd;
  2563. void *data = packet->payload;
  2564. u32 tlen = packet->tlen;
  2565. struct rvt_qp *qp = packet->qp;
  2566. struct hfi1_qp_priv *qpriv = qp->priv;
  2567. struct hfi1_ibport *ibp = rcd_to_iport(rcd);
  2568. struct ib_other_headers *ohdr = packet->ohdr;
  2569. u32 opcode = packet->opcode;
  2570. u32 hdrsize = packet->hlen;
  2571. u32 psn = ib_bth_get_psn(packet->ohdr);
  2572. u32 pad = packet->pad;
  2573. struct ib_wc wc;
  2574. u32 pmtu = qp->pmtu;
  2575. int diff;
  2576. struct ib_reth *reth;
  2577. unsigned long flags;
  2578. int ret;
  2579. bool copy_last = false, fecn;
  2580. u32 rkey;
  2581. u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
  2582. lockdep_assert_held(&qp->r_lock);
  2583. if (hfi1_ruc_check_hdr(ibp, packet))
  2584. return;
  2585. fecn = process_ecn(qp, packet);
  2586. opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
  2587. /*
  2588. * Process responses (ACKs) before anything else. Note that the
  2589. * packet sequence number will be for something in the send work
  2590. * queue rather than the expected receive packet sequence number.
  2591. * In other words, this QP is the requester.
  2592. */
  2593. if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
  2594. opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
  2595. rc_rcv_resp(packet);
  2596. return;
  2597. }
  2598. /* Compute 24 bits worth of difference. */
  2599. diff = delta_psn(psn, qp->r_psn);
  2600. if (unlikely(diff)) {
  2601. if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
  2602. return;
  2603. goto send_ack;
  2604. }
  2605. /* Check for opcode sequence errors. */
  2606. switch (qp->r_state) {
  2607. case OP(SEND_FIRST):
  2608. case OP(SEND_MIDDLE):
  2609. if (opcode == OP(SEND_MIDDLE) ||
  2610. opcode == OP(SEND_LAST) ||
  2611. opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
  2612. opcode == OP(SEND_LAST_WITH_INVALIDATE))
  2613. break;
  2614. goto nack_inv;
  2615. case OP(RDMA_WRITE_FIRST):
  2616. case OP(RDMA_WRITE_MIDDLE):
  2617. if (opcode == OP(RDMA_WRITE_MIDDLE) ||
  2618. opcode == OP(RDMA_WRITE_LAST) ||
  2619. opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
  2620. break;
  2621. goto nack_inv;
  2622. default:
  2623. if (opcode == OP(SEND_MIDDLE) ||
  2624. opcode == OP(SEND_LAST) ||
  2625. opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
  2626. opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
  2627. opcode == OP(RDMA_WRITE_MIDDLE) ||
  2628. opcode == OP(RDMA_WRITE_LAST) ||
  2629. opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
  2630. goto nack_inv;
  2631. /*
  2632. * Note that it is up to the requester to not send a new
  2633. * RDMA read or atomic operation before receiving an ACK
  2634. * for the previous operation.
  2635. */
  2636. break;
  2637. }
  2638. if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
  2639. rvt_comm_est(qp);
  2640. /* OK, process the packet. */
  2641. switch (opcode) {
  2642. case OP(SEND_FIRST):
  2643. ret = rvt_get_rwqe(qp, false);
  2644. if (ret < 0)
  2645. goto nack_op_err;
  2646. if (!ret)
  2647. goto rnr_nak;
  2648. qp->r_rcv_len = 0;
  2649. fallthrough;
  2650. case OP(SEND_MIDDLE):
  2651. case OP(RDMA_WRITE_MIDDLE):
  2652. send_middle:
  2653. /* Check for invalid length PMTU or posted rwqe len. */
  2654. /*
  2655. * There will be no padding for 9B packet but 16B packets
  2656. * will come in with some padding since we always add
  2657. * CRC and LT bytes which will need to be flit aligned
  2658. */
  2659. if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
  2660. goto nack_inv;
  2661. qp->r_rcv_len += pmtu;
  2662. if (unlikely(qp->r_rcv_len > qp->r_len))
  2663. goto nack_inv;
  2664. rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
  2665. break;
  2666. case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
  2667. /* consume RWQE */
  2668. ret = rvt_get_rwqe(qp, true);
  2669. if (ret < 0)
  2670. goto nack_op_err;
  2671. if (!ret)
  2672. goto rnr_nak;
  2673. goto send_last_imm;
  2674. case OP(SEND_ONLY):
  2675. case OP(SEND_ONLY_WITH_IMMEDIATE):
  2676. case OP(SEND_ONLY_WITH_INVALIDATE):
  2677. ret = rvt_get_rwqe(qp, false);
  2678. if (ret < 0)
  2679. goto nack_op_err;
  2680. if (!ret)
  2681. goto rnr_nak;
  2682. qp->r_rcv_len = 0;
  2683. if (opcode == OP(SEND_ONLY))
  2684. goto no_immediate_data;
  2685. if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
  2686. goto send_last_inv;
  2687. fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
  2688. case OP(SEND_LAST_WITH_IMMEDIATE):
  2689. send_last_imm:
  2690. wc.ex.imm_data = ohdr->u.imm_data;
  2691. wc.wc_flags = IB_WC_WITH_IMM;
  2692. goto send_last;
  2693. case OP(SEND_LAST_WITH_INVALIDATE):
  2694. send_last_inv:
  2695. rkey = be32_to_cpu(ohdr->u.ieth);
  2696. if (rvt_invalidate_rkey(qp, rkey))
  2697. goto no_immediate_data;
  2698. wc.ex.invalidate_rkey = rkey;
  2699. wc.wc_flags = IB_WC_WITH_INVALIDATE;
  2700. goto send_last;
  2701. case OP(RDMA_WRITE_LAST):
  2702. copy_last = rvt_is_user_qp(qp);
  2703. fallthrough;
  2704. case OP(SEND_LAST):
  2705. no_immediate_data:
  2706. wc.wc_flags = 0;
  2707. wc.ex.imm_data = 0;
  2708. send_last:
  2709. /* Check for invalid length. */
  2710. /* LAST len should be >= 1 */
  2711. if (unlikely(tlen < (hdrsize + extra_bytes)))
  2712. goto nack_inv;
  2713. /* Don't count the CRC(and padding and LT byte for 16B). */
  2714. tlen -= (hdrsize + extra_bytes);
  2715. wc.byte_len = tlen + qp->r_rcv_len;
  2716. if (unlikely(wc.byte_len > qp->r_len))
  2717. goto nack_inv;
  2718. rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
  2719. rvt_put_ss(&qp->r_sge);
  2720. qp->r_msn++;
  2721. if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
  2722. break;
  2723. wc.wr_id = qp->r_wr_id;
  2724. wc.status = IB_WC_SUCCESS;
  2725. if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
  2726. opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
  2727. wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
  2728. else
  2729. wc.opcode = IB_WC_RECV;
  2730. wc.qp = &qp->ibqp;
  2731. wc.src_qp = qp->remote_qpn;
  2732. wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
  2733. /*
  2734. * It seems that IB mandates the presence of an SL in a
  2735. * work completion only for the UD transport (see section
  2736. * 11.4.2 of IBTA Vol. 1).
  2737. *
  2738. * However, the way the SL is chosen below is consistent
  2739. * with the way that IB/qib works and is trying avoid
  2740. * introducing incompatibilities.
  2741. *
  2742. * See also OPA Vol. 1, section 9.7.6, and table 9-17.
  2743. */
  2744. wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
  2745. /* zero fields that are N/A */
  2746. wc.vendor_err = 0;
  2747. wc.pkey_index = 0;
  2748. wc.dlid_path_bits = 0;
  2749. wc.port_num = 0;
  2750. /* Signal completion event if the solicited bit is set. */
  2751. rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
  2752. break;
  2753. case OP(RDMA_WRITE_ONLY):
  2754. copy_last = rvt_is_user_qp(qp);
  2755. fallthrough;
  2756. case OP(RDMA_WRITE_FIRST):
  2757. case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
  2758. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
  2759. goto nack_inv;
  2760. /* consume RWQE */
  2761. reth = &ohdr->u.rc.reth;
  2762. qp->r_len = be32_to_cpu(reth->length);
  2763. qp->r_rcv_len = 0;
  2764. qp->r_sge.sg_list = NULL;
  2765. if (qp->r_len != 0) {
  2766. u32 rkey = be32_to_cpu(reth->rkey);
  2767. u64 vaddr = get_ib_reth_vaddr(reth);
  2768. int ok;
  2769. /* Check rkey & NAK */
  2770. ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
  2771. rkey, IB_ACCESS_REMOTE_WRITE);
  2772. if (unlikely(!ok))
  2773. goto nack_acc;
  2774. qp->r_sge.num_sge = 1;
  2775. } else {
  2776. qp->r_sge.num_sge = 0;
  2777. qp->r_sge.sge.mr = NULL;
  2778. qp->r_sge.sge.vaddr = NULL;
  2779. qp->r_sge.sge.length = 0;
  2780. qp->r_sge.sge.sge_length = 0;
  2781. }
  2782. if (opcode == OP(RDMA_WRITE_FIRST))
  2783. goto send_middle;
  2784. else if (opcode == OP(RDMA_WRITE_ONLY))
  2785. goto no_immediate_data;
  2786. ret = rvt_get_rwqe(qp, true);
  2787. if (ret < 0)
  2788. goto nack_op_err;
  2789. if (!ret) {
  2790. /* peer will send again */
  2791. rvt_put_ss(&qp->r_sge);
  2792. goto rnr_nak;
  2793. }
  2794. wc.ex.imm_data = ohdr->u.rc.imm_data;
  2795. wc.wc_flags = IB_WC_WITH_IMM;
  2796. goto send_last;
  2797. case OP(RDMA_READ_REQUEST): {
  2798. struct rvt_ack_entry *e;
  2799. u32 len;
  2800. u8 next;
  2801. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
  2802. goto nack_inv;
  2803. next = qp->r_head_ack_queue + 1;
  2804. /* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */
  2805. if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
  2806. next = 0;
  2807. spin_lock_irqsave(&qp->s_lock, flags);
  2808. if (unlikely(next == qp->s_acked_ack_queue)) {
  2809. if (!qp->s_ack_queue[next].sent)
  2810. goto nack_inv_unlck;
  2811. update_ack_queue(qp, next);
  2812. }
  2813. e = &qp->s_ack_queue[qp->r_head_ack_queue];
  2814. release_rdma_sge_mr(e);
  2815. reth = &ohdr->u.rc.reth;
  2816. len = be32_to_cpu(reth->length);
  2817. if (len) {
  2818. u32 rkey = be32_to_cpu(reth->rkey);
  2819. u64 vaddr = get_ib_reth_vaddr(reth);
  2820. int ok;
  2821. /* Check rkey & NAK */
  2822. ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
  2823. rkey, IB_ACCESS_REMOTE_READ);
  2824. if (unlikely(!ok))
  2825. goto nack_acc_unlck;
  2826. /*
  2827. * Update the next expected PSN. We add 1 later
  2828. * below, so only add the remainder here.
  2829. */
  2830. qp->r_psn += rvt_div_mtu(qp, len - 1);
  2831. } else {
  2832. e->rdma_sge.mr = NULL;
  2833. e->rdma_sge.vaddr = NULL;
  2834. e->rdma_sge.length = 0;
  2835. e->rdma_sge.sge_length = 0;
  2836. }
  2837. e->opcode = opcode;
  2838. e->sent = 0;
  2839. e->psn = psn;
  2840. e->lpsn = qp->r_psn;
  2841. /*
  2842. * We need to increment the MSN here instead of when we
  2843. * finish sending the result since a duplicate request would
  2844. * increment it more than once.
  2845. */
  2846. qp->r_msn++;
  2847. qp->r_psn++;
  2848. qp->r_state = opcode;
  2849. qp->r_nak_state = 0;
  2850. qp->r_head_ack_queue = next;
  2851. qpriv->r_tid_alloc = qp->r_head_ack_queue;
  2852. /* Schedule the send engine. */
  2853. qp->s_flags |= RVT_S_RESP_PENDING;
  2854. if (fecn)
  2855. qp->s_flags |= RVT_S_ECN;
  2856. hfi1_schedule_send(qp);
  2857. spin_unlock_irqrestore(&qp->s_lock, flags);
  2858. return;
  2859. }
  2860. case OP(COMPARE_SWAP):
  2861. case OP(FETCH_ADD): {
  2862. struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
  2863. u64 vaddr = get_ib_ateth_vaddr(ateth);
  2864. bool opfn = opcode == OP(COMPARE_SWAP) &&
  2865. vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
  2866. struct rvt_ack_entry *e;
  2867. atomic64_t *maddr;
  2868. u64 sdata;
  2869. u32 rkey;
  2870. u8 next;
  2871. if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
  2872. !opfn))
  2873. goto nack_inv;
  2874. next = qp->r_head_ack_queue + 1;
  2875. if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
  2876. next = 0;
  2877. spin_lock_irqsave(&qp->s_lock, flags);
  2878. if (unlikely(next == qp->s_acked_ack_queue)) {
  2879. if (!qp->s_ack_queue[next].sent)
  2880. goto nack_inv_unlck;
  2881. update_ack_queue(qp, next);
  2882. }
  2883. e = &qp->s_ack_queue[qp->r_head_ack_queue];
  2884. release_rdma_sge_mr(e);
  2885. /* Process OPFN special virtual address */
  2886. if (opfn) {
  2887. opfn_conn_response(qp, e, ateth);
  2888. goto ack;
  2889. }
  2890. if (unlikely(vaddr & (sizeof(u64) - 1)))
  2891. goto nack_inv_unlck;
  2892. rkey = be32_to_cpu(ateth->rkey);
  2893. /* Check rkey & NAK */
  2894. if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
  2895. vaddr, rkey,
  2896. IB_ACCESS_REMOTE_ATOMIC)))
  2897. goto nack_acc_unlck;
  2898. /* Perform atomic OP and save result. */
  2899. maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
  2900. sdata = get_ib_ateth_swap(ateth);
  2901. e->atomic_data = (opcode == OP(FETCH_ADD)) ?
  2902. (u64)atomic64_add_return(sdata, maddr) - sdata :
  2903. (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
  2904. get_ib_ateth_compare(ateth),
  2905. sdata);
  2906. rvt_put_mr(qp->r_sge.sge.mr);
  2907. qp->r_sge.num_sge = 0;
  2908. ack:
  2909. e->opcode = opcode;
  2910. e->sent = 0;
  2911. e->psn = psn;
  2912. e->lpsn = psn;
  2913. qp->r_msn++;
  2914. qp->r_psn++;
  2915. qp->r_state = opcode;
  2916. qp->r_nak_state = 0;
  2917. qp->r_head_ack_queue = next;
  2918. qpriv->r_tid_alloc = qp->r_head_ack_queue;
  2919. /* Schedule the send engine. */
  2920. qp->s_flags |= RVT_S_RESP_PENDING;
  2921. if (fecn)
  2922. qp->s_flags |= RVT_S_ECN;
  2923. hfi1_schedule_send(qp);
  2924. spin_unlock_irqrestore(&qp->s_lock, flags);
  2925. return;
  2926. }
  2927. default:
  2928. /* NAK unknown opcodes. */
  2929. goto nack_inv;
  2930. }
  2931. qp->r_psn++;
  2932. qp->r_state = opcode;
  2933. qp->r_ack_psn = psn;
  2934. qp->r_nak_state = 0;
  2935. /* Send an ACK if requested or required. */
  2936. if (psn & IB_BTH_REQ_ACK || fecn) {
  2937. if (packet->numpkt == 0 || fecn ||
  2938. qp->r_adefered >= HFI1_PSN_CREDIT) {
  2939. rc_cancel_ack(qp);
  2940. goto send_ack;
  2941. }
  2942. qp->r_adefered++;
  2943. rc_defered_ack(rcd, qp);
  2944. }
  2945. return;
  2946. rnr_nak:
  2947. qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
  2948. qp->r_ack_psn = qp->r_psn;
  2949. /* Queue RNR NAK for later */
  2950. rc_defered_ack(rcd, qp);
  2951. return;
  2952. nack_op_err:
  2953. rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
  2954. qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
  2955. qp->r_ack_psn = qp->r_psn;
  2956. /* Queue NAK for later */
  2957. rc_defered_ack(rcd, qp);
  2958. return;
  2959. nack_inv_unlck:
  2960. spin_unlock_irqrestore(&qp->s_lock, flags);
  2961. nack_inv:
  2962. rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
  2963. qp->r_nak_state = IB_NAK_INVALID_REQUEST;
  2964. qp->r_ack_psn = qp->r_psn;
  2965. /* Queue NAK for later */
  2966. rc_defered_ack(rcd, qp);
  2967. return;
  2968. nack_acc_unlck:
  2969. spin_unlock_irqrestore(&qp->s_lock, flags);
  2970. nack_acc:
  2971. rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
  2972. qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
  2973. qp->r_ack_psn = qp->r_psn;
  2974. send_ack:
  2975. hfi1_send_rc_ack(packet, fecn);
  2976. }
  2977. void hfi1_rc_hdrerr(
  2978. struct hfi1_ctxtdata *rcd,
  2979. struct hfi1_packet *packet,
  2980. struct rvt_qp *qp)
  2981. {
  2982. struct hfi1_ibport *ibp = rcd_to_iport(rcd);
  2983. int diff;
  2984. u32 opcode;
  2985. u32 psn;
  2986. if (hfi1_ruc_check_hdr(ibp, packet))
  2987. return;
  2988. psn = ib_bth_get_psn(packet->ohdr);
  2989. opcode = ib_bth_get_opcode(packet->ohdr);
  2990. /* Only deal with RDMA Writes for now */
  2991. if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
  2992. diff = delta_psn(psn, qp->r_psn);
  2993. if (!qp->r_nak_state && diff >= 0) {
  2994. ibp->rvp.n_rc_seqnak++;
  2995. qp->r_nak_state = IB_NAK_PSN_ERROR;
  2996. /* Use the expected PSN. */
  2997. qp->r_ack_psn = qp->r_psn;
  2998. /*
  2999. * Wait to send the sequence
  3000. * NAK until all packets
  3001. * in the receive queue have
  3002. * been processed.
  3003. * Otherwise, we end up
  3004. * propagating congestion.
  3005. */
  3006. rc_defered_ack(rcd, qp);
  3007. } /* Out of sequence NAK */
  3008. } /* QP Request NAKs */
  3009. }