erdma_cmdq.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2. /* Authors: Cheng Xu <[email protected]> */
  3. /* Kai Shen <[email protected]> */
  4. /* Copyright (c) 2020-2022, Alibaba Group. */
  5. #include "erdma.h"
  6. static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
  7. {
  8. struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
  9. u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
  10. FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
  11. FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
  12. FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
  13. *cmdq->cq.db_record = db_data;
  14. writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
  15. atomic64_inc(&cmdq->cq.armed_num);
  16. }
  17. static void kick_cmdq_db(struct erdma_cmdq *cmdq)
  18. {
  19. struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
  20. u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
  21. *cmdq->sq.db_record = db_data;
  22. writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
  23. }
  24. static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
  25. {
  26. int comp_idx;
  27. spin_lock(&cmdq->lock);
  28. comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
  29. cmdq->max_outstandings);
  30. if (comp_idx == cmdq->max_outstandings) {
  31. spin_unlock(&cmdq->lock);
  32. return ERR_PTR(-ENOMEM);
  33. }
  34. __set_bit(comp_idx, cmdq->comp_wait_bitmap);
  35. spin_unlock(&cmdq->lock);
  36. return &cmdq->wait_pool[comp_idx];
  37. }
  38. static void put_comp_wait(struct erdma_cmdq *cmdq,
  39. struct erdma_comp_wait *comp_wait)
  40. {
  41. int used;
  42. cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
  43. spin_lock(&cmdq->lock);
  44. used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
  45. spin_unlock(&cmdq->lock);
  46. WARN_ON(!used);
  47. }
  48. static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
  49. struct erdma_cmdq *cmdq)
  50. {
  51. int i;
  52. cmdq->wait_pool =
  53. devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
  54. sizeof(struct erdma_comp_wait), GFP_KERNEL);
  55. if (!cmdq->wait_pool)
  56. return -ENOMEM;
  57. spin_lock_init(&cmdq->lock);
  58. cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
  59. &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
  60. if (!cmdq->comp_wait_bitmap)
  61. return -ENOMEM;
  62. for (i = 0; i < cmdq->max_outstandings; i++) {
  63. init_completion(&cmdq->wait_pool[i].wait_event);
  64. cmdq->wait_pool[i].ctx_id = i;
  65. }
  66. return 0;
  67. }
  68. static int erdma_cmdq_sq_init(struct erdma_dev *dev)
  69. {
  70. struct erdma_cmdq *cmdq = &dev->cmdq;
  71. struct erdma_cmdq_sq *sq = &cmdq->sq;
  72. u32 buf_size;
  73. sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
  74. sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
  75. buf_size = sq->depth << SQEBB_SHIFT;
  76. sq->qbuf =
  77. dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
  78. &sq->qbuf_dma_addr, GFP_KERNEL);
  79. if (!sq->qbuf)
  80. return -ENOMEM;
  81. sq->db_record = (u64 *)(sq->qbuf + buf_size);
  82. spin_lock_init(&sq->lock);
  83. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
  84. upper_32_bits(sq->qbuf_dma_addr));
  85. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
  86. lower_32_bits(sq->qbuf_dma_addr));
  87. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
  88. erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
  89. sq->qbuf_dma_addr + buf_size);
  90. return 0;
  91. }
  92. static int erdma_cmdq_cq_init(struct erdma_dev *dev)
  93. {
  94. struct erdma_cmdq *cmdq = &dev->cmdq;
  95. struct erdma_cmdq_cq *cq = &cmdq->cq;
  96. u32 buf_size;
  97. cq->depth = cmdq->sq.depth;
  98. buf_size = cq->depth << CQE_SHIFT;
  99. cq->qbuf =
  100. dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
  101. &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
  102. if (!cq->qbuf)
  103. return -ENOMEM;
  104. spin_lock_init(&cq->lock);
  105. cq->db_record = (u64 *)(cq->qbuf + buf_size);
  106. atomic64_set(&cq->armed_num, 0);
  107. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
  108. upper_32_bits(cq->qbuf_dma_addr));
  109. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
  110. lower_32_bits(cq->qbuf_dma_addr));
  111. erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
  112. cq->qbuf_dma_addr + buf_size);
  113. return 0;
  114. }
  115. static int erdma_cmdq_eq_init(struct erdma_dev *dev)
  116. {
  117. struct erdma_cmdq *cmdq = &dev->cmdq;
  118. struct erdma_eq *eq = &cmdq->eq;
  119. u32 buf_size;
  120. eq->depth = cmdq->max_outstandings;
  121. buf_size = eq->depth << EQE_SHIFT;
  122. eq->qbuf =
  123. dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
  124. &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
  125. if (!eq->qbuf)
  126. return -ENOMEM;
  127. spin_lock_init(&eq->lock);
  128. atomic64_set(&eq->event_num, 0);
  129. eq->db_addr =
  130. (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
  131. eq->db_record = (u64 *)(eq->qbuf + buf_size);
  132. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
  133. upper_32_bits(eq->qbuf_dma_addr));
  134. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
  135. lower_32_bits(eq->qbuf_dma_addr));
  136. erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
  137. erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
  138. eq->qbuf_dma_addr + buf_size);
  139. return 0;
  140. }
  141. int erdma_cmdq_init(struct erdma_dev *dev)
  142. {
  143. int err, i;
  144. struct erdma_cmdq *cmdq = &dev->cmdq;
  145. u32 sts, ctrl;
  146. cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
  147. cmdq->use_event = false;
  148. sema_init(&cmdq->credits, cmdq->max_outstandings);
  149. err = erdma_cmdq_wait_res_init(dev, cmdq);
  150. if (err)
  151. return err;
  152. err = erdma_cmdq_sq_init(dev);
  153. if (err)
  154. return err;
  155. err = erdma_cmdq_cq_init(dev);
  156. if (err)
  157. goto err_destroy_sq;
  158. err = erdma_cmdq_eq_init(dev);
  159. if (err)
  160. goto err_destroy_cq;
  161. ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
  162. erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
  163. for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
  164. sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
  165. ERDMA_REG_DEV_ST_INIT_DONE_MASK);
  166. if (sts)
  167. break;
  168. msleep(ERDMA_REG_ACCESS_WAIT_MS);
  169. }
  170. if (i == ERDMA_WAIT_DEV_DONE_CNT) {
  171. dev_err(&dev->pdev->dev, "wait init done failed.\n");
  172. err = -ETIMEDOUT;
  173. goto err_destroy_eq;
  174. }
  175. set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
  176. return 0;
  177. err_destroy_eq:
  178. dma_free_coherent(&dev->pdev->dev,
  179. (cmdq->eq.depth << EQE_SHIFT) +
  180. ERDMA_EXTRA_BUFFER_SIZE,
  181. cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
  182. err_destroy_cq:
  183. dma_free_coherent(&dev->pdev->dev,
  184. (cmdq->cq.depth << CQE_SHIFT) +
  185. ERDMA_EXTRA_BUFFER_SIZE,
  186. cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
  187. err_destroy_sq:
  188. dma_free_coherent(&dev->pdev->dev,
  189. (cmdq->sq.depth << SQEBB_SHIFT) +
  190. ERDMA_EXTRA_BUFFER_SIZE,
  191. cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
  192. return err;
  193. }
  194. void erdma_finish_cmdq_init(struct erdma_dev *dev)
  195. {
  196. /* after device init successfully, change cmdq to event mode. */
  197. dev->cmdq.use_event = true;
  198. arm_cmdq_cq(&dev->cmdq);
  199. }
  200. void erdma_cmdq_destroy(struct erdma_dev *dev)
  201. {
  202. struct erdma_cmdq *cmdq = &dev->cmdq;
  203. clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
  204. dma_free_coherent(&dev->pdev->dev,
  205. (cmdq->eq.depth << EQE_SHIFT) +
  206. ERDMA_EXTRA_BUFFER_SIZE,
  207. cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
  208. dma_free_coherent(&dev->pdev->dev,
  209. (cmdq->sq.depth << SQEBB_SHIFT) +
  210. ERDMA_EXTRA_BUFFER_SIZE,
  211. cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
  212. dma_free_coherent(&dev->pdev->dev,
  213. (cmdq->cq.depth << CQE_SHIFT) +
  214. ERDMA_EXTRA_BUFFER_SIZE,
  215. cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
  216. }
  217. static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
  218. {
  219. __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
  220. cmdq->cq.depth, CQE_SHIFT);
  221. u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
  222. __be32_to_cpu(READ_ONCE(*cqe)));
  223. return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
  224. }
  225. static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
  226. struct erdma_comp_wait *comp_wait)
  227. {
  228. __le64 *wqe;
  229. u64 hdr = *req;
  230. comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
  231. reinit_completion(&comp_wait->wait_event);
  232. comp_wait->sq_pi = cmdq->sq.pi;
  233. wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
  234. SQEBB_SHIFT);
  235. memcpy(wqe, req, req_len);
  236. cmdq->sq.pi += cmdq->sq.wqebb_cnt;
  237. hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
  238. FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
  239. comp_wait->ctx_id) |
  240. FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
  241. *wqe = cpu_to_le64(hdr);
  242. kick_cmdq_db(cmdq);
  243. }
  244. static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
  245. {
  246. struct erdma_comp_wait *comp_wait;
  247. u32 hdr0, sqe_idx;
  248. __be32 *cqe;
  249. u16 ctx_id;
  250. u64 *sqe;
  251. int i;
  252. cqe = get_next_valid_cmdq_cqe(cmdq);
  253. if (!cqe)
  254. return -EAGAIN;
  255. cmdq->cq.ci++;
  256. dma_rmb();
  257. hdr0 = __be32_to_cpu(*cqe);
  258. sqe_idx = __be32_to_cpu(*(cqe + 1));
  259. sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
  260. SQEBB_SHIFT);
  261. ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
  262. comp_wait = &cmdq->wait_pool[ctx_id];
  263. if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
  264. return -EIO;
  265. comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
  266. comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
  267. cmdq->sq.ci += cmdq->sq.wqebb_cnt;
  268. for (i = 0; i < 4; i++)
  269. comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i));
  270. if (cmdq->use_event)
  271. complete(&comp_wait->wait_event);
  272. return 0;
  273. }
  274. static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
  275. {
  276. unsigned long flags;
  277. u16 comp_num;
  278. spin_lock_irqsave(&cmdq->cq.lock, flags);
  279. /* We must have less than # of max_outstandings
  280. * completions at one time.
  281. */
  282. for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
  283. if (erdma_poll_single_cmd_completion(cmdq))
  284. break;
  285. if (comp_num && cmdq->use_event)
  286. arm_cmdq_cq(cmdq);
  287. spin_unlock_irqrestore(&cmdq->cq.lock, flags);
  288. }
  289. void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
  290. {
  291. int got_event = 0;
  292. if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
  293. !cmdq->use_event)
  294. return;
  295. while (get_next_valid_eqe(&cmdq->eq)) {
  296. cmdq->eq.ci++;
  297. got_event++;
  298. }
  299. if (got_event) {
  300. cmdq->cq.cmdsn++;
  301. erdma_polling_cmd_completions(cmdq);
  302. }
  303. notify_eq(&cmdq->eq);
  304. }
  305. static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
  306. struct erdma_cmdq *cmdq, u32 timeout)
  307. {
  308. unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
  309. while (1) {
  310. erdma_polling_cmd_completions(cmdq);
  311. if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
  312. break;
  313. if (time_is_before_jiffies(comp_timeout))
  314. return -ETIME;
  315. msleep(20);
  316. }
  317. return 0;
  318. }
  319. static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
  320. struct erdma_cmdq *cmdq, u32 timeout)
  321. {
  322. unsigned long flags = 0;
  323. wait_for_completion_timeout(&comp_ctx->wait_event,
  324. msecs_to_jiffies(timeout));
  325. if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
  326. spin_lock_irqsave(&cmdq->cq.lock, flags);
  327. comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
  328. spin_unlock_irqrestore(&cmdq->cq.lock, flags);
  329. return -ETIME;
  330. }
  331. return 0;
  332. }
  333. void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
  334. {
  335. *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
  336. FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
  337. }
  338. int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
  339. u64 *resp0, u64 *resp1)
  340. {
  341. struct erdma_comp_wait *comp_wait;
  342. int ret;
  343. if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
  344. return -ENODEV;
  345. down(&cmdq->credits);
  346. comp_wait = get_comp_wait(cmdq);
  347. if (IS_ERR(comp_wait)) {
  348. clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
  349. set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
  350. up(&cmdq->credits);
  351. return PTR_ERR(comp_wait);
  352. }
  353. spin_lock(&cmdq->sq.lock);
  354. push_cmdq_sqe(cmdq, req, req_size, comp_wait);
  355. spin_unlock(&cmdq->sq.lock);
  356. if (cmdq->use_event)
  357. ret = erdma_wait_cmd_completion(comp_wait, cmdq,
  358. ERDMA_CMDQ_TIMEOUT_MS);
  359. else
  360. ret = erdma_poll_cmd_completion(comp_wait, cmdq,
  361. ERDMA_CMDQ_TIMEOUT_MS);
  362. if (ret) {
  363. set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
  364. clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
  365. goto out;
  366. }
  367. if (comp_wait->comp_status)
  368. ret = -EIO;
  369. if (resp0 && resp1) {
  370. *resp0 = *((u64 *)&comp_wait->comp_data[0]);
  371. *resp1 = *((u64 *)&comp_wait->comp_data[2]);
  372. }
  373. put_comp_wait(cmdq, comp_wait);
  374. out:
  375. up(&cmdq->credits);
  376. return ret;
  377. }