submit.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
  3. #include <linux/init.h>
  4. #include <linux/kernel.h>
  5. #include <linux/module.h>
  6. #include <linux/pci.h>
  7. #include <uapi/linux/idxd.h>
  8. #include "idxd.h"
  9. #include "registers.h"
  10. static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
  11. {
  12. struct idxd_desc *desc;
  13. struct idxd_device *idxd = wq->idxd;
  14. desc = wq->descs[idx];
  15. memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
  16. memset(desc->completion, 0, idxd->data->compl_size);
  17. desc->cpu = cpu;
  18. if (device_pasid_enabled(idxd))
  19. desc->hw->pasid = idxd->pasid;
  20. return desc;
  21. }
  22. struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
  23. {
  24. int cpu, idx;
  25. struct idxd_device *idxd = wq->idxd;
  26. DEFINE_SBQ_WAIT(wait);
  27. struct sbq_wait_state *ws;
  28. struct sbitmap_queue *sbq;
  29. if (idxd->state != IDXD_DEV_ENABLED)
  30. return ERR_PTR(-EIO);
  31. sbq = &wq->sbq;
  32. idx = sbitmap_queue_get(sbq, &cpu);
  33. if (idx < 0) {
  34. if (optype == IDXD_OP_NONBLOCK)
  35. return ERR_PTR(-EAGAIN);
  36. } else {
  37. return __get_desc(wq, idx, cpu);
  38. }
  39. ws = &sbq->ws[0];
  40. for (;;) {
  41. sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
  42. if (signal_pending_state(TASK_INTERRUPTIBLE, current))
  43. break;
  44. idx = sbitmap_queue_get(sbq, &cpu);
  45. if (idx >= 0)
  46. break;
  47. schedule();
  48. }
  49. sbitmap_finish_wait(sbq, ws, &wait);
  50. if (idx < 0)
  51. return ERR_PTR(-EAGAIN);
  52. return __get_desc(wq, idx, cpu);
  53. }
  54. void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
  55. {
  56. int cpu = desc->cpu;
  57. desc->cpu = -1;
  58. sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
  59. }
  60. static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
  61. struct idxd_desc *desc)
  62. {
  63. struct idxd_desc *d, *n;
  64. lockdep_assert_held(&ie->list_lock);
  65. list_for_each_entry_safe(d, n, &ie->work_list, list) {
  66. if (d == desc) {
  67. list_del(&d->list);
  68. return d;
  69. }
  70. }
  71. /*
  72. * At this point, the desc needs to be aborted is held by the completion
  73. * handler where it has taken it off the pending list but has not added to the
  74. * work list. It will be cleaned up by the interrupt handler when it sees the
  75. * IDXD_COMP_DESC_ABORT for completion status.
  76. */
  77. return NULL;
  78. }
  79. static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
  80. struct idxd_desc *desc)
  81. {
  82. struct idxd_desc *d, *t, *found = NULL;
  83. struct llist_node *head;
  84. LIST_HEAD(flist);
  85. desc->completion->status = IDXD_COMP_DESC_ABORT;
  86. /*
  87. * Grab the list lock so it will block the irq thread handler. This allows the
  88. * abort code to locate the descriptor need to be aborted.
  89. */
  90. spin_lock(&ie->list_lock);
  91. head = llist_del_all(&ie->pending_llist);
  92. if (head) {
  93. llist_for_each_entry_safe(d, t, head, llnode) {
  94. if (d == desc) {
  95. found = desc;
  96. continue;
  97. }
  98. if (d->completion->status)
  99. list_add_tail(&d->list, &flist);
  100. else
  101. list_add_tail(&d->list, &ie->work_list);
  102. }
  103. }
  104. if (!found)
  105. found = list_abort_desc(wq, ie, desc);
  106. spin_unlock(&ie->list_lock);
  107. if (found)
  108. idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
  109. /*
  110. * completing the descriptor will return desc to allocator and
  111. * the desc can be acquired by a different process and the
  112. * desc->list can be modified. Delete desc from list so the
  113. * list trasversing does not get corrupted by the other process.
  114. */
  115. list_for_each_entry_safe(d, t, &flist, list) {
  116. list_del_init(&d->list);
  117. idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
  118. }
  119. }
  120. /*
  121. * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
  122. * has better control of number of descriptors being submitted to a shared wq by limiting
  123. * the number of driver allocated descriptors to the wq size. However, when the swq is
  124. * exported to a guest kernel, it may be shared with multiple guest kernels. This means
  125. * the likelihood of getting busy returned on the swq when submitting goes significantly up.
  126. * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
  127. * up. The sysfs knob can be tuned by the system administrator.
  128. */
  129. int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
  130. {
  131. unsigned int retries = wq->enqcmds_retries;
  132. int rc;
  133. do {
  134. rc = enqcmds(portal, desc);
  135. if (rc == 0)
  136. break;
  137. cpu_relax();
  138. } while (retries--);
  139. return rc;
  140. }
  141. int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
  142. {
  143. struct idxd_device *idxd = wq->idxd;
  144. struct idxd_irq_entry *ie = NULL;
  145. u32 desc_flags = desc->hw->flags;
  146. void __iomem *portal;
  147. int rc;
  148. if (idxd->state != IDXD_DEV_ENABLED)
  149. return -EIO;
  150. if (!percpu_ref_tryget_live(&wq->wq_active)) {
  151. wait_for_completion(&wq->wq_resurrect);
  152. if (!percpu_ref_tryget_live(&wq->wq_active))
  153. return -ENXIO;
  154. }
  155. portal = idxd_wq_portal_addr(wq);
  156. /*
  157. * The wmb() flushes writes to coherent DMA data before
  158. * possibly triggering a DMA read. The wmb() is necessary
  159. * even on UP because the recipient is a device.
  160. */
  161. wmb();
  162. /*
  163. * Pending the descriptor to the lockless list for the irq_entry
  164. * that we designated the descriptor to.
  165. */
  166. if (desc_flags & IDXD_OP_FLAG_RCI) {
  167. ie = &wq->ie;
  168. desc->hw->int_handle = ie->int_handle;
  169. llist_add(&desc->llnode, &ie->pending_llist);
  170. }
  171. if (wq_dedicated(wq)) {
  172. iosubmit_cmds512(portal, desc->hw, 1);
  173. } else {
  174. rc = idxd_enqcmds(wq, portal, desc->hw);
  175. if (rc < 0) {
  176. percpu_ref_put(&wq->wq_active);
  177. /* abort operation frees the descriptor */
  178. if (ie)
  179. llist_abort_desc(wq, ie, desc);
  180. return rc;
  181. }
  182. }
  183. percpu_ref_put(&wq->wq_active);
  184. return 0;
  185. }