123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- // SPDX-License-Identifier: GPL-2.0
- /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
- #include <linux/init.h>
- #include <linux/kernel.h>
- #include <linux/module.h>
- #include <linux/pci.h>
- #include <uapi/linux/idxd.h>
- #include "idxd.h"
- #include "registers.h"
- static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
- {
- struct idxd_desc *desc;
- struct idxd_device *idxd = wq->idxd;
- desc = wq->descs[idx];
- memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
- memset(desc->completion, 0, idxd->data->compl_size);
- desc->cpu = cpu;
- if (device_pasid_enabled(idxd))
- desc->hw->pasid = idxd->pasid;
- return desc;
- }
- struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
- {
- int cpu, idx;
- struct idxd_device *idxd = wq->idxd;
- DEFINE_SBQ_WAIT(wait);
- struct sbq_wait_state *ws;
- struct sbitmap_queue *sbq;
- if (idxd->state != IDXD_DEV_ENABLED)
- return ERR_PTR(-EIO);
- sbq = &wq->sbq;
- idx = sbitmap_queue_get(sbq, &cpu);
- if (idx < 0) {
- if (optype == IDXD_OP_NONBLOCK)
- return ERR_PTR(-EAGAIN);
- } else {
- return __get_desc(wq, idx, cpu);
- }
- ws = &sbq->ws[0];
- for (;;) {
- sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
- if (signal_pending_state(TASK_INTERRUPTIBLE, current))
- break;
- idx = sbitmap_queue_get(sbq, &cpu);
- if (idx >= 0)
- break;
- schedule();
- }
- sbitmap_finish_wait(sbq, ws, &wait);
- if (idx < 0)
- return ERR_PTR(-EAGAIN);
- return __get_desc(wq, idx, cpu);
- }
- void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
- {
- int cpu = desc->cpu;
- desc->cpu = -1;
- sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
- }
- static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
- struct idxd_desc *desc)
- {
- struct idxd_desc *d, *n;
- lockdep_assert_held(&ie->list_lock);
- list_for_each_entry_safe(d, n, &ie->work_list, list) {
- if (d == desc) {
- list_del(&d->list);
- return d;
- }
- }
- /*
- * At this point, the desc needs to be aborted is held by the completion
- * handler where it has taken it off the pending list but has not added to the
- * work list. It will be cleaned up by the interrupt handler when it sees the
- * IDXD_COMP_DESC_ABORT for completion status.
- */
- return NULL;
- }
- static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
- struct idxd_desc *desc)
- {
- struct idxd_desc *d, *t, *found = NULL;
- struct llist_node *head;
- LIST_HEAD(flist);
- desc->completion->status = IDXD_COMP_DESC_ABORT;
- /*
- * Grab the list lock so it will block the irq thread handler. This allows the
- * abort code to locate the descriptor need to be aborted.
- */
- spin_lock(&ie->list_lock);
- head = llist_del_all(&ie->pending_llist);
- if (head) {
- llist_for_each_entry_safe(d, t, head, llnode) {
- if (d == desc) {
- found = desc;
- continue;
- }
- if (d->completion->status)
- list_add_tail(&d->list, &flist);
- else
- list_add_tail(&d->list, &ie->work_list);
- }
- }
- if (!found)
- found = list_abort_desc(wq, ie, desc);
- spin_unlock(&ie->list_lock);
- if (found)
- idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
- /*
- * completing the descriptor will return desc to allocator and
- * the desc can be acquired by a different process and the
- * desc->list can be modified. Delete desc from list so the
- * list trasversing does not get corrupted by the other process.
- */
- list_for_each_entry_safe(d, t, &flist, list) {
- list_del_init(&d->list);
- idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
- }
- }
- /*
- * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
- * has better control of number of descriptors being submitted to a shared wq by limiting
- * the number of driver allocated descriptors to the wq size. However, when the swq is
- * exported to a guest kernel, it may be shared with multiple guest kernels. This means
- * the likelihood of getting busy returned on the swq when submitting goes significantly up.
- * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
- * up. The sysfs knob can be tuned by the system administrator.
- */
- int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
- {
- unsigned int retries = wq->enqcmds_retries;
- int rc;
- do {
- rc = enqcmds(portal, desc);
- if (rc == 0)
- break;
- cpu_relax();
- } while (retries--);
- return rc;
- }
- int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
- {
- struct idxd_device *idxd = wq->idxd;
- struct idxd_irq_entry *ie = NULL;
- u32 desc_flags = desc->hw->flags;
- void __iomem *portal;
- int rc;
- if (idxd->state != IDXD_DEV_ENABLED)
- return -EIO;
- if (!percpu_ref_tryget_live(&wq->wq_active)) {
- wait_for_completion(&wq->wq_resurrect);
- if (!percpu_ref_tryget_live(&wq->wq_active))
- return -ENXIO;
- }
- portal = idxd_wq_portal_addr(wq);
- /*
- * The wmb() flushes writes to coherent DMA data before
- * possibly triggering a DMA read. The wmb() is necessary
- * even on UP because the recipient is a device.
- */
- wmb();
- /*
- * Pending the descriptor to the lockless list for the irq_entry
- * that we designated the descriptor to.
- */
- if (desc_flags & IDXD_OP_FLAG_RCI) {
- ie = &wq->ie;
- desc->hw->int_handle = ie->int_handle;
- llist_add(&desc->llnode, &ie->pending_llist);
- }
- if (wq_dedicated(wq)) {
- iosubmit_cmds512(portal, desc->hw, 1);
- } else {
- rc = idxd_enqcmds(wq, portal, desc->hw);
- if (rc < 0) {
- percpu_ref_put(&wq->wq_active);
- /* abort operation frees the descriptor */
- if (ie)
- llist_abort_desc(wq, ie, desc);
- return rc;
- }
- }
- percpu_ref_put(&wq->wq_active);
- return 0;
- }
|