drm/amdkfd: Fix circular lock in nocpsch path

[ Upstream commit a7b2451d31cfa2e8aeccf3b35612ce33f02371fc ]

Calling free_mqd inside of destroy_queue_nocpsch_locked can cause a
circular lock. destroy_queue_nocpsch_locked is called under a DQM lock,
which is taken in MMU notifiers, potentially in FS reclaim context.
Taking another lock, which is BO reservation lock from free_mqd, while
causing an FS reclaim inside the DQM lock creates a problematic circular
lock dependency. Therefore move free_mqd out of
destroy_queue_nocpsch_locked and call it after unlocking DQM.

Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Amber Lin
2021-06-07 14:46:21 -04:00
committed by Greg Kroah-Hartman
parent cd29db48bb
commit 0e72b151e3

View File

@@ -486,9 +486,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
if (retval == -ETIME) if (retval == -ETIME)
qpd->reset_wavefronts = true; qpd->reset_wavefronts = true;
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
list_del(&q->list); list_del(&q->list);
if (list_empty(&qpd->queues_list)) { if (list_empty(&qpd->queues_list)) {
if (qpd->reset_wavefronts) { if (qpd->reset_wavefronts) {
@@ -523,6 +520,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
int retval; int retval;
uint64_t sdma_val = 0; uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd); struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct mqd_manager *mqd_mgr =
dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
/* Get the SDMA queue stats */ /* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
@@ -540,6 +539,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val; pdd->sdma_past_activity_counter += sdma_val;
dqm_unlock(dqm); dqm_unlock(dqm);
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
return retval; return retval;
} }
@@ -1632,7 +1633,7 @@ static int set_trap_handler(struct device_queue_manager *dqm,
static int process_termination_nocpsch(struct device_queue_manager *dqm, static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
struct queue *q, *next; struct queue *q;
struct device_process_node *cur, *next_dpn; struct device_process_node *cur, *next_dpn;
int retval = 0; int retval = 0;
bool found = false; bool found = false;
@@ -1640,12 +1641,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
dqm_lock(dqm); dqm_lock(dqm);
/* Clear all user mode queues */ /* Clear all user mode queues */
list_for_each_entry_safe(q, next, &qpd->queues_list, list) { while (!list_empty(&qpd->queues_list)) {
struct mqd_manager *mqd_mgr;
int ret; int ret;
q = list_first_entry(&qpd->queues_list, struct queue, list);
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
ret = destroy_queue_nocpsch_locked(dqm, qpd, q); ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
if (ret) if (ret)
retval = ret; retval = ret;
dqm_unlock(dqm);
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
dqm_lock(dqm);
} }
/* Unregister process */ /* Unregister process */