msm: eva: Enhance session error handling

Ensure smmu fault dump before calling BUG_ON. Rely on WD timeout
for FW hang debugging.

Change-Id: I0d7fba31dbc602b244af74da6774885c56af0fcc
Signed-off-by: George Shen <quic_sqiao@quicinc.com>
This commit is contained in:
George Shen
2023-05-01 08:22:20 -07:00
父節點 1f5a03e9a6
當前提交 2bdff6770e
共有 4 個文件被更改,包括 25 次插入73 次删除

查看文件

@@ -633,9 +633,9 @@ int msm_cvp_unmap_buf_wncc(struct msm_cvp_inst *inst,
inst->unused_wncc_bufs.smem[idx] = *(cbuf->smem); inst->unused_wncc_bufs.smem[idx] = *(cbuf->smem);
inst->unused_wncc_bufs.nr++; inst->unused_wncc_bufs.nr++;
inst->unused_wncc_bufs.nr = inst->unused_wncc_bufs.nr =
(inst->unused_wncc_bufs.nr > MAX_FRAME_BUFFER_NUMS)? (inst->unused_wncc_bufs.nr > NUM_WNCC_BUFS)?
MAX_FRAME_BUFFER_NUMS : inst->unused_wncc_bufs.nr; NUM_WNCC_BUFS : inst->unused_wncc_bufs.nr;
inst->unused_wncc_bufs.ktid = ++idx % MAX_FRAME_BUFFER_NUMS; inst->unused_wncc_bufs.ktid = ++idx % NUM_WNCC_BUFS;
} }
mutex_unlock(&inst->cvpwnccbufs.lock); mutex_unlock(&inst->cvpwnccbufs.lock);
@@ -1880,6 +1880,7 @@ int msm_cvp_session_deinit_buffers(struct msm_cvp_inst *inst)
return rc; return rc;
} }
#define MAX_NUM_FRAMES_DUMP 4
void msm_cvp_print_inst_bufs(struct msm_cvp_inst *inst, bool log) void msm_cvp_print_inst_bufs(struct msm_cvp_inst *inst, bool log)
{ {
struct cvp_internal_buf *buf; struct cvp_internal_buf *buf;
@@ -1916,10 +1917,18 @@ void msm_cvp_print_inst_bufs(struct msm_cvp_inst *inst, bool log)
dprintk(CVP_ERR, "frame buffer list\n"); dprintk(CVP_ERR, "frame buffer list\n");
mutex_lock(&inst->frames.lock); mutex_lock(&inst->frames.lock);
list_for_each_entry(frame, &inst->frames.list, list) { list_for_each_entry(frame, &inst->frames.list, list) {
dprintk(CVP_ERR, "frame no %d tid %llx bufs\n", i++, frame->ktid); i++;
for (c = 0; c < frame->nr; c++) if (i <= MAX_NUM_FRAMES_DUMP) {
_log_smem(snap, inst, frame->bufs[c].smem, log); dprintk(CVP_ERR, "frame no %d tid %llx bufs\n",
i, frame->ktid);
for (c = 0; c < frame->nr; c++)
_log_smem(snap, inst, frame->bufs[c].smem,
log);
}
} }
if (i > MAX_NUM_FRAMES_DUMP)
dprintk(CVP_ERR, "Skipped %d frames' buffers\n",
(i - MAX_NUM_FRAMES_DUMP));
mutex_unlock(&inst->frames.lock); mutex_unlock(&inst->frames.lock);
mutex_lock(&inst->cvpdspbufs.lock); mutex_lock(&inst->cvpdspbufs.lock);

查看文件

@@ -22,11 +22,6 @@
static void handle_session_error(enum hal_command_response cmd, void *data); static void handle_session_error(enum hal_command_response cmd, void *data);
static void msm_cvp_comm_generate_session_error(struct msm_cvp_inst *inst)
{
dprintk(CVP_WARN, "%s function is deprecated\n", __func__);
}
static void dump_hfi_queue(struct iris_hfi_device *device) static void dump_hfi_queue(struct iris_hfi_device *device)
{ {
struct cvp_hfi_queue_header *queue; struct cvp_hfi_queue_header *queue;
@@ -567,28 +562,6 @@ static void handle_session_error(enum hal_command_response cmd, void *data)
cvp_put_inst(inst); cvp_put_inst(inst);
} }
static void msm_comm_clean_notify_client(struct msm_cvp_core *core)
{
struct msm_cvp_inst *inst = NULL;
if (!core) {
dprintk(CVP_ERR, "%s: Invalid params\n", __func__);
return;
}
dprintk(CVP_WARN, "%s: Core %pK\n", __func__, core);
mutex_lock(&core->lock);
list_for_each_entry(inst, &core->instances, list) {
mutex_lock(&inst->lock);
inst->state = MSM_CVP_CORE_INVALID;
mutex_unlock(&inst->lock);
dprintk(CVP_WARN,
"%s Send sys error for inst %pK\n", __func__, inst);
}
mutex_unlock(&core->lock);
}
void handle_sys_error(enum hal_command_response cmd, void *data) void handle_sys_error(enum hal_command_response cmd, void *data)
{ {
struct msm_cvp_cb_cmd_done *response = data; struct msm_cvp_cb_cmd_done *response = data;
@@ -848,11 +821,18 @@ static int msm_comm_session_abort(struct msm_cvp_inst *inst)
int rc = 0, abort_completion = 0; int rc = 0, abort_completion = 0;
struct cvp_hfi_device *hdev; struct cvp_hfi_device *hdev;
if (!inst || !inst->core || !inst->core->device) { if (!inst || !inst->core || !inst->core->device) {
dprintk(CVP_ERR, "%s invalid params\n", __func__); dprintk(CVP_ERR, "%s invalid params\n", __func__);
return -EINVAL; return -EINVAL;
} }
hdev = inst->core->device; hdev = inst->core->device;
print_hfi_queue_info(hdev);
if (1)
return 0;
/* Activate code below for Watchdog timeout testing */
abort_completion = SESSION_MSG_INDEX(HAL_SESSION_ABORT_DONE); abort_completion = SESSION_MSG_INDEX(HAL_SESSION_ABORT_DONE);
dprintk(CVP_WARN, "%s: inst %pK session %x\n", __func__, dprintk(CVP_WARN, "%s: inst %pK session %x\n", __func__,
@@ -882,44 +862,7 @@ exit:
static void handle_thermal_event(struct msm_cvp_core *core) static void handle_thermal_event(struct msm_cvp_core *core)
{ {
int rc = 0; dprintk(CVP_WARN, "Deprecated thermal_event handler\n");
struct msm_cvp_inst *inst;
if (!core || !core->device) {
dprintk(CVP_ERR, "%s Invalid params\n", __func__);
return;
}
mutex_lock(&core->lock);
list_for_each_entry(inst, &core->instances, list) {
if (!inst->session)
continue;
mutex_unlock(&core->lock);
if (inst->state >= MSM_CVP_OPEN_DONE &&
inst->state < MSM_CVP_CLOSE_DONE) {
dprintk(CVP_WARN, "%s: abort inst %pK\n",
__func__, inst);
rc = msm_comm_session_abort(inst);
if (rc) {
dprintk(CVP_ERR,
"%s session_abort failed rc: %d\n",
__func__, rc);
goto err_sess_abort;
}
change_cvp_inst_state(inst, MSM_CVP_CORE_INVALID);
dprintk(CVP_WARN,
"%s Send sys error for inst %pK\n",
__func__, inst);
} else {
msm_cvp_comm_generate_session_error(inst);
}
mutex_lock(&core->lock);
}
mutex_unlock(&core->lock);
return;
err_sess_abort:
msm_comm_clean_notify_client(core);
} }
void msm_cvp_comm_handle_thermal_event(void) void msm_cvp_comm_handle_thermal_event(void)

查看文件

@@ -15,7 +15,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#ifdef CVP_FASTRPC_ENABLED #ifdef CVP_FASTRPC_ENABLED
#include <linux/fastrpc.h> #include <fastrpc.h>
#else #else
struct fastrpc_device { struct fastrpc_device {
int handle; int handle;

查看文件

@@ -1038,13 +1038,13 @@ int msm_cvp_smmu_fault_handler(struct iommu_domain *domain,
return -EINVAL; return -EINVAL;
} }
mutex_lock(&core->lock);
core->smmu_fault_count++; core->smmu_fault_count++;
if (!core->last_fault_addr) if (!core->last_fault_addr)
core->last_fault_addr = iova; core->last_fault_addr = iova;
dprintk(CVP_ERR, "%s - faulting address: %lx, %d\n", dprintk(CVP_ERR, "%s - faulting address: %lx, %d\n",
__func__, iova, core->smmu_fault_count); __func__, iova, core->smmu_fault_count);
mutex_lock(&core->lock);
log = (core->log.snapshot_index > 0)? false : true; log = (core->log.snapshot_index > 0)? false : true;
list_for_each_entry(inst, &core->instances, list) { list_for_each_entry(inst, &core->instances, list) {
cvp_print_inst(CVP_ERR, inst); cvp_print_inst(CVP_ERR, inst);