msm: eva: Added EVA FW/HW hung detection mechanism using WD ISR

Recovery mechanism is added to invoke SSR when WD ISR is triggered.

Change-Id: I6f7a289f822c6f1a50494cd6a4855a2c3ba2cc72
Signed-off-by: Palak Joshi <quic_palakash@quicinc.com>
This commit is contained in:
Palak Joshi
2023-02-08 19:46:06 +05:30
parent ab30e5fc6f
commit 00f7039e3c
10 changed files with 70 additions and 3 deletions

View File

@@ -200,6 +200,7 @@ struct cvp_iface_q_info {
struct cvp_hal_data {
u32 irq;
u32 irq_wd;
phys_addr_t firmware_base;
u8 __iomem *register_base;
u8 __iomem *gcc_reg_base;
@@ -279,6 +280,7 @@ struct iris_hfi_device {
};
irqreturn_t cvp_hfi_isr(int irq, void *dev);
irqreturn_t iris_hfi_isr_wd(int irq, void *dev);
void cvp_iris_hfi_delete_device(void *device);
int cvp_iris_hfi_initialize(struct cvp_hfi_device *hdev, u32 device_id,

View File

@@ -35,6 +35,7 @@
// ysi - added for debug
#include <linux/clk/qcom.h>
#include "msm_cvp_common.h"
#define REG_ADDR_OFFSET_BITMASK 0x000FFFFF
#define QDSS_IOVA_START 0x80001000
@@ -3455,6 +3456,43 @@ irqreturn_t cvp_hfi_isr(int irq, void *dev)
return IRQ_HANDLED;
}
static void iris_hfi_wd_work_handler(struct work_struct *work)
{
struct msm_cvp_core *core;
struct iris_hfi_device *device;
struct msm_cvp_cb_cmd_done response = {0};
enum hal_command_response cmd = HAL_SYS_WATCHDOG_TIMEOUT;
core = list_first_entry(&cvp_driver->cores, struct msm_cvp_core, list);
if (core)
device = core->device->hfi_device_data;
else
return;
if (msm_cvp_hw_wd_recovery) {
dprintk(CVP_ERR, "Cleaning up as HW WD recovery is enable %d\n",
msm_cvp_hw_wd_recovery);
response.device_id = device->device_id;
handle_sys_error(cmd, (void *) &response);
enable_irq(device->cvp_hal_data->irq_wd);
}
else {
dprintk(CVP_ERR, "Crashing the device as HW WD recovery is disable %d\n",
msm_cvp_hw_wd_recovery);
BUG_ON(1);
}
}
static DECLARE_WORK(iris_hfi_wd_work, iris_hfi_wd_work_handler);
irqreturn_t iris_hfi_isr_wd(int irq, void *dev)
{
struct iris_hfi_device *device = dev;
dprintk(CVP_ERR, "Got HW WDOG IRQ! \n");
disable_irq_nosync(irq);
queue_work(device->cvp_workq, &iris_hfi_wd_work);
return IRQ_HANDLED;
}
static int __init_reset_clk(struct msm_cvp_platform_resources *res,
int reset_index)
{
@@ -4168,6 +4206,13 @@ static void interrupt_init_iris2(struct iris_hfi_device *device)
__write_register(device, CVP_WRAPPER_INTR_MASK, mask_val);
dprintk(CVP_REG, "Init irq: reg: %x, mask value %x\n",
CVP_WRAPPER_INTR_MASK, mask_val);
mask_val = 0;
mask_val = __read_register(device, CVP_SS_IRQ_MASK);
mask_val &= ~(CVP_SS_INTR_BMASK);
__write_register(device, CVP_SS_IRQ_MASK, mask_val);
dprintk(CVP_REG, "Init irq_wd: reg: %x, mask value %x\n",
CVP_SS_IRQ_MASK, mask_val);
}
static void setup_dsp_uc_memmap_vpu5(struct iris_hfi_device *device)

View File

@@ -96,10 +96,11 @@
#define CVP_WRAPPER_INTR_STATUS_A2HWD_BMSK 0x8
#define CVP_WRAPPER_INTR_STATUS_A2H_BMSK 0x4
#define CVP_SS_IRQ_MASK (CVP_TOP_BASE_OFFS + 0x04)
#define CVP_SS_INTR_BMASK (0x100)
#define CVP_WRAPPER_INTR_MASK (CVP_WRAPPER_BASE_OFFS + 0x10)
#define CVP_FATAL_INTR_BMSK (CVP_WRAPPER_INTR_MASK_CPU_NOC_BMSK | \
CVP_WRAPPER_INTR_MASK_CORE_NOC_BMSK | \
CVP_WRAPPER_INTR_MASK_A2HWD_BMSK)
CVP_WRAPPER_INTR_MASK_CORE_NOC_BMSK )
#define CVP_WRAPPER_INTR_MASK_CPU_NOC_BMSK 0x40
#define CVP_WRAPPER_INTR_MASK_CORE_NOC_BMSK 0x20
#define CVP_WRAPPER_INTR_MASK_A2HWD_BMSK 0x8

View File

@@ -589,7 +589,7 @@ static void msm_comm_clean_notify_client(struct msm_cvp_core *core)
mutex_unlock(&core->lock);
}
static void handle_sys_error(enum hal_command_response cmd, void *data)
void handle_sys_error(enum hal_command_response cmd, void *data)
{
struct msm_cvp_cb_cmd_done *response = data;
struct msm_cvp_core *core = NULL;

View File

@@ -23,6 +23,7 @@ int msm_cvp_comm_suspend(int core_id);
void msm_cvp_comm_session_clean(struct msm_cvp_inst *inst);
int msm_cvp_comm_kill_session(struct msm_cvp_inst *inst);
void msm_cvp_comm_generate_sys_error(struct msm_cvp_inst *inst);
void handle_sys_error(enum hal_command_response cmd, void *data);
int msm_cvp_comm_smem_cache_operations(struct msm_cvp_inst *inst,
struct msm_cvp_smem *mem, enum smem_cache_ops cache_ops);
int msm_cvp_comm_check_core_init(struct msm_cvp_core *core);

View File

@@ -37,6 +37,7 @@ bool msm_cvp_mmrm_enabled = !true;
bool msm_cvp_dcvs_disable = !true;
int msm_cvp_minidump_enable = !1;
int cvp_kernel_fence_enabled = 2;
int msm_cvp_hw_wd_recovery = 1;
#define MAX_DBG_BUF_SIZE 4096
@@ -453,6 +454,8 @@ struct dentry *msm_cvp_debugfs_init_core(struct msm_cvp_core *core,
dprintk(CVP_ERR, "debugfs_create: ssr_stall fail\n");
goto failed_create_dir;
}
debugfs_create_u32("hw_wd_recovery", 0644, dir,
&msm_cvp_hw_wd_recovery);
failed_create_dir:
return dir;
}

View File

@@ -70,6 +70,7 @@ extern bool msm_cvp_mmrm_enabled;
extern bool msm_cvp_dcvs_disable;
extern int msm_cvp_minidump_enable;
extern int cvp_kernel_fence_enabled;
extern int msm_cvp_hw_wd_recovery;
#define dprintk(__level, __fmt, arg...) \
do { \

View File

@@ -900,6 +900,11 @@ int cvp_read_platform_resources_from_dt(
dprintk(CVP_CORE, "%s: res->irq:%d \n",
__func__, res->irq);
//Parsing for WD interrupt
res->irq_wd = platform_get_irq(pdev, 1);
dprintk(CVP_CORE, "%s: res->irq_wd:%d \n",
__func__, res->irq_wd);
rc = msm_cvp_load_subcache_info(res);
if (rc)
dprintk(CVP_WARN, "Failed to load subcache info: %d\n", rc);

View File

@@ -178,6 +178,7 @@ struct msm_cvp_platform_resources {
uint32_t gcc_reg_size;
struct cvp_fw_reg_mappings reg_mappings;
uint32_t irq;
uint32_t irq_wd;
uint32_t sku_version;
struct allowed_clock_rates_table *allowed_clks_tbl;
u32 allowed_clks_tbl_size;

View File

@@ -130,6 +130,7 @@ static int msm_cvp_vm_init_reg_and_irq(struct iris_hfi_device *device,
}
hal->irq = res->irq;
hal->irq_wd = res->irq_wd;
hal->firmware_base = res->firmware_base;
hal->register_base = devm_ioremap(&res->pdev->dev,
res->register_base, res->register_size);
@@ -159,6 +160,13 @@ static int msm_cvp_vm_init_reg_and_irq(struct iris_hfi_device *device,
goto error_irq_fail;
}
rc = request_irq(res->irq_wd, iris_hfi_isr_wd, IRQF_TRIGGER_HIGH,
"msm_cvp", device);
if (unlikely(rc)) {
dprintk(CVP_ERR, "() :request_irq for WD failed\n");
goto error_irq_fail;
}
disable_irq_nosync(res->irq);
dprintk(CVP_INFO,
"firmware_base = %pa, register_base = %pa, register_size = %d\n",