Эх сурвалжийг харах

msm: camera: tfe: Add out of sync error handling

problem:
Need to handle continuous out_of_sync_frame error that
may come due to mup/VC mismatch.

solution:
Handle the scenario by counting error occurences for a fixed
threshold since HW dosent generate SOF when this error comes
and triggering recovery.
Reset the count after the subsequent SOF is received.

CRs-Fixed: 3419224
Change-Id: Ifd44dc753385ae8cb8ce4b4cb283cd07aa8b2a44
Signed-off-by: Pranav Sanwal <[email protected]>
Pranav Sanwal 2 жил өмнө
parent
commit
c2ad5cd913

+ 124 - 11
drivers/cam_isp/isp_hw_mgr/cam_tfe_hw_mgr.c

@@ -32,6 +32,7 @@
 
 #define CAM_TFE_HW_CONFIG_TIMEOUT 60
 #define CAM_TFE_HW_CONFIG_WAIT_MAX_TRY  3
+#define MAX_TFE_INTERNAL_RECOVERY_ATTEMPTS    1
 
 #define TZ_SVC_SMMU_PROGRAM 0x15
 #define TZ_SAFE_SYSCALL_ID  0x3
@@ -2225,6 +2226,9 @@ static int cam_tfe_mgr_acquire_hw(void *hw_mgr_priv, void *acquire_hw_args)
 	tfe_ctx->cdm_ops = cdm_acquire.ops;
 	atomic_set(&tfe_ctx->cdm_done, 1);
 	tfe_ctx->last_cdm_done_req = 0;
+	tfe_ctx->current_mup = 0;
+	tfe_ctx->try_recovery_cnt = 0;
+	tfe_ctx->recovery_req_id = 0;
 
 	acquire_hw_info = (struct cam_isp_tfe_acquire_hw_info *)
 		acquire_args->acquire_info;
@@ -2490,6 +2494,7 @@ static int cam_tfe_mgr_acquire_dev(void *hw_mgr_priv, void *acquire_hw_args)
 	tfe_ctx->cdm_ops = cdm_acquire.ops;
 	atomic_set(&tfe_ctx->cdm_done, 1);
 	tfe_ctx->last_cdm_done_req = 0;
+	tfe_ctx->current_mup = 0;
 
 	isp_resource = (struct cam_isp_resource *)acquire_args->acquire_info;
 
@@ -2862,8 +2867,27 @@ static int cam_tfe_mgr_config_hw(void *hw_mgr_priv,
 		CAM_ERR(CAM_ISP, "Invalid context parameters");
 		return -EPERM;
 	}
-	if (atomic_read(&ctx->overflow_pending))
+	if (atomic_read(&ctx->overflow_pending)) {
+		CAM_DBG(CAM_ISP,
+			"Ctx[%pK][%d] Overflow pending, cannot apply req %llu",
+			ctx, ctx->ctx_index, cfg->request_id);
 		return -EINVAL;
+	}
+
+
+	/*
+	 * Assuming overflow recovery happens on req N, and we may
+	 * haven't got all the result for req N while apply N + 1,
+	 * so we reset try_recovery_cnt while apply N + 2.
+	 */
+	if (ctx->try_recovery_cnt &&
+		(cfg->request_id > (ctx->recovery_req_id + 1))) {
+		ctx->try_recovery_cnt = 0;
+		ctx->recovery_req_id = 0;
+		CAM_DBG(CAM_ISP,
+			"Ctx[%pK][%d] Reset try_recovery count for req %llu",
+			ctx, ctx->ctx_index, cfg->request_id);
+	}
 
 	hw_update_data = (struct cam_isp_prepare_hw_update_data  *) cfg->priv;
 	hw_update_data->isp_mgr_ctx = ctx;
@@ -3077,6 +3101,8 @@ static int cam_tfe_mgr_config_hw(void *hw_mgr_priv,
 			goto end;
 		} else {
 			rc = 0;
+			if (hw_update_data->mup_en)
+				ctx->current_mup = hw_update_data->mup_val;
 			CAM_DBG(CAM_ISP,
 				"config done Success for req_id=%llu ctx_index %d",
 				cfg->request_id, ctx->ctx_index);
@@ -3300,6 +3326,8 @@ static int cam_tfe_mgr_stop_hw(void *hw_mgr_priv, void *stop_hw_args)
 	cam_common_wait_for_completion_timeout(&ctx->config_done_complete,
 		msecs_to_jiffies(5));
 
+	ctx->current_mup = 0;
+
 	if (stop_isp->stop_only)
 		goto end;
 
@@ -3894,6 +3922,9 @@ static int cam_tfe_mgr_release_hw(void *hw_mgr_priv,
 	}
 	ctx->last_submit_bl_cmd.bl_count = 0;
 	ctx->packet = NULL;
+	ctx->current_mup = 0;
+	ctx->try_recovery_cnt = 0;
+	ctx->recovery_req_id = 0;
 
 	CAM_DBG(CAM_ISP, "Exit...ctx id:%d",
 		ctx->ctx_index);
@@ -5161,6 +5192,58 @@ static void cam_tfe_mgr_pf_dump(uint32_t res_id,
 	}
 }
 
+static int cam_tfe_mgr_prog_default_settings(struct cam_tfe_hw_mgr_ctx *ctx)
+{
+	int rc = 0, i;
+	struct cam_isp_hw_mgr_res       *hw_mgr_res;
+	struct cam_isp_resource_node    *res;
+	struct list_head                *res_list;
+	struct cam_isp_hw_get_cmd_update    rup_args;
+	struct cam_isp_mode_switch_data     mup_config;
+
+
+	res_list = &ctx->res_list_tfe_in;
+
+	list_for_each_entry(hw_mgr_res, &ctx->res_list_tfe_in, list) {
+		if (hw_mgr_res->res_type != CAM_ISP_RESOURCE_TFE_IN)
+			continue;
+
+		for (i = 0; i < CAM_ISP_HW_SPLIT_MAX; i++) {
+			if (!hw_mgr_res->hw_res[i])
+				continue;
+
+			rup_args.res = hw_mgr_res->hw_res[i];
+			rup_args.cmd.cmd_buf_addr = NULL;
+			rup_args.cmd.size = 0;
+			rup_args.cmd_type = CAM_ISP_HW_CMD_GET_REG_UPDATE;
+			rup_args.reg_write = true;
+
+			mup_config.mup = ctx->current_mup;
+
+			rup_args.data = &mup_config;
+
+			res = rup_args.res;
+			rc = res->hw_intf->hw_ops.process_cmd(
+				res->hw_intf->hw_priv,
+				CAM_ISP_HW_CMD_GET_REG_UPDATE, &rup_args,
+				sizeof(struct cam_isp_hw_get_cmd_update));
+
+			if (rc) {
+				CAM_ERR(CAM_ISP, "failed for TFE: %d mup: %d ctx id:%d",
+					res->hw_intf->hw_idx, ctx->current_mup, ctx->ctx_index);
+				return rc;
+			}
+
+			CAM_DBG(CAM_ISP,
+				"Reg update for TFE: %d mup: %d ctx id:%d",
+				res->hw_intf->hw_idx, ctx->current_mup, ctx->ctx_index);
+		}
+
+	}
+
+	return rc;
+}
+
 static void cam_tfe_mgr_dump_pf_data(
 	struct cam_tfe_hw_mgr  *hw_mgr,
 	struct cam_hw_cmd_args *hw_cmd_args)
@@ -5377,6 +5460,9 @@ static int cam_tfe_mgr_cmd(void *hw_mgr_priv, void *cmd_args)
 				sizeof(struct cam_tfe_hw_comp_record) *
 				CAM_TFE_BUS_COMP_NUM_MAX);
 			break;
+		case CAM_ISP_HW_MGR_CMD_PROG_DEFAULT_CFG:
+			rc = cam_tfe_mgr_prog_default_settings(ctx);
+			break;
 		case CAM_ISP_HW_MGR_GET_SOF_TS:
 			rc = cam_tfe_mgr_cmd_get_sof_timestamp(ctx,
 				&isp_hw_cmd_args->u.sof_ts.curr,
@@ -5925,6 +6011,13 @@ static int cam_tfe_hw_mgr_handle_hw_err(
 		return rc;
 	}
 
+	if (ctx)
+		tfe_hw_mgr_ctx = (struct cam_tfe_hw_mgr_ctx *)ctx;
+	else {
+		CAM_ERR(CAM_ISP, "tfe hw mgr ctx NULL");
+		return rc;
+	}
+
 	err_evt_info = (struct cam_isp_hw_error_event_info *)event_info->event_data;
 	if (err_evt_info->err_type == CAM_TFE_IRQ_STATUS_VIOLATION)
 		error_event_data.error_type = CAM_ISP_HW_ERROR_VIOLATION;
@@ -5933,6 +6026,23 @@ static int cam_tfe_hw_mgr_handle_hw_err(
 		error_event_data.error_type = CAM_ISP_HW_ERROR_OVERFLOW;
 	else if (event_info->res_type == CAM_ISP_RESOURCE_TFE_OUT)
 		error_event_data.error_type = CAM_ISP_HW_ERROR_BUSIF_OVERFLOW;
+	else if (err_evt_info->err_type == CAM_TFE_IRQ_STATUS_OUT_OF_SYNC) {
+		error_event_data.error_type = CAM_ISP_HW_ERROR_CSID_SENSOR_FRAME_DROP;
+		if (tfe_hw_mgr_ctx->try_recovery_cnt < MAX_TFE_INTERNAL_RECOVERY_ATTEMPTS) {
+			error_event_data.try_internal_recovery = true;
+			if (!atomic_read(&tfe_hw_mgr_ctx->overflow_pending))
+				tfe_hw_mgr_ctx->try_recovery_cnt++;
+
+			if (!tfe_hw_mgr_ctx->recovery_req_id)
+				tfe_hw_mgr_ctx->recovery_req_id = tfe_hw_mgr_ctx->applied_req_id;
+		}
+
+		CAM_INFO(CAM_ISP,
+			"TFE: %u error: %u current_recovery_cnt: %u  recovery_req: %llu on ctx: %u",
+			event_info->hw_idx, error_event_data.error_type,
+			tfe_hw_mgr_ctx->try_recovery_cnt, tfe_hw_mgr_ctx->recovery_req_id,
+			tfe_hw_mgr_ctx->ctx_index);
+	}
 
 	spin_lock(&g_tfe_hw_mgr.ctx_lock);
 	if (err_evt_info->err_type == CAM_ISP_HW_ERROR_CSID_FATAL) {
@@ -5941,14 +6051,13 @@ static int cam_tfe_hw_mgr_handle_hw_err(
 		return rc;
 	}
 
-	if (ctx) {
-		tfe_hw_mgr_ctx =
-			(struct cam_tfe_hw_mgr_ctx *)ctx;
-		if ((event_info->res_type == CAM_ISP_RESOURCE_TFE_IN)
-			&& (!tfe_hw_mgr_ctx->is_rdi_only_context)
-			&& (event_info->res_id != CAM_ISP_HW_TFE_IN_CAMIF))
-			cam_tfe_hw_mgr_handle_hw_dump_info(tfe_hw_mgr_ctx, event_info);
-	}
+	if (event_info->res_type ==
+		CAM_ISP_RESOURCE_TFE_IN &&
+		!tfe_hw_mgr_ctx->is_rdi_only_context &&
+		event_info->res_id !=
+		CAM_ISP_HW_TFE_IN_CAMIF)
+		cam_tfe_hw_mgr_handle_hw_dump_info(
+			tfe_hw_mgr_ctx, event_info);
 
 	core_idx = event_info->hw_idx;
 
@@ -5967,7 +6076,7 @@ static int cam_tfe_hw_mgr_handle_hw_err(
 		return rc;
 	}
 
-	if (g_tfe_hw_mgr.debug_cfg.enable_recovery) {
+	if (!error_event_data.try_internal_recovery && g_tfe_hw_mgr.debug_cfg.enable_recovery) {
 		/* Trigger for recovery */
 		if (err_evt_info->err_type == CAM_TFE_IRQ_STATUS_VIOLATION)
 			recovery_data.error_type = CAM_ISP_HW_ERROR_VIOLATION;
@@ -5975,9 +6084,13 @@ static int cam_tfe_hw_mgr_handle_hw_err(
 			recovery_data.error_type = CAM_ISP_HW_ERROR_OVERFLOW;
 		cam_tfe_hw_mgr_do_error_recovery(&recovery_data);
 	} else {
-		CAM_DBG(CAM_ISP, "recovery is not enabled");
+		CAM_DBG(CAM_ISP, "recovery enabled: %d, internal_recovery: %d, ctx: %d",
+			error_event_data.try_internal_recovery,
+			g_tfe_hw_mgr.debug_cfg.enable_recovery,
+			tfe_hw_mgr_ctx->ctx_index);
 		rc = 0;
 	}
+
 end:
 	spin_unlock(&g_tfe_hw_mgr.ctx_lock);
 	return rc;

+ 6 - 0
drivers/cam_isp/isp_hw_mgr/cam_tfe_hw_mgr.h

@@ -126,6 +126,9 @@ struct cam_tfe_cdm_user_data {
  * @bw_config_version          BW Config version
  * @tfe_bus_comp_grp          pointer to tfe comp group info
  * @cdm_userdata               CDM user data
+ * @try_recovery_cnt          Retry count for overflow recovery
+ * @current_mup               Current MUP val
+ * @recovery_req_id           The request id on which overflow recovery happens
  */
 struct cam_tfe_hw_mgr_ctx {
 	struct list_head                list;
@@ -170,6 +173,9 @@ struct cam_tfe_hw_mgr_ctx {
 	uint32_t                        bw_config_version;
 	struct cam_tfe_hw_comp_record  *tfe_bus_comp_grp;
 	struct cam_tfe_cdm_user_data    cdm_userdata;
+	uint32_t                        current_mup;
+	uint32_t                        try_recovery_cnt;
+	uint64_t                        recovery_req_id;
 };
 
 /**

+ 2 - 0
drivers/cam_isp/isp_hw_mgr/isp_hw/include/cam_isp_hw.h

@@ -517,6 +517,7 @@ struct cam_isp_hw_fcg_cmd {
  * @cmd:             Command buffer information
  * @use_scratch_cfg: To indicate if it's scratch buffer config
  * @trigger_cdm_en:  Flag to indicate if cdm is trigger
+ * @reg_write:        if set use AHB to config rup/aup
  *
  */
 struct cam_isp_hw_get_cmd_update {
@@ -531,6 +532,7 @@ struct cam_isp_hw_get_cmd_update {
 		struct cam_isp_hw_get_wm_update      *rm_update;
 	};
 	bool trigger_cdm_en;
+	bool reg_write;
 };
 
 /*

+ 1 - 0
drivers/cam_isp/isp_hw_mgr/isp_hw/include/cam_tfe_hw_intf.h

@@ -36,6 +36,7 @@ enum cam_tfe_hw_irq_status {
 	CAM_TFE_IRQ_STATUS_OVERFLOW,
 	CAM_TFE_IRQ_STATUS_P2I_ERROR,
 	CAM_TFE_IRQ_STATUS_VIOLATION,
+	CAM_TFE_IRQ_STATUS_OUT_OF_SYNC,
 	CAM_TFE_IRQ_STATUS_MAX,
 };
 

+ 42 - 13
drivers/cam_isp/isp_hw_mgr/isp_hw/tfe_hw/cam_tfe_core.c

@@ -30,6 +30,7 @@ static const char drv_name[] = "tfe";
 #define CAM_TFE_DELAY_BW_REDUCTION_NUM_FRAMES 3
 #define CAM_TFE_CAMIF_IRQ_SOF_DEBUG_CNT_MAX  2
 #define CAM_TFE_DELAY_BW_REDUCTION_NUM_FRAMES 3
+#define CAM_TFE_MAX_OUT_OF_SYNC_ERR_COUNT     3
 
 struct cam_tfe_top_common_data {
 	struct cam_hw_soc_info                     *soc_info;
@@ -59,6 +60,7 @@ struct cam_tfe_top_priv {
 	struct timespec64                    error_ts;
 	uint32_t                             top_debug;
 	uint32_t                             last_mup_val;
+	atomic_t                             switch_out_of_sync_cnt;
 };
 
 struct cam_tfe_camif_data {
@@ -354,7 +356,9 @@ static void cam_tfe_log_tfe_in_debug_status(
 static void cam_tfe_log_error_irq_status(
 	struct cam_tfe_hw_core_info          *core_info,
 	struct cam_tfe_top_priv              *top_priv,
-	struct cam_tfe_irq_evt_payload       *evt_payload)
+	struct cam_tfe_irq_evt_payload       *evt_payload,
+	struct cam_isp_hw_error_event_info   *err_evt_info,
+	int8_t                               *report_err)
 {
 	struct cam_tfe_hw_info               *hw_info;
 	void __iomem                         *mem_base;
@@ -366,7 +370,7 @@ static void cam_tfe_log_error_irq_status(
 	struct timespec64 ts;
 	uint32_t  i, val_0, val_1, val_2, val_3;
 
-
+	*report_err = 1;
 	ktime_get_boottime_ts64(&ts);
 	hw_info = core_info->tfe_hw_info;
 	mem_base = top_priv->common_data.soc_info->reg_map[0].mem_base;
@@ -465,6 +469,15 @@ static void cam_tfe_log_error_irq_status(
 				core_info->core_index, top_priv->last_mup_val,
 				((cam_io_r(mem_base + common_reg->reg_update_cmd) >>
 				common_reg->mup_shift_val) & 1));
+			*report_err = 0;
+			atomic_inc(&top_priv->switch_out_of_sync_cnt);
+			if (atomic_read(&top_priv->switch_out_of_sync_cnt) >=
+				CAM_TFE_MAX_OUT_OF_SYNC_ERR_COUNT) {
+				*report_err = 1;
+				err_evt_info->err_type = CAM_TFE_IRQ_STATUS_OUT_OF_SYNC;
+				CAM_ERR(CAM_ISP, "TFE %d out of sync frame count: %d",
+					core_info->core_index, top_priv->switch_out_of_sync_cnt);
+			}
 		}
 	}
 
@@ -525,6 +538,7 @@ static int cam_tfe_error_irq_bottom_half(
 	struct cam_isp_hw_event_info         evt_info;
 	struct cam_tfe_hw_info              *hw_info;
 	uint32_t   error_detected = 0;
+	int8_t report_err = 1;
 
 	hw_info = core_info->tfe_hw_info;
 	evt_info.hw_idx = core_info->core_index;
@@ -548,13 +562,16 @@ static int cam_tfe_error_irq_bottom_half(
 		top_priv->error_ts.tv_nsec =
 			evt_payload->ts.mono_time.tv_nsec;
 
-		cam_tfe_log_error_irq_status(core_info, top_priv, evt_payload);
-		if (event_cb)
-			event_cb(event_cb_priv,
-				CAM_ISP_HW_EVENT_ERROR, (void *)&evt_info);
-		else
-			CAM_ERR(CAM_ISP, "TFE:%d invalid eventcb:",
-				core_info->core_index);
+		cam_tfe_log_error_irq_status(core_info, top_priv,
+			evt_payload, &err_evt_info, &report_err);
+		if (report_err) {
+			if (event_cb)
+				event_cb(event_cb_priv, CAM_ISP_HW_EVENT_ERROR,
+					(void *)&evt_info);
+			else
+				CAM_ERR(CAM_ISP, "TFE:%d invalid eventcb:",
+					core_info->core_index);
+		}
 	}
 
 	return 0;
@@ -604,6 +621,8 @@ static int cam_tfe_rdi_irq_bottom_half(
 			rdi_priv->event_cb(rdi_priv->priv,
 				CAM_ISP_HW_EVENT_SOF, (void *)&evt_info);
 
+		atomic_set(&top_priv->switch_out_of_sync_cnt, 0);
+
 		if (top_priv->top_debug &
 			CAMIF_DEBUG_ENABLE_SENSOR_DIAG_STATUS) {
 			common_reg  = rdi_priv->common_reg;
@@ -774,6 +793,8 @@ static int cam_tfe_camif_irq_bottom_half(
 			camif_priv->event_cb(camif_priv->priv,
 				CAM_ISP_HW_EVENT_SOF, (void *)&evt_info);
 
+		atomic_set(&top_priv->switch_out_of_sync_cnt, 0);
+
 		if (top_priv->top_debug &
 			CAMIF_DEBUG_ENABLE_SENSOR_DIAG_STATUS) {
 			common_reg  = camif_priv->common_reg;
@@ -1476,10 +1497,11 @@ static int cam_tfe_top_get_reg_update(
 		return -EINVAL;
 	}
 
+	soc_info = top_priv->common_data.soc_info;
 	in_res = cdm_args->res;
 	size = cdm_util_ops->cdm_required_size_reg_random(1);
 	/* since cdm returns dwords, we need to convert it into bytes */
-	if ((size * 4) > cdm_args->cmd.size) {
+	if ((!cdm_args->reg_write) && ((size * 4) > cdm_args->cmd.size)) {
 		CAM_ERR(CAM_ISP, "buf size:%d is not sufficient, expected: %d",
 			cdm_args->cmd.size, size);
 		return -EINVAL;
@@ -1515,10 +1537,15 @@ static int cam_tfe_top_get_reg_update(
 				top_priv->common_data.hw_intf->hw_idx, reg_val_pair[1]);
 	}
 
-	cdm_util_ops->cdm_write_regrandom(cdm_args->cmd.cmd_buf_addr,
-		1, reg_val_pair);
+	if (cdm_args->reg_write) {
+		cam_io_w_mb(reg_val_pair[1],
+			soc_info->reg_map[TFE_CORE_BASE_IDX].mem_base + reg_val_pair[0]);
+	} else {
+		cdm_util_ops->cdm_write_regrandom(cdm_args->cmd.cmd_buf_addr,
+			1, reg_val_pair);
 
-	cdm_args->cmd.used_bytes = size * 4;
+		cdm_args->cmd.used_bytes = size * 4;
+	}
 
 	return 0;
 }
@@ -2466,6 +2493,7 @@ int cam_tfe_top_start(struct cam_tfe_hw_core_info *core_info,
 	top_priv = (struct cam_tfe_top_priv *)core_info->top_priv;
 	in_res = (struct cam_isp_resource_node *)start_args;
 	hw_info = (struct cam_hw_info  *)in_res->hw_intf->hw_priv;
+	atomic_set(&top_priv->switch_out_of_sync_cnt, 0);
 
 	if (hw_info->hw_state != CAM_HW_STATE_POWER_UP) {
 		CAM_ERR(CAM_ISP, "TFE:%d HW not powered up",
@@ -2641,6 +2669,7 @@ int cam_tfe_top_stop(struct cam_tfe_hw_core_info *core_info,
 		}
 	}
 
+	atomic_set(&top_priv->switch_out_of_sync_cnt, 0);
 	core_info->irq_err_config_cnt--;
 	if (!core_info->irq_err_config_cnt)
 		cam_tfe_irq_config(core_info,