Ver código fonte

msm: camera: isp: HW reset and recovery for bus overflow

This change does hw recovery and reapply all alive requests
for bus overflow issues.

When we face bus overflow KMD fatal errors, instead of
sending error to UMD, we will try internal recovery and
send a warn message to UMD once internal recovery happens,
if we fail to do recovery, then sending error to UMD.

CRs-Fixed: 3098892
Change-Id: Idee3679ff06227f985e106470bc1f5a14c9cb404
Signed-off-by: chengxue <[email protected]>
Signed-off-by: Depeng Shao <[email protected]>
chengxue 3 anos atrás
pai
commit
ffe5621b5e

+ 139 - 8
drivers/cam_isp/cam_isp_context.c

@@ -848,7 +848,7 @@ static int cam_isp_ctx_dump_req(
 }
 
 static int __cam_isp_ctx_enqueue_request_in_order(
-	struct cam_context *ctx, struct cam_ctx_request *req)
+	struct cam_context *ctx, struct cam_ctx_request *req, bool lock)
 {
 	struct cam_ctx_request           *req_current;
 	struct cam_ctx_request           *req_prev;
@@ -856,7 +856,8 @@ static int __cam_isp_ctx_enqueue_request_in_order(
 	struct cam_isp_context           *ctx_isp;
 
 	INIT_LIST_HEAD(&temp_list);
-	spin_lock_bh(&ctx->lock);
+	if (lock)
+		spin_lock_bh(&ctx->lock);
 	if (list_empty(&ctx->pending_req_list)) {
 		list_add_tail(&req->list, &ctx->pending_req_list);
 	} else {
@@ -887,7 +888,8 @@ static int __cam_isp_ctx_enqueue_request_in_order(
 	ctx_isp = (struct cam_isp_context *) ctx->ctx_priv;
 	__cam_isp_ctx_update_event_record(ctx_isp,
 		CAM_ISP_CTX_EVENT_SUBMIT, req);
-	spin_unlock_bh(&ctx->lock);
+	if (lock)
+		spin_unlock_bh(&ctx->lock);
 	return 0;
 }
 
@@ -3193,6 +3195,113 @@ static void get_notification_evt_params(uint32_t hw_error, uint32_t *fence_evt_c
 	*recovery_type = recovery_type_temp;
 }
 
+static bool __cam_isp_ctx_request_can_reapply(
+	struct cam_isp_ctx_req *req_isp)
+{
+	int i;
+
+	for (i = 0; i < req_isp->num_fence_map_out; i++)
+		if (req_isp->fence_map_out[i].sync_id == -1)
+			return false;
+
+	return true;
+}
+
+static int __cam_isp_ctx_trigger_error_req_reapply(
+	struct cam_isp_context *ctx_isp)
+{
+	int                             rc = 0;
+	struct cam_ctx_request          *req = NULL;
+	struct cam_ctx_request          *req_to_reapply = NULL;
+	struct cam_ctx_request          *req_temp;
+	struct cam_isp_ctx_req          *req_isp = NULL;
+	struct cam_context              *ctx = ctx_isp->base;
+
+	/*
+	 * For errors that can be recoverable within kmd, we
+	 * try to do internal hw stop, restart and notify CRM
+	 * to do reapply with the help of bubble control flow.
+	 */
+	if (list_empty(&ctx->active_req_list)) {
+		CAM_DBG(CAM_ISP,
+			"handling error with no active request");
+		if (list_empty(&ctx->wait_req_list)) {
+			CAM_WARN(CAM_ISP,
+				"Reapply with no active/wait request");
+			rc = -EINVAL;
+			goto end;
+		}
+	}
+
+	if (!list_empty(&ctx->active_req_list)) {
+		list_for_each_entry_safe_reverse(req, req_temp,
+			&ctx->active_req_list, list) {
+			/*
+			 * If some fences of the active request are already
+			 * signalled, we shouldn't do recovery for the buffer
+			 * and timestamp consistency.
+			 */
+			req_isp = (struct cam_isp_ctx_req *)req->req_priv;
+			if (!__cam_isp_ctx_request_can_reapply(req_isp)) {
+				CAM_INFO(CAM_ISP,
+					"ctx:%u fence has partially signaled, cannot do recovery for req %llu",
+					ctx->ctx_id, req->request_id);
+				rc = -EINVAL;
+				goto end;
+			}
+			list_del_init(&req->list);
+			__cam_isp_ctx_enqueue_request_in_order(ctx, req, false);
+			ctx_isp->active_req_cnt--;
+			CAM_DBG(CAM_ISP, "ctx:%u move active req %llu to pending",
+				ctx->ctx_id, req->request_id);
+		}
+	}
+
+	if (!list_empty(&ctx->wait_req_list)) {
+		list_for_each_entry_safe_reverse(req, req_temp,
+			&ctx->wait_req_list, list) {
+			list_del_init(&req->list);
+			__cam_isp_ctx_enqueue_request_in_order(ctx, req, false);
+			CAM_DBG(CAM_ISP, "ctx:%u move wait req %llu to pending",
+				ctx->ctx_id, req->request_id);
+		}
+	}
+
+	req_to_reapply = list_first_entry(&ctx->pending_req_list,
+		struct cam_ctx_request, list);
+	req_isp = (struct cam_isp_ctx_req *)req_to_reapply->req_priv;
+	ctx_isp->substate_activated = CAM_ISP_CTX_ACTIVATED_EPOCH;
+	ctx_isp->recovery_req_id = req_to_reapply->request_id;
+	atomic_set(&ctx_isp->internal_recovery_set, 1);
+
+	CAM_INFO(CAM_ISP, "ctx:%u notify CRM to reapply req %llu",
+		ctx->ctx_id, req_to_reapply->request_id);
+
+	rc = __cam_isp_ctx_notify_error_util(CAM_TRIGGER_POINT_SOF,
+		CRM_KMD_WARN_INTERNAL_RECOVERY,
+		req_to_reapply->request_id,
+		ctx_isp);
+	if (rc) {
+		/* Unable to notify CRM to do reapply back to normal */
+		CAM_WARN(CAM_ISP,
+			"ctx:%u unable to notify CRM for req %llu",
+			ctx->ctx_id, ctx_isp->recovery_req_id);
+		ctx_isp->recovery_req_id = 0;
+		atomic_set(&ctx_isp->internal_recovery_set, 0);
+		goto end;
+	}
+
+	/* Notify userland that KMD has done internal recovery */
+	__cam_isp_ctx_notify_v4l2_error_event(CAM_REQ_MGR_WARN_TYPE_KMD_RECOVERY,
+		0, req_to_reapply->request_id, ctx);
+
+	CAM_DBG(CAM_ISP, "ctx:%u handling reapply done for req %llu",
+		ctx->ctx_id, req_to_reapply->request_id);
+
+end:
+	return rc;
+}
+
 static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
 	void *evt_data)
 {
@@ -3217,6 +3326,12 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
 
 	CAM_DBG(CAM_ISP, "Enter HW error_type = %d", error_event_data->error_type);
 
+	if (error_event_data->try_internal_recovery) {
+		rc = __cam_isp_ctx_trigger_error_req_reapply(ctx_isp);
+		if (!rc)
+			goto exit;
+	}
+
 	if (!ctx_isp->offline_context)
 		__cam_isp_ctx_pause_crm_timer(ctx);
 
@@ -4083,6 +4198,19 @@ static int __cam_isp_ctx_apply_req_in_activated_state(
 			"ctx_id:%d Processing bubble cannot apply Request Id %llu",
 			ctx->ctx_id,
 			apply->request_id);
+		rc = -EFAULT;
+		goto end;
+	}
+
+	/*
+	 * When isp processing internal recovery, the crm may still apply
+	 * req to isp ctx. In this case, we should reject this req apply.
+	 */
+	if (atomic_read(&ctx_isp->internal_recovery_set)) {
+		CAM_INFO_RATE_LIMIT(CAM_ISP,
+			"ctx_id:%d Processing recovery cannot apply Request Id %lld",
+			ctx->ctx_id,
+			apply->request_id);
 		rc = -EAGAIN;
 		goto end;
 	}
@@ -4165,7 +4293,10 @@ static int __cam_isp_ctx_apply_req_in_activated_state(
 		ctx_isp->substate_activated = next_state;
 		ctx_isp->last_applied_req_id = apply->request_id;
 		list_del_init(&req->list);
-		list_add_tail(&req->list, &ctx->wait_req_list);
+		if (atomic_read(&ctx_isp->internal_recovery_set))
+			__cam_isp_ctx_enqueue_request_in_order(ctx, req, false);
+		else
+			list_add_tail(&req->list, &ctx->wait_req_list);
 		CAM_DBG(CAM_ISP, "new substate Substate[%s], applied req %lld",
 			__cam_isp_ctx_substate_val_to_type(next_state),
 			ctx_isp->last_applied_req_id);
@@ -5865,7 +5996,7 @@ static int __cam_isp_ctx_config_dev_in_top_state(
 		}
 
 		if (ctx_isp->offline_context) {
-			__cam_isp_ctx_enqueue_request_in_order(ctx, req);
+			__cam_isp_ctx_enqueue_request_in_order(ctx, req, true);
 		} else if (ctx->ctx_crm_intf->add_req) {
 			memset(&add_req, 0, sizeof(add_req));
 			add_req.link_hdl = ctx->link_hdl;
@@ -5882,7 +6013,7 @@ static int __cam_isp_ctx_config_dev_in_top_state(
 						req->request_id);
 			} else {
 				__cam_isp_ctx_enqueue_request_in_order(
-					ctx, req);
+					ctx, req, true);
 			}
 		} else {
 			CAM_ERR(CAM_ISP, "Unable to add request: req id=%llu", req->request_id);
@@ -7304,7 +7435,7 @@ static int __cam_isp_ctx_apply_default_settings(
 	struct cam_isp_context *ctx_isp =
 		(struct cam_isp_context *) ctx->ctx_priv;
 
-	if (!ctx_isp->use_default_apply)
+	if ((!ctx_isp->use_default_apply) && !(atomic_read(&ctx_isp->internal_recovery_set)))
 		return 0;
 
 	if (!(apply->trigger_point & ctx_isp->subscribe_event)) {
@@ -7318,7 +7449,7 @@ static int __cam_isp_ctx_apply_default_settings(
 	if (apply->trigger_point != CAM_TRIGGER_POINT_SOF)
 		return 0;
 
-	if ((ctx_isp->aeb_enabled) && (atomic_read(&ctx_isp->internal_recovery_set)))
+	if (atomic_read(&ctx_isp->internal_recovery_set))
 		return __cam_isp_ctx_reset_and_recover(false, ctx);
 
 	CAM_DBG(CAM_ISP,

+ 68 - 8
drivers/cam_isp/isp_hw_mgr/cam_ife_hw_mgr.c

@@ -43,6 +43,11 @@
 #define CAM_ISP_GENERIC_BLOB_TYPE_MAX               \
 	(CAM_ISP_GENERIC_BLOB_TYPE_CSID_QCFA_CONFIG + 1)
 
+#define MAX_RETRY_ATTEMPTS 1
+
+#define CAM_ISP_CSID_ERROR_CAN_RECOVERY             \
+	CAM_ISP_HW_ERROR_RECOVERY_OVERFLOW
+
 static uint32_t blob_type_hw_cmd_map[CAM_ISP_GENERIC_BLOB_TYPE_MAX] = {
 	CAM_ISP_HW_CMD_GET_HFR_UPDATE,
 	CAM_ISP_HW_CMD_CLOCK_UPDATE,
@@ -584,9 +589,10 @@ static inline bool cam_ife_hw_mgr_is_sfe_out_port(uint32_t res_id)
 	return is_sfe_out;
 }
 
-static int cam_ife_hw_mgr_notify_overflow(
+static int cam_ife_hw_mgr_check_and_notify_overflow(
 	struct cam_isp_hw_event_info    *evt,
-	void                            *ctx)
+	void                            *ctx,
+	bool                            *is_bus_overflow)
 {
 	int                             i;
 	int                             res_id;
@@ -594,6 +600,7 @@ static int cam_ife_hw_mgr_notify_overflow(
 	int                             sfe_res_id = -1;
 	struct cam_hw_intf             *hw_if = NULL;
 	struct cam_ife_hw_mgr_ctx      *hw_mgr_ctx = ctx;
+	struct cam_isp_hw_overflow_info overflow_info;
 
 	switch(evt->res_id) {
 	case  CAM_IFE_PIX_PATH_RES_IPP:
@@ -648,10 +655,22 @@ static int cam_ife_hw_mgr_notify_overflow(
 			return -EINVAL;
 		}
 
-		if (hw_if->hw_ops.process_cmd)
+		if (hw_if->hw_ops.process_cmd) {
+			overflow_info.res_id = res_id;
 			hw_if->hw_ops.process_cmd(hw_if->hw_priv,
 				CAM_ISP_HW_NOTIFY_OVERFLOW,
-				&res_id, sizeof(int));
+				&overflow_info,
+				sizeof(struct cam_isp_hw_overflow_info));
+
+			CAM_DBG(CAM_ISP,
+				"check and notify hw idx %d type %d bus overflow happened %d",
+				hw_mgr_ctx->base[i].idx,
+				hw_mgr_ctx->base[i].hw_type,
+				overflow_info.is_bus_overflow);
+
+			if (overflow_info.is_bus_overflow)
+				*is_bus_overflow = true;
+		}
 	}
 
 	return 0;
@@ -4978,6 +4997,8 @@ static int cam_ife_mgr_acquire_hw(void *hw_mgr_priv, void *acquire_hw_args)
 	ife_ctx->hw_mgr = ife_hw_mgr;
 	ife_ctx->cdm_ops =  cam_cdm_publish_ops();
 	ife_ctx->common.sec_pf_evt_cb = acquire_args->sec_pf_evt_cb;
+	ife_ctx->try_recovery_cnt = 0;
+	ife_ctx->recovery_req_id = 0;
 
 	acquire_hw_info =
 		(struct cam_isp_acquire_hw_info *)acquire_args->acquire_info;
@@ -6027,6 +6048,20 @@ static int cam_ife_mgr_config_hw(void *hw_mgr_priv,
 		return -EPERM;
 	}
 
+	/*
+	 * Assuming overflow recovery happens on req N, and we may
+	 * haven't got all the result for req N while apply N + 1,
+	 * so we reset try_recovery_cnt while apply N + 2.
+	 */
+	if (ctx->try_recovery_cnt &&
+		(cfg->request_id > (ctx->recovery_req_id + 1))) {
+		ctx->try_recovery_cnt = 0;
+		ctx->recovery_req_id = 0;
+		CAM_DBG(CAM_ISP,
+			"Ctx[%pK][%d] Reset overflow recovery count for req %llu",
+			ctx, ctx->ctx_index, cfg->request_id);
+	}
+
 	hw_update_data = (struct cam_isp_prepare_hw_update_data  *) cfg->priv;
 	hw_update_data->isp_mgr_ctx = ctx;
 	ctx->cdm_userdata.request_id = cfg->request_id;
@@ -7193,6 +7228,8 @@ static int cam_ife_mgr_release_hw(void *hw_mgr_priv,
 	kfree(ctx->scratch_buf_info.ife_scratch_config);
 	ctx->scratch_buf_info.sfe_scratch_config = NULL;
 	ctx->scratch_buf_info.ife_scratch_config = NULL;
+	ctx->try_recovery_cnt = 0;
+	ctx->recovery_req_id = 0;
 
 	memset(&ctx->flags, 0, sizeof(struct cam_ife_hw_mgr_ctx_flags));
 	atomic_set(&ctx->overflow_pending, 0);
@@ -12180,8 +12217,6 @@ static int cam_ife_mgr_recover_hw(void *priv, void *data)
 			ctx =  recovery_data->affected_ctx[i];
 			start_args.ctxt_to_hw_map = ctx;
 
-			atomic_set(&ctx->overflow_pending, 0);
-
 			rc = cam_ife_mgr_restart_hw(&start_args);
 			if (rc) {
 				CAM_ERR(CAM_ISP, "CTX start failed(%d)", rc);
@@ -12189,6 +12224,8 @@ static int cam_ife_mgr_recover_hw(void *priv, void *data)
 			}
 			CAM_DBG(CAM_ISP, "Started resources rc (%d)", rc);
 		}
+
+		atomic_set(&ctx->overflow_pending, 0);
 		CAM_DBG(CAM_ISP, "Recovery Done rc (%d)", rc);
 
 		break;
@@ -12398,6 +12435,7 @@ static int cam_ife_hw_mgr_handle_csid_error(
 	struct cam_isp_hw_error_event_info      *err_evt_info;
 	struct cam_isp_hw_error_event_data       error_event_data = {0};
 	struct cam_ife_hw_event_recovery_data    recovery_data = {0};
+	bool                                     is_bus_overflow = false;
 
 	if (!event_info->event_data) {
 		CAM_ERR(CAM_ISP,
@@ -12437,7 +12475,28 @@ static int cam_ife_hw_mgr_handle_csid_error(
 		CAM_ISP_HW_ERROR_RECOVERY_OVERFLOW |
 		CAM_ISP_HW_ERROR_CSID_FRAME_SIZE)) {
 
-		cam_ife_hw_mgr_notify_overflow(event_info, ctx);
+		cam_ife_hw_mgr_check_and_notify_overflow(event_info,
+			ctx, &is_bus_overflow);
+
+		/*
+		 * When CSID overflow IRQ comes, we need read bus overflow
+		 * status, to check if it's a bus overflow issue,
+		 * only do recovery in bus overflow cases.
+		 */
+		if ((err_type & CAM_ISP_CSID_ERROR_CAN_RECOVERY) &&
+			is_bus_overflow) {
+			if (ctx->try_recovery_cnt < MAX_RETRY_ATTEMPTS) {
+				error_event_data.try_internal_recovery = true;
+				if (!atomic_read(&ctx->overflow_pending))
+					ctx->try_recovery_cnt++;
+				if (!ctx->recovery_req_id)
+					ctx->recovery_req_id = ctx->applied_req_id;
+			}
+			CAM_DBG(CAM_ISP, "CSID[%u] Try recovery count %u on req %llu",
+				event_info->hw_idx,
+				ctx->try_recovery_cnt,
+				ctx->recovery_req_id);
+		}
 
 		error_event_data.error_type |= err_type;
 		recovery_data.error_type = err_type;
@@ -12450,7 +12509,8 @@ end:
 	if (rc || !recovery_data.no_of_context)
 		goto skip_recovery;
 
-	cam_ife_hw_mgr_do_error_recovery(&recovery_data);
+	if (!error_event_data.try_internal_recovery)
+		cam_ife_hw_mgr_do_error_recovery(&recovery_data);
 	CAM_DBG(CAM_ISP, "Exit CSID[%u] error %d", event_info->hw_idx,
 		err_type);
 

+ 4 - 0
drivers/cam_isp/isp_hw_mgr/cam_ife_hw_mgr.h

@@ -282,6 +282,8 @@ struct cam_ife_cdm_user_data {
  * @current_mup:            Current MUP val, scratch will then apply the same as previously
  *                          applied request
  * @curr_num_exp:           Current num of exposures
+ * @try_recovery_cnt:       Retry count for overflow recovery
+ * @recovery_req_id:        The request id on which overflow recovery happens
  *
  */
 struct cam_ife_hw_mgr_ctx {
@@ -339,6 +341,8 @@ struct cam_ife_hw_mgr_ctx {
 	atomic_t                                   recovery_id;
 	uint32_t                                   current_mup;
 	uint32_t                                   curr_num_exp;
+	uint32_t                                   try_recovery_cnt;
+	uint64_t                                   recovery_req_id;
 };
 
 /**

+ 2 - 0
drivers/cam_isp/isp_hw_mgr/include/cam_isp_hw_mgr_intf.h

@@ -334,12 +334,14 @@ struct cam_isp_hw_eof_event_data {
  * @recovery_enabled:      Identifies if the context needs to recover & reapply
  *                         this request
  * @enable_req_dump:       Enable request dump on HW errors
+ * @try_internal_recovery: Enable internal recovery on HW errors
  */
 struct cam_isp_hw_error_event_data {
 	uint32_t             error_type;
 	uint64_t             timestamp;
 	bool                 recovery_enabled;
 	bool                 enable_req_dump;
+	bool                 try_internal_recovery;
 };
 
 /**

+ 13 - 0
drivers/cam_isp/isp_hw_mgr/isp_hw/include/cam_isp_hw.h

@@ -536,4 +536,17 @@ struct cam_isp_hw_init_config_update {
 	struct cam_isp_init_config     *init_config;
 };
 
+/*
+ * struct cam_isp_hw_overflow_info:
+ *
+ * @Brief:                  ISP hw bus overflow info
+ *
+ * @res_id:                 Resource type
+ * @is_bus_overflow:        Indicate whether bus overflow happened
+ */
+struct cam_isp_hw_overflow_info {
+	int                     res_id;
+	bool                    is_bus_overflow;
+};
+
 #endif /* _CAM_ISP_HW_H_ */

+ 2 - 2
drivers/cam_isp/isp_hw_mgr/isp_hw/sfe_hw/sfe_bus/cam_sfe_bus_rd.c

@@ -468,6 +468,8 @@ static int cam_sfe_bus_release_rm(void          *bus_priv,
 	rsrc_data->unpacker_cfg = 0;
 	rsrc_data->burst_len = 0;
 	rsrc_data->en_cfg = 0;
+	rsrc_data->enable_caching =  false;
+	rsrc_data->offset = 0;
 
 	rm_res->tasklet_info = NULL;
 	rm_res->res_state = CAM_ISP_RESOURCE_STATE_AVAILABLE;
@@ -527,9 +529,7 @@ static int cam_sfe_bus_stop_rm(struct cam_isp_resource_node *rm_res)
 	cam_io_w_mb(0x0, common_data->mem_base + rsrc_data->hw_regs->cfg);
 
 	rm_res->res_state = CAM_ISP_RESOURCE_STATE_RESERVED;
-	rsrc_data->enable_caching =  false;
 	rsrc_data->enable_disable_cfg_done = false;
-	rsrc_data->offset = 0;
 
 	CAM_DBG(CAM_SFE, "SFE:%d RM:%d stopped",
 		rsrc_data->common_data->core_index, rsrc_data->index);

+ 2 - 3
drivers/cam_isp/isp_hw_mgr/isp_hw/sfe_hw/sfe_bus/cam_sfe_bus_wr.c

@@ -4,7 +4,6 @@
  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
  */
 
-
 #include <linux/ratelimit.h>
 #include <linux/slab.h>
 
@@ -839,6 +838,8 @@ static int cam_sfe_bus_release_wm(void   *bus_priv,
 	rsrc_data->hfr_cfg_done = false;
 	rsrc_data->en_cfg = 0;
 	rsrc_data->is_dual = 0;
+	rsrc_data->enable_caching =  false;
+	rsrc_data->offset = 0;
 
 	wm_res->tasklet_info = NULL;
 	wm_res->res_state = CAM_ISP_RESOURCE_STATE_AVAILABLE;
@@ -923,8 +924,6 @@ static int cam_sfe_bus_stop_wm(struct cam_isp_resource_node *wm_res)
 	wm_res->res_state = CAM_ISP_RESOURCE_STATE_RESERVED;
 	rsrc_data->init_cfg_done = false;
 	rsrc_data->hfr_cfg_done = false;
-	rsrc_data->enable_caching =  false;
-	rsrc_data->offset = 0;
 
 	return 0;
 }

+ 7 - 3
drivers/cam_isp/isp_hw_mgr/isp_hw/sfe_hw/sfe_top/cam_sfe_top.c

@@ -810,13 +810,15 @@ static int cam_sfe_set_top_debug(
 }
 
 static int cam_sfe_top_handle_overflow(
-	struct cam_sfe_top_priv *top_priv, uint32_t cmd_type)
+	struct cam_sfe_top_priv *top_priv, void *cmd_args)
 {
 	struct cam_sfe_top_common_data      *common_data;
 	struct cam_hw_soc_info              *soc_info;
 	uint32_t                             bus_overflow_status, violation_status, tmp;
 	uint32_t                             i = 0;
+	struct cam_isp_hw_overflow_info     *overflow_info = NULL;
 
+	overflow_info = (struct cam_isp_hw_overflow_info *)cmd_args;
 	common_data = &top_priv->common_data;
 	soc_info = common_data->soc_info;
 
@@ -843,8 +845,10 @@ static int cam_sfe_top_handle_overflow(
 		cam_sfe_top_print_ipp_violation_info(top_priv, violation_status);
 	cam_sfe_top_print_debug_reg_info(top_priv);
 
-	if (bus_overflow_status)
+	if (bus_overflow_status) {
 		cam_cpas_log_votes();
+		overflow_info->is_bus_overflow = true;
+	}
 
 	return 0;
 }
@@ -1105,7 +1109,7 @@ int cam_sfe_top_process_cmd(void *priv, uint32_t cmd_type,
 		rc = cam_sfe_set_top_debug(top_priv, cmd_args);
 		break;
 	case CAM_ISP_HW_NOTIFY_OVERFLOW:
-		rc = cam_sfe_top_handle_overflow(top_priv, cmd_type);
+		rc = cam_sfe_top_handle_overflow(top_priv, cmd_args);
 		break;
 	case CAM_ISP_HW_CMD_APPLY_CLK_BW_UPDATE:
 		rc = cam_sfe_top_apply_clk_bw_update(top_priv, cmd_args, arg_size);

+ 8 - 2
drivers/cam_isp/isp_hw_mgr/isp_hw/vfe_hw/vfe_top/cam_vfe_top_ver4.c

@@ -523,7 +523,11 @@ static int cam_vfe_top_ver4_print_overflow_debug_info(
 	struct cam_vfe_soc_private *soc_private = NULL;
 	uint32_t                             violation_status = 0, bus_overflow_status = 0, tmp;
 	uint32_t                             i = 0;
-	int                                  res_id = *((int *)(cmd_args));
+	int                                  res_id;
+	struct cam_isp_hw_overflow_info     *overflow_info = NULL;
+
+	overflow_info = (struct cam_isp_hw_overflow_info *)cmd_args;
+	res_id = overflow_info->res_id;
 
 	common_data = &top_priv->common_data;
 	soc_info = top_priv->top_common.soc_info;
@@ -538,9 +542,11 @@ static int cam_vfe_top_ver4_print_overflow_debug_info(
 		soc_info->index, top_priv->sof_cnt, soc_info->applied_src_clk_rate / 1000000,
 		CAM_BOOL_TO_YESNO(bus_overflow_status), CAM_BOOL_TO_YESNO(violation_status));
 
-	if (bus_overflow_status)
+	if (bus_overflow_status) {
+		overflow_info->is_bus_overflow = true;
 		CAM_INFO(CAM_ISP, "VFE[%d] Bus overflow status: 0x%x",
 			soc_info->index, bus_overflow_status);
+	}
 
 	tmp = bus_overflow_status;
 	while (tmp) {

+ 3 - 1
drivers/cam_req_mgr/cam_req_mgr_core.c

@@ -1961,7 +1961,8 @@ static int __cam_req_mgr_process_req(struct cam_req_mgr_core_link *link,
 			max_retry++;
 
 		if (!link->wq_congestion && dev) {
-			link->retry_cnt++;
+			if (rc != -EAGAIN)
+				link->retry_cnt++;
 			if (link->retry_cnt == max_retry) {
 				CAM_DBG(CAM_CRM,
 					"Max retry attempts (count %d) reached on link[0x%x] for req [%lld]",
@@ -3016,6 +3017,7 @@ int cam_req_mgr_process_error(void *priv, void *data)
 	mutex_lock(&link->req.lock);
 	switch (err_info->error) {
 	case CRM_KMD_ERR_BUBBLE:
+	case CRM_KMD_WARN_INTERNAL_RECOVERY:
 		idx = __cam_req_mgr_find_slot_for_req(in_q, err_info->req_id);
 		if (idx < 0) {
 			CAM_ERR_RATE_LIMIT(CAM_CRM,

+ 2 - 0
drivers/cam_req_mgr/cam_req_mgr_interface.h

@@ -147,6 +147,7 @@ enum cam_req_status {
  * @PAGE_FAULT : Page fault while accessing memory
  * @OVERFLOW   : Bus Overflow for IFE/VFE
  * @TIMEOUT    : Timeout from cci or bus.
+ * @RECOVERY   : Internal recovery for bus overflow
  * @MAX        : Invalid error value
  */
 enum cam_req_mgr_device_error {
@@ -157,6 +158,7 @@ enum cam_req_mgr_device_error {
 	CRM_KMD_ERR_OVERFLOW,
 	CRM_KMD_ERR_TIMEOUT,
 	CRM_KMD_ERR_STOPPED,
+	CRM_KMD_WARN_INTERNAL_RECOVERY,
 	CRM_KMD_ERR_MAX,
 };