Kaynağa Gözat

msm: camera: isp: Extend internal recovery scheme

In case of bubble recovery stalling try internal recovery,
halt, reset and resume IFE pipeline. If internal recovery
succeeds skip notifying userland for pipeline recovery.
If the same slot [same request] is stalled again, it will
flag for userspace recovery.

CRs-Fixed: 3098892
Change-Id: I6fff844fecd653897451ab920ddf6c4d8ca2f49e
Signed-off-by: Karthik Anantha Ram <[email protected]>
Karthik Anantha Ram 3 yıl önce
ebeveyn
işleme
b7af81340a

+ 149 - 39
drivers/cam_isp/cam_isp_context.c

@@ -1397,6 +1397,7 @@ static void __cam_isp_context_reset_internal_recovery_params(
 	atomic_set(&ctx_isp->process_bubble, 0);
 	ctx_isp->recovery_req_id = 0;
 	ctx_isp->aeb_error_cnt = 0;
+	ctx_isp->bubble_frame_cnt = 0;
 }
 
 static int __cam_isp_context_try_internal_recovery(
@@ -3202,66 +3203,84 @@ static bool __cam_isp_ctx_request_can_reapply(
 	return true;
 }
 
-static int __cam_isp_ctx_trigger_error_req_reapply(
+static int __cam_isp_ctx_validate_for_req_reapply_util(
 	struct cam_isp_context *ctx_isp)
 {
-	int                             rc = 0;
-	struct cam_ctx_request          *req = NULL;
-	struct cam_ctx_request          *req_to_reapply = NULL;
-	struct cam_ctx_request          *req_temp;
-	struct cam_isp_ctx_req          *req_isp = NULL;
-	struct cam_context              *ctx = ctx_isp->base;
+	int rc = 0;
+	struct cam_ctx_request *req_temp;
+	struct cam_ctx_request *req = NULL;
+	struct cam_isp_ctx_req *req_isp = NULL;
+	struct cam_context *ctx = ctx_isp->base;
 
-	/*
-	 * For errors that can be recoverable within kmd, we
-	 * try to do internal hw stop, restart and notify CRM
-	 * to do reapply with the help of bubble control flow.
-	 */
+	/* Check for req in active/wait lists */
 	if (list_empty(&ctx->active_req_list)) {
 		CAM_DBG(CAM_ISP,
-			"handling error with no active request");
+			"Active request list empty for ctx: %u on link: 0x%x",
+			ctx->ctx_id, ctx->link_hdl);
+
 		if (list_empty(&ctx->wait_req_list)) {
 			CAM_WARN(CAM_ISP,
-				"Reapply with no active/wait request");
+				"No active/wait req for ctx: %u on link: 0x%x",
+				ctx->ctx_id, ctx->link_hdl);
 			rc = -EINVAL;
 			goto end;
 		}
 	}
 
+	/* Validate if all fences for active requests are not signaled */
 	if (!list_empty(&ctx->active_req_list)) {
 		list_for_each_entry_safe_reverse(req, req_temp,
 			&ctx->active_req_list, list) {
 			/*
 			 * If some fences of the active request are already
-			 * signalled, we shouldn't do recovery for the buffer
+			 * signaled, we should not do recovery for the buffer
 			 * and timestamp consistency.
 			 */
 			req_isp = (struct cam_isp_ctx_req *)req->req_priv;
 			if (!__cam_isp_ctx_request_can_reapply(req_isp)) {
-				CAM_INFO(CAM_ISP,
-					"ctx:%u fence has partially signaled, cannot do recovery for req %llu",
-					ctx->ctx_id, req->request_id);
+				CAM_WARN(CAM_ISP,
+					"Req: %llu in ctx:%u on link: 0x%x fence has partially signaled, cannot do recovery",
+					req->request_id, ctx->ctx_id, ctx->link_hdl);
 				rc = -EINVAL;
 				goto end;
 			}
+		}
+	}
+
+	/* Move active requests to pending list */
+	if (!list_empty(&ctx->active_req_list)) {
+		list_for_each_entry_safe_reverse(req, req_temp,
+			&ctx->active_req_list, list) {
 			list_del_init(&req->list);
 			__cam_isp_ctx_enqueue_request_in_order(ctx, req, false);
 			ctx_isp->active_req_cnt--;
-			CAM_DBG(CAM_ISP, "ctx:%u move active req %llu to pending",
-				ctx->ctx_id, req->request_id);
+			CAM_DBG(CAM_ISP, "ctx:%u link:0x%x move active req %llu to pending",
+				ctx->ctx_id, ctx->link_hdl, req->request_id);
 		}
 	}
 
+	/* Move wait requests to pending list */
 	if (!list_empty(&ctx->wait_req_list)) {
-		list_for_each_entry_safe_reverse(req, req_temp,
-			&ctx->wait_req_list, list) {
+		list_for_each_entry_safe_reverse(req, req_temp, &ctx->wait_req_list, list) {
 			list_del_init(&req->list);
 			__cam_isp_ctx_enqueue_request_in_order(ctx, req, false);
-			CAM_DBG(CAM_ISP, "ctx:%u move wait req %llu to pending",
-				ctx->ctx_id, req->request_id);
+			CAM_DBG(CAM_ISP, "ctx:%u link:0x%x move wait req %llu to pending",
+				ctx->ctx_id, ctx->link_hdl, req->request_id);
 		}
 	}
 
+end:
+	return rc;
+}
+
+static int __cam_isp_ctx_handle_recovery_req_util(
+	struct cam_isp_context *ctx_isp)
+{
+	int rc = 0;
+	struct cam_context *ctx = ctx_isp->base;
+	struct cam_ctx_request *req_to_reapply = NULL;
+	struct cam_isp_ctx_req *req_isp = NULL;
+
 	req_to_reapply = list_first_entry(&ctx->pending_req_list,
 		struct cam_ctx_request, list);
 	req_isp = (struct cam_isp_ctx_req *)req_to_reapply->req_priv;
@@ -3269,13 +3288,12 @@ static int __cam_isp_ctx_trigger_error_req_reapply(
 	ctx_isp->recovery_req_id = req_to_reapply->request_id;
 	atomic_set(&ctx_isp->internal_recovery_set, 1);
 
-	CAM_INFO(CAM_ISP, "ctx:%u notify CRM to reapply req %llu",
-		ctx->ctx_id, req_to_reapply->request_id);
+	CAM_INFO(CAM_ISP, "Notify CRM to reapply req:%llu for ctx:%u link:0x%x",
+		req_to_reapply->request_id, ctx->ctx_id, ctx->link_hdl);
 
 	rc = __cam_isp_ctx_notify_error_util(CAM_TRIGGER_POINT_SOF,
-		CRM_KMD_WARN_INTERNAL_RECOVERY,
-		req_to_reapply->request_id,
-		ctx_isp);
+		CRM_KMD_WARN_INTERNAL_RECOVERY, req_to_reapply->request_id,
+			ctx_isp);
 	if (rc) {
 		/* Unable to notify CRM to do reapply back to normal */
 		CAM_WARN(CAM_ISP,
@@ -3283,15 +3301,33 @@ static int __cam_isp_ctx_trigger_error_req_reapply(
 			ctx->ctx_id, ctx_isp->recovery_req_id);
 		ctx_isp->recovery_req_id = 0;
 		atomic_set(&ctx_isp->internal_recovery_set, 0);
-		goto end;
 	}
 
-	/* Notify userland that KMD has done internal recovery */
-	__cam_isp_ctx_notify_v4l2_error_event(CAM_REQ_MGR_WARN_TYPE_KMD_RECOVERY,
-		0, req_to_reapply->request_id, ctx);
+	return rc;
+}
+
+static int __cam_isp_ctx_trigger_error_req_reapply(
+	struct cam_isp_context *ctx_isp)
+{
+	int rc = 0;
+	struct cam_context *ctx = ctx_isp->base;
+
+	/*
+	 * For errors that can be recoverable within kmd, we
+	 * try to do internal hw stop, restart and notify CRM
+	 * to do reapply with the help of bubble control flow.
+	 */
 
-	CAM_DBG(CAM_ISP, "ctx:%u handling reapply done for req %llu",
-		ctx->ctx_id, req_to_reapply->request_id);
+	rc = __cam_isp_ctx_validate_for_req_reapply_util(ctx_isp);
+	if (rc)
+		goto end;
+
+	rc = __cam_isp_ctx_handle_recovery_req_util(ctx_isp);
+	if (rc)
+		goto end;
+
+	CAM_DBG(CAM_ISP, "Triggered internal recovery for req:%llu ctx:%u on link 0x%x",
+		ctx_isp->recovery_req_id, ctx->ctx_id, ctx->link_hdl);
 
 end:
 	return rc;
@@ -4264,6 +4300,16 @@ static int __cam_isp_ctx_apply_req_in_activated_state(
 	}
 	req_isp->bubble_report = apply->report_if_bubble;
 
+	/*
+	 * Reset all buf done/bubble flags for the req being applied
+	 * If internal recovery has led to re-apply of same
+	 * request, clear all stale entities
+	 */
+	req_isp->num_acked = 0;
+	req_isp->num_deferred_acks = 0;
+	req_isp->cdm_reset_before_apply = false;
+	req_isp->bubble_detected = false;
+
 	cfg.ctxt_to_hw_map = ctx_isp->hw_ctx;
 	cfg.request_id = req->request_id;
 	cfg.hw_update_entries = req_isp->cfg;
@@ -7261,7 +7307,6 @@ static int __cam_isp_ctx_reset_and_recover(
 	req = list_first_entry(&ctx->pending_req_list,
 		struct cam_ctx_request, list);
 	req_isp = (struct cam_isp_ctx_req *) req->req_priv;
-	req_isp->bubble_detected = false;
 
 	CAM_INFO(CAM_ISP,
 		"Trigger Halt, Reset & Resume for req: %llu ctx: %u in state: %d link: 0x%x",
@@ -7331,6 +7376,11 @@ static int __cam_isp_ctx_reset_and_recover(
 
 	/* IQ applied for this request, on next trigger skip IQ cfg */
 	req_isp->reapply_type = CAM_CONFIG_REAPPLY_IO;
+
+	/* Notify userland that KMD has done internal recovery */
+	__cam_isp_ctx_notify_v4l2_error_event(CAM_REQ_MGR_WARN_TYPE_KMD_RECOVERY,
+		0, req->request_id, ctx);
+
 	CAM_DBG(CAM_ISP, "Internal Start HW success ctx %u on link: 0x%x",
 		ctx->ctx_id, ctx->link_hdl);
 
@@ -7338,6 +7388,54 @@ end:
 	return rc;
 }
 
+static bool __cam_isp_ctx_try_internal_recovery_for_bubble(
+	int64_t error_req_id, struct cam_context *ctx)
+{
+	int rc;
+	struct cam_isp_context *ctx_isp =
+		(struct cam_isp_context *)ctx->ctx_priv;
+
+	/* Perform recovery if bubble recovery is stalled */
+	if (!atomic_read(&ctx_isp->process_bubble))
+		return false;
+
+	/* Validate if errored request has been applied */
+	if (ctx_isp->last_applied_req_id < error_req_id) {
+		CAM_WARN(CAM_ISP,
+			"Skip trying for internal recovery last applied: %lld error_req: %lld for ctx: %u on link: 0x%x",
+			ctx_isp->last_applied_req_id, error_req_id,
+			ctx->ctx_id, ctx->link_hdl);
+		return false;
+	}
+
+	if (__cam_isp_ctx_validate_for_req_reapply_util(ctx_isp)) {
+		CAM_WARN(CAM_ISP,
+			"Internal recovery not possible for ctx: %u on link: 0x%x req: %lld [last_applied: %lld]",
+			ctx->ctx_id, ctx->link_hdl, error_req_id, ctx_isp->last_applied_req_id);
+		return false;
+	}
+
+	/* Trigger reset and recover */
+	atomic_set(&ctx_isp->internal_recovery_set, 1);
+	rc = __cam_isp_ctx_reset_and_recover(false, ctx);
+	if (rc) {
+		CAM_WARN(CAM_ISP,
+			"Internal recovery failed in ctx: %u on link: 0x%x req: %lld [last_applied: %lld]",
+			ctx->ctx_id, ctx->link_hdl, error_req_id, ctx_isp->last_applied_req_id);
+		atomic_set(&ctx_isp->internal_recovery_set, 0);
+		goto error;
+	}
+
+	CAM_DBG(CAM_ISP,
+		"Internal recovery done in ctx: %u on link: 0x%x req: %lld [last_applied: %lld]",
+		ctx->ctx_id, ctx->link_hdl, error_req_id, ctx_isp->last_applied_req_id);
+
+	return true;
+
+error:
+	return false;
+}
+
 static int __cam_isp_ctx_process_evt(struct cam_context *ctx,
 	struct cam_req_mgr_link_evt_data *link_evt_data)
 {
@@ -7356,9 +7454,21 @@ static int __cam_isp_ctx_process_evt(struct cam_context *ctx,
 	case CAM_REQ_MGR_LINK_EVT_SOF_FREEZE:
 		rc = __cam_isp_ctx_handle_sof_freeze_evt(ctx);
 		break;
-	case CAM_REQ_MGR_LINK_EVT_STALLED:
-		if (ctx->state == CAM_CTX_ACTIVATED)
-			rc = __cam_isp_ctx_trigger_reg_dump(CAM_HW_MGR_CMD_REG_DUMP_ON_ERROR, ctx);
+	case CAM_REQ_MGR_LINK_EVT_STALLED: {
+		bool internal_recovery_skipped = false;
+
+		if (ctx->state == CAM_CTX_ACTIVATED) {
+			if (link_evt_data->try_for_recovery)
+				internal_recovery_skipped =
+					__cam_isp_ctx_try_internal_recovery_for_bubble(
+						link_evt_data->req_id, ctx);
+
+			if (!internal_recovery_skipped)
+				rc = __cam_isp_ctx_trigger_reg_dump(
+					CAM_HW_MGR_CMD_REG_DUMP_ON_ERROR, ctx);
+		}
+		link_evt_data->try_for_recovery = internal_recovery_skipped;
+	}
 		break;
 	default:
 		CAM_WARN(CAM_ISP,

+ 40 - 10
drivers/cam_req_mgr/cam_req_mgr_core.c

@@ -68,6 +68,7 @@ void cam_req_mgr_core_link_reset(struct cam_req_mgr_core_link *link)
 	link->num_sync_links = 0;
 	link->last_sof_trigger_jiffies = 0;
 	link->wq_congestion = false;
+	link->try_for_internal_recovery = false;
 	atomic_set(&link->eof_event_cnt, 0);
 	__cam_req_mgr_reset_apply_data(link);
 
@@ -250,7 +251,6 @@ static void __cam_req_mgr_find_dev_name(
  * __cam_req_mgr_notify_frame_skip()
  *
  * @brief : Notify all devices of frame skipping
- * @link  : link on which we are applying these settings
  *
  */
 static int __cam_req_mgr_notify_frame_skip(
@@ -337,13 +337,17 @@ static int __cam_req_mgr_send_evt(
 	struct cam_req_mgr_core_link  *link)
 {
 	int i;
-	struct cam_req_mgr_link_evt_data     evt_data;
+	struct cam_req_mgr_link_evt_data     evt_data = {0};
 	struct cam_req_mgr_connected_device *device = NULL;
 
 	CAM_DBG(CAM_CRM,
 		"Notify event type: %d to all connected devices on link: 0x%x",
 		type, link->link_hdl);
 
+	/* Try for internal recovery */
+	if (link->try_for_internal_recovery)
+		evt_data.try_for_recovery = true;
+
 	for (i = 0; i < link->num_devs; i++) {
 		device = &link->l_dev[i];
 		if (device != NULL) {
@@ -357,6 +361,9 @@ static int __cam_req_mgr_send_evt(
 		}
 	}
 
+	/* Updated if internal recovery succeeded */
+	link->try_for_internal_recovery = evt_data.try_for_recovery;
+
 	return 0;
 }
 
@@ -369,7 +376,7 @@ static int __cam_req_mgr_send_evt(
  *
  */
 static int __cam_req_mgr_notify_error_on_link(
-	struct cam_req_mgr_core_link    *link,
+	struct cam_req_mgr_core_link *link,
 	struct cam_req_mgr_connected_device *dev)
 {
 	struct cam_req_mgr_core_session *session = NULL;
@@ -389,7 +396,18 @@ static int __cam_req_mgr_notify_error_on_link(
 	}
 
 	/* Notify all devices in the link about the error */
-	__cam_req_mgr_send_evt(0, CAM_REQ_MGR_LINK_EVT_STALLED, CRM_KMD_ERR_FATAL, link);
+	__cam_req_mgr_send_evt(link->req.apply_data[link->min_delay].req_id,
+		CAM_REQ_MGR_LINK_EVT_STALLED, CRM_KMD_ERR_FATAL, link);
+
+	/*
+	 * Internal recovery succeeded - skip userland notification
+	 * If recovery had failed subdevice will reset this flag
+	 */
+	if (link->try_for_internal_recovery) {
+		CAM_INFO(CAM_CRM, "Internal recovery succeeded on link: 0x%x",
+			link->link_hdl);
+		return 0;
+	}
 
 	CAM_ERR_RATE_LIMIT(CAM_CRM,
 		"Notifying userspace to trigger recovery on link 0x%x for session %d",
@@ -662,6 +680,7 @@ static void __cam_req_mgr_reset_req_slot(struct cam_req_mgr_core_link *link,
 	slot->skip_idx = 0;
 	slot->recover = 0;
 	slot->additional_timeout = 0;
+	slot->recovery_counter = 0;
 	slot->sync_mode = CAM_REQ_MGR_SYNC_MODE_NO_SYNC;
 	slot->status = CRM_SLOT_STATUS_NO_REQ;
 
@@ -1792,7 +1811,6 @@ static int __cam_req_mgr_process_req(struct cam_req_mgr_core_link *link,
 	struct cam_req_mgr_core_session     *session;
 	struct cam_req_mgr_connected_device *dev = NULL;
 	struct cam_req_mgr_core_link        *tmp_link = NULL;
-	uint32_t                             max_retry = 0;
 	enum crm_req_eof_trigger_type        eof_trigger_type;
 
 	session = (struct cam_req_mgr_core_session *)link->parent;
@@ -1957,17 +1975,15 @@ static int __cam_req_mgr_process_req(struct cam_req_mgr_core_link *link,
 	if (rc < 0) {
 		/* Apply req failed retry at next sof */
 		slot->status = CRM_SLOT_STATUS_REQ_PENDING;
-		max_retry = MAXIMUM_RETRY_ATTEMPTS;
-		if (link->max_delay == 1)
-			max_retry++;
 
 		if (!link->wq_congestion && dev) {
 			if (rc != -EAGAIN)
 				link->retry_cnt++;
-			if (link->retry_cnt == max_retry) {
+
+			if (link->retry_cnt >= MAXIMUM_RETRY_ATTEMPTS) {
 				CAM_DBG(CAM_CRM,
 					"Max retry attempts (count %d) reached on link[0x%x] for req [%lld]",
-					max_retry, link->link_hdl,
+					MAXIMUM_RETRY_ATTEMPTS, link->link_hdl,
 					in_q->slot[in_q->rd_idx].req_id);
 
 				cam_req_mgr_debug_delay_detect();
@@ -1978,7 +1994,21 @@ static int __cam_req_mgr_process_req(struct cam_req_mgr_core_link *link,
 					link->link_hdl,
 					CAM_DEFAULT_VALUE, rc);
 
+				/*
+				 * Try for internal recovery - primarily for IFE subdev
+				 * if it's the first instance of stall
+				 */
+				if (!slot->recovery_counter)
+					link->try_for_internal_recovery = true;
+
 				__cam_req_mgr_notify_error_on_link(link, dev);
+
+				/* Increment internal recovery counter */
+				if (link->try_for_internal_recovery) {
+					slot->recovery_counter++;
+					link->try_for_internal_recovery = false;
+				}
+
 				link->retry_cnt = 0;
 			}
 		} else

+ 4 - 0
drivers/cam_req_mgr/cam_req_mgr_core.h

@@ -271,6 +271,7 @@ struct cam_req_mgr_req_tbl {
  * @sync_mode          : Sync mode in which req id in this slot has to applied
  * @additional_timeout : Adjusted watchdog timeout value associated with
  * this request
+ * @recovery_counter   : Internal recovery counter
  */
 struct cam_req_mgr_slot {
 	int32_t               idx;
@@ -280,6 +281,7 @@ struct cam_req_mgr_slot {
 	int64_t               req_id;
 	int32_t               sync_mode;
 	int32_t               additional_timeout;
+	int32_t               recovery_counter;
 };
 
 /**
@@ -390,6 +392,7 @@ struct cam_req_mgr_connected_device {
  *                         case of long exposure use case
  * @last_sof_trigger_jiffies : Record the jiffies of last sof trigger jiffies
  * @wq_congestion        : Indicates if WQ congestion is detected or not
+ * @try_for_internal_recovery : If the link stalls try for RT internal recovery
  */
 struct cam_req_mgr_core_link {
 	int32_t                              link_hdl;
@@ -428,6 +431,7 @@ struct cam_req_mgr_core_link {
 	bool                                 skip_init_frame;
 	uint64_t                             last_sof_trigger_jiffies;
 	bool                                 wq_congestion;
+	bool                                 try_for_internal_recovery;
 };
 
 /**

+ 4 - 0
drivers/cam_req_mgr/cam_req_mgr_interface.h

@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
  */
 
 #ifndef _CAM_REQ_MGR_INTERFACE_H
@@ -377,11 +378,14 @@ struct cam_req_mgr_flush_request {
  * @link_hdl          : link handle
  * @req_id            : request id
  * @evt_type          : link event
+ * @try_for_recovery  : Link is stalled allow subdevices to recover if
+ *                      possible
  */
 struct cam_req_mgr_link_evt_data {
 	int32_t  link_hdl;
 	int32_t  dev_hdl;
 	uint64_t req_id;
+	bool     try_for_recovery;
 	enum cam_req_mgr_link_evt_type evt_type;
 	union {
 		enum cam_req_mgr_device_error error;