瀏覽代碼

msm: camera: isp: Protect recovery callback from stop ioctl using mutex

Since recovery callback happens in workqueue context, it can run in
parallel with stop dev ioctl. This leads to many possible race
conditions. Instead, protect the recovery callback with the same context
mutex as ioctls to prevent parallel execution.

CRs-Fixed: 3003703
Change-Id: I92a635cfaeee4cf09047672a5cb925cf262cd816
Signed-off-by: Anand Ravi <[email protected]>
Anand Ravi 3 年之前
父節點
當前提交
b811f12335

+ 27 - 0
drivers/cam_core/cam_context.c

@@ -674,6 +674,33 @@ int cam_context_handle_dump_dev(struct cam_context *ctx,
 	return rc;
 }
 
+int cam_context_handle_hw_recovery(void *priv, void *data)
+{
+	struct cam_context *ctx = priv;
+	int rc = 0;
+
+	if (!ctx) {
+		CAM_ERR(CAM_CORE, "null context");
+		return -EINVAL;
+	}
+
+	mutex_lock(&ctx->ctx_mutex);
+	if (ctx->state != CAM_CTX_ACTIVATED) {
+		CAM_DBG(CAM_CORE, "skipping recovery for ctx:%d dev:%s in state:%d", ctx->ctx_id,
+			ctx->dev_name, ctx->state);
+		goto end;
+	}
+	CAM_DBG(CAM_CORE, "try hw recovery for ctx:%d dev:%s", ctx->ctx_id, ctx->dev_name);
+	if (ctx->state_machine[ctx->state].recovery_ops)
+		rc = ctx->state_machine[ctx->state].recovery_ops(priv, data);
+	else
+		CAM_WARN(CAM_CORE, "no recovery op in state:%d for ctx:%d dev:%s",
+			ctx->state, ctx->ctx_id, ctx->dev_name);
+end:
+	mutex_unlock(&ctx->ctx_mutex);
+	return rc;
+}
+
 int cam_context_init(struct cam_context *ctx,
 	const char *dev_name,
 	uint64_t dev_id,

+ 14 - 0
drivers/cam_core/cam_context.h

@@ -167,6 +167,7 @@ struct cam_ctx_crm_ops {
  * @pagefault_ops:         Function to be called on page fault
  * @dumpinfo_ops:          Function to be invoked for dumping any
  *                         context info
+ * @recovery_ops:          Function to be invoked to try hardware recovery
  *
  */
 struct cam_ctx_ops {
@@ -175,6 +176,7 @@ struct cam_ctx_ops {
 	cam_hw_event_cb_func         irq_ops;
 	cam_hw_pagefault_cb_func     pagefault_ops;
 	cam_ctx_info_dump_cb_func    dumpinfo_ops;
+	cam_ctx_recovery_cb_func     recovery_ops;
 };
 
 /**
@@ -539,6 +541,18 @@ int cam_context_handle_dump_dev(struct cam_context *ctx,
 int cam_context_handle_info_dump(void *context,
 	enum cam_context_dump_id id);
 
+/**
+ * cam_context_handle_hw_recovery()
+ *
+ * @brief:        Handle hardware recovery. This function can be scheduled in
+ *                cam_req_mgr_workq.
+ *
+ * @context:      Object pointer for cam_context
+ * @data:         Recovery data that is to be passsed to hw mgr
+ *
+ */
+int cam_context_handle_hw_recovery(void *context, void *data);
+
 /**
  * cam_context_deinit()
  *

+ 6 - 0
drivers/cam_core/cam_hw_mgr_intf.h

@@ -53,6 +53,10 @@ typedef int (*cam_hw_pagefault_cb_func)(void *context,
 typedef int (*cam_ctx_info_dump_cb_func)(void *context,
 	enum cam_context_dump_id dump_id);
 
+/* ctx recovery callback function type */
+typedef int (*cam_ctx_recovery_cb_func)(void *context,
+	void *recovery_data);
+
 /**
  * struct cam_hw_update_entry - Entry for hardware config
  *
@@ -421,6 +425,7 @@ struct cam_hw_cmd_args {
  * @hw_flush:                  Function pointer for HW flush
  * @hw_reset:                  Function pointer for HW reset
  * @hw_dump:                   Function pointer for HW dump
+ * @hw_recovery:               Function pointer for HW recovery callback
  *
  */
 struct cam_hw_mgr_intf {
@@ -443,6 +448,7 @@ struct cam_hw_mgr_intf {
 	int (*hw_flush)(void *hw_priv, void *hw_flush_args);
 	int (*hw_reset)(void *hw_priv, void *hw_reset_args);
 	int (*hw_dump)(void *hw_priv, void *hw_dump_args);
+	int (*hw_recovery)(void *hw_priv, void *hw_recovery_args);
 };
 
 #endif /* _CAM_HW_MGR_INTF_H_ */

+ 16 - 0
drivers/cam_isp/cam_isp_context.c

@@ -33,6 +33,8 @@ static struct cam_isp_ctx_debug isp_ctx_debug;
 static int cam_isp_context_dump_requests(void *data,
 	struct cam_smmu_pf_info *pf_info);
 
+static int cam_isp_context_hw_recovery(void *priv, void *data);
+
 static int __cam_isp_ctx_start_dev_in_ready(struct cam_context *ctx,
 	struct cam_start_stop_dev_cmd *cmd);
 
@@ -6525,9 +6527,23 @@ static struct cam_ctx_ops
 		.irq_ops = __cam_isp_ctx_handle_irq_in_activated,
 		.pagefault_ops = cam_isp_context_dump_requests,
 		.dumpinfo_ops = cam_isp_context_info_dump,
+		.recovery_ops = cam_isp_context_hw_recovery,
 	},
 };
 
+static int cam_isp_context_hw_recovery(void *priv, void *data)
+{
+	struct cam_context *ctx = priv;
+	int rc = -EPERM;
+
+	if (ctx->hw_mgr_intf->hw_recovery)
+		rc = ctx->hw_mgr_intf->hw_recovery(ctx->hw_mgr_intf->hw_mgr_priv, data);
+	else
+		CAM_ERR(CAM_ISP, "hw mgr doesn't support recovery");
+
+	return rc;
+}
+
 static int cam_isp_context_dump_requests(void *data,
 	struct cam_smmu_pf_info *pf_info)
 {

+ 32 - 11
drivers/cam_isp/isp_hw_mgr/cam_ife_hw_mgr.c

@@ -5976,6 +5976,14 @@ static int cam_ife_mgr_stop_hw(void *hw_mgr_priv, void *stop_hw_args)
 		return -EPERM;
 	}
 
+	if (!ctx->num_base) {
+		CAM_ERR(CAM_ISP, "number of bases are zero");
+		return -EINVAL;
+	}
+
+	/* Cancel all scheduled recoveries without affecting future recoveries */
+	atomic_inc(&ctx->recovery_id);
+
 	CAM_DBG(CAM_ISP, " Enter...ctx id:%d", ctx->ctx_index);
 	stop_isp = (struct cam_isp_stop_args    *)stop_args->args;
 
@@ -5988,11 +5996,6 @@ static int cam_ife_mgr_stop_hw(void *hw_mgr_priv, void *stop_hw_args)
 
 	/* Note:stop resource will remove the irq mask from the hardware */
 
-	if (!ctx->num_base) {
-		CAM_ERR(CAM_ISP, "number of bases are zero");
-		return -EINVAL;
-	}
-
 	CAM_DBG(CAM_ISP, "Halting CSIDs");
 
 	/* get master base index first */
@@ -11029,7 +11032,7 @@ static int cam_ife_mgr_cmd_get_sof_timestamp(
 	return rc;
 }
 
-static int cam_ife_mgr_process_recovery_cb(void *priv, void *data)
+static int cam_ife_mgr_recover_hw(void *priv, void *data)
 {
 	int32_t rc = 0;
 	struct cam_ife_hw_event_recovery_data   *recovery_data = data;
@@ -11037,10 +11040,21 @@ static int cam_ife_mgr_process_recovery_cb(void *priv, void *data)
 	struct cam_hw_stop_args                  stop_args;
 	struct cam_ife_hw_mgr                   *ife_hw_mgr = priv;
 	uint32_t                                 i = 0;
-
+	bool cancel = false;
 	uint32_t error_type = recovery_data->error_type;
 	struct cam_ife_hw_mgr_ctx        *ctx = NULL;
 
+	for (i = 0; i < recovery_data->no_of_context; i++) {
+		ctx = recovery_data->affected_ctx[i];
+		if (recovery_data->id[i] != atomic_read(&ctx->recovery_id)) {
+			CAM_INFO(CAM_ISP, "recovery for ctx:%d error-type:%d cancelled",
+				ctx->ctx_index, error_type);
+			cancel = true;
+		}
+	}
+	if (cancel)
+		goto end;
+
 	/* Here recovery is performed */
 	CAM_DBG(CAM_ISP, "ErrorType = %d", error_type);
 
@@ -11117,6 +11131,7 @@ static int cam_ife_mgr_process_recovery_cb(void *priv, void *data)
 	}
 	CAM_DBG(CAM_ISP, "Exit: ErrorType = %d", error_type);
 
+end:
 	kfree(recovery_data);
 	return rc;
 }
@@ -11124,9 +11139,10 @@ static int cam_ife_mgr_process_recovery_cb(void *priv, void *data)
 static int cam_ife_hw_mgr_do_error_recovery(
 	struct cam_ife_hw_event_recovery_data  *ife_mgr_recovery_data)
 {
-	int32_t                                 rc = 0;
+	int32_t                                 rc, i;
 	struct crm_workq_task                  *task = NULL;
 	struct cam_ife_hw_event_recovery_data  *recovery_data = NULL;
+	struct cam_ife_hw_mgr_ctx *ctx;
 
 	recovery_data = kmemdup(ife_mgr_recovery_data,
 		sizeof(struct cam_ife_hw_event_recovery_data), GFP_ATOMIC);
@@ -11142,12 +11158,16 @@ static int cam_ife_hw_mgr_do_error_recovery(
 		return -ENOMEM;
 	}
 
-	task->process_cb = &cam_ife_mgr_process_recovery_cb;
+	task->process_cb = &cam_context_handle_hw_recovery;
 	task->payload = recovery_data;
+	for (i = 0; i < recovery_data->no_of_context; i++) {
+		ctx = recovery_data->affected_ctx[i];
+		recovery_data->id[i] = atomic_inc_return(&ctx->recovery_id);
+	}
+
 	rc = cam_req_mgr_workq_enqueue_task(task,
-		recovery_data->affected_ctx[0]->hw_mgr,
+		recovery_data->affected_ctx[0]->common.cb_priv,
 		CRM_TASK_PRIORITY_0);
-
 	return rc;
 }
 
@@ -12452,6 +12472,7 @@ int cam_ife_hw_mgr_init(struct cam_hw_mgr_intf *hw_mgr_intf, int *iommu_hdl)
 	hw_mgr_intf->hw_cmd = cam_ife_mgr_cmd;
 	hw_mgr_intf->hw_reset = cam_ife_mgr_reset;
 	hw_mgr_intf->hw_dump = cam_ife_mgr_dump;
+	hw_mgr_intf->hw_recovery = cam_ife_mgr_recover_hw;
 
 	if (iommu_hdl)
 		*iommu_hdl = g_ife_hw_mgr.mgr_common.img_iommu_hdl;

+ 4 - 0
drivers/cam_isp/isp_hw_mgr/cam_ife_hw_mgr.h

@@ -214,6 +214,7 @@ struct cam_ife_hw_mgr_ctx_flags {
  * @sfe_info                SFE info pertaining to this stream
  * @flags                   Flags pertainting to this ctx
  * @bw_config_version       BW Config version
+ * @recovery_id:            Unique ID of the current valid scheduled recovery
  *
  */
 struct cam_ife_hw_mgr_ctx {
@@ -264,6 +265,7 @@ struct cam_ife_hw_mgr_ctx {
 	struct cam_ife_hw_mgr_sfe_info  sfe_info;
 	struct cam_ife_hw_mgr_ctx_flags flags;
 	uint32_t                        bw_config_version;
+	atomic_t                        recovery_id;
 };
 
 /**
@@ -352,6 +354,7 @@ struct cam_ife_hw_mgr {
  * @affected_core:            Array of the hardware cores that are affected
  * @affected_ctx:             Array of the hardware contexts that are affected
  * @no_of_context:            Actual number of the affected context
+ * @id:                       Unique ID associated with this recovery data (per HW context)
  *
  */
 struct cam_ife_hw_event_recovery_data {
@@ -359,6 +362,7 @@ struct cam_ife_hw_event_recovery_data {
 	uint32_t                   affected_core[CAM_ISP_HW_NUM_MAX];
 	struct cam_ife_hw_mgr_ctx *affected_ctx[CAM_IFE_CTX_MAX];
 	uint32_t                   no_of_context;
+	uint32_t                   id[CAM_IFE_CTX_MAX];
 };
 
 /**