Browse Source

video: driver: update power calculations

Update power calculation to avoid low framerate or
frame drop issues

Change-Id: Ice306a44d4ea8242d965c0e5d03edffc0f5e0f8b
Signed-off-by: Ashish Patil <[email protected]>
Ashish Patil 2 years ago
parent
commit
dd0e6d5f70

+ 1 - 1
driver/platform/common/inc/perf_static_model.h

@@ -161,7 +161,7 @@ struct api_calculation_freq_output {
 	u32 vsp_min_freq;
 	u32 tensilica_min_freq;
 	u32 hw_min_freq;
-	u32  enc_hqmode;
+	u32 enc_hqmode;
 	struct corner_voting usecase_corner;
 };
 

+ 2 - 2
driver/platform/pineapple/inc/pineapple_technology.h

@@ -25,8 +25,8 @@ static u32 frequency_table_pineapple[2][6] =
   */
 
 /* Tensilica cycles profiled by FW team in lanai device Feb 2022 */
-#define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_AV1D                                            ((80000*3)/2));
-#define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_NONAV1D                                         ((60000*3)/2);
+#define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_AV1D                                            ((80000*3)/2)
+#define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_NONAV1D                                         ((60000*3)/2)
 
  /* Tensilica cycles */
 #define DECODER_VPP_FW_OVERHEAD_PINEAPPLE                                                  (0)

+ 27 - 5
driver/variant/iris33/src/msm_vidc_bus_iris33.c

@@ -125,7 +125,7 @@ u32 get_compression_factors(struct compression_factors *compression_factor,
 	return 0;
 }
 
-static int calculate_bandwidth_decoder_iris3(
+static int calculate_bandwidth_decoder_iris33(
 		struct api_calculation_input codec_input,
 		struct api_calculation_bw_output *codec_output)
 {
@@ -298,7 +298,7 @@ static int calculate_bandwidth_decoder_iris3(
 		else
 			av1tile_index_entry = 6;
 
-		/* NOT PWC //or average and power case */
+		/* NOT PWC or average and power case */
 		if (codec_input.complexity_setting != 0)
 			av1tile_complexity = 1;
 		else
@@ -551,7 +551,7 @@ static int calculate_bandwidth_decoder_iris3(
 	return 0;
 }
 
-static int calculate_bandwidth_encoder_iris3(
+static int calculate_bandwidth_encoder_iris33(
 		struct api_calculation_input codec_input,
 		struct api_calculation_bw_output *codec_output)
 {
@@ -703,6 +703,16 @@ static int calculate_bandwidth_encoder_iris3(
 	codec_output->collocated_rd_wr_total_ddr =
 		codec_output->collocated_rd_wr_total_noc;
 
+	/* I frame only */
+	if (codec_input.hierachical_layer == 7) {
+		codec_output->collocated_rd_noc = 0;
+		codec_output->collocated_wr_noc = 0;
+		codec_output->collocated_rd_ddr = 0;
+		codec_output->collocated_wr_ddr = 0;
+		codec_output->collocated_rd_wr_total_noc = 0;
+		codec_output->collocated_rd_wr_total_ddr = 0;
+	}
+
 	/* accumulation */
 	codec_output->noc_bw_rd += codec_output->collocated_rd_noc;
 	codec_output->noc_bw_wr += codec_output->collocated_wr_noc;
@@ -806,6 +816,18 @@ static int calculate_bandwidth_encoder_iris3(
 	codec_output->dpb_wr_ddr = (en_llc_enable_rec_wr_uncompleted) ?
 		0 : codec_output->dpb_wr_noc;
 
+	/* I frame only */
+	if (codec_input.hierachical_layer == 7) {
+		codec_output->dpb_rd_y_noc = 0;
+		codec_output->dpb_rd_crcb_noc =0;
+		codec_output->dpb_rdwr_duetooverlap_noc =0;
+		codec_output->dpb_wr_noc =0;
+		codec_output->dpb_rd_y_ddr=0;
+		codec_output->dpb_rd_crcb_ddr =0;
+		codec_output->dpb_rdwr_duetooverlap_ddr=0;
+		codec_output->dpb_wr_ddr =0;
+	}
+
 	/* accumulation */
 	codec_output->noc_bw_rd += codec_output->dpb_rd_y_noc;
 	codec_output->noc_bw_rd += codec_output->dpb_rd_crcb_noc;
@@ -909,9 +931,9 @@ int msm_vidc_calculate_bandwidth(struct api_calculation_input codec_input,
 	int rc = 0;
 
 	if (codec_input.decoder_or_encoder == CODEC_DECODER) {
-		rc = calculate_bandwidth_decoder_iris3(codec_input, codec_output);
+		rc = calculate_bandwidth_decoder_iris33(codec_input, codec_output);
 	} else if (codec_input.decoder_or_encoder == CODEC_ENCODER) {
-		rc = calculate_bandwidth_encoder_iris3(codec_input, codec_output);
+		rc = calculate_bandwidth_encoder_iris33(codec_input, codec_output);
 	} else {
 		d_vpr_e("%s: invalid codec\n", codec_input.decoder_or_encoder);
 		return -EINVAL;

+ 37 - 25
driver/variant/iris33/src/msm_vidc_clock_iris33.c

@@ -198,7 +198,6 @@ static int calculate_vsp_min_freq(struct api_calculation_input codec_input,
 	u8 bitrate_entry = get_bitrate_entry(pixle_count); /* TODO EXTRACT */
 
 	input_bitrate_fp = ((u32)(codec_input.bitrate_mbps * 100 + 99)) / 100;
-	vsp_hw_min_frequency = frequency_table_pineapple[0][1] * input_bitrate_fp * 1000;
 
 	/* 8KUHD60fps with B frame */
 	if ((pixle_count >= fp_pixel_count_bar0) &&
@@ -215,54 +214,60 @@ static int calculate_vsp_min_freq(struct api_calculation_input codec_input,
 		 *
 		 *  TODO : Reduce these conditions by removing the zero entries from Bitrate table.
 		 */
-		vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
+
+		vsp_hw_min_frequency = frequency_table_pineapple[0][2] *
 			input_bitrate_fp * 1000;
 
 		if (codec_input.codec == CODEC_AV1)
-			vsp_hw_min_frequency = frequency_table_pineapple[0][0] *
+			vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
 				input_bitrate_fp * 1000;
 
 		if ((codec_input.codec == CODEC_H264) ||
-			(codec_input.codec == CODEC_H264_CAVLC) ||
-			((codec_input.codec == CODEC_HEVC) &&
-			(codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_1S))) {
+			(codec_input.codec == CODEC_H264_CAVLC)) {
+			vsp_hw_min_frequency = (frequency_table_pineapple[0][2] * 1000 +
+				(fw_sw_vsp_offset - 1));
 			vsp_hw_min_frequency =
-				DIV_ROUND_UP(frequency_table_pineapple[0][1], fw_sw_vsp_offset);
-		} else if (((codec_input.codec == CODEC_HEVC) &&
-			(codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S))
-			|| (codec_input.codec == CODEC_VP9)
-			|| (codec_input.codec == CODEC_AV1)) {
+				DIV_ROUND_UP(vsp_hw_min_frequency, fw_sw_vsp_offset);
+		} else {
 			if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
+				vsp_hw_min_frequency = vsp_hw_min_frequency +
+					(bitrate_table_pineapple_2stage_fp[codec][0] *
+					fw_sw_vsp_offset - 1);
 				vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
-					(bitrate_table_pineapple_2stage_fp[codec][0] * fw_sw_vsp_offset));
+					(bitrate_table_pineapple_2stage_fp[codec][0]) *
+						fw_sw_vsp_offset);
 			} else {
+				vsp_hw_min_frequency = vsp_hw_min_frequency +
+					(bitrate_table_pineapple_1stage_fp[codec][0] *
+					fw_sw_vsp_offset - 1);
 				vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
-					(bitrate_table_pineapple_1stage_fp[codec][0] * fw_sw_vsp_offset));
+					(bitrate_table_pineapple_1stage_fp[codec][0]) *
+						fw_sw_vsp_offset);
 			}
 		}
 	} else {
-		vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
+		vsp_hw_min_frequency = frequency_table_pineapple[0][2] *
 			input_bitrate_fp * 1000;
 
 		if (codec_input.codec == CODEC_AV1 && bitrate_entry == 1)
-			vsp_hw_min_frequency = frequency_table_pineapple[0][0] *
+			vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
 				input_bitrate_fp * 1000;
 
-		if ((codec_input.codec == CODEC_H264_CAVLC) &&
-			(codec_input.entropy_coding_mode == CODEC_ENTROPY_CODING_CAVLC))
-			codec = CODEC_H264_CAVLC;
-		else if ((codec_input.codec == CODEC_H264) &&
-			(codec_input.entropy_coding_mode == CODEC_ENTROPY_CODING_CABAC))
-			codec = CODEC_H264;
-
-		if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S)
+		if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
+			vsp_hw_min_frequency = vsp_hw_min_frequency +
+				(bitrate_table_pineapple_2stage_fp[codec][bitrate_entry] *
+				fw_sw_vsp_offset - 1);
 			vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
 				(bitrate_table_pineapple_2stage_fp[codec][bitrate_entry]) *
 					fw_sw_vsp_offset);
-		else
+		} else {
+			vsp_hw_min_frequency = vsp_hw_min_frequency +
+				(bitrate_table_pineapple_1stage_fp[codec][bitrate_entry] *
+				fw_sw_vsp_offset - 1);
 			vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
 				(bitrate_table_pineapple_1stage_fp[codec][bitrate_entry]) *
 					fw_sw_vsp_offset);
+		}
 	}
 
 	codec_output->vsp_min_freq = vsp_hw_min_frequency;
@@ -322,6 +327,7 @@ static int calculate_vpp_min_freq(struct api_calculation_input codec_input,
 	u32 lpmode_uhd_cycle_permb = 0;
 	u32 hqmode1080p_cycle_permb = 0;
 	u32 encoder_vpp_target_clk_per_mb = 0;
+	u32 decoder_vpp_fw_overhead = DECODER_VPP_FW_OVERHEAD_PINEAPPLE;
 
 	codec_mbspersession_pineaple =
 		calculate_number_mbs_pineapple(codec_input.frame_width,
@@ -342,11 +348,16 @@ static int calculate_vpp_min_freq(struct api_calculation_input codec_input,
 				pipe_penalty_codec + 999) / 1000;
 		}
 
+		if (codec_input.codec == CODEC_AV1)
+			decoder_vpp_fw_overhead = DECODER_VPP_FW_OVERHEAD_PINEAPPLE_AV1D;
+		else
+			decoder_vpp_fw_overhead = DECODER_VPP_FW_OVERHEAD_PINEAPPLE_NONAV1D;
+
 		if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
 			/* FW overhead, convert FW cycles to impact to one pipe */
 			u64 decoder_vpp_fw_overhead = 0;
 			decoder_vpp_fw_overhead =
-				DIV_ROUND_UP((DECODER_VPP_FW_OVERHEAD_PINEAPPLE * 10 *
+				DIV_ROUND_UP((decoder_vpp_fw_overhead * 10 *
 				codec_input.frame_rate), 15);
 
 			decoder_vpp_fw_overhead =
@@ -407,6 +418,7 @@ static int calculate_vpp_min_freq(struct api_calculation_input codec_input,
 	} else { /* encoder */
 		/* Decide LP/HQ */
 		u8 hq_mode = 0;
+
 		if (codec_input.pipe_num > 1)
 			if (codec_input.frame_width * codec_input.frame_height <=
 				1920 * 1080)

+ 49 - 33
driver/variant/iris33/src/msm_vidc_power_iris33.c

@@ -40,10 +40,13 @@ static int msm_vidc_init_codec_input_freq(struct msm_vidc_inst *inst, u32 data_s
 		codec_input->codec    = CODEC_H264;
 		codec_input->lcu_size = 16;
 		if (inst->capabilities->cap[ENTROPY_MODE].value ==
-				V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC)
+				V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
 			codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
-		else
+			codec_input->codec = CODEC_H264;
+		} else {
 			codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
+			codec_input->codec = CODEC_H264_CAVLC;
+		}
 	} else if (inst->codec == MSM_VIDC_HEVC) {
 		codec_input->codec    = CODEC_HEVC;
 		codec_input->lcu_size = 32;
@@ -98,8 +101,13 @@ static int msm_vidc_init_codec_input_freq(struct msm_vidc_inst *inst, u32 data_s
 			inst->fmts[INPUT_PORT].fmt.pix_mp.pixelformat, __func__);
 
 	codec_input->linear_opb = is_linear_colorformat(color_fmt);
-	codec_input->bitrate_mbps =
-		(codec_input->frame_rate * data_size * 8) / 1000000;
+
+	if (inst->domain == MSM_VIDC_DECODER)
+		codec_input->bitrate_mbps =
+			(codec_input->frame_rate * data_size * 8) / 1000000;
+	else
+		codec_input->bitrate_mbps =
+			inst->capabilities->cap[BIT_RATE].value / 1000000;
 
 	/* disable av1d commercial tile */
 	codec_input->av1d_commer_tile_enable = 0;
@@ -130,7 +138,19 @@ static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc
 	codec_input->chipset_gen = MSM_PINEAPPLE;
 
 	if (d->codec == MSM_VIDC_H264) {
-		codec_input->codec = CODEC_H264;
+		if (inst->capabilities->cap[ENTROPY_MODE].value ==
+			V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
+			codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
+			codec_input->codec = CODEC_H264;
+		} else if (inst->capabilities->cap[ENTROPY_MODE].value ==
+			V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC) {
+			codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
+			codec_input->codec = CODEC_H264_CAVLC;
+		} else {
+			d_vpr_e("%s: invalid entropy %d\n", __func__,
+				inst->capabilities->cap[ENTROPY_MODE].value);
+			return -EINVAL;
+		}
 	} else if (d->codec == MSM_VIDC_HEVC) {
 		codec_input->codec = CODEC_HEVC;
 	} else if (d->codec == MSM_VIDC_VP9) {
@@ -157,18 +177,6 @@ static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc
 		return -EINVAL;
 	}
 
-	if (inst->capabilities->cap[ENTROPY_MODE].value ==
-			V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
-		codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
-	} else if (inst->capabilities->cap[ENTROPY_MODE].value ==
-			V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC) {
-		codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
-	} else {
-		d_vpr_e("%s: invalid entropy %d\n", __func__,
-				inst->capabilities->cap[ENTROPY_MODE].value);
-		return -EINVAL;
-	}
-
 	/*
 	 * Used for calculating Encoder GOP Complexity
 	 * hierachical_layer=0..7 used as Array Index
@@ -224,19 +232,24 @@ static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc
 
 	/* TODO Confirm if no multiref */
 	codec_input->encoder_multiref = 0;  /* set as no multiref */
-	codec_input->bitrate_mbps = (d->bitrate / 1000000); /* bps 10; set as 10mbps */
+	codec_input->bitrate_mbps = (d->bitrate / 1000000);
 
 	opb_compression_enabled = d->num_formats >= 2 && __ubwc(d->color_formats[1]);
 
-	/* ANDROID CR is in Q16 format, StaticModel CR in x100 format */
-	codec_input->cr_dpb = ((Q16_INT(d->compression_ratio)*100) +
-		Q16_FRAC(d->compression_ratio));
-
-	codec_input->cr_opb = opb_compression_enabled ?
-		codec_input->cr_dpb : FP_ONE;
-
-	codec_input->cr_ipb = ((Q16_INT(d->input_cr)*100) + Q16_FRAC(d->input_cr));
-	codec_input->cr_rpb = codec_input->cr_dpb;  /* cr_rpb ony for encoder */
+	/* video driver CR is in Q16 format, StaticModel CR in x100 format */
+	if (d->domain == MSM_VIDC_DECODER) {
+		codec_input->cr_dpb = ((Q16_INT(d->compression_ratio)*100) +
+			Q16_FRAC(d->compression_ratio));
+		codec_input->cr_opb = codec_input->cr_dpb;
+		if (codec_input->split_opb == 1) {
+			/* need to check the value if linear opb, currently set min cr */
+			codec_input->cr_opb = 100;
+		}
+	} else {
+		codec_input->cr_ipb = ((Q16_INT(d->input_cr)*100) + Q16_FRAC(d->input_cr));
+		codec_input->cr_rpb = ((Q16_INT(d->compression_ratio)*100) +
+			Q16_FRAC(d->compression_ratio));
+	}
 
 	/* disable by default, only enable for aurora depth map session */
 	codec_input->lumaonly_decode = 0;
@@ -375,10 +388,12 @@ static u64 msm_vidc_calc_freq_iris33_new(struct msm_vidc_inst *inst, u32 data_si
 		}
 	}
 
-	freq = codec_output.hw_min_freq * 1000000; /* Convert to Hz */
+	freq = (u64)codec_output.hw_min_freq * 1000000; /* Convert to Hz */
 
-	i_vpr_p(inst, "%s: filled len %d, required freq %llu, fps %u, mbpf %u\n",
-		__func__, data_size, freq, fps, mbpf);
+	i_vpr_p(inst, "%s: filled len %d, required freq %llu, vpp %u, vsp %u, tensilica %u, hw_freq %u, fps %u, mbpf %u\n",
+		__func__, data_size, freq, codec_output.vpp_min_freq,
+		codec_output.vsp_min_freq, codec_output.tensilica_min_freq,
+		codec_output.hw_min_freq, fps, mbpf);
 
 	if (inst->codec == MSM_VIDC_AV1 ||
 		(inst->iframe && is_hevc_10bit_decode_session(inst))) {
@@ -662,6 +677,10 @@ u64 msm_vidc_calc_freq_iris33_legacy(struct msm_vidc_inst *inst, u32 data_size)
 	freq = max(vpp_cycles, vsp_cycles);
 	freq = max(freq, fw_cycles);
 
+	i_vpr_p(inst, "%s: filled len %d, required freq %llu, vpp %llu, vsp %llu, fw_cycles %llu, fps %u, mbpf %u\n",
+		__func__, data_size, freq,
+		vpp_cycles, vsp_cycles, fw_cycles, fps, mbpf);
+
 	if (inst->codec == MSM_VIDC_AV1 ||
 		(inst->iframe && is_hevc_10bit_decode_session(inst))) {
 		/*
@@ -675,9 +694,6 @@ u64 msm_vidc_calc_freq_iris33_legacy(struct msm_vidc_inst *inst, u32 data_size)
 			freq = core->resource->freq_set.freq_tbl[1].freq;
 	}
 
-	i_vpr_p(inst, "%s: filled len %d, required freq %llu, fps %u, mbpf %u\n",
-		__func__, data_size, freq, fps, mbpf);
-
 	return freq;
 }