video: driver: update power calculations

Update power calculation to avoid low framerate or
frame drop issues

Change-Id: Ice306a44d4ea8242d965c0e5d03edffc0f5e0f8b
Signed-off-by: Ashish Patil <quic_ashpat@quicinc.com>
This commit is contained in:
Ashish Patil
2023-02-23 19:15:54 -08:00
parent c0d457f55a
commit dd0e6d5f70
5 changed files with 116 additions and 66 deletions

View File

@@ -161,7 +161,7 @@ struct api_calculation_freq_output {
u32 vsp_min_freq; u32 vsp_min_freq;
u32 tensilica_min_freq; u32 tensilica_min_freq;
u32 hw_min_freq; u32 hw_min_freq;
u32 enc_hqmode; u32 enc_hqmode;
struct corner_voting usecase_corner; struct corner_voting usecase_corner;
}; };

View File

@@ -25,8 +25,8 @@ static u32 frequency_table_pineapple[2][6] =
*/ */
/* Tensilica cycles profiled by FW team in lanai device Feb 2022 */ /* Tensilica cycles profiled by FW team in lanai device Feb 2022 */
#define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_AV1D ((80000*3)/2)); #define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_AV1D ((80000*3)/2)
#define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_NONAV1D ((60000*3)/2); #define DECODER_VPP_FW_OVERHEAD_PINEAPPLE_NONAV1D ((60000*3)/2)
/* Tensilica cycles */ /* Tensilica cycles */
#define DECODER_VPP_FW_OVERHEAD_PINEAPPLE (0) #define DECODER_VPP_FW_OVERHEAD_PINEAPPLE (0)

View File

@@ -125,7 +125,7 @@ u32 get_compression_factors(struct compression_factors *compression_factor,
return 0; return 0;
} }
static int calculate_bandwidth_decoder_iris3( static int calculate_bandwidth_decoder_iris33(
struct api_calculation_input codec_input, struct api_calculation_input codec_input,
struct api_calculation_bw_output *codec_output) struct api_calculation_bw_output *codec_output)
{ {
@@ -298,7 +298,7 @@ static int calculate_bandwidth_decoder_iris3(
else else
av1tile_index_entry = 6; av1tile_index_entry = 6;
/* NOT PWC //or average and power case */ /* NOT PWC or average and power case */
if (codec_input.complexity_setting != 0) if (codec_input.complexity_setting != 0)
av1tile_complexity = 1; av1tile_complexity = 1;
else else
@@ -551,7 +551,7 @@ static int calculate_bandwidth_decoder_iris3(
return 0; return 0;
} }
static int calculate_bandwidth_encoder_iris3( static int calculate_bandwidth_encoder_iris33(
struct api_calculation_input codec_input, struct api_calculation_input codec_input,
struct api_calculation_bw_output *codec_output) struct api_calculation_bw_output *codec_output)
{ {
@@ -703,6 +703,16 @@ static int calculate_bandwidth_encoder_iris3(
codec_output->collocated_rd_wr_total_ddr = codec_output->collocated_rd_wr_total_ddr =
codec_output->collocated_rd_wr_total_noc; codec_output->collocated_rd_wr_total_noc;
/* I frame only */
if (codec_input.hierachical_layer == 7) {
codec_output->collocated_rd_noc = 0;
codec_output->collocated_wr_noc = 0;
codec_output->collocated_rd_ddr = 0;
codec_output->collocated_wr_ddr = 0;
codec_output->collocated_rd_wr_total_noc = 0;
codec_output->collocated_rd_wr_total_ddr = 0;
}
/* accumulation */ /* accumulation */
codec_output->noc_bw_rd += codec_output->collocated_rd_noc; codec_output->noc_bw_rd += codec_output->collocated_rd_noc;
codec_output->noc_bw_wr += codec_output->collocated_wr_noc; codec_output->noc_bw_wr += codec_output->collocated_wr_noc;
@@ -806,6 +816,18 @@ static int calculate_bandwidth_encoder_iris3(
codec_output->dpb_wr_ddr = (en_llc_enable_rec_wr_uncompleted) ? codec_output->dpb_wr_ddr = (en_llc_enable_rec_wr_uncompleted) ?
0 : codec_output->dpb_wr_noc; 0 : codec_output->dpb_wr_noc;
/* I frame only */
if (codec_input.hierachical_layer == 7) {
codec_output->dpb_rd_y_noc = 0;
codec_output->dpb_rd_crcb_noc =0;
codec_output->dpb_rdwr_duetooverlap_noc =0;
codec_output->dpb_wr_noc =0;
codec_output->dpb_rd_y_ddr=0;
codec_output->dpb_rd_crcb_ddr =0;
codec_output->dpb_rdwr_duetooverlap_ddr=0;
codec_output->dpb_wr_ddr =0;
}
/* accumulation */ /* accumulation */
codec_output->noc_bw_rd += codec_output->dpb_rd_y_noc; codec_output->noc_bw_rd += codec_output->dpb_rd_y_noc;
codec_output->noc_bw_rd += codec_output->dpb_rd_crcb_noc; codec_output->noc_bw_rd += codec_output->dpb_rd_crcb_noc;
@@ -909,9 +931,9 @@ int msm_vidc_calculate_bandwidth(struct api_calculation_input codec_input,
int rc = 0; int rc = 0;
if (codec_input.decoder_or_encoder == CODEC_DECODER) { if (codec_input.decoder_or_encoder == CODEC_DECODER) {
rc = calculate_bandwidth_decoder_iris3(codec_input, codec_output); rc = calculate_bandwidth_decoder_iris33(codec_input, codec_output);
} else if (codec_input.decoder_or_encoder == CODEC_ENCODER) { } else if (codec_input.decoder_or_encoder == CODEC_ENCODER) {
rc = calculate_bandwidth_encoder_iris3(codec_input, codec_output); rc = calculate_bandwidth_encoder_iris33(codec_input, codec_output);
} else { } else {
d_vpr_e("%s: invalid codec\n", codec_input.decoder_or_encoder); d_vpr_e("%s: invalid codec\n", codec_input.decoder_or_encoder);
return -EINVAL; return -EINVAL;

View File

@@ -198,7 +198,6 @@ static int calculate_vsp_min_freq(struct api_calculation_input codec_input,
u8 bitrate_entry = get_bitrate_entry(pixle_count); /* TODO EXTRACT */ u8 bitrate_entry = get_bitrate_entry(pixle_count); /* TODO EXTRACT */
input_bitrate_fp = ((u32)(codec_input.bitrate_mbps * 100 + 99)) / 100; input_bitrate_fp = ((u32)(codec_input.bitrate_mbps * 100 + 99)) / 100;
vsp_hw_min_frequency = frequency_table_pineapple[0][1] * input_bitrate_fp * 1000;
/* 8KUHD60fps with B frame */ /* 8KUHD60fps with B frame */
if ((pixle_count >= fp_pixel_count_bar0) && if ((pixle_count >= fp_pixel_count_bar0) &&
@@ -215,54 +214,60 @@ static int calculate_vsp_min_freq(struct api_calculation_input codec_input,
* *
* TODO : Reduce these conditions by removing the zero entries from Bitrate table. * TODO : Reduce these conditions by removing the zero entries from Bitrate table.
*/ */
vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
vsp_hw_min_frequency = frequency_table_pineapple[0][2] *
input_bitrate_fp * 1000; input_bitrate_fp * 1000;
if (codec_input.codec == CODEC_AV1) if (codec_input.codec == CODEC_AV1)
vsp_hw_min_frequency = frequency_table_pineapple[0][0] * vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
input_bitrate_fp * 1000; input_bitrate_fp * 1000;
if ((codec_input.codec == CODEC_H264) || if ((codec_input.codec == CODEC_H264) ||
(codec_input.codec == CODEC_H264_CAVLC) || (codec_input.codec == CODEC_H264_CAVLC)) {
((codec_input.codec == CODEC_HEVC) && vsp_hw_min_frequency = (frequency_table_pineapple[0][2] * 1000 +
(codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_1S))) { (fw_sw_vsp_offset - 1));
vsp_hw_min_frequency = vsp_hw_min_frequency =
DIV_ROUND_UP(frequency_table_pineapple[0][1], fw_sw_vsp_offset); DIV_ROUND_UP(vsp_hw_min_frequency, fw_sw_vsp_offset);
} else if (((codec_input.codec == CODEC_HEVC) && } else {
(codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S))
|| (codec_input.codec == CODEC_VP9)
|| (codec_input.codec == CODEC_AV1)) {
if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) { if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
vsp_hw_min_frequency = vsp_hw_min_frequency +
(bitrate_table_pineapple_2stage_fp[codec][0] *
fw_sw_vsp_offset - 1);
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency, vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
(bitrate_table_pineapple_2stage_fp[codec][0] * fw_sw_vsp_offset)); (bitrate_table_pineapple_2stage_fp[codec][0]) *
fw_sw_vsp_offset);
} else { } else {
vsp_hw_min_frequency = vsp_hw_min_frequency +
(bitrate_table_pineapple_1stage_fp[codec][0] *
fw_sw_vsp_offset - 1);
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency, vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
(bitrate_table_pineapple_1stage_fp[codec][0] * fw_sw_vsp_offset)); (bitrate_table_pineapple_1stage_fp[codec][0]) *
fw_sw_vsp_offset);
} }
} }
} else { } else {
vsp_hw_min_frequency = frequency_table_pineapple[0][1] * vsp_hw_min_frequency = frequency_table_pineapple[0][2] *
input_bitrate_fp * 1000; input_bitrate_fp * 1000;
if (codec_input.codec == CODEC_AV1 && bitrate_entry == 1) if (codec_input.codec == CODEC_AV1 && bitrate_entry == 1)
vsp_hw_min_frequency = frequency_table_pineapple[0][0] * vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
input_bitrate_fp * 1000; input_bitrate_fp * 1000;
if ((codec_input.codec == CODEC_H264_CAVLC) && if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
(codec_input.entropy_coding_mode == CODEC_ENTROPY_CODING_CAVLC)) vsp_hw_min_frequency = vsp_hw_min_frequency +
codec = CODEC_H264_CAVLC; (bitrate_table_pineapple_2stage_fp[codec][bitrate_entry] *
else if ((codec_input.codec == CODEC_H264) && fw_sw_vsp_offset - 1);
(codec_input.entropy_coding_mode == CODEC_ENTROPY_CODING_CABAC))
codec = CODEC_H264;
if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S)
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency, vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
(bitrate_table_pineapple_2stage_fp[codec][bitrate_entry]) * (bitrate_table_pineapple_2stage_fp[codec][bitrate_entry]) *
fw_sw_vsp_offset); fw_sw_vsp_offset);
else } else {
vsp_hw_min_frequency = vsp_hw_min_frequency +
(bitrate_table_pineapple_1stage_fp[codec][bitrate_entry] *
fw_sw_vsp_offset - 1);
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency, vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
(bitrate_table_pineapple_1stage_fp[codec][bitrate_entry]) * (bitrate_table_pineapple_1stage_fp[codec][bitrate_entry]) *
fw_sw_vsp_offset); fw_sw_vsp_offset);
}
} }
codec_output->vsp_min_freq = vsp_hw_min_frequency; codec_output->vsp_min_freq = vsp_hw_min_frequency;
@@ -322,6 +327,7 @@ static int calculate_vpp_min_freq(struct api_calculation_input codec_input,
u32 lpmode_uhd_cycle_permb = 0; u32 lpmode_uhd_cycle_permb = 0;
u32 hqmode1080p_cycle_permb = 0; u32 hqmode1080p_cycle_permb = 0;
u32 encoder_vpp_target_clk_per_mb = 0; u32 encoder_vpp_target_clk_per_mb = 0;
u32 decoder_vpp_fw_overhead = DECODER_VPP_FW_OVERHEAD_PINEAPPLE;
codec_mbspersession_pineaple = codec_mbspersession_pineaple =
calculate_number_mbs_pineapple(codec_input.frame_width, calculate_number_mbs_pineapple(codec_input.frame_width,
@@ -342,11 +348,16 @@ static int calculate_vpp_min_freq(struct api_calculation_input codec_input,
pipe_penalty_codec + 999) / 1000; pipe_penalty_codec + 999) / 1000;
} }
if (codec_input.codec == CODEC_AV1)
decoder_vpp_fw_overhead = DECODER_VPP_FW_OVERHEAD_PINEAPPLE_AV1D;
else
decoder_vpp_fw_overhead = DECODER_VPP_FW_OVERHEAD_PINEAPPLE_NONAV1D;
if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) { if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
/* FW overhead, convert FW cycles to impact to one pipe */ /* FW overhead, convert FW cycles to impact to one pipe */
u64 decoder_vpp_fw_overhead = 0; u64 decoder_vpp_fw_overhead = 0;
decoder_vpp_fw_overhead = decoder_vpp_fw_overhead =
DIV_ROUND_UP((DECODER_VPP_FW_OVERHEAD_PINEAPPLE * 10 * DIV_ROUND_UP((decoder_vpp_fw_overhead * 10 *
codec_input.frame_rate), 15); codec_input.frame_rate), 15);
decoder_vpp_fw_overhead = decoder_vpp_fw_overhead =
@@ -407,6 +418,7 @@ static int calculate_vpp_min_freq(struct api_calculation_input codec_input,
} else { /* encoder */ } else { /* encoder */
/* Decide LP/HQ */ /* Decide LP/HQ */
u8 hq_mode = 0; u8 hq_mode = 0;
if (codec_input.pipe_num > 1) if (codec_input.pipe_num > 1)
if (codec_input.frame_width * codec_input.frame_height <= if (codec_input.frame_width * codec_input.frame_height <=
1920 * 1080) 1920 * 1080)

View File

@@ -40,10 +40,13 @@ static int msm_vidc_init_codec_input_freq(struct msm_vidc_inst *inst, u32 data_s
codec_input->codec = CODEC_H264; codec_input->codec = CODEC_H264;
codec_input->lcu_size = 16; codec_input->lcu_size = 16;
if (inst->capabilities->cap[ENTROPY_MODE].value == if (inst->capabilities->cap[ENTROPY_MODE].value ==
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC; codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
else codec_input->codec = CODEC_H264;
} else {
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC; codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
codec_input->codec = CODEC_H264_CAVLC;
}
} else if (inst->codec == MSM_VIDC_HEVC) { } else if (inst->codec == MSM_VIDC_HEVC) {
codec_input->codec = CODEC_HEVC; codec_input->codec = CODEC_HEVC;
codec_input->lcu_size = 32; codec_input->lcu_size = 32;
@@ -98,8 +101,13 @@ static int msm_vidc_init_codec_input_freq(struct msm_vidc_inst *inst, u32 data_s
inst->fmts[INPUT_PORT].fmt.pix_mp.pixelformat, __func__); inst->fmts[INPUT_PORT].fmt.pix_mp.pixelformat, __func__);
codec_input->linear_opb = is_linear_colorformat(color_fmt); codec_input->linear_opb = is_linear_colorformat(color_fmt);
codec_input->bitrate_mbps =
(codec_input->frame_rate * data_size * 8) / 1000000; if (inst->domain == MSM_VIDC_DECODER)
codec_input->bitrate_mbps =
(codec_input->frame_rate * data_size * 8) / 1000000;
else
codec_input->bitrate_mbps =
inst->capabilities->cap[BIT_RATE].value / 1000000;
/* disable av1d commercial tile */ /* disable av1d commercial tile */
codec_input->av1d_commer_tile_enable = 0; codec_input->av1d_commer_tile_enable = 0;
@@ -130,7 +138,19 @@ static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc
codec_input->chipset_gen = MSM_PINEAPPLE; codec_input->chipset_gen = MSM_PINEAPPLE;
if (d->codec == MSM_VIDC_H264) { if (d->codec == MSM_VIDC_H264) {
codec_input->codec = CODEC_H264; if (inst->capabilities->cap[ENTROPY_MODE].value ==
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
codec_input->codec = CODEC_H264;
} else if (inst->capabilities->cap[ENTROPY_MODE].value ==
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC) {
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
codec_input->codec = CODEC_H264_CAVLC;
} else {
d_vpr_e("%s: invalid entropy %d\n", __func__,
inst->capabilities->cap[ENTROPY_MODE].value);
return -EINVAL;
}
} else if (d->codec == MSM_VIDC_HEVC) { } else if (d->codec == MSM_VIDC_HEVC) {
codec_input->codec = CODEC_HEVC; codec_input->codec = CODEC_HEVC;
} else if (d->codec == MSM_VIDC_VP9) { } else if (d->codec == MSM_VIDC_VP9) {
@@ -157,18 +177,6 @@ static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc
return -EINVAL; return -EINVAL;
} }
if (inst->capabilities->cap[ENTROPY_MODE].value ==
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
} else if (inst->capabilities->cap[ENTROPY_MODE].value ==
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC) {
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
} else {
d_vpr_e("%s: invalid entropy %d\n", __func__,
inst->capabilities->cap[ENTROPY_MODE].value);
return -EINVAL;
}
/* /*
* Used for calculating Encoder GOP Complexity * Used for calculating Encoder GOP Complexity
* hierachical_layer=0..7 used as Array Index * hierachical_layer=0..7 used as Array Index
@@ -224,19 +232,24 @@ static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc
/* TODO Confirm if no multiref */ /* TODO Confirm if no multiref */
codec_input->encoder_multiref = 0; /* set as no multiref */ codec_input->encoder_multiref = 0; /* set as no multiref */
codec_input->bitrate_mbps = (d->bitrate / 1000000); /* bps 10; set as 10mbps */ codec_input->bitrate_mbps = (d->bitrate / 1000000);
opb_compression_enabled = d->num_formats >= 2 && __ubwc(d->color_formats[1]); opb_compression_enabled = d->num_formats >= 2 && __ubwc(d->color_formats[1]);
/* ANDROID CR is in Q16 format, StaticModel CR in x100 format */ /* video driver CR is in Q16 format, StaticModel CR in x100 format */
codec_input->cr_dpb = ((Q16_INT(d->compression_ratio)*100) + if (d->domain == MSM_VIDC_DECODER) {
Q16_FRAC(d->compression_ratio)); codec_input->cr_dpb = ((Q16_INT(d->compression_ratio)*100) +
Q16_FRAC(d->compression_ratio));
codec_input->cr_opb = opb_compression_enabled ? codec_input->cr_opb = codec_input->cr_dpb;
codec_input->cr_dpb : FP_ONE; if (codec_input->split_opb == 1) {
/* need to check the value if linear opb, currently set min cr */
codec_input->cr_ipb = ((Q16_INT(d->input_cr)*100) + Q16_FRAC(d->input_cr)); codec_input->cr_opb = 100;
codec_input->cr_rpb = codec_input->cr_dpb; /* cr_rpb ony for encoder */ }
} else {
codec_input->cr_ipb = ((Q16_INT(d->input_cr)*100) + Q16_FRAC(d->input_cr));
codec_input->cr_rpb = ((Q16_INT(d->compression_ratio)*100) +
Q16_FRAC(d->compression_ratio));
}
/* disable by default, only enable for aurora depth map session */ /* disable by default, only enable for aurora depth map session */
codec_input->lumaonly_decode = 0; codec_input->lumaonly_decode = 0;
@@ -375,10 +388,12 @@ static u64 msm_vidc_calc_freq_iris33_new(struct msm_vidc_inst *inst, u32 data_si
} }
} }
freq = codec_output.hw_min_freq * 1000000; /* Convert to Hz */ freq = (u64)codec_output.hw_min_freq * 1000000; /* Convert to Hz */
i_vpr_p(inst, "%s: filled len %d, required freq %llu, fps %u, mbpf %u\n", i_vpr_p(inst, "%s: filled len %d, required freq %llu, vpp %u, vsp %u, tensilica %u, hw_freq %u, fps %u, mbpf %u\n",
__func__, data_size, freq, fps, mbpf); __func__, data_size, freq, codec_output.vpp_min_freq,
codec_output.vsp_min_freq, codec_output.tensilica_min_freq,
codec_output.hw_min_freq, fps, mbpf);
if (inst->codec == MSM_VIDC_AV1 || if (inst->codec == MSM_VIDC_AV1 ||
(inst->iframe && is_hevc_10bit_decode_session(inst))) { (inst->iframe && is_hevc_10bit_decode_session(inst))) {
@@ -662,6 +677,10 @@ u64 msm_vidc_calc_freq_iris33_legacy(struct msm_vidc_inst *inst, u32 data_size)
freq = max(vpp_cycles, vsp_cycles); freq = max(vpp_cycles, vsp_cycles);
freq = max(freq, fw_cycles); freq = max(freq, fw_cycles);
i_vpr_p(inst, "%s: filled len %d, required freq %llu, vpp %llu, vsp %llu, fw_cycles %llu, fps %u, mbpf %u\n",
__func__, data_size, freq,
vpp_cycles, vsp_cycles, fw_cycles, fps, mbpf);
if (inst->codec == MSM_VIDC_AV1 || if (inst->codec == MSM_VIDC_AV1 ||
(inst->iframe && is_hevc_10bit_decode_session(inst))) { (inst->iframe && is_hevc_10bit_decode_session(inst))) {
/* /*
@@ -675,9 +694,6 @@ u64 msm_vidc_calc_freq_iris33_legacy(struct msm_vidc_inst *inst, u32 data_size)
freq = core->resource->freq_set.freq_tbl[1].freq; freq = core->resource->freq_set.freq_tbl[1].freq;
} }
i_vpr_p(inst, "%s: filled len %d, required freq %llu, fps %u, mbpf %u\n",
__func__, data_size, freq, fps, mbpf);
return freq; return freq;
} }