video: driver: lanai power calculations
New bandwidth and frequency calculation functions for lanai. Change-Id: I0ec4a74bc24598628fe94a6ebbe9df5e3cb71a9c Signed-off-by: Ashish Patil <quic_ashpat@quicinc.com>
This commit is contained in:

committed by
Gerrit - the friendly Code Review server

parent
95eccc81e8
commit
e28d357256
@@ -10,6 +10,8 @@
|
||||
#include "msm_vidc_inst.h"
|
||||
#include "msm_vidc_power.h"
|
||||
|
||||
#define ENABLE_LEGACY_POWER_CALCULATIONS 1
|
||||
|
||||
u64 msm_vidc_calc_freq_iris33(struct msm_vidc_inst* inst, u32 data_size);
|
||||
int msm_vidc_calc_bw_iris33(struct msm_vidc_inst* inst,
|
||||
struct vidc_bus_vote_data* vote_data);
|
||||
|
921
driver/variant/iris33/src/msm_vidc_bus_iris33.c
Normal file
921
driver/variant/iris33/src/msm_vidc_bus_iris33.c
Normal file
@@ -0,0 +1,921 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "pineapple_technology.h"
|
||||
#include "msm_vidc_debug.h"
|
||||
|
||||
u32 calculate_number_lcus_pineapple(u32 width, u32 height, u32 lcu_size)
|
||||
{
|
||||
u32 mbs_width = (width % lcu_size) ?
|
||||
(width / lcu_size + 1) : (width / lcu_size);
|
||||
u32 mbs_height = (height % lcu_size) ?
|
||||
(height / lcu_size + 1) : (height / lcu_size);
|
||||
|
||||
return mbs_width * mbs_height;
|
||||
}
|
||||
|
||||
u32 calculate_number_ubwctiles_pineapple(
|
||||
u32 width, u32 height, u32 tile_w, u32 tile_h)
|
||||
{
|
||||
u32 tiles_width = (width % tile_w) ?
|
||||
(width / tile_w + 1) : (width / tile_w);
|
||||
u32 tiles_height = (height % tile_h) ?
|
||||
(height / tile_h + 1) : (height / tile_h);
|
||||
|
||||
return tiles_width * tiles_height;
|
||||
}
|
||||
|
||||
struct compression_factors {
|
||||
u32 dpb_cf_y;
|
||||
u32 dpb_cf_cbcr;
|
||||
u32 opb_cf_ycbcr;
|
||||
u32 dpb_cr_y;
|
||||
u32 ipb_cr_y;
|
||||
u32 ipb_cr;
|
||||
} compression_factor;
|
||||
|
||||
u32 get_compression_factors(struct compression_factors *compression_factor,
|
||||
struct api_calculation_input codec_input)
|
||||
{
|
||||
u8 cr_index_entry, cr_index_y, cr_index_c, cr_index_uni;
|
||||
u32 frame_width;
|
||||
u32 frame_height;
|
||||
|
||||
frame_width = codec_input.frame_width;
|
||||
frame_height = codec_input.frame_height;
|
||||
if (frame_width * frame_height <= 1920 * 1080)
|
||||
cr_index_entry = 0;
|
||||
else
|
||||
cr_index_entry = 1;
|
||||
|
||||
if (codec_input.bitdepth == CODEC_BITDEPTH_8) {
|
||||
/* NOT PWC or average and power case */
|
||||
if (codec_input.complexity_setting != 0) {
|
||||
cr_index_y = 0;
|
||||
cr_index_c = 1;
|
||||
cr_index_uni = 2;
|
||||
} else {
|
||||
cr_index_y = 3;
|
||||
cr_index_c = 4;
|
||||
cr_index_uni = 5;
|
||||
}
|
||||
} else {
|
||||
/* NOT PWC or average and power case */
|
||||
if (codec_input.complexity_setting != 0) {
|
||||
cr_index_y = 6;
|
||||
cr_index_c = 7;
|
||||
cr_index_uni = 8;
|
||||
} else {
|
||||
cr_index_y = 9;
|
||||
cr_index_c = 10;
|
||||
cr_index_uni = 11;
|
||||
}
|
||||
}
|
||||
|
||||
if (codec_input.decoder_or_encoder == CODEC_DECODER) {
|
||||
compression_factor->dpb_cf_y =
|
||||
dpbopb_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_y];
|
||||
compression_factor->dpb_cf_cbcr =
|
||||
dpbopb_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_c];
|
||||
compression_factor->opb_cf_ycbcr =
|
||||
dpbopb_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_uni];
|
||||
|
||||
if ((codec_input.regression_mode == 3) &&
|
||||
/* input cr numbers from interface */
|
||||
((codec_input.cr_dpb != 0) || (codec_input.cr_opb != 0))) {
|
||||
compression_factor->dpb_cf_y = (u32)(codec_input.cr_dpb * 100);
|
||||
compression_factor->dpb_cf_cbcr = (u32)(codec_input.cr_dpb * 100);
|
||||
compression_factor->opb_cf_ycbcr = (u32)(codec_input.cr_opb * 100);
|
||||
}
|
||||
} else { /* encoder */
|
||||
/*
|
||||
* IPB CR Table Choice; static sheet (if framewidth<3840, use lossless table)
|
||||
* (else, use lossy table)
|
||||
* stick to this choice for SW purpose (no change for SW)
|
||||
*/
|
||||
if (frame_width < 3840) {
|
||||
compression_factor->ipb_cr =
|
||||
ipblossless_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_uni];
|
||||
compression_factor->ipb_cr_y =
|
||||
ipblossless_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_y];
|
||||
} else {
|
||||
compression_factor->ipb_cr =
|
||||
ipblossy_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_uni];
|
||||
compression_factor->ipb_cr_y =
|
||||
ipblossy_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_y];
|
||||
}
|
||||
|
||||
compression_factor->dpb_cf_y =
|
||||
rpb_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_y];
|
||||
|
||||
compression_factor->dpb_cf_cbcr =
|
||||
rpb_ubwc30_cr_table_cratio_pineapple[cr_index_entry][cr_index_c];
|
||||
|
||||
if ((codec_input.regression_mode == 3) &&
|
||||
/* input cr from interface */
|
||||
((codec_input.cr_ipb != 0) || (codec_input.cr_rpb != 0))) {
|
||||
compression_factor->dpb_cf_y = (u32)(codec_input.cr_rpb * 100);
|
||||
compression_factor->dpb_cf_cbcr = (u32)(codec_input.cr_rpb * 100);
|
||||
compression_factor->ipb_cr_y = (u32)(codec_input.cr_ipb * 100);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int calculate_bandwidth_decoder_iris3(
|
||||
struct api_calculation_input codec_input,
|
||||
struct api_calculation_bw_output *codec_output)
|
||||
{
|
||||
/* common control parameters */
|
||||
u32 frame_width;
|
||||
u32 frame_height;
|
||||
u32 frame_lcu_size = 16; /* initialized to h264 */
|
||||
u32 lcu_per_frame;
|
||||
u32 target_bitrate;
|
||||
u32 collocated_bytes_per_lcu = 16; /* initialized to h264 */
|
||||
u32 av1d_segment_read_per_lcu;
|
||||
u32 av1d_fe_leftlinebuffer_perlcu_tileboudary;
|
||||
|
||||
u32 frame420_y_bw_linear_8bpp;
|
||||
u32 frame420_y_bw_no_ubwc_tile_10bpp;
|
||||
u32 frame420_y_bw_linear_10bpp;
|
||||
|
||||
u16 ubwc_tile_w;
|
||||
u16 ubwc_tile_h;
|
||||
|
||||
u32 dpb_compression_factor_y;
|
||||
u32 dpb_compression_factor_cbcr;
|
||||
|
||||
u32 reconstructed_write_bw_factor_rd;
|
||||
u32 reference_y_read_bw_factor;
|
||||
u32 reference_cbcr_read_bw_factor;
|
||||
|
||||
/* decoder control parameters */
|
||||
u32 decoder_vsp_read_factor = 6;
|
||||
u32 bins_to_bits_factor = 4;
|
||||
|
||||
u32 dpb_to_opb_ratios_ds = 1;
|
||||
|
||||
u8 llc_enabled_ref_y_rd = 1;
|
||||
u8 llc_enable_ref_crcb_rd = 1;
|
||||
u8 llc_enabled_bse_tlb = 1;
|
||||
/* this is for 2pipe and 1pipe LLC */
|
||||
u8 llc_enable_probtable_av1d_21pipe = 0;
|
||||
|
||||
u32 opb_compression_factor_ycbcr;
|
||||
u32 dpb_ubwc_tile_width_pixels;
|
||||
u32 dpb_ubwc_tile_height_pixels;
|
||||
u32 decoder_frame_complexity_factor;
|
||||
u32 llc_saving = 130; /* Initialized to H264 */
|
||||
|
||||
u16 av1_tile_numbers;
|
||||
u32 av1_collated_seg_buffer_rd_wr;
|
||||
/* need divide by 1M at later step; */
|
||||
u32 av1_probability_table_rdwr_bytesperframe = 22784;
|
||||
u32 av1_fe_left_line_buffer_rdwr;
|
||||
|
||||
u32 bse_tlb_byte_per_lcu = 0;
|
||||
|
||||
u32 large_bw_calculation_fp = 0;
|
||||
|
||||
llc_enabled_ref_y_rd = (codec_input.status_llc_onoff) ? 1 : 0;
|
||||
llc_enable_ref_crcb_rd = (codec_input.status_llc_onoff) ? 1 : 0;
|
||||
/* H265D BSE tlb in LLC will be pored in Kailua */
|
||||
llc_enabled_bse_tlb = (codec_input.status_llc_onoff) ? 1 : 0;
|
||||
|
||||
frame_width = codec_input.frame_width;
|
||||
frame_height = codec_input.frame_height;
|
||||
if ((codec_input.codec == CODEC_H264) ||
|
||||
(codec_input.codec == CODEC_H264_CAVLC)) {
|
||||
frame_lcu_size = 16;
|
||||
collocated_bytes_per_lcu = 16;
|
||||
llc_saving = 130;
|
||||
} else if (codec_input.codec == CODEC_HEVC) {
|
||||
if (codec_input.lcu_size == 32) {
|
||||
frame_lcu_size = 32;
|
||||
collocated_bytes_per_lcu = 64;
|
||||
llc_saving = 114;
|
||||
} else if (codec_input.lcu_size == 64) {
|
||||
frame_lcu_size = 64;
|
||||
collocated_bytes_per_lcu = 256;
|
||||
llc_saving = 107;
|
||||
}
|
||||
} else if (codec_input.codec == CODEC_VP9) {
|
||||
if (codec_input.lcu_size == 32) {
|
||||
frame_lcu_size = 32;
|
||||
collocated_bytes_per_lcu = 64;
|
||||
llc_saving = 114;
|
||||
} else if (codec_input.lcu_size == 64) {
|
||||
frame_lcu_size = 64;
|
||||
collocated_bytes_per_lcu = 256;
|
||||
llc_saving = 107;
|
||||
}
|
||||
} else if (codec_input.codec == CODEC_AV1) {
|
||||
u32 av1d_leftline_cdef = (2944 + 896 + 896);
|
||||
u32 av1d_leftline_scaling = (2176 + 1408 + 1408);
|
||||
u32 av1d_leftline_fg = (1280);
|
||||
u32 av1d_leftline_lr = (1536 + 1024 + 1024);
|
||||
|
||||
av1d_fe_leftlinebuffer_perlcu_tileboudary =
|
||||
av1d_leftline_cdef + av1d_leftline_scaling +
|
||||
av1d_leftline_fg + av1d_leftline_lr;
|
||||
|
||||
if (codec_input.lcu_size == 128) {
|
||||
frame_lcu_size = 128;
|
||||
collocated_bytes_per_lcu = 4 * 512;
|
||||
av1d_segment_read_per_lcu = 512;
|
||||
llc_saving = 104;
|
||||
} else if (codec_input.lcu_size == 32) {
|
||||
frame_lcu_size = 32;
|
||||
collocated_bytes_per_lcu = 4 * 512 / (128 * 128 / 32 / 32);
|
||||
av1d_segment_read_per_lcu = 512 / (128 * 128 / 32 / 32);
|
||||
av1d_fe_leftlinebuffer_perlcu_tileboudary =
|
||||
av1d_fe_leftlinebuffer_perlcu_tileboudary / (128 * 128 / 32 / 32);
|
||||
llc_saving = 114;
|
||||
} else if (codec_input.lcu_size == 64) {
|
||||
frame_lcu_size = 64;
|
||||
collocated_bytes_per_lcu = 4 * 512 / (128 * 128 / 64 / 64);
|
||||
av1d_segment_read_per_lcu = 512 / (128 * 128 / 64 / 64);
|
||||
av1d_fe_leftlinebuffer_perlcu_tileboudary =
|
||||
av1d_fe_leftlinebuffer_perlcu_tileboudary / (128 * 128 / 64 / 64);
|
||||
llc_saving = 107;
|
||||
}
|
||||
}
|
||||
|
||||
lcu_per_frame =
|
||||
calculate_number_lcus_pineapple(frame_width, frame_height, frame_lcu_size);
|
||||
|
||||
target_bitrate = (u32)(codec_input.bitrate_mbps); /* Mbps */
|
||||
|
||||
ubwc_tile_w = (codec_input.bitdepth == CODEC_BITDEPTH_8) ? 32 : 48;
|
||||
ubwc_tile_h = (codec_input.bitdepth == CODEC_BITDEPTH_8) ? 8 : 4;
|
||||
|
||||
frame420_y_bw_linear_8bpp =
|
||||
((calculate_number_ubwctiles_pineapple(frame_width, frame_height, 32, 8) *
|
||||
256 * codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
|
||||
frame420_y_bw_no_ubwc_tile_10bpp =
|
||||
((calculate_number_ubwctiles_pineapple(frame_width, frame_height, 48, 4) *
|
||||
256 * codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
frame420_y_bw_linear_10bpp = ((frame_width * frame_height *
|
||||
codec_input.frame_rate * 2 + 999) / 1000 + 999) / 1000;
|
||||
|
||||
/* TODO Integrate Compression Ratio returned by FW */
|
||||
get_compression_factors(&compression_factor, codec_input);
|
||||
dpb_compression_factor_y = compression_factor.dpb_cf_y;
|
||||
dpb_compression_factor_cbcr = compression_factor.dpb_cf_cbcr;
|
||||
opb_compression_factor_ycbcr = compression_factor.opb_cf_ycbcr;
|
||||
|
||||
dpb_ubwc_tile_width_pixels = ubwc_tile_w;
|
||||
|
||||
dpb_ubwc_tile_height_pixels = ubwc_tile_h;
|
||||
|
||||
decoder_frame_complexity_factor =
|
||||
(codec_input.complexity_setting == 0) ?
|
||||
400 : ((codec_input.complexity_setting == 1) ? 266 : 100);
|
||||
|
||||
reconstructed_write_bw_factor_rd = (codec_input.complexity_setting == 0) ?
|
||||
105 : 100;
|
||||
|
||||
reference_y_read_bw_factor = llc_saving;
|
||||
|
||||
reference_cbcr_read_bw_factor = llc_saving;
|
||||
|
||||
if (codec_input.codec == CODEC_AV1) {
|
||||
u8 av1tile_index_entry, av1tile_complexity;
|
||||
|
||||
if (frame_width * frame_height <= 1280 * 720)
|
||||
av1tile_index_entry = 4;
|
||||
else if (frame_width * frame_height <= 1920 * 1080)
|
||||
av1tile_index_entry = 0;
|
||||
else if (frame_width * frame_height <= 2560 * 1440)
|
||||
av1tile_index_entry = 5;
|
||||
else if (frame_width * frame_height <= 4096 * 2304)
|
||||
av1tile_index_entry = 1;
|
||||
else
|
||||
av1tile_index_entry = 6;
|
||||
|
||||
/* NOT PWC //or average and power case */
|
||||
if (codec_input.complexity_setting != 0)
|
||||
av1tile_complexity = 1;
|
||||
else
|
||||
av1tile_complexity = 0;
|
||||
|
||||
av1_tile_numbers = av1_num_tiles_pineapple[av1tile_index_entry][av1tile_complexity];
|
||||
|
||||
/* these bw can be ignored */
|
||||
av1_collated_seg_buffer_rd_wr =
|
||||
((av1d_segment_read_per_lcu * lcu_per_frame *
|
||||
codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
|
||||
av1_fe_left_line_buffer_rdwr =
|
||||
(((av1d_fe_leftlinebuffer_perlcu_tileboudary *
|
||||
frame_height * (av1_tile_numbers > 1 ? av1_tile_numbers / 2 : 0)
|
||||
+ 999) / 1000 + 999) / 1000 + (frame_lcu_size - 1)) / frame_lcu_size;
|
||||
}
|
||||
|
||||
if (codec_input.codec == CODEC_HEVC) {
|
||||
if (codec_input.lcu_size == 32)
|
||||
bse_tlb_byte_per_lcu = 64;
|
||||
else if (codec_input.lcu_size == 16)
|
||||
bse_tlb_byte_per_lcu = 32;
|
||||
else
|
||||
bse_tlb_byte_per_lcu = 128;
|
||||
} else if ((codec_input.codec == CODEC_H264) ||
|
||||
(codec_input.codec == CODEC_H264_CAVLC)) {
|
||||
bse_tlb_byte_per_lcu = 64;
|
||||
} else if (codec_input.codec == CODEC_VP9) {
|
||||
bse_tlb_byte_per_lcu = 304;
|
||||
} else if (codec_input.codec == CODEC_AV1) {
|
||||
if (codec_input.lcu_size == 128)
|
||||
bse_tlb_byte_per_lcu = 2064;
|
||||
else if (codec_input.lcu_size == 64)
|
||||
bse_tlb_byte_per_lcu = 1056;
|
||||
else if (codec_input.lcu_size == 32)
|
||||
bse_tlb_byte_per_lcu = 2064 / (128 * 128 / 32 / 32);
|
||||
}
|
||||
|
||||
codec_output->noc_bw_rd = 0;
|
||||
codec_output->noc_bw_wr = 0;
|
||||
codec_output->ddr_bw_rd = 0;
|
||||
codec_output->ddr_bw_wr = 0;
|
||||
|
||||
large_bw_calculation_fp = 0;
|
||||
large_bw_calculation_fp = ((target_bitrate *
|
||||
decoder_vsp_read_factor + 7) / 8);
|
||||
|
||||
codec_output->vsp_read_noc = large_bw_calculation_fp;
|
||||
|
||||
codec_output->vsp_read_ddr = codec_output->vsp_read_noc;
|
||||
|
||||
large_bw_calculation_fp = ((target_bitrate *
|
||||
bins_to_bits_factor + 7) / 8);
|
||||
|
||||
codec_output->vsp_write_noc = large_bw_calculation_fp;
|
||||
codec_output->vsp_write_ddr = codec_output->vsp_write_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->vsp_read_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->vsp_read_ddr;
|
||||
codec_output->noc_bw_wr += codec_output->vsp_write_noc;
|
||||
codec_output->ddr_bw_wr += codec_output->vsp_write_ddr;
|
||||
|
||||
large_bw_calculation_fp = 0;
|
||||
large_bw_calculation_fp = ((collocated_bytes_per_lcu *
|
||||
lcu_per_frame * codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
codec_output->collocated_rd_noc = large_bw_calculation_fp;
|
||||
codec_output->collocated_wr_noc = codec_output->collocated_rd_noc;
|
||||
codec_output->collocated_rd_ddr = codec_output->collocated_rd_noc;
|
||||
codec_output->collocated_wr_ddr = codec_output->collocated_wr_noc;
|
||||
|
||||
codec_output->collocated_rd_wr_total_noc =
|
||||
(u32)(codec_output->collocated_rd_noc + codec_output->collocated_wr_noc);
|
||||
|
||||
codec_output->collocated_rd_wr_total_ddr =
|
||||
codec_output->collocated_rd_wr_total_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->collocated_rd_noc;
|
||||
codec_output->noc_bw_wr += codec_output->collocated_wr_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->collocated_rd_ddr;
|
||||
codec_output->ddr_bw_wr += codec_output->collocated_wr_ddr;
|
||||
|
||||
large_bw_calculation_fp = 0;
|
||||
large_bw_calculation_fp = ((codec_input.bitdepth == CODEC_BITDEPTH_8) ?
|
||||
frame420_y_bw_linear_8bpp :
|
||||
frame420_y_bw_no_ubwc_tile_10bpp) * decoder_frame_complexity_factor;
|
||||
|
||||
large_bw_calculation_fp =
|
||||
(large_bw_calculation_fp + dpb_compression_factor_y - 1) /
|
||||
dpb_compression_factor_y;
|
||||
|
||||
codec_output->dpb_rd_y_noc = large_bw_calculation_fp;
|
||||
|
||||
large_bw_calculation_fp = ((codec_input.bitdepth == CODEC_BITDEPTH_8) ?
|
||||
frame420_y_bw_linear_8bpp : frame420_y_bw_no_ubwc_tile_10bpp) *
|
||||
decoder_frame_complexity_factor;
|
||||
|
||||
large_bw_calculation_fp =
|
||||
(large_bw_calculation_fp + dpb_compression_factor_cbcr - 1) /
|
||||
dpb_compression_factor_cbcr / 2;
|
||||
|
||||
codec_output->dpb_rd_crcb_noc = large_bw_calculation_fp;
|
||||
codec_output->dpb_rdwr_duetooverlap_noc = 0;
|
||||
|
||||
large_bw_calculation_fp = ((codec_input.bitdepth == CODEC_BITDEPTH_8) ?
|
||||
frame420_y_bw_linear_8bpp : frame420_y_bw_no_ubwc_tile_10bpp) *
|
||||
reconstructed_write_bw_factor_rd;
|
||||
|
||||
large_bw_calculation_fp = ((codec_input.bitdepth == CODEC_BITDEPTH_8) ?
|
||||
frame420_y_bw_linear_8bpp : frame420_y_bw_no_ubwc_tile_10bpp) *
|
||||
reconstructed_write_bw_factor_rd;
|
||||
|
||||
large_bw_calculation_fp = large_bw_calculation_fp *
|
||||
(dpb_compression_factor_y / 2 + dpb_compression_factor_cbcr);
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp + dpb_compression_factor_y - 1) /
|
||||
dpb_compression_factor_y;
|
||||
|
||||
large_bw_calculation_fp =
|
||||
(large_bw_calculation_fp + dpb_compression_factor_cbcr - 1) /
|
||||
dpb_compression_factor_cbcr;
|
||||
|
||||
codec_output->dpb_wr_noc = large_bw_calculation_fp;
|
||||
|
||||
codec_output->dpb_rd_y_ddr = (llc_enabled_ref_y_rd) ?
|
||||
((codec_output->dpb_rd_y_noc * 100 + reference_y_read_bw_factor - 1) /
|
||||
reference_y_read_bw_factor) : codec_output->dpb_rd_y_noc;
|
||||
|
||||
codec_output->dpb_rd_crcb_ddr = (llc_enable_ref_crcb_rd) ?
|
||||
((codec_output->dpb_rd_crcb_noc * 100 +
|
||||
reference_cbcr_read_bw_factor - 1) /
|
||||
reference_cbcr_read_bw_factor) : codec_output->dpb_rd_crcb_noc;
|
||||
|
||||
codec_output->dpb_rdwr_duetooverlap_ddr = 0;
|
||||
codec_output->dpb_wr_ddr = codec_output->dpb_wr_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->dpb_rd_y_noc;
|
||||
codec_output->noc_bw_rd += codec_output->dpb_rd_crcb_noc;
|
||||
codec_output->noc_bw_rd += codec_output->dpb_rdwr_duetooverlap_noc;
|
||||
codec_output->noc_bw_wr += codec_output->dpb_wr_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->dpb_rd_y_ddr;
|
||||
codec_output->ddr_bw_rd += codec_output->dpb_rd_crcb_ddr;
|
||||
codec_output->ddr_bw_rd += codec_output->dpb_rdwr_duetooverlap_ddr;
|
||||
codec_output->ddr_bw_wr += codec_output->dpb_wr_ddr;
|
||||
|
||||
if (codec_input.linear_opb || codec_input.split_opb) {
|
||||
if (codec_input.linear_opb) {
|
||||
if (codec_input.bitdepth == CODEC_BITDEPTH_8) {
|
||||
large_bw_calculation_fp = ((frame420_y_bw_linear_8bpp) *
|
||||
3 / 2 / dpb_to_opb_ratios_ds);
|
||||
|
||||
codec_output->opb_write_total_noc = large_bw_calculation_fp;
|
||||
} else {
|
||||
large_bw_calculation_fp = ((frame420_y_bw_linear_10bpp) *
|
||||
3 / 2 / dpb_to_opb_ratios_ds);
|
||||
|
||||
codec_output->opb_write_total_noc = large_bw_calculation_fp;
|
||||
}
|
||||
} else { /* (CODEC_INPUT.split_opb) */
|
||||
if (codec_input.bitdepth == CODEC_BITDEPTH_8) {
|
||||
large_bw_calculation_fp =
|
||||
(frame420_y_bw_linear_8bpp * 3 / 2 / dpb_to_opb_ratios_ds *
|
||||
100 + opb_compression_factor_ycbcr - 1) /
|
||||
opb_compression_factor_ycbcr;
|
||||
|
||||
codec_output->opb_write_total_noc = large_bw_calculation_fp;
|
||||
} else {
|
||||
large_bw_calculation_fp =
|
||||
(frame420_y_bw_no_ubwc_tile_10bpp * 3 / 2 /
|
||||
dpb_to_opb_ratios_ds * 100 +
|
||||
opb_compression_factor_ycbcr - 1) /
|
||||
opb_compression_factor_ycbcr;
|
||||
|
||||
codec_output->opb_write_total_noc = large_bw_calculation_fp;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
codec_output->opb_write_total_noc = 0;
|
||||
}
|
||||
|
||||
codec_output->opb_write_total_ddr = codec_output->opb_write_total_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_wr += codec_output->opb_write_total_noc;
|
||||
codec_output->ddr_bw_wr += codec_output->opb_write_total_ddr;
|
||||
|
||||
large_bw_calculation_fp = ((bse_tlb_byte_per_lcu * lcu_per_frame *
|
||||
codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
|
||||
codec_output->bse_tlb_rd_noc = large_bw_calculation_fp;
|
||||
|
||||
if (llc_enabled_bse_tlb)
|
||||
codec_output->bse_tlb_rd_ddr = 0;
|
||||
else
|
||||
codec_output->bse_tlb_rd_ddr = codec_output->bse_tlb_rd_noc;
|
||||
|
||||
codec_output->bse_tlb_wr_noc = codec_output->bse_tlb_rd_noc;
|
||||
|
||||
if (llc_enabled_bse_tlb)
|
||||
codec_output->bse_tlb_wr_ddr = 0;
|
||||
else
|
||||
codec_output->bse_tlb_wr_ddr = codec_output->bse_tlb_wr_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->bse_tlb_rd_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->bse_tlb_rd_ddr;
|
||||
codec_output->noc_bw_wr += codec_output->bse_tlb_wr_noc;
|
||||
codec_output->ddr_bw_wr += codec_output->bse_tlb_wr_ddr;
|
||||
|
||||
if (codec_input.codec == CODEC_AV1) {
|
||||
codec_output->statistics_rd_noc = (av1_collated_seg_buffer_rd_wr +
|
||||
av1_probability_table_rdwr_bytesperframe * av1_tile_numbers /
|
||||
1000 / 1000 + av1_fe_left_line_buffer_rdwr);
|
||||
|
||||
codec_output->statistics_wr_noc = (av1_collated_seg_buffer_rd_wr +
|
||||
av1_probability_table_rdwr_bytesperframe * av1_tile_numbers /
|
||||
1000 / 1000 + av1_fe_left_line_buffer_rdwr);
|
||||
|
||||
if (llc_enable_probtable_av1d_21pipe) {
|
||||
/* assert(CODEC_INPUT.pipe_num != 4); */
|
||||
codec_output->statistics_rd_ddr = codec_output->statistics_rd_noc -
|
||||
av1_probability_table_rdwr_bytesperframe *
|
||||
av1_tile_numbers / 1000 / 1000;
|
||||
|
||||
codec_output->statistics_wr_ddr = codec_output->statistics_wr_noc -
|
||||
av1_probability_table_rdwr_bytesperframe *
|
||||
av1_tile_numbers / 1000 / 1000;
|
||||
} else {
|
||||
codec_output->statistics_rd_ddr = codec_output->statistics_rd_noc;
|
||||
codec_output->statistics_wr_ddr = codec_output->statistics_wr_noc;
|
||||
}
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->statistics_rd_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->statistics_rd_ddr;
|
||||
codec_output->noc_bw_wr += codec_output->statistics_wr_noc;
|
||||
codec_output->ddr_bw_wr += codec_output->statistics_wr_ddr;
|
||||
}
|
||||
|
||||
|
||||
codec_output->mmu_rd_ddr = 0;
|
||||
codec_output->mmu_rd_noc = 0;
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->mmu_rd_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->mmu_rd_ddr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int calculate_bandwidth_encoder_iris3(
|
||||
struct api_calculation_input codec_input,
|
||||
struct api_calculation_bw_output *codec_output)
|
||||
{
|
||||
/* common control parameters */
|
||||
u32 frame_width;
|
||||
u32 frame_height;
|
||||
u32 frame_lcu_size;
|
||||
u32 lcu_per_frame;
|
||||
u32 target_bitrate;
|
||||
u32 collocated_bytes_per_lcu;
|
||||
|
||||
u32 frame420_y_bw_linear_8bpp;
|
||||
u32 frame420_y_bw_no_ubwc_tile_10bpp;
|
||||
u32 frame420_y_bw_linear_10bpp;
|
||||
|
||||
u16 ubwc_tile_w;
|
||||
u16 ubwc_tile_h;
|
||||
|
||||
u32 dpb_compression_factor_y;
|
||||
u32 dpb_compression_factor_cbcr;
|
||||
|
||||
u32 reconstructed_write_bw_factor_rd;
|
||||
u32 reference_y_read_bw_factor;
|
||||
u32 reference_crcb_read_bw_factor;
|
||||
|
||||
/* encoder control parameters */
|
||||
u32 en_vertical_tiles_width = 960;
|
||||
|
||||
u8 en_rotation_90_270 = 0;
|
||||
/* TODO Can we use (codec_input.status_llc_onoff) for enc_llc_*? */
|
||||
u8 en_llc_enable_ref_rd_crcb = 0;
|
||||
u8 en_llc_enable_rec_wr_uncompleted = 0;
|
||||
u8 en_llc_enable_ref_rd_y_overlap = 0;
|
||||
|
||||
u32 en_bins_to_bits_factor = 4;
|
||||
u32 en_search_windows_size_horizontal = 96;
|
||||
|
||||
u32 en_tile_number;
|
||||
u32 ipb_compression_factor_y;
|
||||
u32 ipb_compression_factor;
|
||||
|
||||
u32 large_bw_calculation_fp = 0;
|
||||
|
||||
/* TODO Are these really needed in Encoder? */
|
||||
u32 bse_tlb_byte_per_lcu = 0;
|
||||
u8 llc_enabled_bse_tlb = 1;
|
||||
|
||||
/*H265D BSE tlb in LLC will be pored in Kailua */
|
||||
llc_enabled_bse_tlb = (codec_input.status_llc_onoff) ? 1 : 0;
|
||||
|
||||
frame_width = codec_input.frame_width;
|
||||
frame_height = codec_input.frame_height;
|
||||
if ((codec_input.codec == CODEC_H264) ||
|
||||
(codec_input.codec == CODEC_H264_CAVLC)) {
|
||||
frame_lcu_size = 16;
|
||||
collocated_bytes_per_lcu = 16;
|
||||
} if (codec_input.codec == CODEC_HEVC) {
|
||||
frame_lcu_size = 32;
|
||||
collocated_bytes_per_lcu = 64;
|
||||
} else {
|
||||
/* TODO What is the value for VP9, AV1? */
|
||||
frame_lcu_size = 16;
|
||||
collocated_bytes_per_lcu = 16; /* TODO Fixes Uninitialized compilation error. */
|
||||
}
|
||||
|
||||
lcu_per_frame =
|
||||
calculate_number_lcus_pineapple(frame_width, frame_height, frame_lcu_size);
|
||||
|
||||
bse_tlb_byte_per_lcu = 16; /* TODO Should be in common declaration */
|
||||
|
||||
target_bitrate = (u32)(codec_input.bitrate_mbps); /* Mbps */
|
||||
|
||||
ubwc_tile_w = (codec_input.bitdepth == CODEC_BITDEPTH_8) ? 32 : 48;
|
||||
ubwc_tile_h = (codec_input.bitdepth == CODEC_BITDEPTH_8) ? 8 : 4;
|
||||
|
||||
/* yuv */
|
||||
if (codec_input.ipb_yuvrgb == 0) {
|
||||
frame420_y_bw_linear_8bpp =
|
||||
((calculate_number_ubwctiles_pineapple(frame_width, frame_height,
|
||||
32, 8) * 256 * codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
} else { /* RGBA */
|
||||
frame420_y_bw_linear_8bpp =
|
||||
((calculate_number_ubwctiles_pineapple(frame_width, frame_height,
|
||||
6, 4) * 256 * codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
}
|
||||
|
||||
frame420_y_bw_no_ubwc_tile_10bpp =
|
||||
((calculate_number_ubwctiles_pineapple(frame_width, frame_height, 48, 4) *
|
||||
256 * codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
|
||||
frame420_y_bw_linear_10bpp = ((frame_width * frame_height *
|
||||
codec_input.frame_rate * 2 + 999) / 1000 + 999) / 1000;
|
||||
|
||||
/* TODO Integrate Compression Ratio returned by FW */
|
||||
get_compression_factors(&compression_factor, codec_input);
|
||||
dpb_compression_factor_y = compression_factor.dpb_cf_y;
|
||||
dpb_compression_factor_cbcr = compression_factor.dpb_cf_cbcr;
|
||||
ipb_compression_factor_y = compression_factor.ipb_cr_y;
|
||||
ipb_compression_factor = compression_factor.ipb_cr;
|
||||
|
||||
en_tile_number = (frame_width % en_vertical_tiles_width) ?
|
||||
((frame_width / en_vertical_tiles_width) + 1) :
|
||||
(frame_width / en_vertical_tiles_width);
|
||||
|
||||
en_tile_number = en_tile_number * 100;
|
||||
|
||||
/* ceil is same as excel roundup (float, 0); */
|
||||
reconstructed_write_bw_factor_rd = ((en_tile_number - 100) * 2 *
|
||||
((codec_input.lcu_size + ubwc_tile_w - 1) / ubwc_tile_w) *
|
||||
ubwc_tile_w + (frame_width - 1)) / (frame_width)+100;
|
||||
|
||||
reference_y_read_bw_factor = ((en_tile_number - 100) * 2 *
|
||||
((en_search_windows_size_horizontal + ubwc_tile_w - 1) / ubwc_tile_w) *
|
||||
ubwc_tile_w + (frame_width - 1)) / frame_width + 100;
|
||||
|
||||
reference_crcb_read_bw_factor = 150;
|
||||
|
||||
codec_output->noc_bw_rd = 0;
|
||||
codec_output->noc_bw_wr = 0;
|
||||
codec_output->ddr_bw_rd = 0;
|
||||
codec_output->ddr_bw_wr = 0;
|
||||
|
||||
large_bw_calculation_fp = (target_bitrate * en_bins_to_bits_factor + 7) / 8;
|
||||
codec_output->vsp_read_noc = large_bw_calculation_fp;
|
||||
codec_output->vsp_read_ddr = codec_output->vsp_read_noc;
|
||||
large_bw_calculation_fp = (target_bitrate + 7) / 8;
|
||||
|
||||
codec_output->vsp_write_noc = codec_output->vsp_read_noc +
|
||||
large_bw_calculation_fp;
|
||||
|
||||
codec_output->vsp_write_ddr = codec_output->vsp_write_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->vsp_read_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->vsp_read_ddr;
|
||||
codec_output->noc_bw_wr += codec_output->vsp_write_noc;
|
||||
codec_output->ddr_bw_wr += codec_output->vsp_write_ddr;
|
||||
|
||||
large_bw_calculation_fp = ((collocated_bytes_per_lcu * lcu_per_frame *
|
||||
codec_input.frame_rate + 999) / 1000 + 999) / 1000;
|
||||
|
||||
codec_output->collocated_rd_noc = large_bw_calculation_fp;
|
||||
codec_output->collocated_wr_noc = codec_output->collocated_rd_noc;
|
||||
codec_output->collocated_rd_ddr = codec_output->collocated_rd_noc;
|
||||
codec_output->collocated_wr_ddr = codec_output->collocated_wr_noc;
|
||||
|
||||
codec_output->collocated_rd_wr_total_noc =
|
||||
(u32)(codec_output->collocated_rd_noc + codec_output->collocated_wr_noc);
|
||||
codec_output->collocated_rd_wr_total_ddr =
|
||||
codec_output->collocated_rd_wr_total_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->collocated_rd_noc;
|
||||
codec_output->noc_bw_wr += codec_output->collocated_wr_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->collocated_rd_ddr;
|
||||
codec_output->ddr_bw_wr += codec_output->collocated_wr_ddr;
|
||||
|
||||
large_bw_calculation_fp = 0;
|
||||
|
||||
large_bw_calculation_fp = ((codec_input.bitdepth == CODEC_BITDEPTH_8) ?
|
||||
frame420_y_bw_linear_8bpp :
|
||||
frame420_y_bw_no_ubwc_tile_10bpp) * reference_y_read_bw_factor;
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp *
|
||||
pineapple_en_readfactor[codec_input.hierachical_layer]);
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp +
|
||||
dpb_compression_factor_y - 1) / dpb_compression_factor_y;
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp + 999) / 1000;
|
||||
|
||||
codec_output->dpb_rd_y_noc = large_bw_calculation_fp;
|
||||
|
||||
large_bw_calculation_fp = 0;
|
||||
|
||||
large_bw_calculation_fp = ((codec_input.bitdepth == CODEC_BITDEPTH_8) ?
|
||||
frame420_y_bw_linear_8bpp :
|
||||
frame420_y_bw_no_ubwc_tile_10bpp) * reference_crcb_read_bw_factor / 2;
|
||||
|
||||
large_bw_calculation_fp = large_bw_calculation_fp *
|
||||
pineapple_en_readfactor[codec_input.hierachical_layer];
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp +
|
||||
dpb_compression_factor_cbcr - 1) / dpb_compression_factor_cbcr;
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp + 999) / 1000;
|
||||
codec_output->dpb_rd_crcb_noc = large_bw_calculation_fp;
|
||||
|
||||
large_bw_calculation_fp = 0;
|
||||
|
||||
large_bw_calculation_fp = ((codec_input.bitdepth == CODEC_BITDEPTH_8) ?
|
||||
frame420_y_bw_linear_8bpp : frame420_y_bw_no_ubwc_tile_10bpp) *
|
||||
reconstructed_write_bw_factor_rd *
|
||||
pineapple_en_writefactor[codec_input.hierachical_layer] /
|
||||
pineapple_en_frame_num_parallel;
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp + 999) / 1000;
|
||||
|
||||
large_bw_calculation_fp = large_bw_calculation_fp *
|
||||
(dpb_compression_factor_cbcr + dpb_compression_factor_y / 2);
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp +
|
||||
dpb_compression_factor_y - 1) / dpb_compression_factor_y;
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp +
|
||||
dpb_compression_factor_cbcr - 1) / dpb_compression_factor_cbcr;
|
||||
|
||||
codec_output->dpb_wr_noc = large_bw_calculation_fp;
|
||||
|
||||
/*
|
||||
* Summary:
|
||||
* by default (for both HFR and HSR cases) :
|
||||
* -Any resolution and fps >= 120, enable layering.
|
||||
* (120 -> 3, 240 -> 4, 480 -> 5)
|
||||
* - (once we enable layering) : 50 per cent frames are Non - reference
|
||||
* frames.recon write is disable by Venus firmware
|
||||
* - Customer has ability to enable / disable layering.
|
||||
* Hence, recon write savings would not be there if customer explicitly disables layer encoding.
|
||||
*/
|
||||
|
||||
/*HFR Cases use alternating rec write if not PWC*/
|
||||
if ((codec_input.frame_rate >= 120) && (codec_input.complexity_setting != 0))
|
||||
codec_output->dpb_wr_noc = codec_output->dpb_wr_noc / 2;
|
||||
|
||||
/* for power cases with [B1] adaptive non-ref b frame */
|
||||
/* power caes IbP non reference b */
|
||||
if ((codec_input.hierachical_layer >= 1) &&
|
||||
(codec_input.hierachical_layer <= 3) &&
|
||||
(codec_input.complexity_setting != 0))
|
||||
codec_output->dpb_wr_noc = codec_output->dpb_wr_noc / 2;
|
||||
|
||||
large_bw_calculation_fp = 0;
|
||||
large_bw_calculation_fp = codec_output->dpb_wr_noc *
|
||||
(reconstructed_write_bw_factor_rd - 100);
|
||||
|
||||
large_bw_calculation_fp = (large_bw_calculation_fp +
|
||||
reconstructed_write_bw_factor_rd - 1) / reconstructed_write_bw_factor_rd;
|
||||
|
||||
codec_output->dpb_rdwr_duetooverlap_noc = large_bw_calculation_fp;
|
||||
|
||||
codec_output->dpb_rd_y_ddr = (en_llc_enable_ref_rd_y_overlap) ?
|
||||
(codec_output->dpb_rd_y_noc * 100 + reference_y_read_bw_factor - 1) /
|
||||
reference_y_read_bw_factor : codec_output->dpb_rd_y_noc;
|
||||
|
||||
codec_output->dpb_rd_crcb_ddr = (en_llc_enable_ref_rd_crcb) ?
|
||||
(codec_output->dpb_rd_crcb_noc * 100 + reference_crcb_read_bw_factor - 1) /
|
||||
reference_crcb_read_bw_factor : codec_output->dpb_rd_crcb_noc;
|
||||
|
||||
codec_output->dpb_rdwr_duetooverlap_ddr = (en_llc_enable_rec_wr_uncompleted) ?
|
||||
0 : codec_output->dpb_rdwr_duetooverlap_noc;
|
||||
|
||||
codec_output->dpb_wr_ddr = (en_llc_enable_rec_wr_uncompleted) ?
|
||||
0 : codec_output->dpb_wr_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->dpb_rd_y_noc;
|
||||
codec_output->noc_bw_rd += codec_output->dpb_rd_crcb_noc;
|
||||
codec_output->noc_bw_rd += codec_output->dpb_rdwr_duetooverlap_noc;
|
||||
codec_output->noc_bw_wr += codec_output->dpb_wr_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->dpb_rd_y_ddr;
|
||||
codec_output->ddr_bw_rd += codec_output->dpb_rd_crcb_ddr;
|
||||
codec_output->ddr_bw_rd += codec_output->dpb_rdwr_duetooverlap_ddr;
|
||||
codec_output->ddr_bw_wr += codec_output->dpb_wr_ddr;
|
||||
|
||||
if (codec_input.bitdepth == CODEC_BITDEPTH_8) {
|
||||
if (codec_input.ipb_yuvrgb == 0) { /* yuv */
|
||||
large_bw_calculation_fp = ((frame420_y_bw_linear_8bpp) * 3 / 2);
|
||||
codec_output->ipb_rd_total_noc = large_bw_calculation_fp;
|
||||
if (codec_input.linear_ipb == 0) {
|
||||
codec_output->ipb_rd_total_noc =
|
||||
(large_bw_calculation_fp * 100 + ipb_compression_factor - 1) /
|
||||
ipb_compression_factor;
|
||||
}
|
||||
} else { /* rgb */
|
||||
large_bw_calculation_fp = frame420_y_bw_linear_8bpp;
|
||||
codec_output->ipb_rd_total_noc = large_bw_calculation_fp;
|
||||
if (codec_input.linear_ipb == 0) {
|
||||
if (codec_input.complexity_setting == 0) /* pwc */
|
||||
codec_output->ipb_rd_total_noc =
|
||||
(large_bw_calculation_fp * 100 +
|
||||
en_original_compression_factor_rgba_pwd_pineapple - 1) /
|
||||
en_original_compression_factor_rgba_pwd_pineapple;
|
||||
else
|
||||
codec_output->ipb_rd_total_noc =
|
||||
(large_bw_calculation_fp * 100 +
|
||||
en_original_compression_factor_rgba_avg_pineapple - 1) /
|
||||
en_original_compression_factor_rgba_avg_pineapple;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (codec_input.linear_ipb == 1) {
|
||||
large_bw_calculation_fp = (frame420_y_bw_linear_10bpp) * 3 / 2;
|
||||
codec_output->ipb_rd_total_noc = large_bw_calculation_fp;
|
||||
} else {
|
||||
large_bw_calculation_fp = (frame420_y_bw_no_ubwc_tile_10bpp *
|
||||
300 / 2 + ipb_compression_factor - 1) / ipb_compression_factor;
|
||||
codec_output->ipb_rd_total_noc = large_bw_calculation_fp;
|
||||
}
|
||||
}
|
||||
|
||||
if (en_rotation_90_270) {
|
||||
if (codec_input.codec == CODEC_HEVC) {
|
||||
if ((codec_input.bitdepth == CODEC_BITDEPTH_8) &&
|
||||
(codec_input.ipb_yuvrgb == 0))
|
||||
codec_output->ipb_rd_total_noc = codec_output->ipb_rd_total_noc
|
||||
* 1;
|
||||
else
|
||||
codec_output->ipb_rd_total_noc = codec_output->ipb_rd_total_noc
|
||||
* 3;
|
||||
} else {
|
||||
codec_output->ipb_rd_total_noc = codec_output->ipb_rd_total_noc * 2;
|
||||
}
|
||||
}
|
||||
|
||||
codec_output->ipb_rd_total_ddr = codec_output->ipb_rd_total_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->ipb_rd_total_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->ipb_rd_total_ddr;
|
||||
|
||||
codec_output->bse_tlb_rd_noc =
|
||||
((bse_tlb_byte_per_lcu * lcu_per_frame * codec_input.frame_rate + 999)
|
||||
/ 1000 + 999) / 1000;
|
||||
|
||||
if (llc_enabled_bse_tlb) /* TODO should be common declaration */
|
||||
codec_output->bse_tlb_rd_ddr = 0;
|
||||
else
|
||||
codec_output->bse_tlb_rd_ddr = codec_output->bse_tlb_rd_noc;
|
||||
|
||||
codec_output->bse_tlb_wr_noc = codec_output->bse_tlb_rd_noc;
|
||||
|
||||
if (llc_enabled_bse_tlb)
|
||||
codec_output->bse_tlb_wr_ddr = 0;
|
||||
else
|
||||
codec_output->bse_tlb_wr_ddr = codec_output->bse_tlb_wr_noc;
|
||||
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->bse_tlb_rd_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->bse_tlb_rd_ddr;
|
||||
codec_output->noc_bw_wr += codec_output->bse_tlb_wr_noc;
|
||||
codec_output->ddr_bw_wr += codec_output->bse_tlb_wr_ddr;
|
||||
|
||||
codec_output->mmu_rd_ddr = 0;
|
||||
codec_output->mmu_rd_noc = 0;
|
||||
/* accumulation */
|
||||
codec_output->noc_bw_rd += codec_output->mmu_rd_noc;
|
||||
codec_output->ddr_bw_rd += codec_output->mmu_rd_ddr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int msm_vidc_calculate_bandwidth(struct api_calculation_input codec_input,
|
||||
struct api_calculation_bw_output *codec_output)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
if (codec_input.decoder_or_encoder == CODEC_DECODER) {
|
||||
rc = calculate_bandwidth_decoder_iris3(codec_input, codec_output);
|
||||
} else if (codec_input.decoder_or_encoder == CODEC_ENCODER) {
|
||||
rc = calculate_bandwidth_encoder_iris3(codec_input, codec_output);
|
||||
} else {
|
||||
d_vpr_e("%s: invalid codec\n", codec_input.decoder_or_encoder);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
548
driver/variant/iris33/src/msm_vidc_clock_iris33.c
Normal file
548
driver/variant/iris33/src/msm_vidc_clock_iris33.c
Normal file
@@ -0,0 +1,548 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "pineapple_technology.h"
|
||||
#include "msm_vidc_debug.h"
|
||||
|
||||
static u32 calculate_number_mbs_pineapple(u32 width, u32 height, u32 lcu_size)
|
||||
{
|
||||
u32 mbs_width = (width % lcu_size) ?
|
||||
(width / lcu_size + 1) : (width / lcu_size);
|
||||
|
||||
u32 mbs_height = (height % lcu_size) ?
|
||||
(height / lcu_size + 1) : (height / lcu_size);
|
||||
|
||||
return mbs_width * mbs_height * (lcu_size / 16) * (lcu_size / 16);
|
||||
}
|
||||
|
||||
static int initialize_encoder_complexity_table(void)
|
||||
{
|
||||
/* Beging Calculate Encoder GOP Complexity Table and HW Floor numbers */
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_Bb_ENTRY] = 70000;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_P_ENTRY] = 10000;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_Bb_ENTRY] * 150 +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_P_ENTRY] * 100);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] +
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_P_ENTRY] - 1);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] /
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I3B4b1P][CODEC_ENCODER_GOP_P_ENTRY]);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_Bb_ENTRY] = 30000;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_P_ENTRY] = 10000;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_Bb_ENTRY] * 150 +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_P_ENTRY] * 100);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] +
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_P_ENTRY] - 1);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_FACTORY_ENTRY] /
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_I1B2b1P][CODEC_ENCODER_GOP_P_ENTRY]);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_Bb_ENTRY] = 10000;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_P_ENTRY] = 10000;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_Bb_ENTRY] * 150 +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_P_ENTRY] * 100);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_FACTORY_ENTRY] +
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_P_ENTRY] - 1);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_FACTORY_ENTRY] /
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IbP][CODEC_ENCODER_GOP_P_ENTRY]);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_Bb_ENTRY] = 0;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_P_ENTRY] = 1;
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_Bb_ENTRY] * 150 +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_P_ENTRY] * 100);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_FACTORY_ENTRY] +
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_P_ENTRY] - 1);
|
||||
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_FACTORY_ENTRY] =
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_FACTORY_ENTRY] /
|
||||
(codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_Bb_ENTRY] +
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[CODEC_GOP_IPP][CODEC_ENCODER_GOP_P_ENTRY]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 get_bitrate_entry(u32 pixle_count)
|
||||
{
|
||||
u32 bitrate_entry = 0;
|
||||
|
||||
if (pixle_count >= fp_pixel_count_bar1)
|
||||
bitrate_entry = 1;
|
||||
else if (pixle_count >= fp_pixel_count_bar2)
|
||||
bitrate_entry = 2;
|
||||
else if (pixle_count >= fp_pixel_count_bar3)
|
||||
bitrate_entry = 3;
|
||||
else if (pixle_count >= fp_pixel_count_bar4)
|
||||
bitrate_entry = 4;
|
||||
else if (pixle_count >= fp_pixel_count_bar5)
|
||||
bitrate_entry = 5;
|
||||
else if (pixle_count >= fp_pixel_count_bar6)
|
||||
bitrate_entry = 6;
|
||||
else if (pixle_count >= fp_pixel_count_bar7)
|
||||
bitrate_entry = 7;
|
||||
else if (pixle_count >= fp_pixel_count_bar8)
|
||||
bitrate_entry = 8;
|
||||
else if (pixle_count >= fp_pixel_count_bar9)
|
||||
bitrate_entry = 9;
|
||||
else
|
||||
bitrate_entry = 9;
|
||||
|
||||
return bitrate_entry;
|
||||
}
|
||||
|
||||
static int calculate_vsp_min_freq(struct api_calculation_input codec_input,
|
||||
struct api_calculation_freq_output *codec_output)
|
||||
{
|
||||
/*
|
||||
* VSP calculation
|
||||
* different methodology from Lahaina
|
||||
*/
|
||||
u32 vsp_hw_min_frequency = 0;
|
||||
/* UInt32 decoder_vsp_fw_overhead = 100 + 5; // amplified by 100x */
|
||||
u32 fw_sw_vsp_offset = 1000 + 55; /* amplified by 1000x */
|
||||
|
||||
/*
|
||||
* Ignore fw_sw_vsp_offset, as this is baked into the reference bitrate tables.
|
||||
* As a consequence remove x1000 multipler as well.
|
||||
*/
|
||||
u32 codec = codec_input.codec;
|
||||
/* UInt32 *bitratetable; */
|
||||
u32 pixle_count = codec_input.frame_width *
|
||||
codec_input.frame_height * codec_input.frame_rate;
|
||||
|
||||
u8 bitrate_entry = get_bitrate_entry(pixle_count); /* TODO EXTRACT */
|
||||
|
||||
input_bitrate_fp = ((u32)(codec_input.bitrate_mbps * 100 + 99)) / 100;
|
||||
vsp_hw_min_frequency = frequency_table_pineapple[0][1] * input_bitrate_fp * 1000;
|
||||
|
||||
/* 8KUHD60fps with B frame */
|
||||
if ((pixle_count >= fp_pixel_count_bar0) &&
|
||||
(codec_input.hierachical_layer != CODEC_GOP_IPP)) {
|
||||
/*
|
||||
* FORMULA: VSPfreq = NOMINAL * (InputBitrate / ReferenceBitrate);
|
||||
* ReferenceBitrate = 0 for,
|
||||
* - 1Stage TURBO, all Codecs.
|
||||
* - 2Stage TURBO, H264 & H265.
|
||||
*
|
||||
* 8KUHD60fps with B frame
|
||||
* - bitrate_entry = 0
|
||||
* - Clock=NOMINAL for H264 & 2Stage H265. Because bitrate table entry for TURBO is 0.
|
||||
*
|
||||
* TODO : Reduce these conditions by removing the zero entries from Bitrate table.
|
||||
*/
|
||||
vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
|
||||
input_bitrate_fp * 1000;
|
||||
|
||||
if (codec_input.codec == CODEC_AV1)
|
||||
vsp_hw_min_frequency = frequency_table_pineapple[0][0] *
|
||||
input_bitrate_fp * 1000;
|
||||
|
||||
if ((codec_input.codec == CODEC_H264) ||
|
||||
(codec_input.codec == CODEC_H264_CAVLC) ||
|
||||
((codec_input.codec == CODEC_HEVC) &&
|
||||
(codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_1S))) {
|
||||
vsp_hw_min_frequency =
|
||||
DIV_ROUND_UP(frequency_table_pineapple[0][1], fw_sw_vsp_offset);
|
||||
} else if (((codec_input.codec == CODEC_HEVC) &&
|
||||
(codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S))
|
||||
|| (codec_input.codec == CODEC_VP9)
|
||||
|| (codec_input.codec == CODEC_AV1)) {
|
||||
if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
|
||||
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
|
||||
(bitrate_table_pineapple_2stage_fp[codec][0] * fw_sw_vsp_offset));
|
||||
} else {
|
||||
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
|
||||
(bitrate_table_pineapple_1stage_fp[codec][0] * fw_sw_vsp_offset));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vsp_hw_min_frequency = frequency_table_pineapple[0][1] *
|
||||
input_bitrate_fp * 1000;
|
||||
|
||||
if (codec_input.codec == CODEC_AV1 && bitrate_entry == 1)
|
||||
vsp_hw_min_frequency = frequency_table_pineapple[0][0] *
|
||||
input_bitrate_fp * 1000;
|
||||
|
||||
if ((codec_input.codec == CODEC_H264_CAVLC) &&
|
||||
(codec_input.entropy_coding_mode == CODEC_ENTROPY_CODING_CAVLC))
|
||||
codec = CODEC_H264_CAVLC;
|
||||
else if ((codec_input.codec == CODEC_H264) &&
|
||||
(codec_input.entropy_coding_mode == CODEC_ENTROPY_CODING_CABAC))
|
||||
codec = CODEC_H264;
|
||||
|
||||
if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S)
|
||||
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
|
||||
(bitrate_table_pineapple_2stage_fp[codec][bitrate_entry]) *
|
||||
fw_sw_vsp_offset);
|
||||
else
|
||||
vsp_hw_min_frequency = DIV_ROUND_UP(vsp_hw_min_frequency,
|
||||
(bitrate_table_pineapple_1stage_fp[codec][bitrate_entry]) *
|
||||
fw_sw_vsp_offset);
|
||||
}
|
||||
|
||||
codec_output->vsp_min_freq = vsp_hw_min_frequency;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 calculate_pipe_penalty(struct api_calculation_input codec_input)
|
||||
{
|
||||
u32 pipe_penalty_codec = 0;
|
||||
u8 avid_commercial_content = 0;
|
||||
u32 pixel_count = 0;
|
||||
|
||||
/* decoder */
|
||||
if (codec_input.decoder_or_encoder == CODEC_DECODER) {
|
||||
pipe_penalty_codec = pipe_penalty_pineapple[0][0];
|
||||
avid_commercial_content = codec_input.av1d_commer_tile_enable;
|
||||
if (codec_input.codec == CODEC_AV1) {
|
||||
pixel_count = codec_input.frame_width * codec_input.frame_height;
|
||||
if (pixel_count <= 1920 * 1080)
|
||||
pipe_penalty_codec =
|
||||
pipe_penalty_pineapple[avid_commercial_content + 1][0];
|
||||
else if (pixel_count < 3840 * 2160)
|
||||
pipe_penalty_codec =
|
||||
(pipe_penalty_pineapple[avid_commercial_content + 1][0] +
|
||||
pipe_penalty_pineapple[avid_commercial_content + 1][1]) / 2;
|
||||
else if ((pixel_count == 3840 * 2160) ||
|
||||
(pixel_count == 4096 * 2160) || (pixel_count == 4096 * 2304))
|
||||
pipe_penalty_codec = pipe_penalty_pineapple[avid_commercial_content + 1][1];
|
||||
else if (pixel_count < 7680 * 4320)
|
||||
pipe_penalty_codec =
|
||||
(pipe_penalty_pineapple[avid_commercial_content + 1][1] +
|
||||
pipe_penalty_pineapple[avid_commercial_content + 1][2]) / 2;
|
||||
else
|
||||
pipe_penalty_codec =
|
||||
pipe_penalty_pineapple[avid_commercial_content + 1][2];
|
||||
}
|
||||
} else {
|
||||
pipe_penalty_codec = 101;
|
||||
}
|
||||
|
||||
return pipe_penalty_codec;
|
||||
}
|
||||
|
||||
static int calculate_vpp_min_freq(struct api_calculation_input codec_input,
|
||||
struct api_calculation_freq_output *codec_output)
|
||||
{
|
||||
u32 vpp_hw_min_frequency = 0;
|
||||
u32 fmin = 0;
|
||||
u32 tensilica_min_frequency = 0;
|
||||
u32 decoder_vsp_fw_overhead = 100 + 5; /* amplified by 100x */
|
||||
/* UInt32 fw_sw_vsp_offset = 1000 + 55; amplified by 1000x */
|
||||
/* TODO from calculate_sw_vsp_min_freq */
|
||||
u32 vsp_hw_min_frequency = codec_output->vsp_min_freq;
|
||||
u32 pipe_penalty_codec = 0;
|
||||
u32 fmin_fwoverhead105 = 0;
|
||||
u32 fmin_measured_fwoverhead = 0;
|
||||
u32 lpmode_uhd_cycle_permb = 0;
|
||||
u32 hqmode1080p_cycle_permb = 0;
|
||||
u32 encoder_vpp_target_clk_per_mb = 0;
|
||||
|
||||
codec_mbspersession_pineaple =
|
||||
calculate_number_mbs_pineapple(codec_input.frame_width,
|
||||
codec_input.frame_height, codec_input.lcu_size) *
|
||||
codec_input.frame_rate;
|
||||
|
||||
/* Section 2. 0 VPP/VSP calculation */
|
||||
if (codec_input.decoder_or_encoder == CODEC_DECODER) { /* decoder */
|
||||
vpp_hw_min_frequency = ((decoder_vpp_target_clk_per_mb_pineapple) *
|
||||
(codec_mbspersession_pineaple) + codec_input.pipe_num - 1) /
|
||||
(codec_input.pipe_num);
|
||||
|
||||
vpp_hw_min_frequency = (vpp_hw_min_frequency + 99999) / 1000000;
|
||||
|
||||
if (codec_input.pipe_num > 1) {
|
||||
pipe_penalty_codec = calculate_pipe_penalty(codec_input);
|
||||
vpp_hw_min_frequency = (vpp_hw_min_frequency *
|
||||
pipe_penalty_codec + 999) / 1000;
|
||||
}
|
||||
|
||||
if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
|
||||
/* FW overhead, convert FW cycles to impact to one pipe */
|
||||
u64 decoder_vpp_fw_overhead = 0;
|
||||
decoder_vpp_fw_overhead =
|
||||
DIV_ROUND_UP((DECODER_VPP_FW_OVERHEAD_PINEAPPLE * 10 *
|
||||
codec_input.frame_rate), 15);
|
||||
|
||||
decoder_vpp_fw_overhead =
|
||||
DIV_ROUND_UP((decoder_vpp_fw_overhead * 1000),
|
||||
(codec_mbspersession_pineaple *
|
||||
decoder_vpp_target_clk_per_mb_pineapple / codec_input.pipe_num));
|
||||
|
||||
decoder_vpp_fw_overhead += 1000;
|
||||
decoder_vpp_fw_overhead = (decoder_vpp_fw_overhead < 1050) ?
|
||||
1050 : decoder_vpp_fw_overhead;
|
||||
|
||||
/* VPP HW + FW */
|
||||
if (codec_input.linear_opb == 1 && codec_input.bitdepth == CODEC_BITDEPTH_10)
|
||||
/* multiply by 1.20 for 10b case */
|
||||
decoder_vpp_fw_overhead = 1200 + decoder_vpp_fw_overhead - 1000;
|
||||
|
||||
vpp_hw_min_frequency = (vpp_hw_min_frequency *
|
||||
decoder_vpp_fw_overhead + 999) / 1000;
|
||||
|
||||
/* VSP HW+FW */
|
||||
vsp_hw_min_frequency =
|
||||
(vsp_hw_min_frequency * decoder_vsp_fw_overhead + 99) / 100;
|
||||
|
||||
fmin = (vpp_hw_min_frequency > vsp_hw_min_frequency) ?
|
||||
vpp_hw_min_frequency : vsp_hw_min_frequency;
|
||||
} else {
|
||||
/* 1-stage need SW cycles + FW cycles + HW time */
|
||||
if (codec_input.linear_opb == 1 && codec_input.bitdepth == CODEC_BITDEPTH_10)
|
||||
/* multiply by 1.20 for 10b linear case */
|
||||
vpp_hw_min_frequency =
|
||||
(vpp_hw_min_frequency * 1200 + 999) / 1000;
|
||||
|
||||
/*
|
||||
* HW time
|
||||
* comment: 02/23/2021 SY: the bitrate is measured bitrate,
|
||||
* the overlapping effect is already considered into bitrate.
|
||||
* no need to add extra anymore
|
||||
*/
|
||||
fmin = (vpp_hw_min_frequency > vsp_hw_min_frequency) ?
|
||||
vpp_hw_min_frequency : vsp_hw_min_frequency;
|
||||
|
||||
/* FW time */
|
||||
fmin_fwoverhead105 = (fmin * 105 + 99) / 100;
|
||||
fmin_measured_fwoverhead = fmin +
|
||||
(((DECODER_VPPVSP1STAGE_FW_OVERHEAD_PINEAPPLE *
|
||||
codec_input.frame_rate * 10 + 14) / 15 + 999) / 1000 + 999) /
|
||||
1000;
|
||||
|
||||
fmin = (fmin_fwoverhead105 > fmin_measured_fwoverhead) ?
|
||||
fmin_fwoverhead105 : fmin_measured_fwoverhead;
|
||||
}
|
||||
|
||||
tensilica_min_frequency = (DECODER_SW_OVERHEAD_PINEAPPLE * 10 + 14) / 15;
|
||||
tensilica_min_frequency = (tensilica_min_frequency + 999) / 1000;
|
||||
tensilica_min_frequency = tensilica_min_frequency * codec_input.frame_rate;
|
||||
tensilica_min_frequency = (tensilica_min_frequency + 999) / 1000;
|
||||
fmin = (tensilica_min_frequency > fmin) ? tensilica_min_frequency : fmin;
|
||||
} else { /* encoder */
|
||||
/* Decide LP/HQ */
|
||||
u8 hq_mode = 0;
|
||||
if (codec_input.pipe_num > 1)
|
||||
if (codec_input.frame_width * codec_input.frame_height <=
|
||||
1920 * 1080)
|
||||
if (codec_input.frame_width * codec_input.frame_height *
|
||||
codec_input.frame_rate <= 1920 * 1080 * 60)
|
||||
hq_mode = 1;
|
||||
|
||||
codec_output->enc_hqmode = hq_mode;
|
||||
|
||||
/* Section 1. 0 */
|
||||
/* TODO ONETIME call, should be in another place. */
|
||||
initialize_encoder_complexity_table();
|
||||
|
||||
/* End Calculate Encoder GOP Complexity Table */
|
||||
|
||||
/* VPP base cycle */
|
||||
lpmode_uhd_cycle_permb = (320 *
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[codec_input.hierachical_layer][CODEC_ENCODER_GOP_FACTORY_ENTRY]
|
||||
+ 99) / 100;
|
||||
|
||||
if ((codec_input.frame_width == 1920) &&
|
||||
((codec_input.frame_height == 1080) ||
|
||||
(codec_input.frame_height == 1088)) &&
|
||||
(codec_input.frame_rate >= 480))
|
||||
lpmode_uhd_cycle_permb = (90 * 4 *
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[codec_input.hierachical_layer][CODEC_ENCODER_GOP_FACTORY_ENTRY]
|
||||
+ 99) / 100;
|
||||
|
||||
if ((codec_input.frame_width == 1280) &&
|
||||
((codec_input.frame_height == 720) ||
|
||||
(codec_input.frame_height == 768)) &&
|
||||
(codec_input.frame_rate >= 960))
|
||||
lpmode_uhd_cycle_permb = (99 * 4 *
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[codec_input.hierachical_layer][CODEC_ENCODER_GOP_FACTORY_ENTRY]
|
||||
+ 99) / 100;
|
||||
|
||||
hqmode1080p_cycle_permb = (675 *
|
||||
codec_encoder_gop_complexity_table_fp
|
||||
[codec_input.hierachical_layer][CODEC_ENCODER_GOP_FACTORY_ENTRY]
|
||||
+ 99) / 100;
|
||||
|
||||
encoder_vpp_target_clk_per_mb = (hq_mode) ?
|
||||
hqmode1080p_cycle_permb : lpmode_uhd_cycle_permb;
|
||||
|
||||
vpp_hw_min_frequency = ((encoder_vpp_target_clk_per_mb) *
|
||||
(codec_mbspersession_pineaple) + codec_input.pipe_num - 1) /
|
||||
(codec_input.pipe_num);
|
||||
|
||||
vpp_hw_min_frequency = (vpp_hw_min_frequency + 99999) / 1000000;
|
||||
|
||||
if (codec_input.pipe_num > 1) {
|
||||
u32 pipe_penalty_codec = 101;
|
||||
vpp_hw_min_frequency = (vpp_hw_min_frequency *
|
||||
pipe_penalty_codec + 99) / 100;
|
||||
}
|
||||
|
||||
if (codec_input.vsp_vpp_mode == CODEC_VSPVPP_MODE_2S) {
|
||||
/* FW overhead, convert FW cycles to impact to one pipe */
|
||||
u64 encoder_vpp_fw_overhead = 0;
|
||||
|
||||
encoder_vpp_fw_overhead =
|
||||
DIV_ROUND_UP((ENCODER_VPP_FW_OVERHEAD_PINEAPPLE * 10 *
|
||||
codec_input.frame_rate), 15);
|
||||
|
||||
encoder_vpp_fw_overhead =
|
||||
DIV_ROUND_UP((encoder_vpp_fw_overhead * 1000),
|
||||
(codec_mbspersession_pineaple * encoder_vpp_target_clk_per_mb /
|
||||
codec_input.pipe_num));
|
||||
|
||||
encoder_vpp_fw_overhead += 1000;
|
||||
|
||||
encoder_vpp_fw_overhead = (encoder_vpp_fw_overhead < 1050) ?
|
||||
1050 : encoder_vpp_fw_overhead;
|
||||
|
||||
/* VPP HW + FW */
|
||||
vpp_hw_min_frequency = (vpp_hw_min_frequency *
|
||||
encoder_vpp_fw_overhead + 999) / 1000;
|
||||
|
||||
/* TODO : decoder_vsp_fw_overhead? */
|
||||
vsp_hw_min_frequency = (vsp_hw_min_frequency *
|
||||
decoder_vsp_fw_overhead + 99) / 100;
|
||||
|
||||
fmin = (vpp_hw_min_frequency > vsp_hw_min_frequency) ?
|
||||
vpp_hw_min_frequency : vsp_hw_min_frequency;
|
||||
} else {
|
||||
/* HW time */
|
||||
fmin = (vpp_hw_min_frequency > vsp_hw_min_frequency) ?
|
||||
vpp_hw_min_frequency : vsp_hw_min_frequency;
|
||||
|
||||
/* FW time */
|
||||
fmin_fwoverhead105 = (fmin * 105 + 99) / 100;
|
||||
fmin_measured_fwoverhead = fmin +
|
||||
(((DECODER_VPPVSP1STAGE_FW_OVERHEAD_PINEAPPLE *
|
||||
codec_input.frame_rate * 10 + 14) / 15 + 999) /
|
||||
1000 + 999) / 1000;
|
||||
|
||||
fmin = (fmin_fwoverhead105 > fmin_measured_fwoverhead) ?
|
||||
fmin_fwoverhead105 : fmin_measured_fwoverhead;
|
||||
/* SW time */
|
||||
}
|
||||
|
||||
tensilica_min_frequency = (ENCODER_SW_OVERHEAD_PINEAPPLE * 10 + 14) / 15;
|
||||
tensilica_min_frequency = (tensilica_min_frequency + 999) / 1000;
|
||||
|
||||
tensilica_min_frequency = tensilica_min_frequency *
|
||||
codec_input.frame_rate;
|
||||
|
||||
tensilica_min_frequency = (tensilica_min_frequency + 999) / 1000;
|
||||
|
||||
fmin = (tensilica_min_frequency > fmin) ?
|
||||
tensilica_min_frequency : fmin;
|
||||
}
|
||||
|
||||
codec_output->vpp_min_freq = vpp_hw_min_frequency;
|
||||
codec_output->vsp_min_freq = vsp_hw_min_frequency;
|
||||
codec_output->tensilica_min_freq = tensilica_min_frequency;
|
||||
codec_output->hw_min_freq = fmin;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int msm_vidc_calculate_frequency(struct api_calculation_input codec_input,
|
||||
struct api_calculation_freq_output *codec_output)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
rc = calculate_vsp_min_freq(codec_input, codec_output);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = calculate_vpp_min_freq(codec_input, codec_output);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return rc;
|
||||
}
|
@@ -9,8 +9,373 @@
|
||||
#include "msm_vidc_inst.h"
|
||||
#include "msm_vidc_core.h"
|
||||
#include "msm_vidc_debug.h"
|
||||
#include "perf_static_model.h"
|
||||
#include "msm_vidc_power.h"
|
||||
|
||||
static u64 __calculate_decoder(struct vidc_bus_vote_data *d);
|
||||
static u64 __calculate_encoder(struct vidc_bus_vote_data *d);
|
||||
static u64 __calculate(struct msm_vidc_inst* inst, struct vidc_bus_vote_data *d);
|
||||
static u64 msm_vidc_calc_freq_iris33_legacy(struct msm_vidc_inst *inst, u32 data_size);
|
||||
|
||||
static int msm_vidc_init_codec_input_freq(struct msm_vidc_inst *inst, u32 data_size,
|
||||
struct api_calculation_input *codec_input)
|
||||
{
|
||||
enum msm_vidc_port_type port;
|
||||
u32 color_fmt;
|
||||
|
||||
if (inst->domain == MSM_VIDC_ENCODER) {
|
||||
codec_input->decoder_or_encoder = CODEC_ENCODER;
|
||||
} else if (inst->domain == MSM_VIDC_DECODER) {
|
||||
codec_input->decoder_or_encoder = CODEC_DECODER;
|
||||
} else {
|
||||
d_vpr_e("%s: invalid domain %d\n", __func__, inst->domain);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
codec_input->chipset_gen = MSM_PINEAPPLE;
|
||||
|
||||
if (inst->codec == MSM_VIDC_H264) {
|
||||
codec_input->codec = CODEC_H264;
|
||||
codec_input->lcu_size = 16;
|
||||
if (inst->capabilities->cap[ENTROPY_MODE].value ==
|
||||
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC)
|
||||
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
|
||||
else
|
||||
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
|
||||
} else if (inst->codec == MSM_VIDC_HEVC) {
|
||||
codec_input->codec = CODEC_HEVC;
|
||||
codec_input->lcu_size = 32;
|
||||
} else if (inst->codec == MSM_VIDC_VP9) {
|
||||
codec_input->codec = CODEC_VP9;
|
||||
codec_input->lcu_size = 16;
|
||||
} else if (inst->codec == MSM_VIDC_AV1) {
|
||||
codec_input->codec = CODEC_AV1;
|
||||
codec_input->lcu_size = 32;
|
||||
} else {
|
||||
d_vpr_e("%s: invalid codec %d\n", __func__, inst->codec);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
codec_input->pipe_num = inst->capabilities->cap[PIPE].value;
|
||||
codec_input->frame_rate = inst->max_rate;
|
||||
|
||||
port = inst->domain == MSM_VIDC_DECODER ? INPUT_PORT : OUTPUT_PORT;
|
||||
codec_input->frame_width = inst->fmts[port].fmt.pix_mp.width;
|
||||
codec_input->frame_height = inst->fmts[port].fmt.pix_mp.height;
|
||||
|
||||
if (inst->capabilities->cap[STAGE].value == MSM_VIDC_STAGE_1) {
|
||||
codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_1S;
|
||||
} else if (inst->capabilities->cap[STAGE].value == MSM_VIDC_STAGE_2) {
|
||||
codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_2S;
|
||||
} else {
|
||||
d_vpr_e("%s: invalid stage %d\n", __func__,
|
||||
inst->capabilities->cap[STAGE].value);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (inst->capabilities->cap[BIT_DEPTH].value == BIT_DEPTH_8)
|
||||
codec_input->bitdepth = CODEC_BITDEPTH_8;
|
||||
else
|
||||
codec_input->bitdepth = CODEC_BITDEPTH_10;
|
||||
|
||||
/*
|
||||
* Used for calculating Encoder GOP Complexity
|
||||
* hierachical_layer=0..7 used as Array Index
|
||||
* inst->capabilities->cap[B_FRAME].value=[ 0 1 2 ]
|
||||
* TODO how to map?
|
||||
*/
|
||||
|
||||
/* set as IPP */
|
||||
codec_input->hierachical_layer = 0;
|
||||
|
||||
if (inst->domain == MSM_VIDC_DECODER)
|
||||
color_fmt = v4l2_colorformat_to_driver(inst,
|
||||
inst->fmts[OUTPUT_PORT].fmt.pix_mp.pixelformat, __func__);
|
||||
else
|
||||
color_fmt = v4l2_colorformat_to_driver(inst,
|
||||
inst->fmts[INPUT_PORT].fmt.pix_mp.pixelformat, __func__);
|
||||
|
||||
codec_input->linear_opb = is_linear_colorformat(color_fmt);
|
||||
codec_input->bitrate_mbps =
|
||||
(codec_input->frame_rate * data_size * 8) / 1000000;
|
||||
|
||||
/* disable av1d commercial tile */
|
||||
codec_input->av1d_commer_tile_enable = 0;
|
||||
/* set as sanity mode */
|
||||
codec_input->regression_mode = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc_bus_vote_data *d,
|
||||
struct api_calculation_input *codec_input)
|
||||
{
|
||||
u32 complexity_factor_int = 0, complexity_factor_frac = 0;
|
||||
bool opb_compression_enabled = false;
|
||||
|
||||
if (!d)
|
||||
return -EINVAL;
|
||||
|
||||
if (d->domain == MSM_VIDC_ENCODER) {
|
||||
codec_input->decoder_or_encoder = CODEC_ENCODER;
|
||||
} else if (d->domain == MSM_VIDC_DECODER) {
|
||||
codec_input->decoder_or_encoder = CODEC_DECODER;
|
||||
} else {
|
||||
d_vpr_e("%s: invalid domain %d\n", __func__, d->domain);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
codec_input->chipset_gen = MSM_PINEAPPLE;
|
||||
|
||||
if (d->codec == MSM_VIDC_H264) {
|
||||
codec_input->codec = CODEC_H264;
|
||||
} else if (d->codec == MSM_VIDC_HEVC) {
|
||||
codec_input->codec = CODEC_HEVC;
|
||||
} else if (d->codec == MSM_VIDC_VP9) {
|
||||
codec_input->codec = CODEC_VP9;
|
||||
} else if (d->codec == MSM_VIDC_AV1) {
|
||||
codec_input->codec = CODEC_AV1;
|
||||
} else {
|
||||
d_vpr_e("%s: invalid codec %d\n", __func__, d->codec);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
codec_input->lcu_size = d->lcu_size;
|
||||
codec_input->pipe_num = d->num_vpp_pipes;
|
||||
codec_input->frame_rate = d->fps;
|
||||
codec_input->frame_width = d->input_width;
|
||||
codec_input->frame_height = d->input_height;
|
||||
|
||||
if (d->work_mode == MSM_VIDC_STAGE_1) {
|
||||
codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_1S;
|
||||
} else if (d->work_mode == MSM_VIDC_STAGE_2) {
|
||||
codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_2S;
|
||||
} else {
|
||||
d_vpr_e("%s: invalid stage %d\n", __func__, d->work_mode);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (inst->capabilities->cap[ENTROPY_MODE].value ==
|
||||
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
|
||||
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
|
||||
} else if (inst->capabilities->cap[ENTROPY_MODE].value ==
|
||||
V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC) {
|
||||
codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
|
||||
} else {
|
||||
d_vpr_e("%s: invalid entropy %d\n", __func__,
|
||||
inst->capabilities->cap[ENTROPY_MODE].value);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used for calculating Encoder GOP Complexity
|
||||
* hierachical_layer=0..7 used as Array Index
|
||||
* TODO how to map?
|
||||
*/
|
||||
codec_input->hierachical_layer = 0; /* set as IPP */
|
||||
|
||||
/*
|
||||
* If the calculated motion_vector_complexity is > 2 then set the
|
||||
* complexity_setting and refframe_complexity to be pwc(performance worst case)
|
||||
* values. If the motion_vector_complexity is < 2 then set the complexity_setting
|
||||
* and refframe_complexity to be average case values.
|
||||
*/
|
||||
|
||||
complexity_factor_int = Q16_INT(d->complexity_factor);
|
||||
complexity_factor_frac = Q16_FRAC(d->complexity_factor);
|
||||
|
||||
if (complexity_factor_int < COMPLEXITY_THRESHOLD ||
|
||||
(complexity_factor_int == COMPLEXITY_THRESHOLD &&
|
||||
complexity_factor_frac == 0)) {
|
||||
/* set as average case values */
|
||||
codec_input->complexity_setting = COMPLEXITY_SETTING_AVG;
|
||||
codec_input->refframe_complexity = REFFRAME_COMPLEXITY_AVG;
|
||||
} else {
|
||||
/* set as pwc */
|
||||
codec_input->complexity_setting = COMPLEXITY_SETTING_PWC;
|
||||
codec_input->refframe_complexity = REFFRAME_COMPLEXITY_PWC;
|
||||
}
|
||||
|
||||
codec_input->status_llc_onoff = d->use_sys_cache;
|
||||
|
||||
if (__bpp(d->color_formats[0]) == 8)
|
||||
codec_input->bitdepth = CODEC_BITDEPTH_8;
|
||||
else
|
||||
codec_input->bitdepth = CODEC_BITDEPTH_10;
|
||||
|
||||
if (d->num_formats == 1) {
|
||||
codec_input->split_opb = 0;
|
||||
codec_input->linear_opb = !__ubwc(d->color_formats[0]);
|
||||
} else if (d->num_formats == 2) {
|
||||
codec_input->split_opb = 1;
|
||||
codec_input->linear_opb = !__ubwc(d->color_formats[1]);
|
||||
} else {
|
||||
d_vpr_e("%s: invalid num_formats %d\n",
|
||||
__func__, d->num_formats);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
codec_input->linear_ipb = 0; /* set as ubwc ipb */
|
||||
|
||||
/* TODO Confirm if we always LOSSLESS mode ie lossy_ipb = 0*/
|
||||
codec_input->lossy_ipb = 0; /* set as lossless ipb */
|
||||
|
||||
/* TODO Confirm if no multiref */
|
||||
codec_input->encoder_multiref = 0; /* set as no multiref */
|
||||
codec_input->bitrate_mbps = (d->bitrate / 1000000); /* bps 10; set as 10mbps */
|
||||
|
||||
opb_compression_enabled = d->num_formats >= 2 && __ubwc(d->color_formats[1]);
|
||||
|
||||
/* ANDROID CR is in Q16 format, StaticModel CR in x100 format */
|
||||
codec_input->cr_dpb = ((Q16_INT(d->compression_ratio)*100) +
|
||||
Q16_FRAC(d->compression_ratio));
|
||||
|
||||
codec_input->cr_opb = opb_compression_enabled ?
|
||||
codec_input->cr_dpb : FP_ONE;
|
||||
|
||||
codec_input->cr_ipb = ((Q16_INT(d->input_cr)*100) + Q16_FRAC(d->input_cr));
|
||||
codec_input->cr_rpb = codec_input->cr_dpb; /* cr_rpb ony for encoder */
|
||||
|
||||
/* disable by default, only enable for aurora depth map session */
|
||||
codec_input->lumaonly_decode = 0;
|
||||
/* TODO: disable av1d commercial tile */
|
||||
codec_input->av1d_commer_tile_enable = 0;
|
||||
/* set as custom regression mode, as are using cr,cf values from FW */
|
||||
codec_input->regression_mode = REGRESSION_MODE_CUSTOM;
|
||||
|
||||
|
||||
/* Dump all the variables for easier debugging */
|
||||
if (msm_vidc_debug & VIDC_BUS) {
|
||||
struct dump dump[] = {
|
||||
{"complexity_factor_int", "%d", complexity_factor_int},
|
||||
{"complexity_factor_frac", "%d", complexity_factor_frac},
|
||||
{"refframe_complexity", "%d", codec_input->refframe_complexity},
|
||||
{"complexity_setting", "%d", codec_input->complexity_setting},
|
||||
{"cr_dpb", "%d", codec_input->cr_dpb},
|
||||
{"cr_opb", "%d", codec_input->cr_opb},
|
||||
{"cr_ipb", "%d", codec_input->cr_ipb},
|
||||
{"cr_rpb", "%d", codec_input->cr_rpb},
|
||||
{"lcu size", "%d", codec_input->lcu_size},
|
||||
{"pipe number", "%d", codec_input->pipe_num},
|
||||
{"frame_rate", "%d", codec_input->frame_rate},
|
||||
{"frame_width", "%d", codec_input->frame_width},
|
||||
{"frame_height", "%d", codec_input->frame_height},
|
||||
{"work_mode","%d", d->work_mode},
|
||||
{"encoder_or_decode", "%d", inst->domain},
|
||||
{"chipset_gen", "%d", codec_input->chipset_gen},
|
||||
{"codec_input", "%d", codec_input->codec},
|
||||
{"entropy_coding_mode", "%d", codec_input->entropy_coding_mode},
|
||||
{"hierachical_layer", "%d", codec_input->hierachical_layer},
|
||||
{"status_llc_onoff", "%d", codec_input->status_llc_onoff},
|
||||
{"bit_depth", "%d", codec_input->bitdepth},
|
||||
{"split_opb", "%d", codec_input->split_opb},
|
||||
{"linear_opb", "%d", codec_input->linear_opb},
|
||||
{"linear_ipb", "%d", codec_input->linear_ipb},
|
||||
{"lossy_ipb", "%d", codec_input->lossy_ipb},
|
||||
{"encoder_multiref", "%d", codec_input->encoder_multiref},
|
||||
{"bitrate_mbps", "%d", codec_input->bitrate_mbps},
|
||||
{"lumaonly_decode", "%d", codec_input->lumaonly_decode},
|
||||
{"av1d_commer_tile_enable", "%d", codec_input->av1d_commer_tile_enable},
|
||||
{"regression_mode", "%d", codec_input->regression_mode},
|
||||
};
|
||||
__dump(dump, ARRAY_SIZE(dump));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 msm_vidc_calc_freq_iris33_new(struct msm_vidc_inst *inst, u32 data_size)
|
||||
{
|
||||
u64 freq = 0;
|
||||
struct msm_vidc_core* core;
|
||||
int ret = 0;
|
||||
struct api_calculation_input codec_input;
|
||||
struct api_calculation_freq_output codec_output;
|
||||
u32 fps, mbpf;
|
||||
|
||||
if (!inst || !inst->core || !inst->capabilities) {
|
||||
d_vpr_e("%s: invalid params\n", __func__);
|
||||
return freq;
|
||||
}
|
||||
|
||||
core = inst->core;
|
||||
|
||||
mbpf = msm_vidc_get_mbs_per_frame(inst);
|
||||
fps = inst->max_rate;
|
||||
|
||||
memset(&codec_input, 0, sizeof(struct api_calculation_input));
|
||||
memset(&codec_output, 0, sizeof(struct api_calculation_freq_output));
|
||||
ret = msm_vidc_init_codec_input_freq(inst, data_size, &codec_input);
|
||||
if (ret)
|
||||
return freq;
|
||||
ret = msm_vidc_calculate_frequency(codec_input, &codec_output);
|
||||
if (ret)
|
||||
return freq;
|
||||
freq = codec_output.hw_min_freq * 1000000; /* Convert to Hz */
|
||||
|
||||
i_vpr_p(inst, "%s: filled len %d, required freq %llu, fps %u, mbpf %u\n",
|
||||
__func__, data_size, freq, fps, mbpf);
|
||||
|
||||
if (inst->codec == MSM_VIDC_AV1 ||
|
||||
(inst->iframe && is_hevc_10bit_decode_session(inst))) {
|
||||
/*
|
||||
* for AV1 or HEVC 10bit and iframe case only allow TURBO and
|
||||
* limit to NOM for all other cases
|
||||
*/
|
||||
} else {
|
||||
/* limit to NOM, index 0 is TURBO, index 1 is NOM clock rate */
|
||||
if (core->resource->freq_set.count >= 2 &&
|
||||
freq > core->resource->freq_set.freq_tbl[1].freq)
|
||||
freq = core->resource->freq_set.freq_tbl[1].freq;
|
||||
}
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
static int msm_vidc_calc_bw_iris33_new(struct msm_vidc_inst *inst,
|
||||
struct vidc_bus_vote_data *vidc_data)
|
||||
{
|
||||
u32 ret = 0;
|
||||
struct api_calculation_input codec_input;
|
||||
struct api_calculation_bw_output codec_output;
|
||||
|
||||
memset(&codec_input, 0, sizeof(struct api_calculation_input));
|
||||
memset(&codec_output, 0, sizeof(struct api_calculation_bw_output));
|
||||
|
||||
ret = msm_vidc_init_codec_input_bus(inst, vidc_data, &codec_input);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = msm_vidc_calculate_bandwidth(codec_input, &codec_output);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vidc_data->calc_bw_ddr = kbps(codec_output.ddr_bw_rd + codec_output.ddr_bw_wr);
|
||||
vidc_data->calc_bw_llcc = kbps(codec_output.noc_bw_rd + codec_output.noc_bw_wr);
|
||||
|
||||
i_vpr_l(inst, "%s: calc_bw_ddr %lu calc_bw_llcc %lu",
|
||||
__func__, vidc_data->calc_bw_ddr, vidc_data->calc_bw_llcc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 msm_vidc_calc_freq_iris33(struct msm_vidc_inst *inst, u32 data_size)
|
||||
{
|
||||
u64 freq = 0;
|
||||
|
||||
if (!inst || !inst->core || !inst->capabilities) {
|
||||
d_vpr_e("%s: invalid params\n", __func__);
|
||||
return freq;
|
||||
}
|
||||
|
||||
if (ENABLE_LEGACY_POWER_CALCULATIONS)
|
||||
freq = msm_vidc_calc_freq_iris33_legacy(inst, data_size);
|
||||
else
|
||||
freq = msm_vidc_calc_freq_iris33_new(inst, data_size);
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
u64 msm_vidc_calc_freq_iris33_legacy(struct msm_vidc_inst *inst, u32 data_size)
|
||||
{
|
||||
u64 freq = 0;
|
||||
struct msm_vidc_core* core;
|
||||
@@ -28,7 +393,8 @@ u64 msm_vidc_calc_freq_iris33(struct msm_vidc_inst *inst, u32 data_size)
|
||||
}
|
||||
core = inst->core;
|
||||
|
||||
if (!core->resource || !core->resource->freq_set.freq_tbl) {
|
||||
if (!core->resource || !core->resource->freq_set.freq_tbl ||
|
||||
!core->resource->freq_set.count) {
|
||||
d_vpr_e("%s: invalid params\n", __func__);
|
||||
return freq;
|
||||
}
|
||||
@@ -140,7 +506,7 @@ u64 msm_vidc_calc_freq_iris33(struct msm_vidc_inst *inst, u32 data_size)
|
||||
/* VSP */
|
||||
if (inst->codec == MSM_VIDC_AV1) {
|
||||
/*
|
||||
* For AV1: Use VSP calculations from Kalama perf model.
|
||||
* For AV1: Use VSP calculations from Lanai perf model.
|
||||
* For legacy codecs, use vsp_cycles based on legacy MB_CYCLES_VSP.
|
||||
*/
|
||||
u32 decoder_vsp_fw_overhead = 105;
|
||||
@@ -801,7 +1167,10 @@ int msm_vidc_calc_bw_iris33(struct msm_vidc_inst *inst,
|
||||
if (!vidc_data)
|
||||
return value;
|
||||
|
||||
value = __calculate(inst, vidc_data);
|
||||
if (ENABLE_LEGACY_POWER_CALCULATIONS)
|
||||
value = __calculate(inst, vidc_data);
|
||||
else
|
||||
value = msm_vidc_calc_bw_iris33_new(inst, vidc_data);
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user