msm_vidc_power.c 16 KB


  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2020, The Linux Foundation. All rights reserved.
  4. */
  5. #include "msm_vidc_power.h"
  6. #include "msm_vidc_debug.h"
  7. #include "msm_vidc_internal.h"
  8. #include "msm_vidc_inst.h"
  9. #include "msm_vidc_core.h"
  10. #include "msm_vidc_dt.h"
  11. #include "msm_vidc_driver.h"
  12. #include "msm_vidc_platform.h"
  13. #include "msm_vidc_buffer.h"
  14. #include "venus_hfi.h"
  15. #define MSM_VIDC_MIN_UBWC_COMPLEXITY_FACTOR (1 << 16)
  16. #define MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR (4 << 16)
  17. #define MSM_VIDC_MIN_UBWC_COMPRESSION_RATIO (1 << 16)
  18. #define MSM_VIDC_MAX_UBWC_COMPRESSION_RATIO (5 << 16)
  19. u64 msm_vidc_max_freq(struct msm_vidc_inst *inst)
  20. {
  21. struct msm_vidc_core* core;
  22. struct allowed_clock_rates_table *allowed_clks_tbl;
  23. u64 freq = 0;
  24. if (!inst || !inst->core) {
  25. d_vpr_e("%s: invalid params\n", __func__);
  26. return freq;
  27. }
  28. core = inst->core;
  29. if (!core->dt || !core->dt->allowed_clks_tbl) {
  30. s_vpr_e(inst->sid, "%s: invalid params\n", __func__);
  31. return freq;
  32. }
  33. allowed_clks_tbl = core->dt->allowed_clks_tbl;
  34. freq = allowed_clks_tbl[0].clock_rate;
  35. s_vpr_l(inst->sid, "%s: rate = %lu\n", __func__, freq);
  36. return freq;
  37. }
  38. static int msm_vidc_get_mbps(struct msm_vidc_inst *inst,
  39. enum load_calc_quirks quirks)
  40. {
  41. int input_port_mbs, output_port_mbs;
  42. int fps, operating_rate, frame_rate;
  43. struct v4l2_format *f;
  44. f = &inst->fmts[INPUT_PORT];
  45. input_port_mbs = NUM_MBS_PER_FRAME(f->fmt.pix_mp.width,
  46. f->fmt.pix_mp.height);
  47. f = &inst->fmts[OUTPUT_PORT];
  48. output_port_mbs = NUM_MBS_PER_FRAME(f->fmt.pix_mp.width,
  49. f->fmt.pix_mp.height);
  50. frame_rate = inst->capabilities->cap[FRAME_RATE].value;
  51. operating_rate = inst->capabilities->cap[OPERATING_RATE].value;
  52. fps = frame_rate;
  53. /* For admission control operating rate is ignored */
  54. if (quirks == LOAD_POWER)
  55. fps = max(operating_rate, frame_rate);
  56. /* In case of fps < 1 we assume 1 */
  57. fps = max(fps >> 16, 1);
  58. return max(input_port_mbs, output_port_mbs) * fps;
  59. }
  60. int msm_vidc_get_inst_load(struct msm_vidc_inst *inst,
  61. enum load_calc_quirks quirks)
  62. {
  63. int load = 0;
  64. if (!inst || !inst->capabilities) {
  65. d_vpr_e("%s: invalid params\n", __func__);
  66. return -EINVAL;
  67. }
  68. if (inst->state == MSM_VIDC_OPEN ||
  69. inst->state == MSM_VIDC_ERROR)
  70. goto exit;
  71. /*
  72. * Clock and Load calculations for REALTIME/NON-REALTIME
  73. * Operating rate will either Default or Client value.
  74. * Session admission control will be based on Load.
  75. * Power requests based of calculated Clock/Freq.
  76. * ----------------|----------------------------|
  77. * REALTIME | Admission Control Load = |
  78. * | res * fps |
  79. * | Power Request Load = |
  80. * | res * max(op, fps)|
  81. * ----------------|----------------------------|
  82. * NON-REALTIME/ | Admission Control Load = 0 |
  83. * THUMBNAIL | Power Request Load = |
  84. * | res * max(op, fps)|
  85. * ----------------|----------------------------|
  86. */
  87. if (is_thumbnail_session(inst) ||
  88. (!is_realtime_session(inst) &&
  89. quirks == LOAD_ADMISSION_CONTROL)) {
  90. load = 0;
  91. } else {
  92. load = msm_vidc_get_mbps(inst, quirks);
  93. }
  94. exit:
  95. return load;
  96. }
  97. static int fill_dynamic_stats(struct msm_vidc_inst *inst,
  98. struct vidc_bus_vote_data *vote_data)
  99. {
  100. u32 max_cr = MSM_VIDC_MIN_UBWC_COMPRESSION_RATIO;
  101. u32 max_cf = MSM_VIDC_MIN_UBWC_COMPLEXITY_FACTOR;
  102. u32 max_input_cr = MSM_VIDC_MIN_UBWC_COMPRESSION_RATIO;
  103. u32 min_cf = MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR;
  104. u32 min_input_cr = MSM_VIDC_MAX_UBWC_COMPRESSION_RATIO;
  105. u32 min_cr = MSM_VIDC_MAX_UBWC_COMPRESSION_RATIO;
  106. /* TODO: get ubwc stats from firmware
  107. if (inst->core->resources.ubwc_stats_in_fbd == 1) {
  108. mutex_lock(&inst->ubwc_stats_lock);
  109. if (inst->ubwc_stats.is_valid == 1) {
  110. min_cr = inst->ubwc_stats.worst_cr;
  111. max_cf = inst->ubwc_stats.worst_cf;
  112. min_input_cr = inst->ubwc_stats.worst_cr;
  113. }
  114. mutex_unlock(&inst->ubwc_stats_lock);
  115. }
  116. */
  117. /* Sanitize CF values from HW */
  118. max_cf = min_t(u32, max_cf, MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR);
  119. min_cf = max_t(u32, min_cf, MSM_VIDC_MIN_UBWC_COMPLEXITY_FACTOR);
  120. max_cr = min_t(u32, max_cr, MSM_VIDC_MAX_UBWC_COMPRESSION_RATIO);
  121. min_cr = max_t(u32, min_cr, MSM_VIDC_MIN_UBWC_COMPRESSION_RATIO);
  122. max_input_cr = min_t(u32,
  123. max_input_cr, MSM_VIDC_MAX_UBWC_COMPRESSION_RATIO);
  124. min_input_cr = max_t(u32,
  125. min_input_cr, MSM_VIDC_MIN_UBWC_COMPRESSION_RATIO);
  126. vote_data->compression_ratio = min_cr;
  127. vote_data->complexity_factor = max_cf;
  128. vote_data->input_cr = min_input_cr;
  129. s_vpr_l(inst->sid,
  130. "Input CR = %d Recon CR = %d Complexity Factor = %d\n",
  131. vote_data->input_cr, vote_data->compression_ratio,
  132. vote_data->complexity_factor);
  133. return 0;
  134. }
  135. static int msm_vidc_set_buses(struct msm_vidc_inst* inst)
  136. {
  137. int rc = 0;
  138. struct msm_vidc_core* core;
  139. struct msm_vidc_inst* temp;
  140. u64 total_bw_ddr = 0, total_bw_llcc = 0;
  141. u64 curr_time_ns;
  142. if (!inst || !inst->core) {
  143. d_vpr_e("%s: invalid params\n", __func__);
  144. return -EINVAL;
  145. }
  146. core = inst->core;
  147. mutex_lock(&core->lock);
  148. curr_time_ns = ktime_get_ns();
  149. list_for_each_entry(temp, &core->instances, list) {
  150. struct msm_vidc_buffer *vbuf, *next;
  151. u32 data_size = 0;
  152. /* TODO: accessing temp without lock */
  153. list_for_each_entry_safe(vbuf, next, &temp->buffers.input.list, list)
  154. data_size = max(data_size, vbuf->data_size);
  155. if (!data_size)
  156. continue;
  157. /* skip inactive session bus bandwidth */
  158. if (!is_active_session(temp->last_qbuf_time_ns, curr_time_ns)) {
  159. temp->active = false;
  160. continue;
  161. }
  162. if (temp->bus_data.power_mode == VIDC_POWER_TURBO) {
  163. total_bw_ddr = total_bw_llcc = INT_MAX;
  164. break;
  165. }
  166. total_bw_ddr += temp->bus_data.calc_bw_ddr;
  167. total_bw_llcc += temp->bus_data.calc_bw_llcc;
  168. }
  169. mutex_unlock(&core->lock);
  170. rc = venus_hfi_scale_buses(inst, total_bw_ddr, total_bw_llcc);
  171. if (rc)
  172. return rc;
  173. return 0;
  174. }
  175. int msm_vidc_scale_buses(struct msm_vidc_inst *inst)
  176. {
  177. int rc = 0;
  178. struct msm_vidc_core *core;
  179. struct vidc_bus_vote_data *vote_data;
  180. struct v4l2_format *out_f;
  181. struct v4l2_format *inp_f;
  182. struct msm_vidc_buffer *vbuf;
  183. u32 data_size = 0;
  184. int codec = 0, frame_rate;
  185. if (!inst || !inst->core || !inst->capabilities) {
  186. d_vpr_e("%s: invalid params: %pK\n", __func__, inst);
  187. return -EINVAL;
  188. }
  189. core = inst->core;
  190. if (!core->dt) {
  191. d_vpr_e("%s: invalid dt params\n", __func__);
  192. return -EINVAL;
  193. }
  194. vote_data = &inst->bus_data;
  195. list_for_each_entry(vbuf, &inst->buffers.input.list, list)
  196. data_size = max(data_size, vbuf->data_size);
  197. if (!data_size)
  198. return 0;
  199. vote_data->power_mode = VIDC_POWER_TURBO;
  200. if (inst->power.buffer_counter < DCVS_FTB_WINDOW)
  201. vote_data->power_mode = VIDC_POWER_TURBO;
  202. if (msm_vidc_clock_voting)
  203. vote_data->power_mode = VIDC_POWER_TURBO;
  204. if (vote_data->power_mode == VIDC_POWER_TURBO)
  205. goto set_buses;
  206. out_f = &inst->fmts[OUTPUT_PORT];
  207. inp_f = &inst->fmts[INPUT_PORT];
  208. switch (inst->domain) {
  209. case MSM_VIDC_DECODER:
  210. codec = inp_f->fmt.pix_mp.pixelformat;
  211. break;
  212. case MSM_VIDC_ENCODER:
  213. codec = out_f->fmt.pix_mp.pixelformat;
  214. break;
  215. default:
  216. s_vpr_e(inst->sid, "%s: invalid session_type %#x\n",
  217. __func__, inst->domain);
  218. break;
  219. }
  220. frame_rate = inst->capabilities->cap[FRAME_RATE].value;
  221. vote_data->codec = inst->codec;
  222. vote_data->input_width = inp_f->fmt.pix_mp.width;
  223. vote_data->input_height = inp_f->fmt.pix_mp.height;
  224. vote_data->output_width = out_f->fmt.pix_mp.width;
  225. vote_data->output_height = out_f->fmt.pix_mp.height;
  226. vote_data->lcu_size = (codec == V4L2_PIX_FMT_HEVC ||
  227. codec == V4L2_PIX_FMT_VP9) ? 32 : 16;
  228. vote_data->fps = msm_vidc_get_fps(inst);
  229. if (inst->domain == MSM_VIDC_ENCODER) {
  230. vote_data->bitrate = inst->capabilities->cap[BIT_RATE].value;
  231. vote_data->rotation = inst->capabilities->cap[ROTATION].value;
  232. vote_data->b_frames_enabled =
  233. inst->capabilities->cap[B_FRAME].value > 0;
  234. /* scale bitrate if operating rate is larger than fps */
  235. if (vote_data->fps > (frame_rate >> 16) &&
  236. (frame_rate >> 16)) {
  237. vote_data->bitrate = vote_data->bitrate /
  238. (frame_rate >> 16) * vote_data->fps;
  239. }
  240. vote_data->num_formats = 1;
  241. vote_data->color_formats[0] = v4l2_colorformat_to_driver(
  242. inst->fmts[INPUT_PORT].fmt.pix_mp.pixelformat, __func__);
  243. } else if (inst->domain == MSM_VIDC_DECODER) {
  244. u32 color_format;
  245. vote_data->bitrate = data_size * vote_data->fps * 8;
  246. color_format = v4l2_colorformat_to_driver(
  247. inst->fmts[OUTPUT_PORT].fmt.pix_mp.pixelformat, __func__);
  248. if (is_linear_colorformat(color_format)) {
  249. vote_data->num_formats = 2;
  250. /*
  251. * 0 index - dpb colorformat
  252. * 1 index - opb colorformat
  253. */
  254. if (is_10bit_colorformat(color_format)) {
  255. vote_data->color_formats[0] = MSM_VIDC_FMT_TP10C;
  256. } else {
  257. vote_data->color_formats[0] = MSM_VIDC_FMT_NV12;
  258. }
  259. vote_data->color_formats[0] = color_format;
  260. } else {
  261. vote_data->num_formats = 1;
  262. vote_data->color_formats[0] = color_format;
  263. }
  264. }
  265. vote_data->work_mode = inst->capabilities->cap[STAGE].value;
  266. if (core->dt->sys_cache_res_set)
  267. vote_data->use_sys_cache = true;
  268. vote_data->num_vpp_pipes = core->capabilities[NUM_VPP_PIPE].value;
  269. fill_dynamic_stats(inst, vote_data);
  270. call_session_op(core, calc_bw, inst, vote_data);
  271. set_buses:
  272. rc = msm_vidc_set_buses(inst);
  273. if (rc)
  274. return rc;
  275. return 0;
  276. }
  277. int msm_vidc_set_clocks(struct msm_vidc_inst* inst)
  278. {
  279. int rc = 0;
  280. struct msm_vidc_core* core;
  281. struct msm_vidc_inst* temp;
  282. u64 freq, rate;
  283. u32 data_size;
  284. bool increment, decrement;
  285. u64 curr_time_ns;
  286. int i = 0;
  287. if (!inst || !inst->core) {
  288. d_vpr_e("%s: invalid params\n", __func__);
  289. return -EINVAL;
  290. }
  291. core = inst->core;
  292. if (!core->dt || !core->dt->allowed_clks_tbl) {
  293. d_vpr_e("%s: invalid dt params\n", __func__);
  294. return -EINVAL;
  295. }
  296. mutex_lock(&core->lock);
  297. increment = false;
  298. decrement = true;
  299. freq = 0;
  300. curr_time_ns = ktime_get_ns();
  301. list_for_each_entry(temp, &core->instances, list) {
  302. struct msm_vidc_buffer* vbuf, *next;
  303. data_size = 0;
  304. list_for_each_entry_safe(vbuf, next, &temp->buffers.input.list, list)
  305. data_size = max(data_size, vbuf->data_size);
  306. if (!data_size)
  307. continue;
  308. /* skip inactive session clock rate */
  309. if (!is_active_session(temp->last_qbuf_time_ns, curr_time_ns)) {
  310. temp->active = false;
  311. continue;
  312. }
  313. freq += temp->power.min_freq;
  314. if (msm_vidc_clock_voting) {
  315. d_vpr_l("msm_vidc_clock_voting %d\n", msm_vidc_clock_voting);
  316. freq = msm_vidc_clock_voting;
  317. decrement = false;
  318. break;
  319. }
  320. /* increment even if one session requested for it */
  321. if (temp->power.dcvs_flags & MSM_VIDC_DCVS_INCR)
  322. increment = true;
  323. /* decrement only if all sessions requested for it */
  324. if (!(temp->power.dcvs_flags & MSM_VIDC_DCVS_DECR))
  325. decrement = false;
  326. }
  327. /*
  328. * keep checking from lowest to highest rate until
  329. * table rate >= requested rate
  330. */
  331. for (i = core->dt->allowed_clks_tbl_size - 1; i >= 0; i--) {
  332. rate = core->dt->allowed_clks_tbl[i].clock_rate;
  333. if (rate >= freq)
  334. break;
  335. }
  336. if (i < 0)
  337. i = 0;
  338. if (increment) {
  339. if (i > 0)
  340. rate = core->dt->allowed_clks_tbl[i - 1].clock_rate;
  341. } else if (decrement) {
  342. if (i < (int) (core->dt->allowed_clks_tbl_size - 1))
  343. rate = core->dt->allowed_clks_tbl[i + 1].clock_rate;
  344. }
  345. core->power.clk_freq = (u32)rate;
  346. d_vpr_p("%s: clock rate %lu requested %lu increment %d decrement %d\n",
  347. __func__, rate, freq, increment, decrement);
  348. mutex_unlock(&core->lock);
  349. rc = venus_hfi_scale_clocks(inst, rate);
  350. if (rc)
  351. return rc;
  352. return 0;
  353. }
  354. static int msm_vidc_apply_dcvs(struct msm_vidc_inst *inst)
  355. {
  356. int rc = 0;
  357. int bufs_with_fw = 0;
  358. struct msm_vidc_power *power;
  359. if (!inst || !inst->core) {
  360. d_vpr_e("%s: invalid params %pK\n", __func__, inst);
  361. return -EINVAL;
  362. }
  363. if (!inst->power.dcvs_mode || inst->decode_batch.enable) {
  364. s_vpr_l(inst->sid, "Skip DCVS (dcvs %d, batching %d)\n",
  365. inst->power.dcvs_mode, inst->decode_batch.enable);
  366. inst->power.dcvs_flags = 0;
  367. return 0;
  368. }
  369. power = &inst->power;
  370. if (is_decode_session(inst)) {
  371. bufs_with_fw = msm_vidc_num_queued_bufs(inst, OUTPUT_MPLANE);
  372. } else {
  373. bufs_with_fw = msm_vidc_num_queued_bufs(inst, INPUT_MPLANE);
  374. }
  375. /* +1 as one buffer is going to be queued after the function */
  376. bufs_with_fw += 1;
  377. /*
  378. * DCVS decides clock level based on below algorithm
  379. *
  380. * Limits :
  381. * min_threshold : Buffers required for reference by FW.
  382. * nom_threshold : Midpoint of Min and Max thresholds
  383. * max_threshold : Min Threshold + DCVS extra buffers, allocated
  384. * for smooth flow.
  385. * 1) When buffers outside FW are reaching client's extra buffers,
  386. * FW is slow and will impact pipeline, Increase clock.
  387. * 2) When pending buffers with FW are less than FW requested,
  388. * pipeline has cushion to absorb FW slowness, Decrease clocks.
  389. * 3) When DCVS has engaged(Inc or Dec) and pending buffers with FW
  390. * transitions past the nom_threshold, switch to calculated load.
  391. * This smoothens the clock transitions.
  392. * 4) Otherwise maintain previous Load config.
  393. */
  394. if (bufs_with_fw >= power->max_threshold) {
  395. power->dcvs_flags = MSM_VIDC_DCVS_INCR;
  396. } else if (bufs_with_fw < power->min_threshold) {
  397. power->dcvs_flags = MSM_VIDC_DCVS_DECR;
  398. } else if ((power->dcvs_flags & MSM_VIDC_DCVS_DECR &&
  399. bufs_with_fw >= power->nom_threshold) ||
  400. (power->dcvs_flags & MSM_VIDC_DCVS_INCR &&
  401. bufs_with_fw <= power->nom_threshold))
  402. power->dcvs_flags = 0;
  403. s_vpr_p(inst->sid, "DCVS: bufs_with_fw %d th[%d %d %d] flags %#x\n",
  404. bufs_with_fw, power->min_threshold,
  405. power->nom_threshold, power->max_threshold,
  406. power->dcvs_flags);
  407. return rc;
  408. }
  409. int msm_vidc_scale_clocks(struct msm_vidc_inst *inst)
  410. {
  411. struct msm_vidc_core* core;
  412. struct msm_vidc_buffer *vbuf;
  413. u32 data_size = 0;
  414. if (!inst || !inst->core) {
  415. d_vpr_e("%s: invalid params\n", __func__);
  416. return -EINVAL;
  417. }
  418. core = inst->core;
  419. list_for_each_entry(vbuf, &inst->buffers.input.list, list)
  420. data_size = max(data_size, vbuf->data_size);
  421. if (!data_size)
  422. return 0;
  423. if (inst->power.buffer_counter < DCVS_FTB_WINDOW ||
  424. is_turbo_session(inst)) {
  425. inst->power.min_freq = msm_vidc_max_freq(inst);
  426. inst->power.dcvs_flags = 0;
  427. } else if (msm_vidc_clock_voting) {
  428. inst->power.min_freq = msm_vidc_clock_voting;
  429. inst->power.dcvs_flags = 0;
  430. } else {
  431. inst->power.min_freq =
  432. call_session_op(core, calc_freq, inst, data_size);
  433. msm_vidc_apply_dcvs(inst);
  434. }
  435. msm_vidc_set_clocks(inst);
  436. return 0;
  437. }
  438. int msm_vidc_scale_power(struct msm_vidc_inst *inst, bool scale_buses)
  439. {
  440. if (!inst || !inst->core) {
  441. d_vpr_e("%s: invalid params %pK\n", __func__, inst);
  442. return -EINVAL;
  443. }
  444. if (!inst->active) {
  445. /* scale buses for inactive -> active session */
  446. scale_buses = true;
  447. inst->active = true;
  448. }
  449. if (msm_vidc_scale_clocks(inst))
  450. s_vpr_e(inst->sid, "failed to scale clock\n");
  451. if (scale_buses) {
  452. if (msm_vidc_scale_buses(inst))
  453. s_vpr_e(inst->sid, "failed to scale bus\n");
  454. }
  455. return 0;
  456. }
  457. void msm_vidc_dcvs_data_reset(struct msm_vidc_inst *inst)
  458. {
  459. struct msm_vidc_power *dcvs;
  460. u32 min_count, actual_count;
  461. if (!inst) {
  462. d_vpr_e("%s: invalid params\n", __func__);
  463. return;
  464. }
  465. dcvs = &inst->power;
  466. if (inst->domain == MSM_VIDC_ENCODER) {
  467. min_count = inst->buffers.input.min_count;
  468. actual_count = inst->buffers.input.actual_count;
  469. } else if (inst->domain == MSM_VIDC_DECODER) {
  470. min_count = inst->buffers.output.min_count;
  471. actual_count = inst->buffers.output.actual_count;
  472. } else {
  473. s_vpr_e(inst->sid, "%s: invalid domain type %d\n",
  474. __func__, inst->domain);
  475. return;
  476. }
  477. dcvs->min_threshold = min_count;
  478. if (inst->domain == MSM_VIDC_ENCODER)
  479. dcvs->max_threshold = min((min_count + DCVS_ENC_EXTRA_INPUT_BUFFERS),
  480. actual_count);
  481. else
  482. dcvs->max_threshold = min((min_count + DCVS_DEC_EXTRA_OUTPUT_BUFFERS),
  483. actual_count);
  484. dcvs->dcvs_window =
  485. dcvs->max_threshold < dcvs->min_threshold ? 0 :
  486. dcvs->max_threshold - dcvs->min_threshold;
  487. dcvs->nom_threshold = dcvs->min_threshold +
  488. (dcvs->dcvs_window ?
  489. (dcvs->dcvs_window / 2) : 0);
  490. dcvs->dcvs_flags = 0;
  491. s_vpr_p(inst->sid, "%s: DCVS: thresholds [%d %d %d] flags %#x\n",
  492. __func__, dcvs->min_threshold,
  493. dcvs->nom_threshold, dcvs->max_threshold,
  494. dcvs->dcvs_flags);
  495. }
  496. void msm_vidc_power_data_reset(struct msm_vidc_inst *inst)
  497. {
  498. int rc = 0;
  499. if (!inst || !inst->core) {
  500. d_vpr_e("%s: invalid params\n", __func__);
  501. return;
  502. }
  503. s_vpr_h(inst->sid, "%s\n", __func__);
  504. msm_vidc_dcvs_data_reset(inst);
  505. inst->power.buffer_counter = 0;
  506. //inst->ubwc_stats.is_valid = 0; TODO: fix it
  507. rc = msm_vidc_scale_power(inst, true);
  508. if (rc)
  509. s_vpr_e(inst->sid, "%s: failed to scale power\n", __func__);
  510. }