dcss-scaler.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2019 NXP.
  4. *
  5. * Scaling algorithms were contributed by Dzung Hoang <[email protected]>
  6. */
  7. #include <linux/device.h>
  8. #include <linux/slab.h>
  9. #include "dcss-dev.h"
  10. #define DCSS_SCALER_CTRL 0x00
  11. #define SCALER_EN BIT(0)
  12. #define REPEAT_EN BIT(4)
  13. #define SCALE2MEM_EN BIT(8)
  14. #define MEM2OFIFO_EN BIT(12)
  15. #define DCSS_SCALER_OFIFO_CTRL 0x04
  16. #define OFIFO_LOW_THRES_POS 0
  17. #define OFIFO_LOW_THRES_MASK GENMASK(9, 0)
  18. #define OFIFO_HIGH_THRES_POS 16
  19. #define OFIFO_HIGH_THRES_MASK GENMASK(25, 16)
  20. #define UNDERRUN_DETECT_CLR BIT(26)
  21. #define LOW_THRES_DETECT_CLR BIT(27)
  22. #define HIGH_THRES_DETECT_CLR BIT(28)
  23. #define UNDERRUN_DETECT_EN BIT(29)
  24. #define LOW_THRES_DETECT_EN BIT(30)
  25. #define HIGH_THRES_DETECT_EN BIT(31)
  26. #define DCSS_SCALER_SDATA_CTRL 0x08
  27. #define YUV_EN BIT(0)
  28. #define RTRAM_8LINES BIT(1)
  29. #define Y_UV_BYTE_SWAP BIT(4)
  30. #define A2R10G10B10_FORMAT_POS 8
  31. #define A2R10G10B10_FORMAT_MASK GENMASK(11, 8)
  32. #define DCSS_SCALER_BIT_DEPTH 0x0C
  33. #define LUM_BIT_DEPTH_POS 0
  34. #define LUM_BIT_DEPTH_MASK GENMASK(1, 0)
  35. #define CHR_BIT_DEPTH_POS 4
  36. #define CHR_BIT_DEPTH_MASK GENMASK(5, 4)
  37. #define DCSS_SCALER_SRC_FORMAT 0x10
  38. #define DCSS_SCALER_DST_FORMAT 0x14
  39. #define FORMAT_MASK GENMASK(1, 0)
  40. #define DCSS_SCALER_SRC_LUM_RES 0x18
  41. #define DCSS_SCALER_SRC_CHR_RES 0x1C
  42. #define DCSS_SCALER_DST_LUM_RES 0x20
  43. #define DCSS_SCALER_DST_CHR_RES 0x24
  44. #define WIDTH_POS 0
  45. #define WIDTH_MASK GENMASK(11, 0)
  46. #define HEIGHT_POS 16
  47. #define HEIGHT_MASK GENMASK(27, 16)
  48. #define DCSS_SCALER_V_LUM_START 0x48
  49. #define V_START_MASK GENMASK(15, 0)
  50. #define DCSS_SCALER_V_LUM_INC 0x4C
  51. #define V_INC_MASK GENMASK(15, 0)
  52. #define DCSS_SCALER_H_LUM_START 0x50
  53. #define H_START_MASK GENMASK(18, 0)
  54. #define DCSS_SCALER_H_LUM_INC 0x54
  55. #define H_INC_MASK GENMASK(15, 0)
  56. #define DCSS_SCALER_V_CHR_START 0x58
  57. #define DCSS_SCALER_V_CHR_INC 0x5C
  58. #define DCSS_SCALER_H_CHR_START 0x60
  59. #define DCSS_SCALER_H_CHR_INC 0x64
  60. #define DCSS_SCALER_COEF_VLUM 0x80
  61. #define DCSS_SCALER_COEF_HLUM 0x140
  62. #define DCSS_SCALER_COEF_VCHR 0x200
  63. #define DCSS_SCALER_COEF_HCHR 0x300
  64. struct dcss_scaler_ch {
  65. void __iomem *base_reg;
  66. u32 base_ofs;
  67. struct dcss_scaler *scl;
  68. u32 sdata_ctrl;
  69. u32 scaler_ctrl;
  70. bool scaler_ctrl_chgd;
  71. u32 c_vstart;
  72. u32 c_hstart;
  73. bool use_nn_interpolation;
  74. };
  75. struct dcss_scaler {
  76. struct device *dev;
  77. struct dcss_ctxld *ctxld;
  78. u32 ctx_id;
  79. struct dcss_scaler_ch ch[3];
  80. };
  81. /* scaler coefficients generator */
  82. #define PSC_FRAC_BITS 30
  83. #define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS)
  84. #define PSC_BITS_FOR_PHASE 4
  85. #define PSC_NUM_PHASES 16
  86. #define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1)
  87. #define PSC_NUM_TAPS 7
  88. #define PSC_NUM_TAPS_RGBA 5
  89. #define PSC_COEFF_PRECISION 10
  90. #define PSC_PHASE_FRACTION_BITS 13
  91. #define PSC_PHASE_MASK (PSC_NUM_PHASES - 1)
  92. #define PSC_Q_FRACTION 19
  93. #define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1))
  94. /**
  95. * mult_q() - Performs fixed-point multiplication.
  96. * @A: multiplier
  97. * @B: multiplicand
  98. */
  99. static int mult_q(int A, int B)
  100. {
  101. int result;
  102. s64 temp;
  103. temp = (int64_t)A * (int64_t)B;
  104. temp += PSC_Q_ROUND_OFFSET;
  105. result = (int)(temp >> PSC_Q_FRACTION);
  106. return result;
  107. }
  108. /**
  109. * div_q() - Performs fixed-point division.
  110. * @A: dividend
  111. * @B: divisor
  112. */
  113. static int div_q(int A, int B)
  114. {
  115. int result;
  116. s64 temp;
  117. temp = (int64_t)A << PSC_Q_FRACTION;
  118. if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0))
  119. temp += B / 2;
  120. else
  121. temp -= B / 2;
  122. result = (int)(temp / B);
  123. return result;
  124. }
  125. /**
  126. * exp_approx_q() - Compute approximation to exp(x) function using Taylor
  127. * series.
  128. * @x: fixed-point argument of exp function
  129. */
  130. static int exp_approx_q(int x)
  131. {
  132. int sum = 1 << PSC_Q_FRACTION;
  133. int term = 1 << PSC_Q_FRACTION;
  134. term = mult_q(term, div_q(x, 1 << PSC_Q_FRACTION));
  135. sum += term;
  136. term = mult_q(term, div_q(x, 2 << PSC_Q_FRACTION));
  137. sum += term;
  138. term = mult_q(term, div_q(x, 3 << PSC_Q_FRACTION));
  139. sum += term;
  140. term = mult_q(term, div_q(x, 4 << PSC_Q_FRACTION));
  141. sum += term;
  142. return sum;
  143. }
  144. /**
  145. * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter.
  146. * @fc_q: fixed-point cutoff frequency normalized to range [0, 1]
  147. * @use_5_taps: indicates whether to use 5 taps or 7 taps
  148. * @coef: output filter coefficients
  149. */
  150. static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
  151. bool phase0_identity,
  152. int coef[][PSC_NUM_TAPS])
  153. {
  154. int sigma_q, g0_q, g1_q, g2_q;
  155. int tap_cnt1, tap_cnt2, tap_idx, phase_cnt;
  156. int mid;
  157. int phase;
  158. int i;
  159. int taps;
  160. if (use_5_taps)
  161. for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
  162. coef[phase][0] = 0;
  163. coef[phase][PSC_NUM_TAPS - 1] = 0;
  164. }
  165. /* seed coefficient scanner */
  166. taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS;
  167. mid = (PSC_NUM_PHASES * taps) / 2 - 1;
  168. phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2;
  169. tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
  170. tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
  171. /* seed gaussian filter generator */
  172. sigma_q = div_q(PSC_Q_ROUND_OFFSET, fc_q);
  173. g0_q = 1 << PSC_Q_FRACTION;
  174. g1_q = exp_approx_q(div_q(-PSC_Q_ROUND_OFFSET,
  175. mult_q(sigma_q, sigma_q)));
  176. g2_q = mult_q(g1_q, g1_q);
  177. coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q;
  178. for (i = 0; i < mid; i++) {
  179. phase_cnt++;
  180. tap_cnt1--;
  181. tap_cnt2++;
  182. g0_q = mult_q(g0_q, g1_q);
  183. g1_q = mult_q(g1_q, g2_q);
  184. if ((phase_cnt & PSC_PHASE_MASK) <= 8) {
  185. tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE;
  186. coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q;
  187. }
  188. if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) {
  189. tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE;
  190. coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q;
  191. }
  192. }
  193. phase_cnt++;
  194. tap_cnt1--;
  195. coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0;
  196. /* override phase 0 with identity filter if specified */
  197. if (phase0_identity)
  198. for (i = 0; i < PSC_NUM_TAPS; i++)
  199. coef[0][i] = i == (PSC_NUM_TAPS >> 1) ?
  200. (1 << PSC_COEFF_PRECISION) : 0;
  201. /* normalize coef */
  202. for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
  203. int sum = 0;
  204. s64 ll_temp;
  205. for (i = 0; i < PSC_NUM_TAPS; i++)
  206. sum += coef[phase][i];
  207. for (i = 0; i < PSC_NUM_TAPS; i++) {
  208. ll_temp = coef[phase][i];
  209. ll_temp <<= PSC_COEFF_PRECISION;
  210. ll_temp += sum >> 1;
  211. ll_temp /= sum;
  212. coef[phase][i] = (int)ll_temp;
  213. }
  214. }
  215. }
  216. static void dcss_scaler_nearest_neighbor_filter(bool use_5_taps,
  217. int coef[][PSC_NUM_TAPS])
  218. {
  219. int i, j;
  220. for (i = 0; i < PSC_STORED_PHASES; i++)
  221. for (j = 0; j < PSC_NUM_TAPS; j++)
  222. coef[i][j] = j == PSC_NUM_TAPS >> 1 ?
  223. (1 << PSC_COEFF_PRECISION) : 0;
  224. }
  225. /**
  226. * dcss_scaler_filter_design() - Compute filter coefficients using
  227. * Gaussian filter.
  228. * @src_length: length of input
  229. * @dst_length: length of output
  230. * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps
  231. * @coef: output coefficients
  232. */
  233. static void dcss_scaler_filter_design(int src_length, int dst_length,
  234. bool use_5_taps, bool phase0_identity,
  235. int coef[][PSC_NUM_TAPS],
  236. bool nn_interpolation)
  237. {
  238. int fc_q;
  239. /* compute cutoff frequency */
  240. if (dst_length >= src_length)
  241. fc_q = div_q(1, PSC_NUM_PHASES);
  242. else
  243. fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
  244. if (nn_interpolation)
  245. dcss_scaler_nearest_neighbor_filter(use_5_taps, coef);
  246. else
  247. /* compute gaussian filter coefficients */
  248. dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
  249. }
  250. static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
  251. {
  252. struct dcss_scaler *scl = ch->scl;
  253. dcss_ctxld_write(scl->ctxld, scl->ctx_id, val, ch->base_ofs + ofs);
  254. }
  255. static int dcss_scaler_ch_init_all(struct dcss_scaler *scl,
  256. unsigned long scaler_base)
  257. {
  258. struct dcss_scaler_ch *ch;
  259. int i;
  260. for (i = 0; i < 3; i++) {
  261. ch = &scl->ch[i];
  262. ch->base_ofs = scaler_base + i * 0x400;
  263. ch->base_reg = ioremap(ch->base_ofs, SZ_4K);
  264. if (!ch->base_reg) {
  265. dev_err(scl->dev, "scaler: unable to remap ch base\n");
  266. return -ENOMEM;
  267. }
  268. ch->scl = scl;
  269. }
  270. return 0;
  271. }
  272. int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base)
  273. {
  274. struct dcss_scaler *scaler;
  275. scaler = kzalloc(sizeof(*scaler), GFP_KERNEL);
  276. if (!scaler)
  277. return -ENOMEM;
  278. dcss->scaler = scaler;
  279. scaler->dev = dcss->dev;
  280. scaler->ctxld = dcss->ctxld;
  281. scaler->ctx_id = CTX_SB_HP;
  282. if (dcss_scaler_ch_init_all(scaler, scaler_base)) {
  283. int i;
  284. for (i = 0; i < 3; i++) {
  285. if (scaler->ch[i].base_reg)
  286. iounmap(scaler->ch[i].base_reg);
  287. }
  288. kfree(scaler);
  289. return -ENOMEM;
  290. }
  291. return 0;
  292. }
  293. void dcss_scaler_exit(struct dcss_scaler *scl)
  294. {
  295. int ch_no;
  296. for (ch_no = 0; ch_no < 3; ch_no++) {
  297. struct dcss_scaler_ch *ch = &scl->ch[ch_no];
  298. dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL);
  299. if (ch->base_reg)
  300. iounmap(ch->base_reg);
  301. }
  302. kfree(scl);
  303. }
  304. void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en)
  305. {
  306. struct dcss_scaler_ch *ch = &scl->ch[ch_num];
  307. u32 scaler_ctrl;
  308. scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0;
  309. if (en)
  310. dcss_scaler_write(ch, ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL);
  311. if (ch->scaler_ctrl != scaler_ctrl)
  312. ch->scaler_ctrl_chgd = true;
  313. ch->scaler_ctrl = scaler_ctrl;
  314. }
  315. static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en)
  316. {
  317. ch->sdata_ctrl &= ~YUV_EN;
  318. ch->sdata_ctrl |= en ? YUV_EN : 0;
  319. }
  320. static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en)
  321. {
  322. ch->sdata_ctrl &= ~RTRAM_8LINES;
  323. ch->sdata_ctrl |= en ? RTRAM_8LINES : 0;
  324. }
  325. static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth)
  326. {
  327. u32 val;
  328. val = depth == 30 ? 2 : 0;
  329. dcss_scaler_write(ch,
  330. ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) |
  331. ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK),
  332. DCSS_SCALER_BIT_DEPTH);
  333. }
  334. enum buffer_format {
  335. BUF_FMT_YUV420,
  336. BUF_FMT_YUV422,
  337. BUF_FMT_ARGB8888_YUV444,
  338. };
  339. enum chroma_location {
  340. PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0,
  341. PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1,
  342. PSC_LOC_HORZ_0_VERT_0 = 2,
  343. PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3,
  344. PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4,
  345. PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5
  346. };
  347. static void dcss_scaler_format_set(struct dcss_scaler_ch *ch,
  348. enum buffer_format src_fmt,
  349. enum buffer_format dst_fmt)
  350. {
  351. dcss_scaler_write(ch, src_fmt, DCSS_SCALER_SRC_FORMAT);
  352. dcss_scaler_write(ch, dst_fmt, DCSS_SCALER_DST_FORMAT);
  353. }
  354. static void dcss_scaler_res_set(struct dcss_scaler_ch *ch,
  355. int src_xres, int src_yres,
  356. int dst_xres, int dst_yres,
  357. u32 pix_format, enum buffer_format dst_format)
  358. {
  359. u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres;
  360. u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres;
  361. bool src_is_444 = true;
  362. lsrc_xres = src_xres;
  363. csrc_xres = src_xres;
  364. lsrc_yres = src_yres;
  365. csrc_yres = src_yres;
  366. ldst_xres = dst_xres;
  367. cdst_xres = dst_xres;
  368. ldst_yres = dst_yres;
  369. cdst_yres = dst_yres;
  370. if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY ||
  371. pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) {
  372. csrc_xres >>= 1;
  373. src_is_444 = false;
  374. } else if (pix_format == DRM_FORMAT_NV12 ||
  375. pix_format == DRM_FORMAT_NV21) {
  376. csrc_xres >>= 1;
  377. csrc_yres >>= 1;
  378. src_is_444 = false;
  379. }
  380. if (dst_format == BUF_FMT_YUV422)
  381. cdst_xres >>= 1;
  382. /* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */
  383. if (src_is_444 && dst_format == BUF_FMT_YUV422) {
  384. lsrc_yres--;
  385. csrc_yres--;
  386. }
  387. dcss_scaler_write(ch, (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
  388. (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
  389. DCSS_SCALER_SRC_LUM_RES);
  390. dcss_scaler_write(ch, (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
  391. (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
  392. DCSS_SCALER_SRC_CHR_RES);
  393. dcss_scaler_write(ch, (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
  394. (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
  395. DCSS_SCALER_DST_LUM_RES);
  396. dcss_scaler_write(ch, (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
  397. (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
  398. DCSS_SCALER_DST_CHR_RES);
  399. }
  400. #define downscale_fp(factor, fp_pos) ((factor) << (fp_pos))
  401. #define upscale_fp(factor, fp_pos) ((1 << (fp_pos)) / (factor))
  402. struct dcss_scaler_factors {
  403. int downscale;
  404. int upscale;
  405. };
  406. static const struct dcss_scaler_factors dcss_scaler_factors[] = {
  407. {3, 8}, {5, 8}, {5, 8},
  408. };
  409. static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch,
  410. int src_xres, int src_yres,
  411. int dst_xres, int dst_yres,
  412. u32 src_format, u32 dst_format,
  413. enum chroma_location src_chroma_loc)
  414. {
  415. int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres;
  416. u32 l_vinc, l_hinc, c_vinc, c_hinc;
  417. u32 c_vstart, c_hstart;
  418. src_c_xres = src_xres;
  419. src_c_yres = src_yres;
  420. dst_c_xres = dst_xres;
  421. dst_c_yres = dst_yres;
  422. c_vstart = 0;
  423. c_hstart = 0;
  424. /* adjustments for source chroma location */
  425. if (src_format == BUF_FMT_YUV420) {
  426. /* vertical input chroma position adjustment */
  427. switch (src_chroma_loc) {
  428. case PSC_LOC_HORZ_0_VERT_1_OVER_4:
  429. case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
  430. /*
  431. * move chroma up to first luma line
  432. * (1/4 chroma input line spacing)
  433. */
  434. c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
  435. break;
  436. case PSC_LOC_HORZ_0_VERT_1_OVER_2:
  437. case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
  438. /*
  439. * move chroma up to first luma line
  440. * (1/2 chroma input line spacing)
  441. */
  442. c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1));
  443. break;
  444. default:
  445. break;
  446. }
  447. /* horizontal input chroma position adjustment */
  448. switch (src_chroma_loc) {
  449. case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
  450. case PSC_LOC_HORZ_1_OVER_4_VERT_0:
  451. case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
  452. /* move chroma left 1/4 chroma input sample spacing */
  453. c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
  454. break;
  455. default:
  456. break;
  457. }
  458. }
  459. /* adjustments to chroma resolution */
  460. if (src_format == BUF_FMT_YUV420) {
  461. src_c_xres >>= 1;
  462. src_c_yres >>= 1;
  463. } else if (src_format == BUF_FMT_YUV422) {
  464. src_c_xres >>= 1;
  465. }
  466. if (dst_format == BUF_FMT_YUV422)
  467. dst_c_xres >>= 1;
  468. l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres;
  469. c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres;
  470. l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres;
  471. c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres;
  472. /* save chroma start phase */
  473. ch->c_vstart = c_vstart;
  474. ch->c_hstart = c_hstart;
  475. dcss_scaler_write(ch, 0, DCSS_SCALER_V_LUM_START);
  476. dcss_scaler_write(ch, l_vinc, DCSS_SCALER_V_LUM_INC);
  477. dcss_scaler_write(ch, 0, DCSS_SCALER_H_LUM_START);
  478. dcss_scaler_write(ch, l_hinc, DCSS_SCALER_H_LUM_INC);
  479. dcss_scaler_write(ch, c_vstart, DCSS_SCALER_V_CHR_START);
  480. dcss_scaler_write(ch, c_vinc, DCSS_SCALER_V_CHR_INC);
  481. dcss_scaler_write(ch, c_hstart, DCSS_SCALER_H_CHR_START);
  482. dcss_scaler_write(ch, c_hinc, DCSS_SCALER_H_CHR_INC);
  483. }
  484. int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num,
  485. int *min, int *max)
  486. {
  487. *min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16);
  488. *max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16);
  489. return 0;
  490. }
  491. static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch,
  492. int base_addr,
  493. int coef[][PSC_NUM_TAPS])
  494. {
  495. int i, phase;
  496. for (i = 0; i < PSC_STORED_PHASES; i++) {
  497. dcss_scaler_write(ch, ((coef[i][1] & 0xfff) << 16 |
  498. (coef[i][2] & 0xfff) << 4 |
  499. (coef[i][3] & 0xf00) >> 8),
  500. base_addr + i * sizeof(u32));
  501. dcss_scaler_write(ch, ((coef[i][3] & 0x0ff) << 20 |
  502. (coef[i][4] & 0xfff) << 8 |
  503. (coef[i][5] & 0xff0) >> 4),
  504. base_addr + 0x40 + i * sizeof(u32));
  505. dcss_scaler_write(ch, ((coef[i][5] & 0x00f) << 24),
  506. base_addr + 0x80 + i * sizeof(u32));
  507. }
  508. /* reverse both phase and tap orderings */
  509. for (phase = (PSC_NUM_PHASES >> 1) - 1;
  510. i < PSC_NUM_PHASES; i++, phase--) {
  511. dcss_scaler_write(ch, ((coef[phase][5] & 0xfff) << 16 |
  512. (coef[phase][4] & 0xfff) << 4 |
  513. (coef[phase][3] & 0xf00) >> 8),
  514. base_addr + i * sizeof(u32));
  515. dcss_scaler_write(ch, ((coef[phase][3] & 0x0ff) << 20 |
  516. (coef[phase][2] & 0xfff) << 8 |
  517. (coef[phase][1] & 0xff0) >> 4),
  518. base_addr + 0x40 + i * sizeof(u32));
  519. dcss_scaler_write(ch, ((coef[phase][1] & 0x00f) << 24),
  520. base_addr + 0x80 + i * sizeof(u32));
  521. }
  522. }
  523. static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch,
  524. int base_addr,
  525. int coef[][PSC_NUM_TAPS])
  526. {
  527. int i, phase;
  528. for (i = 0; i < PSC_STORED_PHASES; i++) {
  529. dcss_scaler_write(ch, ((coef[i][0] & 0xfff) << 16 |
  530. (coef[i][1] & 0xfff) << 4 |
  531. (coef[i][2] & 0xf00) >> 8),
  532. base_addr + i * sizeof(u32));
  533. dcss_scaler_write(ch, ((coef[i][2] & 0x0ff) << 20 |
  534. (coef[i][3] & 0xfff) << 8 |
  535. (coef[i][4] & 0xff0) >> 4),
  536. base_addr + 0x40 + i * sizeof(u32));
  537. dcss_scaler_write(ch, ((coef[i][4] & 0x00f) << 24 |
  538. (coef[i][5] & 0xfff) << 12 |
  539. (coef[i][6] & 0xfff)),
  540. base_addr + 0x80 + i * sizeof(u32));
  541. }
  542. /* reverse both phase and tap orderings */
  543. for (phase = (PSC_NUM_PHASES >> 1) - 1;
  544. i < PSC_NUM_PHASES; i++, phase--) {
  545. dcss_scaler_write(ch, ((coef[phase][6] & 0xfff) << 16 |
  546. (coef[phase][5] & 0xfff) << 4 |
  547. (coef[phase][4] & 0xf00) >> 8),
  548. base_addr + i * sizeof(u32));
  549. dcss_scaler_write(ch, ((coef[phase][4] & 0x0ff) << 20 |
  550. (coef[phase][3] & 0xfff) << 8 |
  551. (coef[phase][2] & 0xff0) >> 4),
  552. base_addr + 0x40 + i * sizeof(u32));
  553. dcss_scaler_write(ch, ((coef[phase][2] & 0x00f) << 24 |
  554. (coef[phase][1] & 0xfff) << 12 |
  555. (coef[phase][0] & 0xfff)),
  556. base_addr + 0x80 + i * sizeof(u32));
  557. }
  558. }
  559. static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
  560. enum buffer_format src_format,
  561. enum buffer_format dst_format,
  562. bool use_5_taps,
  563. int src_xres, int src_yres, int dst_xres,
  564. int dst_yres)
  565. {
  566. int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
  567. bool program_5_taps = use_5_taps ||
  568. (dst_format == BUF_FMT_YUV422 &&
  569. src_format == BUF_FMT_ARGB8888_YUV444);
  570. /* horizontal luma */
  571. dcss_scaler_filter_design(src_xres, dst_xres, false,
  572. src_xres == dst_xres, coef,
  573. ch->use_nn_interpolation);
  574. dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
  575. /* vertical luma */
  576. dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
  577. src_yres == dst_yres, coef,
  578. ch->use_nn_interpolation);
  579. if (program_5_taps)
  580. dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
  581. else
  582. dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
  583. /* adjust chroma resolution */
  584. if (src_format != BUF_FMT_ARGB8888_YUV444)
  585. src_xres >>= 1;
  586. if (src_format == BUF_FMT_YUV420)
  587. src_yres >>= 1;
  588. if (dst_format != BUF_FMT_ARGB8888_YUV444)
  589. dst_xres >>= 1;
  590. if (dst_format == BUF_FMT_YUV420) /* should not happen */
  591. dst_yres >>= 1;
  592. /* horizontal chroma */
  593. dcss_scaler_filter_design(src_xres, dst_xres, false,
  594. (src_xres == dst_xres) && (ch->c_hstart == 0),
  595. coef, ch->use_nn_interpolation);
  596. dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
  597. /* vertical chroma */
  598. dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
  599. (src_yres == dst_yres) && (ch->c_vstart == 0),
  600. coef, ch->use_nn_interpolation);
  601. if (program_5_taps)
  602. dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
  603. else
  604. dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
  605. }
  606. static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
  607. int src_xres, int src_yres, int dst_xres,
  608. int dst_yres)
  609. {
  610. int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
  611. /* horizontal RGB */
  612. dcss_scaler_filter_design(src_xres, dst_xres, false,
  613. src_xres == dst_xres, coef,
  614. ch->use_nn_interpolation);
  615. dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
  616. /* vertical RGB */
  617. dcss_scaler_filter_design(src_yres, dst_yres, false,
  618. src_yres == dst_yres, coef,
  619. ch->use_nn_interpolation);
  620. dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
  621. }
  622. static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
  623. const struct drm_format_info *format)
  624. {
  625. u32 a2r10g10b10_format;
  626. if (format->is_yuv)
  627. return;
  628. ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK;
  629. if (format->depth != 30)
  630. return;
  631. switch (format->format) {
  632. case DRM_FORMAT_ARGB2101010:
  633. case DRM_FORMAT_XRGB2101010:
  634. a2r10g10b10_format = 0;
  635. break;
  636. case DRM_FORMAT_ABGR2101010:
  637. case DRM_FORMAT_XBGR2101010:
  638. a2r10g10b10_format = 5;
  639. break;
  640. case DRM_FORMAT_RGBA1010102:
  641. case DRM_FORMAT_RGBX1010102:
  642. a2r10g10b10_format = 6;
  643. break;
  644. case DRM_FORMAT_BGRA1010102:
  645. case DRM_FORMAT_BGRX1010102:
  646. a2r10g10b10_format = 11;
  647. break;
  648. default:
  649. a2r10g10b10_format = 0;
  650. break;
  651. }
  652. ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
  653. }
  654. void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num,
  655. enum drm_scaling_filter scaling_filter)
  656. {
  657. struct dcss_scaler_ch *ch = &scl->ch[ch_num];
  658. ch->use_nn_interpolation = scaling_filter == DRM_SCALING_FILTER_NEAREST_NEIGHBOR;
  659. }
  660. void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
  661. const struct drm_format_info *format,
  662. int src_xres, int src_yres, int dst_xres, int dst_yres,
  663. u32 vrefresh_hz)
  664. {
  665. struct dcss_scaler_ch *ch = &scl->ch[ch_num];
  666. unsigned int pixel_depth = 0;
  667. bool rtr_8line_en = false;
  668. bool use_5_taps = false;
  669. enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444;
  670. enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444;
  671. u32 pix_format = format->format;
  672. if (format->is_yuv) {
  673. dcss_scaler_yuv_enable(ch, true);
  674. if (pix_format == DRM_FORMAT_NV12 ||
  675. pix_format == DRM_FORMAT_NV21) {
  676. rtr_8line_en = true;
  677. src_format = BUF_FMT_YUV420;
  678. } else if (pix_format == DRM_FORMAT_UYVY ||
  679. pix_format == DRM_FORMAT_VYUY ||
  680. pix_format == DRM_FORMAT_YUYV ||
  681. pix_format == DRM_FORMAT_YVYU) {
  682. src_format = BUF_FMT_YUV422;
  683. }
  684. use_5_taps = !rtr_8line_en;
  685. } else {
  686. dcss_scaler_yuv_enable(ch, false);
  687. pixel_depth = format->depth;
  688. }
  689. dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres,
  690. dst_yres, src_format, dst_format,
  691. PSC_LOC_HORZ_0_VERT_1_OVER_4);
  692. if (format->is_yuv)
  693. dcss_scaler_yuv_coef_set(ch, src_format, dst_format,
  694. use_5_taps, src_xres, src_yres,
  695. dst_xres, dst_yres);
  696. else
  697. dcss_scaler_rgb_coef_set(ch, src_xres, src_yres,
  698. dst_xres, dst_yres);
  699. dcss_scaler_rtr_8lines_enable(ch, rtr_8line_en);
  700. dcss_scaler_bit_depth_set(ch, pixel_depth);
  701. dcss_scaler_set_rgb10_order(ch, format);
  702. dcss_scaler_format_set(ch, src_format, dst_format);
  703. dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres,
  704. pix_format, dst_format);
  705. }
  706. /* This function will be called from interrupt context. */
  707. void dcss_scaler_write_sclctrl(struct dcss_scaler *scl)
  708. {
  709. int chnum;
  710. dcss_ctxld_assert_locked(scl->ctxld);
  711. for (chnum = 0; chnum < 3; chnum++) {
  712. struct dcss_scaler_ch *ch = &scl->ch[chnum];
  713. if (ch->scaler_ctrl_chgd) {
  714. dcss_ctxld_write_irqsafe(scl->ctxld, scl->ctx_id,
  715. ch->scaler_ctrl,
  716. ch->base_ofs +
  717. DCSS_SCALER_CTRL);
  718. ch->scaler_ctrl_chgd = false;
  719. }
  720. }
  721. }