vc4_v3d.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2014 The Linux Foundation. All rights reserved.
  4. * Copyright (C) 2013 Red Hat
  5. * Author: Rob Clark <[email protected]>
  6. */
  7. #include <linux/clk.h>
  8. #include <linux/component.h>
  9. #include <linux/platform_device.h>
  10. #include <linux/pm_runtime.h>
  11. #include "vc4_drv.h"
  12. #include "vc4_regs.h"
  13. static const struct debugfs_reg32 v3d_regs[] = {
  14. VC4_REG32(V3D_IDENT0),
  15. VC4_REG32(V3D_IDENT1),
  16. VC4_REG32(V3D_IDENT2),
  17. VC4_REG32(V3D_SCRATCH),
  18. VC4_REG32(V3D_L2CACTL),
  19. VC4_REG32(V3D_SLCACTL),
  20. VC4_REG32(V3D_INTCTL),
  21. VC4_REG32(V3D_INTENA),
  22. VC4_REG32(V3D_INTDIS),
  23. VC4_REG32(V3D_CT0CS),
  24. VC4_REG32(V3D_CT1CS),
  25. VC4_REG32(V3D_CT0EA),
  26. VC4_REG32(V3D_CT1EA),
  27. VC4_REG32(V3D_CT0CA),
  28. VC4_REG32(V3D_CT1CA),
  29. VC4_REG32(V3D_CT00RA0),
  30. VC4_REG32(V3D_CT01RA0),
  31. VC4_REG32(V3D_CT0LC),
  32. VC4_REG32(V3D_CT1LC),
  33. VC4_REG32(V3D_CT0PC),
  34. VC4_REG32(V3D_CT1PC),
  35. VC4_REG32(V3D_PCS),
  36. VC4_REG32(V3D_BFC),
  37. VC4_REG32(V3D_RFC),
  38. VC4_REG32(V3D_BPCA),
  39. VC4_REG32(V3D_BPCS),
  40. VC4_REG32(V3D_BPOA),
  41. VC4_REG32(V3D_BPOS),
  42. VC4_REG32(V3D_BXCF),
  43. VC4_REG32(V3D_SQRSV0),
  44. VC4_REG32(V3D_SQRSV1),
  45. VC4_REG32(V3D_SQCNTL),
  46. VC4_REG32(V3D_SRQPC),
  47. VC4_REG32(V3D_SRQUA),
  48. VC4_REG32(V3D_SRQUL),
  49. VC4_REG32(V3D_SRQCS),
  50. VC4_REG32(V3D_VPACNTL),
  51. VC4_REG32(V3D_VPMBASE),
  52. VC4_REG32(V3D_PCTRC),
  53. VC4_REG32(V3D_PCTRE),
  54. VC4_REG32(V3D_PCTR(0)),
  55. VC4_REG32(V3D_PCTRS(0)),
  56. VC4_REG32(V3D_PCTR(1)),
  57. VC4_REG32(V3D_PCTRS(1)),
  58. VC4_REG32(V3D_PCTR(2)),
  59. VC4_REG32(V3D_PCTRS(2)),
  60. VC4_REG32(V3D_PCTR(3)),
  61. VC4_REG32(V3D_PCTRS(3)),
  62. VC4_REG32(V3D_PCTR(4)),
  63. VC4_REG32(V3D_PCTRS(4)),
  64. VC4_REG32(V3D_PCTR(5)),
  65. VC4_REG32(V3D_PCTRS(5)),
  66. VC4_REG32(V3D_PCTR(6)),
  67. VC4_REG32(V3D_PCTRS(6)),
  68. VC4_REG32(V3D_PCTR(7)),
  69. VC4_REG32(V3D_PCTRS(7)),
  70. VC4_REG32(V3D_PCTR(8)),
  71. VC4_REG32(V3D_PCTRS(8)),
  72. VC4_REG32(V3D_PCTR(9)),
  73. VC4_REG32(V3D_PCTRS(9)),
  74. VC4_REG32(V3D_PCTR(10)),
  75. VC4_REG32(V3D_PCTRS(10)),
  76. VC4_REG32(V3D_PCTR(11)),
  77. VC4_REG32(V3D_PCTRS(11)),
  78. VC4_REG32(V3D_PCTR(12)),
  79. VC4_REG32(V3D_PCTRS(12)),
  80. VC4_REG32(V3D_PCTR(13)),
  81. VC4_REG32(V3D_PCTRS(13)),
  82. VC4_REG32(V3D_PCTR(14)),
  83. VC4_REG32(V3D_PCTRS(14)),
  84. VC4_REG32(V3D_PCTR(15)),
  85. VC4_REG32(V3D_PCTRS(15)),
  86. VC4_REG32(V3D_DBGE),
  87. VC4_REG32(V3D_FDBGO),
  88. VC4_REG32(V3D_FDBGB),
  89. VC4_REG32(V3D_FDBGR),
  90. VC4_REG32(V3D_FDBGS),
  91. VC4_REG32(V3D_ERRSTAT),
  92. };
  93. static int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
  94. {
  95. struct drm_info_node *node = (struct drm_info_node *)m->private;
  96. struct drm_device *dev = node->minor->dev;
  97. struct vc4_dev *vc4 = to_vc4_dev(dev);
  98. int ret = vc4_v3d_pm_get(vc4);
  99. if (ret == 0) {
  100. uint32_t ident1 = V3D_READ(V3D_IDENT1);
  101. uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
  102. uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
  103. uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
  104. seq_printf(m, "Revision: %d\n",
  105. VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
  106. seq_printf(m, "Slices: %d\n", nslc);
  107. seq_printf(m, "TMUs: %d\n", nslc * tups);
  108. seq_printf(m, "QPUs: %d\n", nslc * qups);
  109. seq_printf(m, "Semaphores: %d\n",
  110. VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
  111. vc4_v3d_pm_put(vc4);
  112. }
  113. return 0;
  114. }
  115. /*
  116. * Wraps pm_runtime_get_sync() in a refcount, so that we can reliably
  117. * get the pm_runtime refcount to 0 in vc4_reset().
  118. */
  119. int
  120. vc4_v3d_pm_get(struct vc4_dev *vc4)
  121. {
  122. if (WARN_ON_ONCE(vc4->is_vc5))
  123. return -ENODEV;
  124. mutex_lock(&vc4->power_lock);
  125. if (vc4->power_refcount++ == 0) {
  126. int ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
  127. if (ret < 0) {
  128. vc4->power_refcount--;
  129. mutex_unlock(&vc4->power_lock);
  130. return ret;
  131. }
  132. }
  133. mutex_unlock(&vc4->power_lock);
  134. return 0;
  135. }
  136. void
  137. vc4_v3d_pm_put(struct vc4_dev *vc4)
  138. {
  139. if (WARN_ON_ONCE(vc4->is_vc5))
  140. return;
  141. mutex_lock(&vc4->power_lock);
  142. if (--vc4->power_refcount == 0) {
  143. pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
  144. pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
  145. }
  146. mutex_unlock(&vc4->power_lock);
  147. }
  148. static void vc4_v3d_init_hw(struct drm_device *dev)
  149. {
  150. struct vc4_dev *vc4 = to_vc4_dev(dev);
  151. /* Take all the memory that would have been reserved for user
  152. * QPU programs, since we don't have an interface for running
  153. * them, anyway.
  154. */
  155. V3D_WRITE(V3D_VPMBASE, 0);
  156. }
  157. int vc4_v3d_get_bin_slot(struct vc4_dev *vc4)
  158. {
  159. struct drm_device *dev = &vc4->base;
  160. unsigned long irqflags;
  161. int slot;
  162. uint64_t seqno = 0;
  163. struct vc4_exec_info *exec;
  164. if (WARN_ON_ONCE(vc4->is_vc5))
  165. return -ENODEV;
  166. try_again:
  167. spin_lock_irqsave(&vc4->job_lock, irqflags);
  168. slot = ffs(~vc4->bin_alloc_used);
  169. if (slot != 0) {
  170. /* Switch from ffs() bit index to a 0-based index. */
  171. slot--;
  172. vc4->bin_alloc_used |= BIT(slot);
  173. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  174. return slot;
  175. }
  176. /* Couldn't find an open slot. Wait for render to complete
  177. * and try again.
  178. */
  179. exec = vc4_last_render_job(vc4);
  180. if (exec)
  181. seqno = exec->seqno;
  182. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  183. if (seqno) {
  184. int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true);
  185. if (ret == 0)
  186. goto try_again;
  187. return ret;
  188. }
  189. return -ENOMEM;
  190. }
  191. /*
  192. * bin_bo_alloc() - allocates the memory that will be used for
  193. * tile binning.
  194. *
  195. * The binner has a limitation that the addresses in the tile state
  196. * buffer that point into the tile alloc buffer or binner overflow
  197. * memory only have 28 bits (256MB), and the top 4 on the bus for
  198. * tile alloc references end up coming from the tile state buffer's
  199. * address.
  200. *
  201. * To work around this, we allocate a single large buffer while V3D is
  202. * in use, make sure that it has the top 4 bits constant across its
  203. * entire extent, and then put the tile state, tile alloc, and binner
  204. * overflow memory inside that buffer.
  205. *
  206. * This creates a limitation where we may not be able to execute a job
  207. * if it doesn't fit within the buffer that we allocated up front.
  208. * However, it turns out that 16MB is "enough for anybody", and
  209. * real-world applications run into allocation failures from the
  210. * overall DMA pool before they make scenes complicated enough to run
  211. * out of bin space.
  212. */
  213. static int bin_bo_alloc(struct vc4_dev *vc4)
  214. {
  215. struct vc4_v3d *v3d = vc4->v3d;
  216. uint32_t size = 16 * 1024 * 1024;
  217. int ret = 0;
  218. struct list_head list;
  219. if (!v3d)
  220. return -ENODEV;
  221. /* We may need to try allocating more than once to get a BO
  222. * that doesn't cross 256MB. Track the ones we've allocated
  223. * that failed so far, so that we can free them when we've got
  224. * one that succeeded (if we freed them right away, our next
  225. * allocation would probably be the same chunk of memory).
  226. */
  227. INIT_LIST_HEAD(&list);
  228. while (true) {
  229. struct vc4_bo *bo = vc4_bo_create(&vc4->base, size, true,
  230. VC4_BO_TYPE_BIN);
  231. if (IS_ERR(bo)) {
  232. ret = PTR_ERR(bo);
  233. dev_err(&v3d->pdev->dev,
  234. "Failed to allocate memory for tile binning: "
  235. "%d. You may need to enable DMA or give it "
  236. "more memory.",
  237. ret);
  238. break;
  239. }
  240. /* Check if this BO won't trigger the addressing bug. */
  241. if ((bo->base.dma_addr & 0xf0000000) ==
  242. ((bo->base.dma_addr + bo->base.base.size - 1) & 0xf0000000)) {
  243. vc4->bin_bo = bo;
  244. /* Set up for allocating 512KB chunks of
  245. * binner memory. The biggest allocation we
  246. * need to do is for the initial tile alloc +
  247. * tile state buffer. We can render to a
  248. * maximum of ((2048*2048) / (32*32) = 4096
  249. * tiles in a frame (until we do floating
  250. * point rendering, at which point it would be
  251. * 8192). Tile state is 48b/tile (rounded to
  252. * a page), and tile alloc is 32b/tile
  253. * (rounded to a page), plus a page of extra,
  254. * for a total of 320kb for our worst-case.
  255. * We choose 512kb so that it divides evenly
  256. * into our 16MB, and the rest of the 512kb
  257. * will be used as storage for the overflow
  258. * from the initial 32b CL per bin.
  259. */
  260. vc4->bin_alloc_size = 512 * 1024;
  261. vc4->bin_alloc_used = 0;
  262. vc4->bin_alloc_overflow = 0;
  263. WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
  264. bo->base.base.size / vc4->bin_alloc_size);
  265. kref_init(&vc4->bin_bo_kref);
  266. /* Enable the out-of-memory interrupt to set our
  267. * newly-allocated binner BO, potentially from an
  268. * already-pending-but-masked interrupt.
  269. */
  270. V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
  271. break;
  272. }
  273. /* Put it on the list to free later, and try again. */
  274. list_add(&bo->unref_head, &list);
  275. }
  276. /* Free all the BOs we allocated but didn't choose. */
  277. while (!list_empty(&list)) {
  278. struct vc4_bo *bo = list_last_entry(&list,
  279. struct vc4_bo, unref_head);
  280. list_del(&bo->unref_head);
  281. drm_gem_object_put(&bo->base.base);
  282. }
  283. return ret;
  284. }
  285. int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used)
  286. {
  287. int ret = 0;
  288. if (WARN_ON_ONCE(vc4->is_vc5))
  289. return -ENODEV;
  290. mutex_lock(&vc4->bin_bo_lock);
  291. if (used && *used)
  292. goto complete;
  293. if (vc4->bin_bo)
  294. kref_get(&vc4->bin_bo_kref);
  295. else
  296. ret = bin_bo_alloc(vc4);
  297. if (ret == 0 && used)
  298. *used = true;
  299. complete:
  300. mutex_unlock(&vc4->bin_bo_lock);
  301. return ret;
  302. }
  303. static void bin_bo_release(struct kref *ref)
  304. {
  305. struct vc4_dev *vc4 = container_of(ref, struct vc4_dev, bin_bo_kref);
  306. if (WARN_ON_ONCE(!vc4->bin_bo))
  307. return;
  308. drm_gem_object_put(&vc4->bin_bo->base.base);
  309. vc4->bin_bo = NULL;
  310. }
  311. void vc4_v3d_bin_bo_put(struct vc4_dev *vc4)
  312. {
  313. if (WARN_ON_ONCE(vc4->is_vc5))
  314. return;
  315. mutex_lock(&vc4->bin_bo_lock);
  316. kref_put(&vc4->bin_bo_kref, bin_bo_release);
  317. mutex_unlock(&vc4->bin_bo_lock);
  318. }
  319. #ifdef CONFIG_PM
  320. static int vc4_v3d_runtime_suspend(struct device *dev)
  321. {
  322. struct vc4_v3d *v3d = dev_get_drvdata(dev);
  323. struct vc4_dev *vc4 = v3d->vc4;
  324. vc4_irq_disable(&vc4->base);
  325. clk_disable_unprepare(v3d->clk);
  326. return 0;
  327. }
  328. static int vc4_v3d_runtime_resume(struct device *dev)
  329. {
  330. struct vc4_v3d *v3d = dev_get_drvdata(dev);
  331. struct vc4_dev *vc4 = v3d->vc4;
  332. int ret;
  333. ret = clk_prepare_enable(v3d->clk);
  334. if (ret != 0)
  335. return ret;
  336. vc4_v3d_init_hw(&vc4->base);
  337. vc4_irq_enable(&vc4->base);
  338. return 0;
  339. }
  340. #endif
  341. int vc4_v3d_debugfs_init(struct drm_minor *minor)
  342. {
  343. struct drm_device *drm = minor->dev;
  344. struct vc4_dev *vc4 = to_vc4_dev(drm);
  345. struct vc4_v3d *v3d = vc4->v3d;
  346. int ret;
  347. if (!vc4->v3d)
  348. return -ENODEV;
  349. ret = vc4_debugfs_add_file(minor, "v3d_ident",
  350. vc4_v3d_debugfs_ident, NULL);
  351. if (ret)
  352. return ret;
  353. ret = vc4_debugfs_add_regset32(minor, "v3d_regs", &v3d->regset);
  354. if (ret)
  355. return ret;
  356. return 0;
  357. }
  358. static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
  359. {
  360. struct platform_device *pdev = to_platform_device(dev);
  361. struct drm_device *drm = dev_get_drvdata(master);
  362. struct vc4_dev *vc4 = to_vc4_dev(drm);
  363. struct vc4_v3d *v3d = NULL;
  364. int ret;
  365. v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
  366. if (!v3d)
  367. return -ENOMEM;
  368. dev_set_drvdata(dev, v3d);
  369. v3d->pdev = pdev;
  370. v3d->regs = vc4_ioremap_regs(pdev, 0);
  371. if (IS_ERR(v3d->regs))
  372. return PTR_ERR(v3d->regs);
  373. v3d->regset.base = v3d->regs;
  374. v3d->regset.regs = v3d_regs;
  375. v3d->regset.nregs = ARRAY_SIZE(v3d_regs);
  376. vc4->v3d = v3d;
  377. v3d->vc4 = vc4;
  378. v3d->clk = devm_clk_get(dev, NULL);
  379. if (IS_ERR(v3d->clk)) {
  380. int ret = PTR_ERR(v3d->clk);
  381. if (ret == -ENOENT) {
  382. /* bcm2835 didn't have a clock reference in the DT. */
  383. ret = 0;
  384. v3d->clk = NULL;
  385. } else {
  386. if (ret != -EPROBE_DEFER)
  387. dev_err(dev, "Failed to get V3D clock: %d\n",
  388. ret);
  389. return ret;
  390. }
  391. }
  392. ret = platform_get_irq(pdev, 0);
  393. if (ret < 0)
  394. return ret;
  395. vc4->irq = ret;
  396. ret = devm_pm_runtime_enable(dev);
  397. if (ret)
  398. return ret;
  399. ret = pm_runtime_resume_and_get(dev);
  400. if (ret)
  401. return ret;
  402. if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
  403. DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
  404. V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
  405. ret = -EINVAL;
  406. goto err_put_runtime_pm;
  407. }
  408. /* Reset the binner overflow address/size at setup, to be sure
  409. * we don't reuse an old one.
  410. */
  411. V3D_WRITE(V3D_BPOA, 0);
  412. V3D_WRITE(V3D_BPOS, 0);
  413. ret = vc4_irq_install(drm, vc4->irq);
  414. if (ret) {
  415. DRM_ERROR("Failed to install IRQ handler\n");
  416. goto err_put_runtime_pm;
  417. }
  418. pm_runtime_use_autosuspend(dev);
  419. pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */
  420. return 0;
  421. err_put_runtime_pm:
  422. pm_runtime_put(dev);
  423. return ret;
  424. }
  425. static void vc4_v3d_unbind(struct device *dev, struct device *master,
  426. void *data)
  427. {
  428. struct drm_device *drm = dev_get_drvdata(master);
  429. struct vc4_dev *vc4 = to_vc4_dev(drm);
  430. vc4_irq_uninstall(drm);
  431. /* Disable the binner's overflow memory address, so the next
  432. * driver probe (if any) doesn't try to reuse our old
  433. * allocation.
  434. */
  435. V3D_WRITE(V3D_BPOA, 0);
  436. V3D_WRITE(V3D_BPOS, 0);
  437. vc4->v3d = NULL;
  438. }
  439. static const struct dev_pm_ops vc4_v3d_pm_ops = {
  440. SET_RUNTIME_PM_OPS(vc4_v3d_runtime_suspend, vc4_v3d_runtime_resume, NULL)
  441. };
  442. static const struct component_ops vc4_v3d_ops = {
  443. .bind = vc4_v3d_bind,
  444. .unbind = vc4_v3d_unbind,
  445. };
  446. static int vc4_v3d_dev_probe(struct platform_device *pdev)
  447. {
  448. return component_add(&pdev->dev, &vc4_v3d_ops);
  449. }
  450. static int vc4_v3d_dev_remove(struct platform_device *pdev)
  451. {
  452. component_del(&pdev->dev, &vc4_v3d_ops);
  453. return 0;
  454. }
  455. const struct of_device_id vc4_v3d_dt_match[] = {
  456. { .compatible = "brcm,bcm2835-v3d" },
  457. { .compatible = "brcm,cygnus-v3d" },
  458. { .compatible = "brcm,vc4-v3d" },
  459. {}
  460. };
  461. struct platform_driver vc4_v3d_driver = {
  462. .probe = vc4_v3d_dev_probe,
  463. .remove = vc4_v3d_dev_remove,
  464. .driver = {
  465. .name = "vc4_v3d",
  466. .of_match_table = vc4_v3d_dt_match,
  467. .pm = &vc4_v3d_pm_ops,
  468. },
  469. };