v3d_gem.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /* Copyright (C) 2014-2018 Broadcom */
  3. #include <linux/device.h>
  4. #include <linux/dma-mapping.h>
  5. #include <linux/io.h>
  6. #include <linux/module.h>
  7. #include <linux/platform_device.h>
  8. #include <linux/reset.h>
  9. #include <linux/sched/signal.h>
  10. #include <linux/uaccess.h>
  11. #include <drm/drm_syncobj.h>
  12. #include <uapi/drm/v3d_drm.h>
  13. #include "v3d_drv.h"
  14. #include "v3d_regs.h"
  15. #include "v3d_trace.h"
  16. static void
  17. v3d_init_core(struct v3d_dev *v3d, int core)
  18. {
  19. /* Set OVRTMUOUT, which means that the texture sampler uniform
  20. * configuration's tmu output type field is used, instead of
  21. * using the hardware default behavior based on the texture
  22. * type. If you want the default behavior, you can still put
  23. * "2" in the indirect texture state's output_type field.
  24. */
  25. if (v3d->ver < 40)
  26. V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
  27. /* Whenever we flush the L2T cache, we always want to flush
  28. * the whole thing.
  29. */
  30. V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
  31. V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
  32. }
  33. /* Sets invariant state for the HW. */
  34. static void
  35. v3d_init_hw_state(struct v3d_dev *v3d)
  36. {
  37. v3d_init_core(v3d, 0);
  38. }
  39. static void
  40. v3d_idle_axi(struct v3d_dev *v3d, int core)
  41. {
  42. V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
  43. if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
  44. (V3D_GMP_STATUS_RD_COUNT_MASK |
  45. V3D_GMP_STATUS_WR_COUNT_MASK |
  46. V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
  47. DRM_ERROR("Failed to wait for safe GMP shutdown\n");
  48. }
  49. }
  50. static void
  51. v3d_idle_gca(struct v3d_dev *v3d)
  52. {
  53. if (v3d->ver >= 41)
  54. return;
  55. V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
  56. if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
  57. V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
  58. V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
  59. DRM_ERROR("Failed to wait for safe GCA shutdown\n");
  60. }
  61. }
  62. static void
  63. v3d_reset_by_bridge(struct v3d_dev *v3d)
  64. {
  65. int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
  66. if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
  67. V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
  68. V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
  69. V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
  70. /* GFXH-1383: The SW_INIT may cause a stray write to address 0
  71. * of the unit, so reset it to its power-on value here.
  72. */
  73. V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
  74. } else {
  75. WARN_ON_ONCE(V3D_GET_FIELD(version,
  76. V3D_TOP_GR_BRIDGE_MAJOR) != 7);
  77. V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
  78. V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
  79. V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
  80. }
  81. }
  82. static void
  83. v3d_reset_v3d(struct v3d_dev *v3d)
  84. {
  85. if (v3d->reset)
  86. reset_control_reset(v3d->reset);
  87. else
  88. v3d_reset_by_bridge(v3d);
  89. v3d_init_hw_state(v3d);
  90. }
  91. void
  92. v3d_reset(struct v3d_dev *v3d)
  93. {
  94. struct drm_device *dev = &v3d->drm;
  95. DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
  96. DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
  97. V3D_CORE_READ(0, V3D_ERR_STAT));
  98. trace_v3d_reset_begin(dev);
  99. /* XXX: only needed for safe powerdown, not reset. */
  100. if (false)
  101. v3d_idle_axi(v3d, 0);
  102. v3d_idle_gca(v3d);
  103. v3d_reset_v3d(v3d);
  104. v3d_mmu_set_page_table(v3d);
  105. v3d_irq_reset(v3d);
  106. v3d_perfmon_stop(v3d, v3d->active_perfmon, false);
  107. trace_v3d_reset_end(dev);
  108. }
  109. static void
  110. v3d_flush_l3(struct v3d_dev *v3d)
  111. {
  112. if (v3d->ver < 41) {
  113. u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
  114. V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
  115. gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
  116. if (v3d->ver < 33) {
  117. V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
  118. gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
  119. }
  120. }
  121. }
  122. /* Invalidates the (read-only) L2C cache. This was the L2 cache for
  123. * uniforms and instructions on V3D 3.2.
  124. */
  125. static void
  126. v3d_invalidate_l2c(struct v3d_dev *v3d, int core)
  127. {
  128. if (v3d->ver > 32)
  129. return;
  130. V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
  131. V3D_L2CACTL_L2CCLR |
  132. V3D_L2CACTL_L2CENA);
  133. }
  134. /* Invalidates texture L2 cachelines */
  135. static void
  136. v3d_flush_l2t(struct v3d_dev *v3d, int core)
  137. {
  138. /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
  139. * need to wait for completion before dispatching the job --
  140. * L2T accesses will be stalled until the flush has completed.
  141. * However, we do need to make sure we don't try to trigger a
  142. * new flush while the L2_CLEAN queue is trying to
  143. * synchronously clean after a job.
  144. */
  145. mutex_lock(&v3d->cache_clean_lock);
  146. V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
  147. V3D_L2TCACTL_L2TFLS |
  148. V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
  149. mutex_unlock(&v3d->cache_clean_lock);
  150. }
  151. /* Cleans texture L1 and L2 cachelines (writing back dirty data).
  152. *
  153. * For cleaning, which happens from the CACHE_CLEAN queue after CSD has
  154. * executed, we need to make sure that the clean is done before
  155. * signaling job completion. So, we synchronously wait before
  156. * returning, and we make sure that L2 invalidates don't happen in the
  157. * meantime to confuse our are-we-done checks.
  158. */
  159. void
  160. v3d_clean_caches(struct v3d_dev *v3d)
  161. {
  162. struct drm_device *dev = &v3d->drm;
  163. int core = 0;
  164. trace_v3d_cache_clean_begin(dev);
  165. V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
  166. if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
  167. V3D_L2TCACTL_TMUWCF), 100)) {
  168. DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
  169. }
  170. mutex_lock(&v3d->cache_clean_lock);
  171. V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
  172. V3D_L2TCACTL_L2TFLS |
  173. V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
  174. if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
  175. V3D_L2TCACTL_L2TFLS), 100)) {
  176. DRM_ERROR("Timeout waiting for L2T clean\n");
  177. }
  178. mutex_unlock(&v3d->cache_clean_lock);
  179. trace_v3d_cache_clean_end(dev);
  180. }
  181. /* Invalidates the slice caches. These are read-only caches. */
  182. static void
  183. v3d_invalidate_slices(struct v3d_dev *v3d, int core)
  184. {
  185. V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
  186. V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
  187. V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
  188. V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
  189. V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
  190. }
  191. void
  192. v3d_invalidate_caches(struct v3d_dev *v3d)
  193. {
  194. /* Invalidate the caches from the outside in. That way if
  195. * another CL's concurrent use of nearby memory were to pull
  196. * an invalidated cacheline back in, we wouldn't leave stale
  197. * data in the inner cache.
  198. */
  199. v3d_flush_l3(v3d);
  200. v3d_invalidate_l2c(v3d, 0);
  201. v3d_flush_l2t(v3d, 0);
  202. v3d_invalidate_slices(v3d, 0);
  203. }
  204. /* Takes the reservation lock on all the BOs being referenced, so that
  205. * at queue submit time we can update the reservations.
  206. *
  207. * We don't lock the RCL the tile alloc/state BOs, or overflow memory
  208. * (all of which are on exec->unref_list). They're entirely private
  209. * to v3d, so we don't attach dma-buf fences to them.
  210. */
  211. static int
  212. v3d_lock_bo_reservations(struct v3d_job *job,
  213. struct ww_acquire_ctx *acquire_ctx)
  214. {
  215. int i, ret;
  216. ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
  217. if (ret)
  218. return ret;
  219. for (i = 0; i < job->bo_count; i++) {
  220. ret = dma_resv_reserve_fences(job->bo[i]->resv, 1);
  221. if (ret)
  222. goto fail;
  223. ret = drm_sched_job_add_implicit_dependencies(&job->base,
  224. job->bo[i], true);
  225. if (ret)
  226. goto fail;
  227. }
  228. return 0;
  229. fail:
  230. drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
  231. return ret;
  232. }
  233. /**
  234. * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
  235. * referenced by the job.
  236. * @dev: DRM device
  237. * @file_priv: DRM file for this fd
  238. * @job: V3D job being set up
  239. * @bo_handles: GEM handles
  240. * @bo_count: Number of GEM handles passed in
  241. *
  242. * The command validator needs to reference BOs by their index within
  243. * the submitted job's BO list. This does the validation of the job's
  244. * BO list and reference counting for the lifetime of the job.
  245. *
  246. * Note that this function doesn't need to unreference the BOs on
  247. * failure, because that will happen at v3d_exec_cleanup() time.
  248. */
  249. static int
  250. v3d_lookup_bos(struct drm_device *dev,
  251. struct drm_file *file_priv,
  252. struct v3d_job *job,
  253. u64 bo_handles,
  254. u32 bo_count)
  255. {
  256. u32 *handles;
  257. int ret = 0;
  258. int i;
  259. job->bo_count = bo_count;
  260. if (!job->bo_count) {
  261. /* See comment on bo_index for why we have to check
  262. * this.
  263. */
  264. DRM_DEBUG("Rendering requires BOs\n");
  265. return -EINVAL;
  266. }
  267. job->bo = kvmalloc_array(job->bo_count,
  268. sizeof(struct drm_gem_dma_object *),
  269. GFP_KERNEL | __GFP_ZERO);
  270. if (!job->bo) {
  271. DRM_DEBUG("Failed to allocate validated BO pointers\n");
  272. return -ENOMEM;
  273. }
  274. handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
  275. if (!handles) {
  276. ret = -ENOMEM;
  277. DRM_DEBUG("Failed to allocate incoming GEM handles\n");
  278. goto fail;
  279. }
  280. if (copy_from_user(handles,
  281. (void __user *)(uintptr_t)bo_handles,
  282. job->bo_count * sizeof(u32))) {
  283. ret = -EFAULT;
  284. DRM_DEBUG("Failed to copy in GEM handles\n");
  285. goto fail;
  286. }
  287. spin_lock(&file_priv->table_lock);
  288. for (i = 0; i < job->bo_count; i++) {
  289. struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
  290. handles[i]);
  291. if (!bo) {
  292. DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
  293. i, handles[i]);
  294. ret = -ENOENT;
  295. spin_unlock(&file_priv->table_lock);
  296. goto fail;
  297. }
  298. drm_gem_object_get(bo);
  299. job->bo[i] = bo;
  300. }
  301. spin_unlock(&file_priv->table_lock);
  302. fail:
  303. kvfree(handles);
  304. return ret;
  305. }
  306. static void
  307. v3d_job_free(struct kref *ref)
  308. {
  309. struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
  310. int i;
  311. for (i = 0; i < job->bo_count; i++) {
  312. if (job->bo[i])
  313. drm_gem_object_put(job->bo[i]);
  314. }
  315. kvfree(job->bo);
  316. dma_fence_put(job->irq_fence);
  317. dma_fence_put(job->done_fence);
  318. if (job->perfmon)
  319. v3d_perfmon_put(job->perfmon);
  320. kfree(job);
  321. }
  322. static void
  323. v3d_render_job_free(struct kref *ref)
  324. {
  325. struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
  326. base.refcount);
  327. struct v3d_bo *bo, *save;
  328. list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
  329. drm_gem_object_put(&bo->base.base);
  330. }
  331. v3d_job_free(ref);
  332. }
  333. void v3d_job_cleanup(struct v3d_job *job)
  334. {
  335. if (!job)
  336. return;
  337. drm_sched_job_cleanup(&job->base);
  338. v3d_job_put(job);
  339. }
  340. void v3d_job_put(struct v3d_job *job)
  341. {
  342. kref_put(&job->refcount, job->free);
  343. }
  344. int
  345. v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
  346. struct drm_file *file_priv)
  347. {
  348. int ret;
  349. struct drm_v3d_wait_bo *args = data;
  350. ktime_t start = ktime_get();
  351. u64 delta_ns;
  352. unsigned long timeout_jiffies =
  353. nsecs_to_jiffies_timeout(args->timeout_ns);
  354. if (args->pad != 0)
  355. return -EINVAL;
  356. ret = drm_gem_dma_resv_wait(file_priv, args->handle,
  357. true, timeout_jiffies);
  358. /* Decrement the user's timeout, in case we got interrupted
  359. * such that the ioctl will be restarted.
  360. */
  361. delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
  362. if (delta_ns < args->timeout_ns)
  363. args->timeout_ns -= delta_ns;
  364. else
  365. args->timeout_ns = 0;
  366. /* Asked to wait beyond the jiffie/scheduler precision? */
  367. if (ret == -ETIME && args->timeout_ns)
  368. ret = -EAGAIN;
  369. return ret;
  370. }
  371. static int
  372. v3d_job_add_deps(struct drm_file *file_priv, struct v3d_job *job,
  373. u32 in_sync, u32 point)
  374. {
  375. struct dma_fence *in_fence = NULL;
  376. int ret;
  377. ret = drm_syncobj_find_fence(file_priv, in_sync, point, 0, &in_fence);
  378. if (ret == -EINVAL)
  379. return ret;
  380. return drm_sched_job_add_dependency(&job->base, in_fence);
  381. }
  382. static int
  383. v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
  384. void **container, size_t size, void (*free)(struct kref *ref),
  385. u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue)
  386. {
  387. struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
  388. struct v3d_job *job;
  389. bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
  390. int ret, i;
  391. *container = kcalloc(1, size, GFP_KERNEL);
  392. if (!*container) {
  393. DRM_ERROR("Cannot allocate memory for v3d job.");
  394. return -ENOMEM;
  395. }
  396. job = *container;
  397. job->v3d = v3d;
  398. job->free = free;
  399. ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
  400. v3d_priv);
  401. if (ret)
  402. goto fail;
  403. if (has_multisync) {
  404. if (se->in_sync_count && se->wait_stage == queue) {
  405. struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs);
  406. for (i = 0; i < se->in_sync_count; i++) {
  407. struct drm_v3d_sem in;
  408. if (copy_from_user(&in, handle++, sizeof(in))) {
  409. ret = -EFAULT;
  410. DRM_DEBUG("Failed to copy wait dep handle.\n");
  411. goto fail_deps;
  412. }
  413. ret = v3d_job_add_deps(file_priv, job, in.handle, 0);
  414. if (ret)
  415. goto fail_deps;
  416. }
  417. }
  418. } else {
  419. ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
  420. if (ret)
  421. goto fail_deps;
  422. }
  423. kref_init(&job->refcount);
  424. return 0;
  425. fail_deps:
  426. drm_sched_job_cleanup(&job->base);
  427. fail:
  428. kfree(*container);
  429. *container = NULL;
  430. return ret;
  431. }
  432. static void
  433. v3d_push_job(struct v3d_job *job)
  434. {
  435. drm_sched_job_arm(&job->base);
  436. job->done_fence = dma_fence_get(&job->base.s_fence->finished);
  437. /* put by scheduler job completion */
  438. kref_get(&job->refcount);
  439. drm_sched_entity_push_job(&job->base);
  440. }
  441. static void
  442. v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
  443. struct v3d_job *job,
  444. struct ww_acquire_ctx *acquire_ctx,
  445. u32 out_sync,
  446. struct v3d_submit_ext *se,
  447. struct dma_fence *done_fence)
  448. {
  449. struct drm_syncobj *sync_out;
  450. bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC);
  451. int i;
  452. for (i = 0; i < job->bo_count; i++) {
  453. /* XXX: Use shared fences for read-only objects. */
  454. dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
  455. DMA_RESV_USAGE_WRITE);
  456. }
  457. drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
  458. /* Update the return sync object for the job */
  459. /* If it only supports a single signal semaphore*/
  460. if (!has_multisync) {
  461. sync_out = drm_syncobj_find(file_priv, out_sync);
  462. if (sync_out) {
  463. drm_syncobj_replace_fence(sync_out, done_fence);
  464. drm_syncobj_put(sync_out);
  465. }
  466. return;
  467. }
  468. /* If multiple semaphores extension is supported */
  469. if (se->out_sync_count) {
  470. for (i = 0; i < se->out_sync_count; i++) {
  471. drm_syncobj_replace_fence(se->out_syncs[i].syncobj,
  472. done_fence);
  473. drm_syncobj_put(se->out_syncs[i].syncobj);
  474. }
  475. kvfree(se->out_syncs);
  476. }
  477. }
  478. static void
  479. v3d_put_multisync_post_deps(struct v3d_submit_ext *se)
  480. {
  481. unsigned int i;
  482. if (!(se && se->out_sync_count))
  483. return;
  484. for (i = 0; i < se->out_sync_count; i++)
  485. drm_syncobj_put(se->out_syncs[i].syncobj);
  486. kvfree(se->out_syncs);
  487. }
  488. static int
  489. v3d_get_multisync_post_deps(struct drm_file *file_priv,
  490. struct v3d_submit_ext *se,
  491. u32 count, u64 handles)
  492. {
  493. struct drm_v3d_sem __user *post_deps;
  494. int i, ret;
  495. if (!count)
  496. return 0;
  497. se->out_syncs = (struct v3d_submit_outsync *)
  498. kvmalloc_array(count,
  499. sizeof(struct v3d_submit_outsync),
  500. GFP_KERNEL);
  501. if (!se->out_syncs)
  502. return -ENOMEM;
  503. post_deps = u64_to_user_ptr(handles);
  504. for (i = 0; i < count; i++) {
  505. struct drm_v3d_sem out;
  506. if (copy_from_user(&out, post_deps++, sizeof(out))) {
  507. ret = -EFAULT;
  508. DRM_DEBUG("Failed to copy post dep handles\n");
  509. goto fail;
  510. }
  511. se->out_syncs[i].syncobj = drm_syncobj_find(file_priv,
  512. out.handle);
  513. if (!se->out_syncs[i].syncobj) {
  514. ret = -EINVAL;
  515. goto fail;
  516. }
  517. }
  518. se->out_sync_count = count;
  519. return 0;
  520. fail:
  521. for (i--; i >= 0; i--)
  522. drm_syncobj_put(se->out_syncs[i].syncobj);
  523. kvfree(se->out_syncs);
  524. return ret;
  525. }
  526. /* Get data for multiple binary semaphores synchronization. Parse syncobj
  527. * to be signaled when job completes (out_sync).
  528. */
  529. static int
  530. v3d_get_multisync_submit_deps(struct drm_file *file_priv,
  531. struct drm_v3d_extension __user *ext,
  532. void *data)
  533. {
  534. struct drm_v3d_multi_sync multisync;
  535. struct v3d_submit_ext *se = data;
  536. int ret;
  537. if (copy_from_user(&multisync, ext, sizeof(multisync)))
  538. return -EFAULT;
  539. if (multisync.pad)
  540. return -EINVAL;
  541. ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count,
  542. multisync.out_syncs);
  543. if (ret)
  544. return ret;
  545. se->in_sync_count = multisync.in_sync_count;
  546. se->in_syncs = multisync.in_syncs;
  547. se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC;
  548. se->wait_stage = multisync.wait_stage;
  549. return 0;
  550. }
  551. /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  552. * according to the extension id (name).
  553. */
  554. static int
  555. v3d_get_extensions(struct drm_file *file_priv,
  556. u64 ext_handles,
  557. void *data)
  558. {
  559. struct drm_v3d_extension __user *user_ext;
  560. int ret;
  561. user_ext = u64_to_user_ptr(ext_handles);
  562. while (user_ext) {
  563. struct drm_v3d_extension ext;
  564. if (copy_from_user(&ext, user_ext, sizeof(ext))) {
  565. DRM_DEBUG("Failed to copy submit extension\n");
  566. return -EFAULT;
  567. }
  568. switch (ext.id) {
  569. case DRM_V3D_EXT_ID_MULTI_SYNC:
  570. ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data);
  571. if (ret)
  572. return ret;
  573. break;
  574. default:
  575. DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
  576. return -EINVAL;
  577. }
  578. user_ext = u64_to_user_ptr(ext.next);
  579. }
  580. return 0;
  581. }
  582. /**
  583. * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
  584. * @dev: DRM device
  585. * @data: ioctl argument
  586. * @file_priv: DRM file for this fd
  587. *
  588. * This is the main entrypoint for userspace to submit a 3D frame to
  589. * the GPU. Userspace provides the binner command list (if
  590. * applicable), and the kernel sets up the render command list to draw
  591. * to the framebuffer described in the ioctl, using the command lists
  592. * that the 3D engine's binner will produce.
  593. */
  594. int
  595. v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
  596. struct drm_file *file_priv)
  597. {
  598. struct v3d_dev *v3d = to_v3d_dev(dev);
  599. struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
  600. struct drm_v3d_submit_cl *args = data;
  601. struct v3d_submit_ext se = {0};
  602. struct v3d_bin_job *bin = NULL;
  603. struct v3d_render_job *render = NULL;
  604. struct v3d_job *clean_job = NULL;
  605. struct v3d_job *last_job;
  606. struct ww_acquire_ctx acquire_ctx;
  607. int ret = 0;
  608. trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
  609. if (args->pad)
  610. return -EINVAL;
  611. if (args->flags &&
  612. args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE |
  613. DRM_V3D_SUBMIT_EXTENSION)) {
  614. DRM_INFO("invalid flags: %d\n", args->flags);
  615. return -EINVAL;
  616. }
  617. if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
  618. ret = v3d_get_extensions(file_priv, args->extensions, &se);
  619. if (ret) {
  620. DRM_DEBUG("Failed to get extensions.\n");
  621. return ret;
  622. }
  623. }
  624. ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render),
  625. v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER);
  626. if (ret)
  627. goto fail;
  628. render->start = args->rcl_start;
  629. render->end = args->rcl_end;
  630. INIT_LIST_HEAD(&render->unref_list);
  631. if (args->bcl_start != args->bcl_end) {
  632. ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin),
  633. v3d_job_free, args->in_sync_bcl, &se, V3D_BIN);
  634. if (ret)
  635. goto fail;
  636. bin->start = args->bcl_start;
  637. bin->end = args->bcl_end;
  638. bin->qma = args->qma;
  639. bin->qms = args->qms;
  640. bin->qts = args->qts;
  641. bin->render = render;
  642. }
  643. if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
  644. ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
  645. v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
  646. if (ret)
  647. goto fail;
  648. last_job = clean_job;
  649. } else {
  650. last_job = &render->base;
  651. }
  652. ret = v3d_lookup_bos(dev, file_priv, last_job,
  653. args->bo_handles, args->bo_handle_count);
  654. if (ret)
  655. goto fail;
  656. ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
  657. if (ret)
  658. goto fail;
  659. if (args->perfmon_id) {
  660. render->base.perfmon = v3d_perfmon_find(v3d_priv,
  661. args->perfmon_id);
  662. if (!render->base.perfmon) {
  663. ret = -ENOENT;
  664. goto fail_perfmon;
  665. }
  666. }
  667. mutex_lock(&v3d->sched_lock);
  668. if (bin) {
  669. bin->base.perfmon = render->base.perfmon;
  670. v3d_perfmon_get(bin->base.perfmon);
  671. v3d_push_job(&bin->base);
  672. ret = drm_sched_job_add_dependency(&render->base.base,
  673. dma_fence_get(bin->base.done_fence));
  674. if (ret)
  675. goto fail_unreserve;
  676. }
  677. v3d_push_job(&render->base);
  678. if (clean_job) {
  679. struct dma_fence *render_fence =
  680. dma_fence_get(render->base.done_fence);
  681. ret = drm_sched_job_add_dependency(&clean_job->base,
  682. render_fence);
  683. if (ret)
  684. goto fail_unreserve;
  685. clean_job->perfmon = render->base.perfmon;
  686. v3d_perfmon_get(clean_job->perfmon);
  687. v3d_push_job(clean_job);
  688. }
  689. mutex_unlock(&v3d->sched_lock);
  690. v3d_attach_fences_and_unlock_reservation(file_priv,
  691. last_job,
  692. &acquire_ctx,
  693. args->out_sync,
  694. &se,
  695. last_job->done_fence);
  696. if (bin)
  697. v3d_job_put(&bin->base);
  698. v3d_job_put(&render->base);
  699. if (clean_job)
  700. v3d_job_put(clean_job);
  701. return 0;
  702. fail_unreserve:
  703. mutex_unlock(&v3d->sched_lock);
  704. fail_perfmon:
  705. drm_gem_unlock_reservations(last_job->bo,
  706. last_job->bo_count, &acquire_ctx);
  707. fail:
  708. v3d_job_cleanup((void *)bin);
  709. v3d_job_cleanup((void *)render);
  710. v3d_job_cleanup(clean_job);
  711. v3d_put_multisync_post_deps(&se);
  712. return ret;
  713. }
  714. /**
  715. * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
  716. * @dev: DRM device
  717. * @data: ioctl argument
  718. * @file_priv: DRM file for this fd
  719. *
  720. * Userspace provides the register setup for the TFU, which we don't
  721. * need to validate since the TFU is behind the MMU.
  722. */
  723. int
  724. v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
  725. struct drm_file *file_priv)
  726. {
  727. struct v3d_dev *v3d = to_v3d_dev(dev);
  728. struct drm_v3d_submit_tfu *args = data;
  729. struct v3d_submit_ext se = {0};
  730. struct v3d_tfu_job *job = NULL;
  731. struct ww_acquire_ctx acquire_ctx;
  732. int ret = 0;
  733. trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
  734. if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
  735. DRM_DEBUG("invalid flags: %d\n", args->flags);
  736. return -EINVAL;
  737. }
  738. if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
  739. ret = v3d_get_extensions(file_priv, args->extensions, &se);
  740. if (ret) {
  741. DRM_DEBUG("Failed to get extensions.\n");
  742. return ret;
  743. }
  744. }
  745. ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
  746. v3d_job_free, args->in_sync, &se, V3D_TFU);
  747. if (ret)
  748. goto fail;
  749. job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
  750. sizeof(*job->base.bo), GFP_KERNEL);
  751. if (!job->base.bo) {
  752. ret = -ENOMEM;
  753. goto fail;
  754. }
  755. job->args = *args;
  756. spin_lock(&file_priv->table_lock);
  757. for (job->base.bo_count = 0;
  758. job->base.bo_count < ARRAY_SIZE(args->bo_handles);
  759. job->base.bo_count++) {
  760. struct drm_gem_object *bo;
  761. if (!args->bo_handles[job->base.bo_count])
  762. break;
  763. bo = idr_find(&file_priv->object_idr,
  764. args->bo_handles[job->base.bo_count]);
  765. if (!bo) {
  766. DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
  767. job->base.bo_count,
  768. args->bo_handles[job->base.bo_count]);
  769. ret = -ENOENT;
  770. spin_unlock(&file_priv->table_lock);
  771. goto fail;
  772. }
  773. drm_gem_object_get(bo);
  774. job->base.bo[job->base.bo_count] = bo;
  775. }
  776. spin_unlock(&file_priv->table_lock);
  777. ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
  778. if (ret)
  779. goto fail;
  780. mutex_lock(&v3d->sched_lock);
  781. v3d_push_job(&job->base);
  782. mutex_unlock(&v3d->sched_lock);
  783. v3d_attach_fences_and_unlock_reservation(file_priv,
  784. &job->base, &acquire_ctx,
  785. args->out_sync,
  786. &se,
  787. job->base.done_fence);
  788. v3d_job_put(&job->base);
  789. return 0;
  790. fail:
  791. v3d_job_cleanup((void *)job);
  792. v3d_put_multisync_post_deps(&se);
  793. return ret;
  794. }
  795. /**
  796. * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
  797. * @dev: DRM device
  798. * @data: ioctl argument
  799. * @file_priv: DRM file for this fd
  800. *
  801. * Userspace provides the register setup for the CSD, which we don't
  802. * need to validate since the CSD is behind the MMU.
  803. */
  804. int
  805. v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
  806. struct drm_file *file_priv)
  807. {
  808. struct v3d_dev *v3d = to_v3d_dev(dev);
  809. struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
  810. struct drm_v3d_submit_csd *args = data;
  811. struct v3d_submit_ext se = {0};
  812. struct v3d_csd_job *job = NULL;
  813. struct v3d_job *clean_job = NULL;
  814. struct ww_acquire_ctx acquire_ctx;
  815. int ret;
  816. trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
  817. if (args->pad)
  818. return -EINVAL;
  819. if (!v3d_has_csd(v3d)) {
  820. DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
  821. return -EINVAL;
  822. }
  823. if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) {
  824. DRM_INFO("invalid flags: %d\n", args->flags);
  825. return -EINVAL;
  826. }
  827. if (args->flags & DRM_V3D_SUBMIT_EXTENSION) {
  828. ret = v3d_get_extensions(file_priv, args->extensions, &se);
  829. if (ret) {
  830. DRM_DEBUG("Failed to get extensions.\n");
  831. return ret;
  832. }
  833. }
  834. ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job),
  835. v3d_job_free, args->in_sync, &se, V3D_CSD);
  836. if (ret)
  837. goto fail;
  838. ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job),
  839. v3d_job_free, 0, NULL, V3D_CACHE_CLEAN);
  840. if (ret)
  841. goto fail;
  842. job->args = *args;
  843. ret = v3d_lookup_bos(dev, file_priv, clean_job,
  844. args->bo_handles, args->bo_handle_count);
  845. if (ret)
  846. goto fail;
  847. ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
  848. if (ret)
  849. goto fail;
  850. if (args->perfmon_id) {
  851. job->base.perfmon = v3d_perfmon_find(v3d_priv,
  852. args->perfmon_id);
  853. if (!job->base.perfmon) {
  854. ret = -ENOENT;
  855. goto fail_perfmon;
  856. }
  857. }
  858. mutex_lock(&v3d->sched_lock);
  859. v3d_push_job(&job->base);
  860. ret = drm_sched_job_add_dependency(&clean_job->base,
  861. dma_fence_get(job->base.done_fence));
  862. if (ret)
  863. goto fail_unreserve;
  864. v3d_push_job(clean_job);
  865. mutex_unlock(&v3d->sched_lock);
  866. v3d_attach_fences_and_unlock_reservation(file_priv,
  867. clean_job,
  868. &acquire_ctx,
  869. args->out_sync,
  870. &se,
  871. clean_job->done_fence);
  872. v3d_job_put(&job->base);
  873. v3d_job_put(clean_job);
  874. return 0;
  875. fail_unreserve:
  876. mutex_unlock(&v3d->sched_lock);
  877. fail_perfmon:
  878. drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
  879. &acquire_ctx);
  880. fail:
  881. v3d_job_cleanup((void *)job);
  882. v3d_job_cleanup(clean_job);
  883. v3d_put_multisync_post_deps(&se);
  884. return ret;
  885. }
  886. int
  887. v3d_gem_init(struct drm_device *dev)
  888. {
  889. struct v3d_dev *v3d = to_v3d_dev(dev);
  890. u32 pt_size = 4096 * 1024;
  891. int ret, i;
  892. for (i = 0; i < V3D_MAX_QUEUES; i++)
  893. v3d->queue[i].fence_context = dma_fence_context_alloc(1);
  894. spin_lock_init(&v3d->mm_lock);
  895. spin_lock_init(&v3d->job_lock);
  896. mutex_init(&v3d->bo_lock);
  897. mutex_init(&v3d->reset_lock);
  898. mutex_init(&v3d->sched_lock);
  899. mutex_init(&v3d->cache_clean_lock);
  900. /* Note: We don't allocate address 0. Various bits of HW
  901. * treat 0 as special, such as the occlusion query counters
  902. * where 0 means "disabled".
  903. */
  904. drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
  905. v3d->pt = dma_alloc_wc(v3d->drm.dev, pt_size,
  906. &v3d->pt_paddr,
  907. GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
  908. if (!v3d->pt) {
  909. drm_mm_takedown(&v3d->mm);
  910. dev_err(v3d->drm.dev,
  911. "Failed to allocate page tables. Please ensure you have DMA enabled.\n");
  912. return -ENOMEM;
  913. }
  914. v3d_init_hw_state(v3d);
  915. v3d_mmu_set_page_table(v3d);
  916. ret = v3d_sched_init(v3d);
  917. if (ret) {
  918. drm_mm_takedown(&v3d->mm);
  919. dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
  920. v3d->pt_paddr);
  921. }
  922. return 0;
  923. }
  924. void
  925. v3d_gem_destroy(struct drm_device *dev)
  926. {
  927. struct v3d_dev *v3d = to_v3d_dev(dev);
  928. v3d_sched_fini(v3d);
  929. /* Waiting for jobs to finish would need to be done before
  930. * unregistering V3D.
  931. */
  932. WARN_ON(v3d->bin_job);
  933. WARN_ON(v3d->render_job);
  934. drm_mm_takedown(&v3d->mm);
  935. dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
  936. v3d->pt_paddr);
  937. }