panfrost_job.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
  3. /* Copyright 2019 Collabora ltd. */
  4. #include <linux/delay.h>
  5. #include <linux/interrupt.h>
  6. #include <linux/io.h>
  7. #include <linux/iopoll.h>
  8. #include <linux/platform_device.h>
  9. #include <linux/pm_runtime.h>
  10. #include <linux/dma-resv.h>
  11. #include <drm/gpu_scheduler.h>
  12. #include <drm/panfrost_drm.h>
  13. #include "panfrost_device.h"
  14. #include "panfrost_devfreq.h"
  15. #include "panfrost_job.h"
  16. #include "panfrost_features.h"
  17. #include "panfrost_issues.h"
  18. #include "panfrost_gem.h"
  19. #include "panfrost_regs.h"
  20. #include "panfrost_gpu.h"
  21. #include "panfrost_mmu.h"
  22. #include "panfrost_dump.h"
  23. #define JOB_TIMEOUT_MS 500
  24. #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
  25. #define job_read(dev, reg) readl(dev->iomem + (reg))
  26. struct panfrost_queue_state {
  27. struct drm_gpu_scheduler sched;
  28. u64 fence_context;
  29. u64 emit_seqno;
  30. };
  31. struct panfrost_job_slot {
  32. struct panfrost_queue_state queue[NUM_JOB_SLOTS];
  33. spinlock_t job_lock;
  34. int irq;
  35. };
  36. static struct panfrost_job *
  37. to_panfrost_job(struct drm_sched_job *sched_job)
  38. {
  39. return container_of(sched_job, struct panfrost_job, base);
  40. }
  41. struct panfrost_fence {
  42. struct dma_fence base;
  43. struct drm_device *dev;
  44. /* panfrost seqno for signaled() test */
  45. u64 seqno;
  46. int queue;
  47. };
  48. static inline struct panfrost_fence *
  49. to_panfrost_fence(struct dma_fence *fence)
  50. {
  51. return (struct panfrost_fence *)fence;
  52. }
  53. static const char *panfrost_fence_get_driver_name(struct dma_fence *fence)
  54. {
  55. return "panfrost";
  56. }
  57. static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence)
  58. {
  59. struct panfrost_fence *f = to_panfrost_fence(fence);
  60. switch (f->queue) {
  61. case 0:
  62. return "panfrost-js-0";
  63. case 1:
  64. return "panfrost-js-1";
  65. case 2:
  66. return "panfrost-js-2";
  67. default:
  68. return NULL;
  69. }
  70. }
  71. static const struct dma_fence_ops panfrost_fence_ops = {
  72. .get_driver_name = panfrost_fence_get_driver_name,
  73. .get_timeline_name = panfrost_fence_get_timeline_name,
  74. };
  75. static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num)
  76. {
  77. struct panfrost_fence *fence;
  78. struct panfrost_job_slot *js = pfdev->js;
  79. fence = kzalloc(sizeof(*fence), GFP_KERNEL);
  80. if (!fence)
  81. return ERR_PTR(-ENOMEM);
  82. fence->dev = pfdev->ddev;
  83. fence->queue = js_num;
  84. fence->seqno = ++js->queue[js_num].emit_seqno;
  85. dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock,
  86. js->queue[js_num].fence_context, fence->seqno);
  87. return &fence->base;
  88. }
  89. int panfrost_job_get_slot(struct panfrost_job *job)
  90. {
  91. /* JS0: fragment jobs.
  92. * JS1: vertex/tiler jobs
  93. * JS2: compute jobs
  94. */
  95. if (job->requirements & PANFROST_JD_REQ_FS)
  96. return 0;
  97. /* Not exposed to userspace yet */
  98. #if 0
  99. if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
  100. if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
  101. (job->pfdev->features.nr_core_groups == 2))
  102. return 2;
  103. if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
  104. return 2;
  105. }
  106. #endif
  107. return 1;
  108. }
  109. static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
  110. u32 requirements,
  111. int js)
  112. {
  113. u64 affinity;
  114. /*
  115. * Use all cores for now.
  116. * Eventually we may need to support tiler only jobs and h/w with
  117. * multiple (2) coherent core groups
  118. */
  119. affinity = pfdev->features.shader_present;
  120. job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
  121. job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
  122. }
  123. static u32
  124. panfrost_get_job_chain_flag(const struct panfrost_job *job)
  125. {
  126. struct panfrost_fence *f = to_panfrost_fence(job->done_fence);
  127. if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
  128. return 0;
  129. return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0;
  130. }
  131. static struct panfrost_job *
  132. panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
  133. {
  134. struct panfrost_job *job = pfdev->jobs[slot][0];
  135. WARN_ON(!job);
  136. pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
  137. pfdev->jobs[slot][1] = NULL;
  138. return job;
  139. }
  140. static unsigned int
  141. panfrost_enqueue_job(struct panfrost_device *pfdev, int slot,
  142. struct panfrost_job *job)
  143. {
  144. if (WARN_ON(!job))
  145. return 0;
  146. if (!pfdev->jobs[slot][0]) {
  147. pfdev->jobs[slot][0] = job;
  148. return 0;
  149. }
  150. WARN_ON(pfdev->jobs[slot][1]);
  151. pfdev->jobs[slot][1] = job;
  152. WARN_ON(panfrost_get_job_chain_flag(job) ==
  153. panfrost_get_job_chain_flag(pfdev->jobs[slot][0]));
  154. return 1;
  155. }
  156. static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
  157. {
  158. struct panfrost_device *pfdev = job->pfdev;
  159. unsigned int subslot;
  160. u32 cfg;
  161. u64 jc_head = job->jc;
  162. int ret;
  163. panfrost_devfreq_record_busy(&pfdev->pfdevfreq);
  164. ret = pm_runtime_get_sync(pfdev->dev);
  165. if (ret < 0)
  166. return;
  167. if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
  168. return;
  169. }
  170. cfg = panfrost_mmu_as_get(pfdev, job->mmu);
  171. job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
  172. job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
  173. panfrost_job_write_affinity(pfdev, job->requirements, js);
  174. /* start MMU, medium priority, cache clean/flush on end, clean/flush on
  175. * start */
  176. cfg |= JS_CONFIG_THREAD_PRI(8) |
  177. JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
  178. JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
  179. panfrost_get_job_chain_flag(job);
  180. if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
  181. cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
  182. if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
  183. cfg |= JS_CONFIG_START_MMU;
  184. job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
  185. if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
  186. job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
  187. /* GO ! */
  188. spin_lock(&pfdev->js->job_lock);
  189. subslot = panfrost_enqueue_job(pfdev, js, job);
  190. /* Don't queue the job if a reset is in progress */
  191. if (!atomic_read(&pfdev->reset.pending)) {
  192. job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
  193. dev_dbg(pfdev->dev,
  194. "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
  195. job, js, subslot, jc_head, cfg & 0xf);
  196. }
  197. spin_unlock(&pfdev->js->job_lock);
  198. }
  199. static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
  200. int bo_count,
  201. struct drm_sched_job *job)
  202. {
  203. int i, ret;
  204. for (i = 0; i < bo_count; i++) {
  205. ret = dma_resv_reserve_fences(bos[i]->resv, 1);
  206. if (ret)
  207. return ret;
  208. /* panfrost always uses write mode in its current uapi */
  209. ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
  210. true);
  211. if (ret)
  212. return ret;
  213. }
  214. return 0;
  215. }
  216. static void panfrost_attach_object_fences(struct drm_gem_object **bos,
  217. int bo_count,
  218. struct dma_fence *fence)
  219. {
  220. int i;
  221. for (i = 0; i < bo_count; i++)
  222. dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
  223. }
  224. int panfrost_job_push(struct panfrost_job *job)
  225. {
  226. struct panfrost_device *pfdev = job->pfdev;
  227. struct ww_acquire_ctx acquire_ctx;
  228. int ret = 0;
  229. ret = drm_gem_lock_reservations(job->bos, job->bo_count,
  230. &acquire_ctx);
  231. if (ret)
  232. return ret;
  233. mutex_lock(&pfdev->sched_lock);
  234. drm_sched_job_arm(&job->base);
  235. job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
  236. ret = panfrost_acquire_object_fences(job->bos, job->bo_count,
  237. &job->base);
  238. if (ret) {
  239. mutex_unlock(&pfdev->sched_lock);
  240. goto unlock;
  241. }
  242. kref_get(&job->refcount); /* put by scheduler job completion */
  243. drm_sched_entity_push_job(&job->base);
  244. mutex_unlock(&pfdev->sched_lock);
  245. panfrost_attach_object_fences(job->bos, job->bo_count,
  246. job->render_done_fence);
  247. unlock:
  248. drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
  249. return ret;
  250. }
  251. static void panfrost_job_cleanup(struct kref *ref)
  252. {
  253. struct panfrost_job *job = container_of(ref, struct panfrost_job,
  254. refcount);
  255. unsigned int i;
  256. dma_fence_put(job->done_fence);
  257. dma_fence_put(job->render_done_fence);
  258. if (job->mappings) {
  259. for (i = 0; i < job->bo_count; i++) {
  260. if (!job->mappings[i])
  261. break;
  262. atomic_dec(&job->mappings[i]->obj->gpu_usecount);
  263. panfrost_gem_mapping_put(job->mappings[i]);
  264. }
  265. kvfree(job->mappings);
  266. }
  267. if (job->bos) {
  268. for (i = 0; i < job->bo_count; i++)
  269. drm_gem_object_put(job->bos[i]);
  270. kvfree(job->bos);
  271. }
  272. kfree(job);
  273. }
  274. void panfrost_job_put(struct panfrost_job *job)
  275. {
  276. kref_put(&job->refcount, panfrost_job_cleanup);
  277. }
  278. static void panfrost_job_free(struct drm_sched_job *sched_job)
  279. {
  280. struct panfrost_job *job = to_panfrost_job(sched_job);
  281. drm_sched_job_cleanup(sched_job);
  282. panfrost_job_put(job);
  283. }
  284. static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job)
  285. {
  286. struct panfrost_job *job = to_panfrost_job(sched_job);
  287. struct panfrost_device *pfdev = job->pfdev;
  288. int slot = panfrost_job_get_slot(job);
  289. struct dma_fence *fence = NULL;
  290. if (unlikely(job->base.s_fence->finished.error))
  291. return NULL;
  292. /* Nothing to execute: can happen if the job has finished while
  293. * we were resetting the GPU.
  294. */
  295. if (!job->jc)
  296. return NULL;
  297. fence = panfrost_fence_create(pfdev, slot);
  298. if (IS_ERR(fence))
  299. return fence;
  300. if (job->done_fence)
  301. dma_fence_put(job->done_fence);
  302. job->done_fence = dma_fence_get(fence);
  303. panfrost_job_hw_submit(job, slot);
  304. return fence;
  305. }
  306. void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
  307. {
  308. int j;
  309. u32 irq_mask = 0;
  310. for (j = 0; j < NUM_JOB_SLOTS; j++) {
  311. irq_mask |= MK_JS_MASK(j);
  312. }
  313. job_write(pfdev, JOB_INT_CLEAR, irq_mask);
  314. job_write(pfdev, JOB_INT_MASK, irq_mask);
  315. }
  316. static void panfrost_job_handle_err(struct panfrost_device *pfdev,
  317. struct panfrost_job *job,
  318. unsigned int js)
  319. {
  320. u32 js_status = job_read(pfdev, JS_STATUS(js));
  321. const char *exception_name = panfrost_exception_name(js_status);
  322. bool signal_fence = true;
  323. if (!panfrost_exception_is_fault(js_status)) {
  324. dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x",
  325. js, exception_name,
  326. job_read(pfdev, JS_HEAD_LO(js)),
  327. job_read(pfdev, JS_TAIL_LO(js)));
  328. } else {
  329. dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
  330. js, exception_name,
  331. job_read(pfdev, JS_HEAD_LO(js)),
  332. job_read(pfdev, JS_TAIL_LO(js)));
  333. }
  334. if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) {
  335. /* Update the job head so we can resume */
  336. job->jc = job_read(pfdev, JS_TAIL_LO(js)) |
  337. ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32);
  338. /* The job will be resumed, don't signal the fence */
  339. signal_fence = false;
  340. } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) {
  341. /* Job has been hard-stopped, flag it as canceled */
  342. dma_fence_set_error(job->done_fence, -ECANCELED);
  343. job->jc = 0;
  344. } else if (panfrost_exception_is_fault(js_status)) {
  345. /* We might want to provide finer-grained error code based on
  346. * the exception type, but unconditionally setting to EINVAL
  347. * is good enough for now.
  348. */
  349. dma_fence_set_error(job->done_fence, -EINVAL);
  350. job->jc = 0;
  351. }
  352. panfrost_mmu_as_put(pfdev, job->mmu);
  353. panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
  354. if (signal_fence)
  355. dma_fence_signal_locked(job->done_fence);
  356. pm_runtime_put_autosuspend(pfdev->dev);
  357. if (panfrost_exception_needs_reset(pfdev, js_status)) {
  358. atomic_set(&pfdev->reset.pending, 1);
  359. drm_sched_fault(&pfdev->js->queue[js].sched);
  360. }
  361. }
  362. static void panfrost_job_handle_done(struct panfrost_device *pfdev,
  363. struct panfrost_job *job)
  364. {
  365. /* Set ->jc to 0 to avoid re-submitting an already finished job (can
  366. * happen when we receive the DONE interrupt while doing a GPU reset).
  367. */
  368. job->jc = 0;
  369. panfrost_mmu_as_put(pfdev, job->mmu);
  370. panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
  371. dma_fence_signal_locked(job->done_fence);
  372. pm_runtime_put_autosuspend(pfdev->dev);
  373. }
  374. static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status)
  375. {
  376. struct panfrost_job *done[NUM_JOB_SLOTS][2] = {};
  377. struct panfrost_job *failed[NUM_JOB_SLOTS] = {};
  378. u32 js_state = 0, js_events = 0;
  379. unsigned int i, j;
  380. /* First we collect all failed/done jobs. */
  381. while (status) {
  382. u32 js_state_mask = 0;
  383. for (j = 0; j < NUM_JOB_SLOTS; j++) {
  384. if (status & MK_JS_MASK(j))
  385. js_state_mask |= MK_JS_MASK(j);
  386. if (status & JOB_INT_MASK_DONE(j)) {
  387. if (done[j][0])
  388. done[j][1] = panfrost_dequeue_job(pfdev, j);
  389. else
  390. done[j][0] = panfrost_dequeue_job(pfdev, j);
  391. }
  392. if (status & JOB_INT_MASK_ERR(j)) {
  393. /* Cancel the next submission. Will be submitted
  394. * after we're done handling this failure if
  395. * there's no reset pending.
  396. */
  397. job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
  398. failed[j] = panfrost_dequeue_job(pfdev, j);
  399. }
  400. }
  401. /* JS_STATE is sampled when JOB_INT_CLEAR is written.
  402. * For each BIT(slot) or BIT(slot + 16) bit written to
  403. * JOB_INT_CLEAR, the corresponding bits in JS_STATE
  404. * (BIT(slot) and BIT(slot + 16)) are updated, but this
  405. * is racy. If we only have one job done at the time we
  406. * read JOB_INT_RAWSTAT but the second job fails before we
  407. * clear the status, we end up with a status containing
  408. * only the DONE bit and consider both jobs as DONE since
  409. * JS_STATE reports both NEXT and CURRENT as inactive.
  410. * To prevent that, let's repeat this clear+read steps
  411. * until status is 0.
  412. */
  413. job_write(pfdev, JOB_INT_CLEAR, status);
  414. js_state &= ~js_state_mask;
  415. js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
  416. js_events |= status;
  417. status = job_read(pfdev, JOB_INT_RAWSTAT);
  418. }
  419. /* Then we handle the dequeued jobs. */
  420. for (j = 0; j < NUM_JOB_SLOTS; j++) {
  421. if (!(js_events & MK_JS_MASK(j)))
  422. continue;
  423. if (failed[j]) {
  424. panfrost_job_handle_err(pfdev, failed[j], j);
  425. } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) {
  426. /* When the current job doesn't fail, the JM dequeues
  427. * the next job without waiting for an ACK, this means
  428. * we can have 2 jobs dequeued and only catch the
  429. * interrupt when the second one is done. If both slots
  430. * are inactive, but one job remains in pfdev->jobs[j],
  431. * consider it done. Of course that doesn't apply if a
  432. * failure happened since we cancelled execution of the
  433. * job in _NEXT (see above).
  434. */
  435. if (WARN_ON(!done[j][0]))
  436. done[j][0] = panfrost_dequeue_job(pfdev, j);
  437. else
  438. done[j][1] = panfrost_dequeue_job(pfdev, j);
  439. }
  440. for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++)
  441. panfrost_job_handle_done(pfdev, done[j][i]);
  442. }
  443. /* And finally we requeue jobs that were waiting in the second slot
  444. * and have been stopped if we detected a failure on the first slot.
  445. */
  446. for (j = 0; j < NUM_JOB_SLOTS; j++) {
  447. if (!(js_events & MK_JS_MASK(j)))
  448. continue;
  449. if (!failed[j] || !pfdev->jobs[j][0])
  450. continue;
  451. if (pfdev->jobs[j][0]->jc == 0) {
  452. /* The job was cancelled, signal the fence now */
  453. struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j);
  454. dma_fence_set_error(canceled->done_fence, -ECANCELED);
  455. panfrost_job_handle_done(pfdev, canceled);
  456. } else if (!atomic_read(&pfdev->reset.pending)) {
  457. /* Requeue the job we removed if no reset is pending */
  458. job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
  459. }
  460. }
  461. }
  462. static void panfrost_job_handle_irqs(struct panfrost_device *pfdev)
  463. {
  464. u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
  465. while (status) {
  466. pm_runtime_mark_last_busy(pfdev->dev);
  467. spin_lock(&pfdev->js->job_lock);
  468. panfrost_job_handle_irq(pfdev, status);
  469. spin_unlock(&pfdev->js->job_lock);
  470. status = job_read(pfdev, JOB_INT_RAWSTAT);
  471. }
  472. }
  473. static u32 panfrost_active_slots(struct panfrost_device *pfdev,
  474. u32 *js_state_mask, u32 js_state)
  475. {
  476. u32 rawstat;
  477. if (!(js_state & *js_state_mask))
  478. return 0;
  479. rawstat = job_read(pfdev, JOB_INT_RAWSTAT);
  480. if (rawstat) {
  481. unsigned int i;
  482. for (i = 0; i < NUM_JOB_SLOTS; i++) {
  483. if (rawstat & MK_JS_MASK(i))
  484. *js_state_mask &= ~MK_JS_MASK(i);
  485. }
  486. }
  487. return js_state & *js_state_mask;
  488. }
  489. static void
  490. panfrost_reset(struct panfrost_device *pfdev,
  491. struct drm_sched_job *bad)
  492. {
  493. u32 js_state, js_state_mask = 0xffffffff;
  494. unsigned int i, j;
  495. bool cookie;
  496. int ret;
  497. if (!atomic_read(&pfdev->reset.pending))
  498. return;
  499. /* Stop the schedulers.
  500. *
  501. * FIXME: We temporarily get out of the dma_fence_signalling section
  502. * because the cleanup path generate lockdep splats when taking locks
  503. * to release job resources. We should rework the code to follow this
  504. * pattern:
  505. *
  506. * try_lock
  507. * if (locked)
  508. * release
  509. * else
  510. * schedule_work_to_release_later
  511. */
  512. for (i = 0; i < NUM_JOB_SLOTS; i++)
  513. drm_sched_stop(&pfdev->js->queue[i].sched, bad);
  514. cookie = dma_fence_begin_signalling();
  515. if (bad)
  516. drm_sched_increase_karma(bad);
  517. /* Mask job interrupts and synchronize to make sure we won't be
  518. * interrupted during our reset.
  519. */
  520. job_write(pfdev, JOB_INT_MASK, 0);
  521. synchronize_irq(pfdev->js->irq);
  522. for (i = 0; i < NUM_JOB_SLOTS; i++) {
  523. /* Cancel the next job and soft-stop the running job. */
  524. job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
  525. job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
  526. }
  527. /* Wait at most 10ms for soft-stops to complete */
  528. ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
  529. !panfrost_active_slots(pfdev, &js_state_mask, js_state),
  530. 10, 10000);
  531. if (ret)
  532. dev_err(pfdev->dev, "Soft-stop failed\n");
  533. /* Handle the remaining interrupts before we reset. */
  534. panfrost_job_handle_irqs(pfdev);
  535. /* Remaining interrupts have been handled, but we might still have
  536. * stuck jobs. Let's make sure the PM counters stay balanced by
  537. * manually calling pm_runtime_put_noidle() and
  538. * panfrost_devfreq_record_idle() for each stuck job.
  539. */
  540. spin_lock(&pfdev->js->job_lock);
  541. for (i = 0; i < NUM_JOB_SLOTS; i++) {
  542. for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
  543. pm_runtime_put_noidle(pfdev->dev);
  544. panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
  545. }
  546. }
  547. memset(pfdev->jobs, 0, sizeof(pfdev->jobs));
  548. spin_unlock(&pfdev->js->job_lock);
  549. /* Proceed with reset now. */
  550. panfrost_device_reset(pfdev);
  551. /* panfrost_device_reset() unmasks job interrupts, but we want to
  552. * keep them masked a bit longer.
  553. */
  554. job_write(pfdev, JOB_INT_MASK, 0);
  555. /* GPU has been reset, we can clear the reset pending bit. */
  556. atomic_set(&pfdev->reset.pending, 0);
  557. /* Now resubmit jobs that were previously queued but didn't have a
  558. * chance to finish.
  559. * FIXME: We temporarily get out of the DMA fence signalling section
  560. * while resubmitting jobs because the job submission logic will
  561. * allocate memory with the GFP_KERNEL flag which can trigger memory
  562. * reclaim and exposes a lock ordering issue.
  563. */
  564. dma_fence_end_signalling(cookie);
  565. for (i = 0; i < NUM_JOB_SLOTS; i++)
  566. drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
  567. cookie = dma_fence_begin_signalling();
  568. /* Restart the schedulers */
  569. for (i = 0; i < NUM_JOB_SLOTS; i++)
  570. drm_sched_start(&pfdev->js->queue[i].sched, true);
  571. /* Re-enable job interrupts now that everything has been restarted. */
  572. job_write(pfdev, JOB_INT_MASK,
  573. GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
  574. GENMASK(NUM_JOB_SLOTS - 1, 0));
  575. dma_fence_end_signalling(cookie);
  576. }
  577. static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
  578. *sched_job)
  579. {
  580. struct panfrost_job *job = to_panfrost_job(sched_job);
  581. struct panfrost_device *pfdev = job->pfdev;
  582. int js = panfrost_job_get_slot(job);
  583. /*
  584. * If the GPU managed to complete this jobs fence, the timeout is
  585. * spurious. Bail out.
  586. */
  587. if (dma_fence_is_signaled(job->done_fence))
  588. return DRM_GPU_SCHED_STAT_NOMINAL;
  589. dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
  590. js,
  591. job_read(pfdev, JS_CONFIG(js)),
  592. job_read(pfdev, JS_STATUS(js)),
  593. job_read(pfdev, JS_HEAD_LO(js)),
  594. job_read(pfdev, JS_TAIL_LO(js)),
  595. sched_job);
  596. panfrost_core_dump(job);
  597. atomic_set(&pfdev->reset.pending, 1);
  598. panfrost_reset(pfdev, sched_job);
  599. return DRM_GPU_SCHED_STAT_NOMINAL;
  600. }
  601. static void panfrost_reset_work(struct work_struct *work)
  602. {
  603. struct panfrost_device *pfdev;
  604. pfdev = container_of(work, struct panfrost_device, reset.work);
  605. panfrost_reset(pfdev, NULL);
  606. }
  607. static const struct drm_sched_backend_ops panfrost_sched_ops = {
  608. .run_job = panfrost_job_run,
  609. .timedout_job = panfrost_job_timedout,
  610. .free_job = panfrost_job_free
  611. };
  612. static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data)
  613. {
  614. struct panfrost_device *pfdev = data;
  615. panfrost_job_handle_irqs(pfdev);
  616. job_write(pfdev, JOB_INT_MASK,
  617. GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
  618. GENMASK(NUM_JOB_SLOTS - 1, 0));
  619. return IRQ_HANDLED;
  620. }
  621. static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
  622. {
  623. struct panfrost_device *pfdev = data;
  624. u32 status = job_read(pfdev, JOB_INT_STAT);
  625. if (!status)
  626. return IRQ_NONE;
  627. job_write(pfdev, JOB_INT_MASK, 0);
  628. return IRQ_WAKE_THREAD;
  629. }
  630. int panfrost_job_init(struct panfrost_device *pfdev)
  631. {
  632. struct panfrost_job_slot *js;
  633. unsigned int nentries = 2;
  634. int ret, j;
  635. /* All GPUs have two entries per queue, but without jobchain
  636. * disambiguation stopping the right job in the close path is tricky,
  637. * so let's just advertise one entry in that case.
  638. */
  639. if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
  640. nentries = 1;
  641. pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL);
  642. if (!js)
  643. return -ENOMEM;
  644. INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
  645. spin_lock_init(&js->job_lock);
  646. js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
  647. if (js->irq <= 0)
  648. return -ENODEV;
  649. ret = devm_request_threaded_irq(pfdev->dev, js->irq,
  650. panfrost_job_irq_handler,
  651. panfrost_job_irq_handler_thread,
  652. IRQF_SHARED, KBUILD_MODNAME "-job",
  653. pfdev);
  654. if (ret) {
  655. dev_err(pfdev->dev, "failed to request job irq");
  656. return ret;
  657. }
  658. pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0);
  659. if (!pfdev->reset.wq)
  660. return -ENOMEM;
  661. for (j = 0; j < NUM_JOB_SLOTS; j++) {
  662. js->queue[j].fence_context = dma_fence_context_alloc(1);
  663. ret = drm_sched_init(&js->queue[j].sched,
  664. &panfrost_sched_ops,
  665. nentries, 0,
  666. msecs_to_jiffies(JOB_TIMEOUT_MS),
  667. pfdev->reset.wq,
  668. NULL, "pan_js", pfdev->dev);
  669. if (ret) {
  670. dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
  671. goto err_sched;
  672. }
  673. }
  674. panfrost_job_enable_interrupts(pfdev);
  675. return 0;
  676. err_sched:
  677. for (j--; j >= 0; j--)
  678. drm_sched_fini(&js->queue[j].sched);
  679. destroy_workqueue(pfdev->reset.wq);
  680. return ret;
  681. }
  682. void panfrost_job_fini(struct panfrost_device *pfdev)
  683. {
  684. struct panfrost_job_slot *js = pfdev->js;
  685. int j;
  686. job_write(pfdev, JOB_INT_MASK, 0);
  687. for (j = 0; j < NUM_JOB_SLOTS; j++) {
  688. drm_sched_fini(&js->queue[j].sched);
  689. }
  690. cancel_work_sync(&pfdev->reset.work);
  691. destroy_workqueue(pfdev->reset.wq);
  692. }
  693. int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
  694. {
  695. struct panfrost_device *pfdev = panfrost_priv->pfdev;
  696. struct panfrost_job_slot *js = pfdev->js;
  697. struct drm_gpu_scheduler *sched;
  698. int ret, i;
  699. for (i = 0; i < NUM_JOB_SLOTS; i++) {
  700. sched = &js->queue[i].sched;
  701. ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
  702. DRM_SCHED_PRIORITY_NORMAL, &sched,
  703. 1, NULL);
  704. if (WARN_ON(ret))
  705. return ret;
  706. }
  707. return 0;
  708. }
  709. void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
  710. {
  711. struct panfrost_device *pfdev = panfrost_priv->pfdev;
  712. int i;
  713. for (i = 0; i < NUM_JOB_SLOTS; i++)
  714. drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
  715. /* Kill in-flight jobs */
  716. spin_lock(&pfdev->js->job_lock);
  717. for (i = 0; i < NUM_JOB_SLOTS; i++) {
  718. struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
  719. int j;
  720. for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
  721. struct panfrost_job *job = pfdev->jobs[i][j];
  722. u32 cmd;
  723. if (!job || job->base.entity != entity)
  724. continue;
  725. if (j == 1) {
  726. /* Try to cancel the job before it starts */
  727. job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
  728. /* Reset the job head so it doesn't get restarted if
  729. * the job in the first slot failed.
  730. */
  731. job->jc = 0;
  732. }
  733. if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
  734. cmd = panfrost_get_job_chain_flag(job) ?
  735. JS_COMMAND_HARD_STOP_1 :
  736. JS_COMMAND_HARD_STOP_0;
  737. } else {
  738. cmd = JS_COMMAND_HARD_STOP;
  739. }
  740. job_write(pfdev, JS_COMMAND(i), cmd);
  741. }
  742. }
  743. spin_unlock(&pfdev->js->job_lock);
  744. }
  745. int panfrost_job_is_idle(struct panfrost_device *pfdev)
  746. {
  747. struct panfrost_job_slot *js = pfdev->js;
  748. int i;
  749. for (i = 0; i < NUM_JOB_SLOTS; i++) {
  750. /* If there are any jobs in the HW queue, we're not idle */
  751. if (atomic_read(&js->queue[i].sched.hw_rq_count))
  752. return false;
  753. }
  754. return true;
  755. }