radeon_fence.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107
  1. /*
  2. * Copyright 2009 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sub license, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. *
  21. * The above copyright notice and this permission notice (including the
  22. * next paragraph) shall be included in all copies or substantial portions
  23. * of the Software.
  24. *
  25. */
  26. /*
  27. * Authors:
  28. * Jerome Glisse <[email protected]>
  29. * Dave Airlie
  30. */
  31. #include <linux/atomic.h>
  32. #include <linux/firmware.h>
  33. #include <linux/kref.h>
  34. #include <linux/sched/signal.h>
  35. #include <linux/seq_file.h>
  36. #include <linux/slab.h>
  37. #include <linux/wait.h>
  38. #include <drm/drm_device.h>
  39. #include <drm/drm_file.h>
  40. #include "radeon.h"
  41. #include "radeon_reg.h"
  42. #include "radeon_trace.h"
  43. /*
  44. * Fences
  45. * Fences mark an event in the GPUs pipeline and are used
  46. * for GPU/CPU synchronization. When the fence is written,
  47. * it is expected that all buffers associated with that fence
  48. * are no longer in use by the associated ring on the GPU and
  49. * that the relevant GPU caches have been flushed. Whether
  50. * we use a scratch register or memory location depends on the asic
  51. * and whether writeback is enabled.
  52. */
  53. /**
  54. * radeon_fence_write - write a fence value
  55. *
  56. * @rdev: radeon_device pointer
  57. * @seq: sequence number to write
  58. * @ring: ring index the fence is associated with
  59. *
  60. * Writes a fence value to memory or a scratch register (all asics).
  61. */
  62. static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  63. {
  64. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  65. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  66. if (drv->cpu_addr) {
  67. *drv->cpu_addr = cpu_to_le32(seq);
  68. }
  69. } else {
  70. WREG32(drv->scratch_reg, seq);
  71. }
  72. }
  73. /**
  74. * radeon_fence_read - read a fence value
  75. *
  76. * @rdev: radeon_device pointer
  77. * @ring: ring index the fence is associated with
  78. *
  79. * Reads a fence value from memory or a scratch register (all asics).
  80. * Returns the value of the fence read from memory or register.
  81. */
  82. static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  83. {
  84. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  85. u32 seq = 0;
  86. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  87. if (drv->cpu_addr) {
  88. seq = le32_to_cpu(*drv->cpu_addr);
  89. } else {
  90. seq = lower_32_bits(atomic64_read(&drv->last_seq));
  91. }
  92. } else {
  93. seq = RREG32(drv->scratch_reg);
  94. }
  95. return seq;
  96. }
  97. /**
  98. * radeon_fence_schedule_check - schedule lockup check
  99. *
  100. * @rdev: radeon_device pointer
  101. * @ring: ring index we should work with
  102. *
  103. * Queues a delayed work item to check for lockups.
  104. */
  105. static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
  106. {
  107. /*
  108. * Do not reset the timer here with mod_delayed_work,
  109. * this can livelock in an interaction with TTM delayed destroy.
  110. */
  111. queue_delayed_work(system_power_efficient_wq,
  112. &rdev->fence_drv[ring].lockup_work,
  113. RADEON_FENCE_JIFFIES_TIMEOUT);
  114. }
  115. /**
  116. * radeon_fence_emit - emit a fence on the requested ring
  117. *
  118. * @rdev: radeon_device pointer
  119. * @fence: radeon fence object
  120. * @ring: ring index the fence is associated with
  121. *
  122. * Emits a fence command on the requested ring (all asics).
  123. * Returns 0 on success, -ENOMEM on failure.
  124. */
  125. int radeon_fence_emit(struct radeon_device *rdev,
  126. struct radeon_fence **fence,
  127. int ring)
  128. {
  129. u64 seq;
  130. /* we are protected by the ring emission mutex */
  131. *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
  132. if ((*fence) == NULL) {
  133. return -ENOMEM;
  134. }
  135. (*fence)->rdev = rdev;
  136. (*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring];
  137. (*fence)->ring = ring;
  138. (*fence)->is_vm_update = false;
  139. dma_fence_init(&(*fence)->base, &radeon_fence_ops,
  140. &rdev->fence_queue.lock,
  141. rdev->fence_context + ring,
  142. seq);
  143. radeon_fence_ring_emit(rdev, ring, *fence);
  144. trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
  145. radeon_fence_schedule_check(rdev, ring);
  146. return 0;
  147. }
  148. /*
  149. * radeon_fence_check_signaled - callback from fence_queue
  150. *
  151. * this function is called with fence_queue lock held, which is also used
  152. * for the fence locking itself, so unlocked variants are used for
  153. * fence_signal, and remove_wait_queue.
  154. */
  155. static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key)
  156. {
  157. struct radeon_fence *fence;
  158. u64 seq;
  159. fence = container_of(wait, struct radeon_fence, fence_wake);
  160. /*
  161. * We cannot use radeon_fence_process here because we're already
  162. * in the waitqueue, in a call from wake_up_all.
  163. */
  164. seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
  165. if (seq >= fence->seq) {
  166. dma_fence_signal_locked(&fence->base);
  167. radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
  168. __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
  169. dma_fence_put(&fence->base);
  170. }
  171. return 0;
  172. }
  173. /**
  174. * radeon_fence_activity - check for fence activity
  175. *
  176. * @rdev: radeon_device pointer
  177. * @ring: ring index the fence is associated with
  178. *
  179. * Checks the current fence value and calculates the last
  180. * signalled fence value. Returns true if activity occured
  181. * on the ring, and the fence_queue should be waken up.
  182. */
  183. static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
  184. {
  185. uint64_t seq, last_seq, last_emitted;
  186. unsigned count_loop = 0;
  187. bool wake = false;
  188. /* Note there is a scenario here for an infinite loop but it's
  189. * very unlikely to happen. For it to happen, the current polling
  190. * process need to be interrupted by another process and another
  191. * process needs to update the last_seq btw the atomic read and
  192. * xchg of the current process.
  193. *
  194. * More over for this to go in infinite loop there need to be
  195. * continuously new fence signaled ie radeon_fence_read needs
  196. * to return a different value each time for both the currently
  197. * polling process and the other process that xchg the last_seq
  198. * btw atomic read and xchg of the current process. And the
  199. * value the other process set as last seq must be higher than
  200. * the seq value we just read. Which means that current process
  201. * need to be interrupted after radeon_fence_read and before
  202. * atomic xchg.
  203. *
  204. * To be even more safe we count the number of time we loop and
  205. * we bail after 10 loop just accepting the fact that we might
  206. * have temporarly set the last_seq not to the true real last
  207. * seq but to an older one.
  208. */
  209. last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  210. do {
  211. last_emitted = rdev->fence_drv[ring].sync_seq[ring];
  212. seq = radeon_fence_read(rdev, ring);
  213. seq |= last_seq & 0xffffffff00000000LL;
  214. if (seq < last_seq) {
  215. seq &= 0xffffffff;
  216. seq |= last_emitted & 0xffffffff00000000LL;
  217. }
  218. if (seq <= last_seq || seq > last_emitted) {
  219. break;
  220. }
  221. /* If we loop over we don't want to return without
  222. * checking if a fence is signaled as it means that the
  223. * seq we just read is different from the previous on.
  224. */
  225. wake = true;
  226. last_seq = seq;
  227. if ((count_loop++) > 10) {
  228. /* We looped over too many time leave with the
  229. * fact that we might have set an older fence
  230. * seq then the current real last seq as signaled
  231. * by the hw.
  232. */
  233. break;
  234. }
  235. } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
  236. if (seq < last_emitted)
  237. radeon_fence_schedule_check(rdev, ring);
  238. return wake;
  239. }
  240. /**
  241. * radeon_fence_check_lockup - check for hardware lockup
  242. *
  243. * @work: delayed work item
  244. *
  245. * Checks for fence activity and if there is none probe
  246. * the hardware if a lockup occured.
  247. */
  248. static void radeon_fence_check_lockup(struct work_struct *work)
  249. {
  250. struct radeon_fence_driver *fence_drv;
  251. struct radeon_device *rdev;
  252. int ring;
  253. fence_drv = container_of(work, struct radeon_fence_driver,
  254. lockup_work.work);
  255. rdev = fence_drv->rdev;
  256. ring = fence_drv - &rdev->fence_drv[0];
  257. if (!down_read_trylock(&rdev->exclusive_lock)) {
  258. /* just reschedule the check if a reset is going on */
  259. radeon_fence_schedule_check(rdev, ring);
  260. return;
  261. }
  262. if (fence_drv->delayed_irq && rdev->irq.installed) {
  263. unsigned long irqflags;
  264. fence_drv->delayed_irq = false;
  265. spin_lock_irqsave(&rdev->irq.lock, irqflags);
  266. radeon_irq_set(rdev);
  267. spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
  268. }
  269. if (radeon_fence_activity(rdev, ring))
  270. wake_up_all(&rdev->fence_queue);
  271. else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  272. /* good news we believe it's a lockup */
  273. dev_warn(rdev->dev, "GPU lockup (current fence id "
  274. "0x%016llx last fence id 0x%016llx on ring %d)\n",
  275. (uint64_t)atomic64_read(&fence_drv->last_seq),
  276. fence_drv->sync_seq[ring], ring);
  277. /* remember that we need an reset */
  278. rdev->needs_reset = true;
  279. wake_up_all(&rdev->fence_queue);
  280. }
  281. up_read(&rdev->exclusive_lock);
  282. }
  283. /**
  284. * radeon_fence_process - process a fence
  285. *
  286. * @rdev: radeon_device pointer
  287. * @ring: ring index the fence is associated with
  288. *
  289. * Checks the current fence value and wakes the fence queue
  290. * if the sequence number has increased (all asics).
  291. */
  292. void radeon_fence_process(struct radeon_device *rdev, int ring)
  293. {
  294. if (radeon_fence_activity(rdev, ring))
  295. wake_up_all(&rdev->fence_queue);
  296. }
  297. /**
  298. * radeon_fence_seq_signaled - check if a fence sequence number has signaled
  299. *
  300. * @rdev: radeon device pointer
  301. * @seq: sequence number
  302. * @ring: ring index the fence is associated with
  303. *
  304. * Check if the last signaled fence sequnce number is >= the requested
  305. * sequence number (all asics).
  306. * Returns true if the fence has signaled (current fence value
  307. * is >= requested value) or false if it has not (current fence
  308. * value is < the requested value. Helper function for
  309. * radeon_fence_signaled().
  310. */
  311. static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  312. u64 seq, unsigned ring)
  313. {
  314. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  315. return true;
  316. }
  317. /* poll new last sequence at least once */
  318. radeon_fence_process(rdev, ring);
  319. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  320. return true;
  321. }
  322. return false;
  323. }
  324. static bool radeon_fence_is_signaled(struct dma_fence *f)
  325. {
  326. struct radeon_fence *fence = to_radeon_fence(f);
  327. struct radeon_device *rdev = fence->rdev;
  328. unsigned ring = fence->ring;
  329. u64 seq = fence->seq;
  330. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  331. return true;
  332. }
  333. if (down_read_trylock(&rdev->exclusive_lock)) {
  334. radeon_fence_process(rdev, ring);
  335. up_read(&rdev->exclusive_lock);
  336. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  337. return true;
  338. }
  339. }
  340. return false;
  341. }
  342. /**
  343. * radeon_fence_enable_signaling - enable signalling on fence
  344. * @f: fence
  345. *
  346. * This function is called with fence_queue lock held, and adds a callback
  347. * to fence_queue that checks if this fence is signaled, and if so it
  348. * signals the fence and removes itself.
  349. */
  350. static bool radeon_fence_enable_signaling(struct dma_fence *f)
  351. {
  352. struct radeon_fence *fence = to_radeon_fence(f);
  353. struct radeon_device *rdev = fence->rdev;
  354. if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
  355. return false;
  356. if (down_read_trylock(&rdev->exclusive_lock)) {
  357. radeon_irq_kms_sw_irq_get(rdev, fence->ring);
  358. if (radeon_fence_activity(rdev, fence->ring))
  359. wake_up_all_locked(&rdev->fence_queue);
  360. /* did fence get signaled after we enabled the sw irq? */
  361. if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
  362. radeon_irq_kms_sw_irq_put(rdev, fence->ring);
  363. up_read(&rdev->exclusive_lock);
  364. return false;
  365. }
  366. up_read(&rdev->exclusive_lock);
  367. } else {
  368. /* we're probably in a lockup, lets not fiddle too much */
  369. if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
  370. rdev->fence_drv[fence->ring].delayed_irq = true;
  371. radeon_fence_schedule_check(rdev, fence->ring);
  372. }
  373. fence->fence_wake.flags = 0;
  374. fence->fence_wake.private = NULL;
  375. fence->fence_wake.func = radeon_fence_check_signaled;
  376. __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
  377. dma_fence_get(f);
  378. return true;
  379. }
  380. /**
  381. * radeon_fence_signaled - check if a fence has signaled
  382. *
  383. * @fence: radeon fence object
  384. *
  385. * Check if the requested fence has signaled (all asics).
  386. * Returns true if the fence has signaled or false if it has not.
  387. */
  388. bool radeon_fence_signaled(struct radeon_fence *fence)
  389. {
  390. if (!fence)
  391. return true;
  392. if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
  393. dma_fence_signal(&fence->base);
  394. return true;
  395. }
  396. return false;
  397. }
  398. /**
  399. * radeon_fence_any_seq_signaled - check if any sequence number is signaled
  400. *
  401. * @rdev: radeon device pointer
  402. * @seq: sequence numbers
  403. *
  404. * Check if the last signaled fence sequnce number is >= the requested
  405. * sequence number (all asics).
  406. * Returns true if any has signaled (current value is >= requested value)
  407. * or false if it has not. Helper function for radeon_fence_wait_seq.
  408. */
  409. static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
  410. {
  411. unsigned i;
  412. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  413. if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
  414. return true;
  415. }
  416. return false;
  417. }
  418. /**
  419. * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
  420. *
  421. * @rdev: radeon device pointer
  422. * @target_seq: sequence number(s) we want to wait for
  423. * @intr: use interruptable sleep
  424. * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
  425. *
  426. * Wait for the requested sequence number(s) to be written by any ring
  427. * (all asics). Sequnce number array is indexed by ring id.
  428. * @intr selects whether to use interruptable (true) or non-interruptable
  429. * (false) sleep when waiting for the sequence number. Helper function
  430. * for radeon_fence_wait_*().
  431. * Returns remaining time if the sequence number has passed, 0 when
  432. * the wait timeout, or an error for all other cases.
  433. * -EDEADLK is returned when a GPU lockup has been detected.
  434. */
  435. static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
  436. u64 *target_seq, bool intr,
  437. long timeout)
  438. {
  439. long r;
  440. int i;
  441. if (radeon_fence_any_seq_signaled(rdev, target_seq))
  442. return timeout;
  443. /* enable IRQs and tracing */
  444. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  445. if (!target_seq[i])
  446. continue;
  447. trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
  448. radeon_irq_kms_sw_irq_get(rdev, i);
  449. }
  450. if (intr) {
  451. r = wait_event_interruptible_timeout(rdev->fence_queue, (
  452. radeon_fence_any_seq_signaled(rdev, target_seq)
  453. || rdev->needs_reset), timeout);
  454. } else {
  455. r = wait_event_timeout(rdev->fence_queue, (
  456. radeon_fence_any_seq_signaled(rdev, target_seq)
  457. || rdev->needs_reset), timeout);
  458. }
  459. if (rdev->needs_reset)
  460. r = -EDEADLK;
  461. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  462. if (!target_seq[i])
  463. continue;
  464. radeon_irq_kms_sw_irq_put(rdev, i);
  465. trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
  466. }
  467. return r;
  468. }
  469. /**
  470. * radeon_fence_wait_timeout - wait for a fence to signal with timeout
  471. *
  472. * @fence: radeon fence object
  473. * @intr: use interruptible sleep
  474. *
  475. * Wait for the requested fence to signal (all asics).
  476. * @intr selects whether to use interruptable (true) or non-interruptable
  477. * (false) sleep when waiting for the fence.
  478. * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
  479. * Returns remaining time if the sequence number has passed, 0 when
  480. * the wait timeout, or an error for all other cases.
  481. */
  482. long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout)
  483. {
  484. uint64_t seq[RADEON_NUM_RINGS] = {};
  485. long r;
  486. /*
  487. * This function should not be called on !radeon fences.
  488. * If this is the case, it would mean this function can
  489. * also be called on radeon fences belonging to another card.
  490. * exclusive_lock is not held in that case.
  491. */
  492. if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
  493. return dma_fence_wait(&fence->base, intr);
  494. seq[fence->ring] = fence->seq;
  495. r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
  496. if (r <= 0) {
  497. return r;
  498. }
  499. dma_fence_signal(&fence->base);
  500. return r;
  501. }
  502. /**
  503. * radeon_fence_wait - wait for a fence to signal
  504. *
  505. * @fence: radeon fence object
  506. * @intr: use interruptible sleep
  507. *
  508. * Wait for the requested fence to signal (all asics).
  509. * @intr selects whether to use interruptable (true) or non-interruptable
  510. * (false) sleep when waiting for the fence.
  511. * Returns 0 if the fence has passed, error for all other cases.
  512. */
  513. int radeon_fence_wait(struct radeon_fence *fence, bool intr)
  514. {
  515. long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
  516. if (r > 0) {
  517. return 0;
  518. } else {
  519. return r;
  520. }
  521. }
  522. /**
  523. * radeon_fence_wait_any - wait for a fence to signal on any ring
  524. *
  525. * @rdev: radeon device pointer
  526. * @fences: radeon fence object(s)
  527. * @intr: use interruptable sleep
  528. *
  529. * Wait for any requested fence to signal (all asics). Fence
  530. * array is indexed by ring id. @intr selects whether to use
  531. * interruptable (true) or non-interruptable (false) sleep when
  532. * waiting for the fences. Used by the suballocator.
  533. * Returns 0 if any fence has passed, error for all other cases.
  534. */
  535. int radeon_fence_wait_any(struct radeon_device *rdev,
  536. struct radeon_fence **fences,
  537. bool intr)
  538. {
  539. uint64_t seq[RADEON_NUM_RINGS];
  540. unsigned i, num_rings = 0;
  541. long r;
  542. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  543. seq[i] = 0;
  544. if (!fences[i]) {
  545. continue;
  546. }
  547. seq[i] = fences[i]->seq;
  548. ++num_rings;
  549. }
  550. /* nothing to wait for ? */
  551. if (num_rings == 0)
  552. return -ENOENT;
  553. r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
  554. if (r < 0) {
  555. return r;
  556. }
  557. return 0;
  558. }
  559. /**
  560. * radeon_fence_wait_next - wait for the next fence to signal
  561. *
  562. * @rdev: radeon device pointer
  563. * @ring: ring index the fence is associated with
  564. *
  565. * Wait for the next fence on the requested ring to signal (all asics).
  566. * Returns 0 if the next fence has passed, error for all other cases.
  567. * Caller must hold ring lock.
  568. */
  569. int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
  570. {
  571. uint64_t seq[RADEON_NUM_RINGS] = {};
  572. long r;
  573. seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
  574. if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
  575. /* nothing to wait for, last_seq is
  576. already the last emited fence */
  577. return -ENOENT;
  578. }
  579. r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
  580. if (r < 0)
  581. return r;
  582. return 0;
  583. }
  584. /**
  585. * radeon_fence_wait_empty - wait for all fences to signal
  586. *
  587. * @rdev: radeon device pointer
  588. * @ring: ring index the fence is associated with
  589. *
  590. * Wait for all fences on the requested ring to signal (all asics).
  591. * Returns 0 if the fences have passed, error for all other cases.
  592. * Caller must hold ring lock.
  593. */
  594. int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
  595. {
  596. uint64_t seq[RADEON_NUM_RINGS] = {};
  597. long r;
  598. seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
  599. if (!seq[ring])
  600. return 0;
  601. r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
  602. if (r < 0) {
  603. if (r == -EDEADLK)
  604. return -EDEADLK;
  605. dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
  606. ring, r);
  607. }
  608. return 0;
  609. }
  610. /**
  611. * radeon_fence_ref - take a ref on a fence
  612. *
  613. * @fence: radeon fence object
  614. *
  615. * Take a reference on a fence (all asics).
  616. * Returns the fence.
  617. */
  618. struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
  619. {
  620. dma_fence_get(&fence->base);
  621. return fence;
  622. }
  623. /**
  624. * radeon_fence_unref - remove a ref on a fence
  625. *
  626. * @fence: radeon fence object
  627. *
  628. * Remove a reference on a fence (all asics).
  629. */
  630. void radeon_fence_unref(struct radeon_fence **fence)
  631. {
  632. struct radeon_fence *tmp = *fence;
  633. *fence = NULL;
  634. if (tmp) {
  635. dma_fence_put(&tmp->base);
  636. }
  637. }
  638. /**
  639. * radeon_fence_count_emitted - get the count of emitted fences
  640. *
  641. * @rdev: radeon device pointer
  642. * @ring: ring index the fence is associated with
  643. *
  644. * Get the number of fences emitted on the requested ring (all asics).
  645. * Returns the number of emitted fences on the ring. Used by the
  646. * dynpm code to ring track activity.
  647. */
  648. unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
  649. {
  650. uint64_t emitted;
  651. /* We are not protected by ring lock when reading the last sequence
  652. * but it's ok to report slightly wrong fence count here.
  653. */
  654. radeon_fence_process(rdev, ring);
  655. emitted = rdev->fence_drv[ring].sync_seq[ring]
  656. - atomic64_read(&rdev->fence_drv[ring].last_seq);
  657. /* to avoid 32bits warp around */
  658. if (emitted > 0x10000000) {
  659. emitted = 0x10000000;
  660. }
  661. return (unsigned)emitted;
  662. }
  663. /**
  664. * radeon_fence_need_sync - do we need a semaphore
  665. *
  666. * @fence: radeon fence object
  667. * @dst_ring: which ring to check against
  668. *
  669. * Check if the fence needs to be synced against another ring
  670. * (all asics). If so, we need to emit a semaphore.
  671. * Returns true if we need to sync with another ring, false if
  672. * not.
  673. */
  674. bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
  675. {
  676. struct radeon_fence_driver *fdrv;
  677. if (!fence) {
  678. return false;
  679. }
  680. if (fence->ring == dst_ring) {
  681. return false;
  682. }
  683. /* we are protected by the ring mutex */
  684. fdrv = &fence->rdev->fence_drv[dst_ring];
  685. if (fence->seq <= fdrv->sync_seq[fence->ring]) {
  686. return false;
  687. }
  688. return true;
  689. }
  690. /**
  691. * radeon_fence_note_sync - record the sync point
  692. *
  693. * @fence: radeon fence object
  694. * @dst_ring: which ring to check against
  695. *
  696. * Note the sequence number at which point the fence will
  697. * be synced with the requested ring (all asics).
  698. */
  699. void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
  700. {
  701. struct radeon_fence_driver *dst, *src;
  702. unsigned i;
  703. if (!fence) {
  704. return;
  705. }
  706. if (fence->ring == dst_ring) {
  707. return;
  708. }
  709. /* we are protected by the ring mutex */
  710. src = &fence->rdev->fence_drv[fence->ring];
  711. dst = &fence->rdev->fence_drv[dst_ring];
  712. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  713. if (i == dst_ring) {
  714. continue;
  715. }
  716. dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
  717. }
  718. }
  719. /**
  720. * radeon_fence_driver_start_ring - make the fence driver
  721. * ready for use on the requested ring.
  722. *
  723. * @rdev: radeon device pointer
  724. * @ring: ring index to start the fence driver on
  725. *
  726. * Make the fence driver ready for processing (all asics).
  727. * Not all asics have all rings, so each asic will only
  728. * start the fence driver on the rings it has.
  729. * Returns 0 for success, errors for failure.
  730. */
  731. int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
  732. {
  733. uint64_t index;
  734. int r;
  735. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  736. if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
  737. rdev->fence_drv[ring].scratch_reg = 0;
  738. if (ring != R600_RING_TYPE_UVD_INDEX) {
  739. index = R600_WB_EVENT_OFFSET + ring * 4;
  740. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  741. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
  742. index;
  743. } else {
  744. /* put fence directly behind firmware */
  745. index = ALIGN(rdev->uvd_fw->size, 8);
  746. rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
  747. rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
  748. }
  749. } else {
  750. r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
  751. if (r) {
  752. dev_err(rdev->dev, "fence failed to get scratch register\n");
  753. return r;
  754. }
  755. index = RADEON_WB_SCRATCH_OFFSET +
  756. rdev->fence_drv[ring].scratch_reg -
  757. rdev->scratch.reg_base;
  758. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  759. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
  760. }
  761. radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
  762. rdev->fence_drv[ring].initialized = true;
  763. dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx\n",
  764. ring, rdev->fence_drv[ring].gpu_addr);
  765. return 0;
  766. }
  767. /**
  768. * radeon_fence_driver_init_ring - init the fence driver
  769. * for the requested ring.
  770. *
  771. * @rdev: radeon device pointer
  772. * @ring: ring index to start the fence driver on
  773. *
  774. * Init the fence driver for the requested ring (all asics).
  775. * Helper function for radeon_fence_driver_init().
  776. */
  777. static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
  778. {
  779. int i;
  780. rdev->fence_drv[ring].scratch_reg = -1;
  781. rdev->fence_drv[ring].cpu_addr = NULL;
  782. rdev->fence_drv[ring].gpu_addr = 0;
  783. for (i = 0; i < RADEON_NUM_RINGS; ++i)
  784. rdev->fence_drv[ring].sync_seq[i] = 0;
  785. atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
  786. rdev->fence_drv[ring].initialized = false;
  787. INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
  788. radeon_fence_check_lockup);
  789. rdev->fence_drv[ring].rdev = rdev;
  790. }
  791. /**
  792. * radeon_fence_driver_init - init the fence driver
  793. * for all possible rings.
  794. *
  795. * @rdev: radeon device pointer
  796. *
  797. * Init the fence driver for all possible rings (all asics).
  798. * Not all asics have all rings, so each asic will only
  799. * start the fence driver on the rings it has using
  800. * radeon_fence_driver_start_ring().
  801. */
  802. void radeon_fence_driver_init(struct radeon_device *rdev)
  803. {
  804. int ring;
  805. init_waitqueue_head(&rdev->fence_queue);
  806. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  807. radeon_fence_driver_init_ring(rdev, ring);
  808. }
  809. radeon_debugfs_fence_init(rdev);
  810. }
  811. /**
  812. * radeon_fence_driver_fini - tear down the fence driver
  813. * for all possible rings.
  814. *
  815. * @rdev: radeon device pointer
  816. *
  817. * Tear down the fence driver for all possible rings (all asics).
  818. */
  819. void radeon_fence_driver_fini(struct radeon_device *rdev)
  820. {
  821. int ring, r;
  822. mutex_lock(&rdev->ring_lock);
  823. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  824. if (!rdev->fence_drv[ring].initialized)
  825. continue;
  826. r = radeon_fence_wait_empty(rdev, ring);
  827. if (r) {
  828. /* no need to trigger GPU reset as we are unloading */
  829. radeon_fence_driver_force_completion(rdev, ring);
  830. }
  831. cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
  832. wake_up_all(&rdev->fence_queue);
  833. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  834. rdev->fence_drv[ring].initialized = false;
  835. }
  836. mutex_unlock(&rdev->ring_lock);
  837. }
  838. /**
  839. * radeon_fence_driver_force_completion - force all fence waiter to complete
  840. *
  841. * @rdev: radeon device pointer
  842. * @ring: the ring to complete
  843. *
  844. * In case of GPU reset failure make sure no process keep waiting on fence
  845. * that will never complete.
  846. */
  847. void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
  848. {
  849. if (rdev->fence_drv[ring].initialized) {
  850. radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
  851. cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
  852. }
  853. }
  854. /*
  855. * Fence debugfs
  856. */
  857. #if defined(CONFIG_DEBUG_FS)
  858. static int radeon_debugfs_fence_info_show(struct seq_file *m, void *data)
  859. {
  860. struct radeon_device *rdev = (struct radeon_device *)m->private;
  861. int i, j;
  862. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  863. if (!rdev->fence_drv[i].initialized)
  864. continue;
  865. radeon_fence_process(rdev, i);
  866. seq_printf(m, "--- ring %d ---\n", i);
  867. seq_printf(m, "Last signaled fence 0x%016llx\n",
  868. (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
  869. seq_printf(m, "Last emitted 0x%016llx\n",
  870. rdev->fence_drv[i].sync_seq[i]);
  871. for (j = 0; j < RADEON_NUM_RINGS; ++j) {
  872. if (i != j && rdev->fence_drv[j].initialized)
  873. seq_printf(m, "Last sync to ring %d 0x%016llx\n",
  874. j, rdev->fence_drv[i].sync_seq[j]);
  875. }
  876. }
  877. return 0;
  878. }
  879. /*
  880. * radeon_debugfs_gpu_reset - manually trigger a gpu reset
  881. *
  882. * Manually trigger a gpu reset at the next fence wait.
  883. */
  884. static int radeon_debugfs_gpu_reset(void *data, u64 *val)
  885. {
  886. struct radeon_device *rdev = (struct radeon_device *)data;
  887. down_read(&rdev->exclusive_lock);
  888. *val = rdev->needs_reset;
  889. rdev->needs_reset = true;
  890. wake_up_all(&rdev->fence_queue);
  891. up_read(&rdev->exclusive_lock);
  892. return 0;
  893. }
  894. DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_fence_info);
  895. DEFINE_DEBUGFS_ATTRIBUTE(radeon_debugfs_gpu_reset_fops,
  896. radeon_debugfs_gpu_reset, NULL, "%lld\n");
  897. #endif
  898. void radeon_debugfs_fence_init(struct radeon_device *rdev)
  899. {
  900. #if defined(CONFIG_DEBUG_FS)
  901. struct dentry *root = rdev->ddev->primary->debugfs_root;
  902. debugfs_create_file("radeon_gpu_reset", 0444, root, rdev,
  903. &radeon_debugfs_gpu_reset_fops);
  904. debugfs_create_file("radeon_fence_info", 0444, root, rdev,
  905. &radeon_debugfs_fence_info_fops);
  906. #endif
  907. }
  908. static const char *radeon_fence_get_driver_name(struct dma_fence *fence)
  909. {
  910. return "radeon";
  911. }
  912. static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
  913. {
  914. struct radeon_fence *fence = to_radeon_fence(f);
  915. switch (fence->ring) {
  916. case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
  917. case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
  918. case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
  919. case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
  920. case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
  921. case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
  922. case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
  923. case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
  924. default: WARN_ON_ONCE(1); return "radeon.unk";
  925. }
  926. }
  927. static inline bool radeon_test_signaled(struct radeon_fence *fence)
  928. {
  929. return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
  930. }
  931. struct radeon_wait_cb {
  932. struct dma_fence_cb base;
  933. struct task_struct *task;
  934. };
  935. static void
  936. radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
  937. {
  938. struct radeon_wait_cb *wait =
  939. container_of(cb, struct radeon_wait_cb, base);
  940. wake_up_process(wait->task);
  941. }
  942. static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
  943. signed long t)
  944. {
  945. struct radeon_fence *fence = to_radeon_fence(f);
  946. struct radeon_device *rdev = fence->rdev;
  947. struct radeon_wait_cb cb;
  948. cb.task = current;
  949. if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
  950. return t;
  951. while (t > 0) {
  952. if (intr)
  953. set_current_state(TASK_INTERRUPTIBLE);
  954. else
  955. set_current_state(TASK_UNINTERRUPTIBLE);
  956. /*
  957. * radeon_test_signaled must be called after
  958. * set_current_state to prevent a race with wake_up_process
  959. */
  960. if (radeon_test_signaled(fence))
  961. break;
  962. if (rdev->needs_reset) {
  963. t = -EDEADLK;
  964. break;
  965. }
  966. t = schedule_timeout(t);
  967. if (t > 0 && intr && signal_pending(current))
  968. t = -ERESTARTSYS;
  969. }
  970. __set_current_state(TASK_RUNNING);
  971. dma_fence_remove_callback(f, &cb.base);
  972. return t;
  973. }
  974. const struct dma_fence_ops radeon_fence_ops = {
  975. .get_driver_name = radeon_fence_get_driver_name,
  976. .get_timeline_name = radeon_fence_get_timeline_name,
  977. .enable_signaling = radeon_fence_enable_signaling,
  978. .signaled = radeon_fence_is_signaled,
  979. .wait = radeon_fence_default_wait,
  980. .release = NULL,
  981. };