amdgpu_gfx.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. * Copyright 2008 Red Hat Inc.
  4. * Copyright 2009 Jerome Glisse.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22. * OTHER DEALINGS IN THE SOFTWARE.
  23. *
  24. */
  25. #include "amdgpu.h"
  26. #include "amdgpu_gfx.h"
  27. #include "amdgpu_rlc.h"
  28. #include "amdgpu_ras.h"
  29. /* delay 0.1 second to enable gfx off feature */
  30. #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
  31. /*
  32. * GPU GFX IP block helpers function.
  33. */
  34. int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
  35. int pipe, int queue)
  36. {
  37. int bit = 0;
  38. bit += mec * adev->gfx.mec.num_pipe_per_mec
  39. * adev->gfx.mec.num_queue_per_pipe;
  40. bit += pipe * adev->gfx.mec.num_queue_per_pipe;
  41. bit += queue;
  42. return bit;
  43. }
  44. void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
  45. int *mec, int *pipe, int *queue)
  46. {
  47. *queue = bit % adev->gfx.mec.num_queue_per_pipe;
  48. *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
  49. % adev->gfx.mec.num_pipe_per_mec;
  50. *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
  51. / adev->gfx.mec.num_pipe_per_mec;
  52. }
  53. bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
  54. int mec, int pipe, int queue)
  55. {
  56. return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
  57. adev->gfx.mec.queue_bitmap);
  58. }
  59. int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
  60. int me, int pipe, int queue)
  61. {
  62. int bit = 0;
  63. bit += me * adev->gfx.me.num_pipe_per_me
  64. * adev->gfx.me.num_queue_per_pipe;
  65. bit += pipe * adev->gfx.me.num_queue_per_pipe;
  66. bit += queue;
  67. return bit;
  68. }
  69. void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
  70. int *me, int *pipe, int *queue)
  71. {
  72. *queue = bit % adev->gfx.me.num_queue_per_pipe;
  73. *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
  74. % adev->gfx.me.num_pipe_per_me;
  75. *me = (bit / adev->gfx.me.num_queue_per_pipe)
  76. / adev->gfx.me.num_pipe_per_me;
  77. }
  78. bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
  79. int me, int pipe, int queue)
  80. {
  81. return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
  82. adev->gfx.me.queue_bitmap);
  83. }
  84. /**
  85. * amdgpu_gfx_scratch_get - Allocate a scratch register
  86. *
  87. * @adev: amdgpu_device pointer
  88. * @reg: scratch register mmio offset
  89. *
  90. * Allocate a CP scratch register for use by the driver (all asics).
  91. * Returns 0 on success or -EINVAL on failure.
  92. */
  93. int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
  94. {
  95. int i;
  96. i = ffs(adev->gfx.scratch.free_mask);
  97. if (i != 0 && i <= adev->gfx.scratch.num_reg) {
  98. i--;
  99. adev->gfx.scratch.free_mask &= ~(1u << i);
  100. *reg = adev->gfx.scratch.reg_base + i;
  101. return 0;
  102. }
  103. return -EINVAL;
  104. }
  105. /**
  106. * amdgpu_gfx_scratch_free - Free a scratch register
  107. *
  108. * @adev: amdgpu_device pointer
  109. * @reg: scratch register mmio offset
  110. *
  111. * Free a CP scratch register allocated for use by the driver (all asics)
  112. */
  113. void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
  114. {
  115. adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
  116. }
  117. /**
  118. * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
  119. *
  120. * @mask: array in which the per-shader array disable masks will be stored
  121. * @max_se: number of SEs
  122. * @max_sh: number of SHs
  123. *
  124. * The bitmask of CUs to be disabled in the shader array determined by se and
  125. * sh is stored in mask[se * max_sh + sh].
  126. */
  127. void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
  128. {
  129. unsigned se, sh, cu;
  130. const char *p;
  131. memset(mask, 0, sizeof(*mask) * max_se * max_sh);
  132. if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
  133. return;
  134. p = amdgpu_disable_cu;
  135. for (;;) {
  136. char *next;
  137. int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
  138. if (ret < 3) {
  139. DRM_ERROR("amdgpu: could not parse disable_cu\n");
  140. return;
  141. }
  142. if (se < max_se && sh < max_sh && cu < 16) {
  143. DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
  144. mask[se * max_sh + sh] |= 1u << cu;
  145. } else {
  146. DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
  147. se, sh, cu);
  148. }
  149. next = strchr(p, ',');
  150. if (!next)
  151. break;
  152. p = next + 1;
  153. }
  154. }
  155. static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
  156. {
  157. if (amdgpu_compute_multipipe != -1) {
  158. DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
  159. amdgpu_compute_multipipe);
  160. return amdgpu_compute_multipipe == 1;
  161. }
  162. /* FIXME: spreading the queues across pipes causes perf regressions
  163. * on POLARIS11 compute workloads */
  164. if (adev->asic_type == CHIP_POLARIS11)
  165. return false;
  166. return adev->gfx.mec.num_mec > 1;
  167. }
  168. bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
  169. int pipe, int queue)
  170. {
  171. bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
  172. int cond;
  173. /* Policy: alternate between normal and high priority */
  174. cond = multipipe_policy ? pipe : queue;
  175. return ((cond % 2) != 0);
  176. }
  177. void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
  178. {
  179. int i, queue, pipe;
  180. bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
  181. int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
  182. adev->gfx.mec.num_queue_per_pipe,
  183. adev->gfx.num_compute_rings);
  184. if (multipipe_policy) {
  185. /* policy: make queues evenly cross all pipes on MEC1 only */
  186. for (i = 0; i < max_queues_per_mec; i++) {
  187. pipe = i % adev->gfx.mec.num_pipe_per_mec;
  188. queue = (i / adev->gfx.mec.num_pipe_per_mec) %
  189. adev->gfx.mec.num_queue_per_pipe;
  190. set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
  191. adev->gfx.mec.queue_bitmap);
  192. }
  193. } else {
  194. /* policy: amdgpu owns all queues in the given pipe */
  195. for (i = 0; i < max_queues_per_mec; ++i)
  196. set_bit(i, adev->gfx.mec.queue_bitmap);
  197. }
  198. dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
  199. }
  200. void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
  201. {
  202. int i, queue, me;
  203. for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
  204. queue = i % adev->gfx.me.num_queue_per_pipe;
  205. me = (i / adev->gfx.me.num_queue_per_pipe)
  206. / adev->gfx.me.num_pipe_per_me;
  207. if (me >= adev->gfx.me.num_me)
  208. break;
  209. /* policy: amdgpu owns the first queue per pipe at this stage
  210. * will extend to mulitple queues per pipe later */
  211. if (me == 0 && queue < 1)
  212. set_bit(i, adev->gfx.me.queue_bitmap);
  213. }
  214. /* update the number of active graphics rings */
  215. adev->gfx.num_gfx_rings =
  216. bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
  217. }
  218. static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
  219. struct amdgpu_ring *ring)
  220. {
  221. int queue_bit;
  222. int mec, pipe, queue;
  223. queue_bit = adev->gfx.mec.num_mec
  224. * adev->gfx.mec.num_pipe_per_mec
  225. * adev->gfx.mec.num_queue_per_pipe;
  226. while (--queue_bit >= 0) {
  227. if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
  228. continue;
  229. amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
  230. /*
  231. * 1. Using pipes 2/3 from MEC 2 seems cause problems.
  232. * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
  233. * only can be issued on queue 0.
  234. */
  235. if ((mec == 1 && pipe > 1) || queue != 0)
  236. continue;
  237. ring->me = mec + 1;
  238. ring->pipe = pipe;
  239. ring->queue = queue;
  240. return 0;
  241. }
  242. dev_err(adev->dev, "Failed to find a queue for KIQ\n");
  243. return -EINVAL;
  244. }
  245. int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
  246. struct amdgpu_ring *ring,
  247. struct amdgpu_irq_src *irq)
  248. {
  249. struct amdgpu_kiq *kiq = &adev->gfx.kiq;
  250. int r = 0;
  251. spin_lock_init(&kiq->ring_lock);
  252. ring->adev = NULL;
  253. ring->ring_obj = NULL;
  254. ring->use_doorbell = true;
  255. ring->doorbell_index = adev->doorbell_index.kiq;
  256. r = amdgpu_gfx_kiq_acquire(adev, ring);
  257. if (r)
  258. return r;
  259. ring->eop_gpu_addr = kiq->eop_gpu_addr;
  260. ring->no_scheduler = true;
  261. sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
  262. r = amdgpu_ring_init(adev, ring, 1024,
  263. irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
  264. AMDGPU_RING_PRIO_DEFAULT);
  265. if (r)
  266. dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
  267. return r;
  268. }
  269. void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
  270. {
  271. amdgpu_ring_fini(ring);
  272. }
  273. void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
  274. {
  275. struct amdgpu_kiq *kiq = &adev->gfx.kiq;
  276. amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
  277. }
  278. int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
  279. unsigned hpd_size)
  280. {
  281. int r;
  282. u32 *hpd;
  283. struct amdgpu_kiq *kiq = &adev->gfx.kiq;
  284. r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
  285. AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
  286. &kiq->eop_gpu_addr, (void **)&hpd);
  287. if (r) {
  288. dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
  289. return r;
  290. }
  291. memset(hpd, 0, hpd_size);
  292. r = amdgpu_bo_reserve(kiq->eop_obj, true);
  293. if (unlikely(r != 0))
  294. dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
  295. amdgpu_bo_kunmap(kiq->eop_obj);
  296. amdgpu_bo_unreserve(kiq->eop_obj);
  297. return 0;
  298. }
  299. /* create MQD for each compute/gfx queue */
  300. int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
  301. unsigned mqd_size)
  302. {
  303. struct amdgpu_ring *ring = NULL;
  304. int r, i;
  305. /* create MQD for KIQ */
  306. ring = &adev->gfx.kiq.ring;
  307. if (!ring->mqd_obj) {
  308. /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
  309. * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
  310. * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
  311. * KIQ MQD no matter SRIOV or Bare-metal
  312. */
  313. r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
  314. AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
  315. &ring->mqd_gpu_addr, &ring->mqd_ptr);
  316. if (r) {
  317. dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
  318. return r;
  319. }
  320. /* prepare MQD backup */
  321. adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
  322. if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
  323. dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
  324. }
  325. if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
  326. /* create MQD for each KGQ */
  327. for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
  328. ring = &adev->gfx.gfx_ring[i];
  329. if (!ring->mqd_obj) {
  330. r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
  331. AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
  332. &ring->mqd_gpu_addr, &ring->mqd_ptr);
  333. if (r) {
  334. dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
  335. return r;
  336. }
  337. /* prepare MQD backup */
  338. adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
  339. if (!adev->gfx.me.mqd_backup[i])
  340. dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
  341. }
  342. }
  343. }
  344. /* create MQD for each KCQ */
  345. for (i = 0; i < adev->gfx.num_compute_rings; i++) {
  346. ring = &adev->gfx.compute_ring[i];
  347. if (!ring->mqd_obj) {
  348. r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
  349. AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
  350. &ring->mqd_gpu_addr, &ring->mqd_ptr);
  351. if (r) {
  352. dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
  353. return r;
  354. }
  355. /* prepare MQD backup */
  356. adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
  357. if (!adev->gfx.mec.mqd_backup[i])
  358. dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
  359. }
  360. }
  361. return 0;
  362. }
  363. void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
  364. {
  365. struct amdgpu_ring *ring = NULL;
  366. int i;
  367. if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
  368. for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
  369. ring = &adev->gfx.gfx_ring[i];
  370. kfree(adev->gfx.me.mqd_backup[i]);
  371. amdgpu_bo_free_kernel(&ring->mqd_obj,
  372. &ring->mqd_gpu_addr,
  373. &ring->mqd_ptr);
  374. }
  375. }
  376. for (i = 0; i < adev->gfx.num_compute_rings; i++) {
  377. ring = &adev->gfx.compute_ring[i];
  378. kfree(adev->gfx.mec.mqd_backup[i]);
  379. amdgpu_bo_free_kernel(&ring->mqd_obj,
  380. &ring->mqd_gpu_addr,
  381. &ring->mqd_ptr);
  382. }
  383. ring = &adev->gfx.kiq.ring;
  384. kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
  385. amdgpu_bo_free_kernel(&ring->mqd_obj,
  386. &ring->mqd_gpu_addr,
  387. &ring->mqd_ptr);
  388. }
  389. int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
  390. {
  391. struct amdgpu_kiq *kiq = &adev->gfx.kiq;
  392. struct amdgpu_ring *kiq_ring = &kiq->ring;
  393. int i;
  394. if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
  395. return -EINVAL;
  396. if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
  397. adev->gfx.num_compute_rings))
  398. return -ENOMEM;
  399. for (i = 0; i < adev->gfx.num_compute_rings; i++)
  400. kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
  401. RESET_QUEUES, 0, 0);
  402. return amdgpu_ring_test_helper(kiq_ring);
  403. }
  404. int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
  405. int queue_bit)
  406. {
  407. int mec, pipe, queue;
  408. int set_resource_bit = 0;
  409. amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
  410. set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
  411. return set_resource_bit;
  412. }
  413. int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
  414. {
  415. struct amdgpu_kiq *kiq = &adev->gfx.kiq;
  416. struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
  417. uint64_t queue_mask = 0;
  418. int r, i;
  419. if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
  420. return -EINVAL;
  421. for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
  422. if (!test_bit(i, adev->gfx.mec.queue_bitmap))
  423. continue;
  424. /* This situation may be hit in the future if a new HW
  425. * generation exposes more than 64 queues. If so, the
  426. * definition of queue_mask needs updating */
  427. if (WARN_ON(i > (sizeof(queue_mask)*8))) {
  428. DRM_ERROR("Invalid KCQ enabled: %d\n", i);
  429. break;
  430. }
  431. queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
  432. }
  433. DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
  434. kiq_ring->queue);
  435. r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
  436. adev->gfx.num_compute_rings +
  437. kiq->pmf->set_resources_size);
  438. if (r) {
  439. DRM_ERROR("Failed to lock KIQ (%d).\n", r);
  440. return r;
  441. }
  442. kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
  443. for (i = 0; i < adev->gfx.num_compute_rings; i++)
  444. kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
  445. r = amdgpu_ring_test_helper(kiq_ring);
  446. if (r)
  447. DRM_ERROR("KCQ enable failed\n");
  448. return r;
  449. }
  450. /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
  451. *
  452. * @adev: amdgpu_device pointer
  453. * @bool enable true: enable gfx off feature, false: disable gfx off feature
  454. *
  455. * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
  456. * 2. other client can send request to disable gfx off feature, the request should be honored.
  457. * 3. other client can cancel their request of disable gfx off feature
  458. * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
  459. */
  460. void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
  461. {
  462. if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
  463. return;
  464. mutex_lock(&adev->gfx.gfx_off_mutex);
  465. if (enable) {
  466. /* If the count is already 0, it means there's an imbalance bug somewhere.
  467. * Note that the bug may be in a different caller than the one which triggers the
  468. * WARN_ON_ONCE.
  469. */
  470. if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
  471. goto unlock;
  472. adev->gfx.gfx_off_req_count--;
  473. if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
  474. schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
  475. } else {
  476. if (adev->gfx.gfx_off_req_count == 0) {
  477. cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
  478. if (adev->gfx.gfx_off_state &&
  479. !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
  480. adev->gfx.gfx_off_state = false;
  481. if (adev->gfx.funcs->init_spm_golden) {
  482. dev_dbg(adev->dev,
  483. "GFXOFF is disabled, re-init SPM golden settings\n");
  484. amdgpu_gfx_init_spm_golden(adev);
  485. }
  486. }
  487. }
  488. adev->gfx.gfx_off_req_count++;
  489. }
  490. unlock:
  491. mutex_unlock(&adev->gfx.gfx_off_mutex);
  492. }
  493. int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
  494. {
  495. int r = 0;
  496. mutex_lock(&adev->gfx.gfx_off_mutex);
  497. r = smu_get_status_gfxoff(adev, value);
  498. mutex_unlock(&adev->gfx.gfx_off_mutex);
  499. return r;
  500. }
  501. int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
  502. {
  503. int r;
  504. struct ras_fs_if fs_info = {
  505. .sysfs_name = "gfx_err_count",
  506. };
  507. struct ras_ih_if ih_info = {
  508. .cb = amdgpu_gfx_process_ras_data_cb,
  509. };
  510. if (!adev->gfx.ras_if) {
  511. adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
  512. if (!adev->gfx.ras_if)
  513. return -ENOMEM;
  514. adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
  515. adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
  516. adev->gfx.ras_if->sub_block_index = 0;
  517. strcpy(adev->gfx.ras_if->name, "gfx");
  518. }
  519. fs_info.head = ih_info.head = *adev->gfx.ras_if;
  520. r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
  521. &fs_info, &ih_info);
  522. if (r)
  523. goto free;
  524. if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
  525. r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
  526. if (r)
  527. goto late_fini;
  528. } else {
  529. /* free gfx ras_if if ras is not supported */
  530. r = 0;
  531. goto free;
  532. }
  533. return 0;
  534. late_fini:
  535. amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
  536. free:
  537. kfree(adev->gfx.ras_if);
  538. adev->gfx.ras_if = NULL;
  539. return r;
  540. }
  541. void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
  542. {
  543. if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
  544. adev->gfx.ras_if) {
  545. struct ras_common_if *ras_if = adev->gfx.ras_if;
  546. struct ras_ih_if ih_info = {
  547. .head = *ras_if,
  548. .cb = amdgpu_gfx_process_ras_data_cb,
  549. };
  550. amdgpu_ras_late_fini(adev, ras_if, &ih_info);
  551. kfree(ras_if);
  552. }
  553. }
  554. int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
  555. void *err_data,
  556. struct amdgpu_iv_entry *entry)
  557. {
  558. /* TODO ue will trigger an interrupt.
  559. *
  560. * When “Full RAS” is enabled, the per-IP interrupt sources should
  561. * be disabled and the driver should only look for the aggregated
  562. * interrupt via sync flood
  563. */
  564. if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
  565. kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
  566. if (adev->gfx.funcs->query_ras_error_count)
  567. adev->gfx.funcs->query_ras_error_count(adev, err_data);
  568. amdgpu_ras_reset_gpu(adev);
  569. }
  570. return AMDGPU_RAS_SUCCESS;
  571. }
  572. int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
  573. struct amdgpu_irq_src *source,
  574. struct amdgpu_iv_entry *entry)
  575. {
  576. struct ras_common_if *ras_if = adev->gfx.ras_if;
  577. struct ras_dispatch_if ih_data = {
  578. .entry = entry,
  579. };
  580. if (!ras_if)
  581. return 0;
  582. ih_data.head = *ras_if;
  583. DRM_ERROR("CP ECC ERROR IRQ\n");
  584. amdgpu_ras_interrupt_dispatch(adev, &ih_data);
  585. return 0;
  586. }
  587. uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
  588. {
  589. signed long r, cnt = 0;
  590. unsigned long flags;
  591. uint32_t seq, reg_val_offs = 0, value = 0;
  592. struct amdgpu_kiq *kiq = &adev->gfx.kiq;
  593. struct amdgpu_ring *ring = &kiq->ring;
  594. if (adev->in_pci_err_recovery)
  595. return 0;
  596. BUG_ON(!ring->funcs->emit_rreg);
  597. spin_lock_irqsave(&kiq->ring_lock, flags);
  598. if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
  599. pr_err("critical bug! too many kiq readers\n");
  600. goto failed_unlock;
  601. }
  602. amdgpu_ring_alloc(ring, 32);
  603. amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
  604. r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
  605. if (r)
  606. goto failed_undo;
  607. amdgpu_ring_commit(ring);
  608. spin_unlock_irqrestore(&kiq->ring_lock, flags);
  609. r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
  610. /* don't wait anymore for gpu reset case because this way may
  611. * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
  612. * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
  613. * never return if we keep waiting in virt_kiq_rreg, which cause
  614. * gpu_recover() hang there.
  615. *
  616. * also don't wait anymore for IRQ context
  617. * */
  618. if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
  619. goto failed_kiq_read;
  620. might_sleep();
  621. while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
  622. msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
  623. r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
  624. }
  625. if (cnt > MAX_KIQ_REG_TRY)
  626. goto failed_kiq_read;
  627. mb();
  628. value = adev->wb.wb[reg_val_offs];
  629. amdgpu_device_wb_free(adev, reg_val_offs);
  630. return value;
  631. failed_undo:
  632. amdgpu_ring_undo(ring);
  633. failed_unlock:
  634. spin_unlock_irqrestore(&kiq->ring_lock, flags);
  635. failed_kiq_read:
  636. if (reg_val_offs)
  637. amdgpu_device_wb_free(adev, reg_val_offs);
  638. dev_err(adev->dev, "failed to read reg:%x\n", reg);
  639. return ~0;
  640. }
  641. void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
  642. {
  643. signed long r, cnt = 0;
  644. unsigned long flags;
  645. uint32_t seq;
  646. struct amdgpu_kiq *kiq = &adev->gfx.kiq;
  647. struct amdgpu_ring *ring = &kiq->ring;
  648. BUG_ON(!ring->funcs->emit_wreg);
  649. if (adev->in_pci_err_recovery)
  650. return;
  651. spin_lock_irqsave(&kiq->ring_lock, flags);
  652. amdgpu_ring_alloc(ring, 32);
  653. amdgpu_ring_emit_wreg(ring, reg, v);
  654. r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
  655. if (r)
  656. goto failed_undo;
  657. amdgpu_ring_commit(ring);
  658. spin_unlock_irqrestore(&kiq->ring_lock, flags);
  659. r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
  660. /* don't wait anymore for gpu reset case because this way may
  661. * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
  662. * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
  663. * never return if we keep waiting in virt_kiq_rreg, which cause
  664. * gpu_recover() hang there.
  665. *
  666. * also don't wait anymore for IRQ context
  667. * */
  668. if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
  669. goto failed_kiq_write;
  670. might_sleep();
  671. while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
  672. msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
  673. r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
  674. }
  675. if (cnt > MAX_KIQ_REG_TRY)
  676. goto failed_kiq_write;
  677. return;
  678. failed_undo:
  679. amdgpu_ring_undo(ring);
  680. spin_unlock_irqrestore(&kiq->ring_lock, flags);
  681. failed_kiq_write:
  682. dev_err(adev->dev, "failed to write reg:%x\n", reg);
  683. }