adreno_gen8_ringbuffer.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2021, The Linux Foundation. All rights reserved.
  4. * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
  5. */
  6. #include "adreno.h"
  7. #include "adreno_gen8.h"
  8. #include "adreno_pm4types.h"
  9. #include "adreno_ringbuffer.h"
  10. #include "adreno_trace.h"
  11. #include "kgsl_trace.h"
  12. static bool is_concurrent_binning(struct adreno_context *drawctxt)
  13. {
  14. if (!drawctxt)
  15. return false;
  16. return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE);
  17. }
  18. static int gen8_rb_pagetable_switch(struct adreno_device *adreno_dev,
  19. struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
  20. struct kgsl_pagetable *pagetable, u32 *cmds)
  21. {
  22. struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
  23. u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
  24. int count = 0;
  25. u32 id = drawctxt ? drawctxt->base.id : 0;
  26. if (pagetable == device->mmu.defaultpagetable)
  27. return 0;
  28. /* CP switches the pagetable and flushes the Caches */
  29. cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
  30. cmds[count++] = lower_32_bits(ttbr0);
  31. cmds[count++] = upper_32_bits(ttbr0);
  32. cmds[count++] = id;
  33. cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5);
  34. cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
  35. rb->id, ttbr0));
  36. cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
  37. rb->id, ttbr0));
  38. cmds[count++] = lower_32_bits(ttbr0);
  39. cmds[count++] = upper_32_bits(ttbr0);
  40. cmds[count++] = id;
  41. /*
  42. * Sync both threads after switching pagetables and enable BR only
  43. * to make sure BV doesn't race ahead while BR is still switching
  44. * pagetables.
  45. */
  46. cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  47. cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
  48. return count;
  49. }
  50. static int gen8_rb_context_switch(struct adreno_device *adreno_dev,
  51. struct adreno_ringbuffer *rb,
  52. struct adreno_context *drawctxt)
  53. {
  54. struct kgsl_pagetable *pagetable =
  55. adreno_drawctxt_get_pagetable(drawctxt);
  56. struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
  57. int count = 0;
  58. u32 cmds[57];
  59. /* Sync both threads */
  60. cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  61. cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH;
  62. /* Reset context state */
  63. cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1);
  64. cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER |
  65. CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS;
  66. /*
  67. * Enable/disable concurrent binning for pagetable switch and
  68. * set the thread to BR since only BR can execute the pagetable
  69. * switch packets.
  70. */
  71. /* Sync both threads and enable BR only */
  72. cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  73. cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
  74. if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) {
  75. /* Clear performance counters during context switches */
  76. if (!adreno_dev->perfcounter) {
  77. cmds[count++] = cp_type4_packet(GEN8_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
  78. cmds[count++] = 0x1;
  79. cmds[count++] = cp_type4_packet(GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
  80. cmds[count++] = 0x1;
  81. }
  82. count += gen8_rb_pagetable_switch(adreno_dev, rb,
  83. drawctxt, pagetable, &cmds[count]);
  84. /* Wait for performance counter clear to finish */
  85. if (!adreno_dev->perfcounter) {
  86. cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
  87. cmds[count++] = 0x3;
  88. cmds[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS;
  89. cmds[count++] = 0x0;
  90. cmds[count++] = 0x1;
  91. cmds[count++] = 0x1;
  92. cmds[count++] = 0x0;
  93. }
  94. } else {
  95. struct kgsl_iommu *iommu = KGSL_IOMMU(device);
  96. u32 offset = GEN8_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d;
  97. /*
  98. * Set the CONTEXTIDR register to the current context id so we
  99. * can use it in pagefault debugging. Unlike TTBR0 we don't
  100. * need any special sequence or locking to change it
  101. */
  102. cmds[count++] = cp_type4_packet(offset, 1);
  103. cmds[count++] = drawctxt->base.id;
  104. }
  105. cmds[count++] = cp_type7_packet(CP_NOP, 1);
  106. cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
  107. cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
  108. cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
  109. current_context));
  110. cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
  111. current_context));
  112. cmds[count++] = drawctxt->base.id;
  113. cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
  114. cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
  115. KGSL_MEMSTORE_GLOBAL, current_context));
  116. cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
  117. KGSL_MEMSTORE_GLOBAL, current_context));
  118. cmds[count++] = drawctxt->base.id;
  119. cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1);
  120. cmds[count++] = 0x31;
  121. if (adreno_is_preemption_enabled(adreno_dev)) {
  122. u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
  123. cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3);
  124. cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR;
  125. cmds[count++] = lower_32_bits(gpuaddr);
  126. cmds[count++] = upper_32_bits(gpuaddr);
  127. }
  128. return gen8_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
  129. cmds, count, 0, NULL);
  130. }
  131. #define RB_SOPTIMESTAMP(device, rb) \
  132. MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
  133. #define CTXT_SOPTIMESTAMP(device, drawctxt) \
  134. MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
  135. #define RB_EOPTIMESTAMP(device, rb) \
  136. MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
  137. #define CTXT_EOPTIMESTAMP(device, drawctxt) \
  138. MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
  139. int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb,
  140. struct adreno_submit_time *time)
  141. {
  142. struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
  143. struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
  144. int ret = 0;
  145. unsigned long flags;
  146. adreno_get_submit_time(adreno_dev, rb, time);
  147. adreno_profile_submit_time(time);
  148. spin_lock_irqsave(&rb->preempt_lock, flags);
  149. if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
  150. if (adreno_dev->cur_rb == rb) {
  151. kgsl_pwrscale_busy(device);
  152. ret = gen8_fenced_write(adreno_dev,
  153. GEN8_CP_RB_WPTR_GC, rb->_wptr,
  154. FENCE_STATUS_WRITEDROPPED0_MASK);
  155. rb->skip_inline_wptr = false;
  156. }
  157. } else {
  158. if (adreno_dev->cur_rb == rb)
  159. rb->skip_inline_wptr = true;
  160. }
  161. rb->wptr = rb->_wptr;
  162. spin_unlock_irqrestore(&rb->preempt_lock, flags);
  163. if (ret) {
  164. /*
  165. * If WPTR update fails, take inline snapshot and trigger
  166. * recovery.
  167. */
  168. gmu_core_fault_snapshot(device);
  169. adreno_dispatcher_fault(adreno_dev,
  170. ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
  171. }
  172. return ret;
  173. }
  174. int gen8_ringbuffer_init(struct adreno_device *adreno_dev)
  175. {
  176. struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
  177. int i, ret;
  178. ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE,
  179. 0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
  180. "scratch");
  181. if (ret)
  182. return ret;
  183. adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
  184. if (!adreno_preemption_feature_set(adreno_dev)) {
  185. adreno_dev->num_ringbuffers = 1;
  186. return adreno_ringbuffer_setup(adreno_dev,
  187. &adreno_dev->ringbuffers[0], 0);
  188. }
  189. adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
  190. for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
  191. int ret;
  192. ret = adreno_ringbuffer_setup(adreno_dev,
  193. &adreno_dev->ringbuffers[i], i);
  194. if (ret)
  195. return ret;
  196. }
  197. timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
  198. gen8_preemption_init(adreno_dev);
  199. return 0;
  200. }
  201. #define GEN8_SUBMIT_MAX 104
  202. int gen8_ringbuffer_addcmds(struct adreno_device *adreno_dev,
  203. struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
  204. u32 flags, u32 *in, u32 dwords, u32 timestamp,
  205. struct adreno_submit_time *time)
  206. {
  207. struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
  208. u32 size = GEN8_SUBMIT_MAX + dwords;
  209. u32 *cmds, index = 0;
  210. u64 profile_gpuaddr;
  211. u32 profile_dwords;
  212. if (adreno_drawctxt_detached(drawctxt))
  213. return -ENOENT;
  214. if (adreno_gpu_fault(adreno_dev) != 0)
  215. return -EPROTO;
  216. rb->timestamp++;
  217. if (drawctxt)
  218. drawctxt->internal_timestamp = rb->timestamp;
  219. /* All submissions are run with protected mode off due to APRIV */
  220. flags &= ~F_NOTPROTECTED;
  221. cmds = adreno_ringbuffer_allocspace(rb, size);
  222. if (IS_ERR(cmds))
  223. return PTR_ERR(cmds);
  224. /* Identify the start of a command */
  225. cmds[index++] = cp_type7_packet(CP_NOP, 1);
  226. cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
  227. /* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped*/
  228. index += gen8_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
  229. &cmds[index]);
  230. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  231. cmds[index++] = CP_SET_THREAD_BOTH;
  232. cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
  233. cmds[index++] = 0x101; /* IFPC disable */
  234. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  235. cmds[index++] = CP_SET_THREAD_BR;
  236. profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
  237. drawctxt, &profile_dwords);
  238. if (profile_gpuaddr) {
  239. cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
  240. cmds[index++] = lower_32_bits(profile_gpuaddr);
  241. cmds[index++] = upper_32_bits(profile_gpuaddr);
  242. cmds[index++] = profile_dwords;
  243. }
  244. if (drawctxt) {
  245. cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
  246. cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
  247. drawctxt));
  248. cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
  249. drawctxt));
  250. cmds[index++] = timestamp;
  251. }
  252. cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
  253. cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
  254. cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
  255. cmds[index++] = rb->timestamp;
  256. if (IS_SECURE(flags)) {
  257. /* Sync BV and BR if entering secure mode */
  258. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  259. cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE;
  260. cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
  261. cmds[index++] = 1;
  262. }
  263. memcpy(&cmds[index], in, dwords << 2);
  264. index += dwords;
  265. profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
  266. drawctxt, &dwords);
  267. if (profile_gpuaddr) {
  268. cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
  269. cmds[index++] = lower_32_bits(profile_gpuaddr);
  270. cmds[index++] = upper_32_bits(profile_gpuaddr);
  271. cmds[index++] = profile_dwords;
  272. }
  273. if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy))
  274. cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
  275. if (is_concurrent_binning(drawctxt)) {
  276. u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts);
  277. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  278. cmds[index++] = CP_SET_THREAD_BV;
  279. /*
  280. * Make sure the timestamp is committed once BV pipe is
  281. * completely done with this submission.
  282. */
  283. cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
  284. cmds[index++] = CACHE_CLEAN | BIT(27);
  285. cmds[index++] = lower_32_bits(addr);
  286. cmds[index++] = upper_32_bits(addr);
  287. cmds[index++] = rb->timestamp;
  288. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  289. cmds[index++] = CP_SET_THREAD_BR;
  290. /*
  291. * This makes sure that BR doesn't race ahead and commit
  292. * timestamp to memstore while BV is still processing
  293. * this submission.
  294. */
  295. cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4);
  296. cmds[index++] = 0;
  297. cmds[index++] = lower_32_bits(addr);
  298. cmds[index++] = upper_32_bits(addr);
  299. cmds[index++] = rb->timestamp;
  300. }
  301. /*
  302. * If this is an internal command, just write the ringbuffer timestamp,
  303. * otherwise, write both
  304. */
  305. if (!drawctxt) {
  306. cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
  307. cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
  308. cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
  309. cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
  310. cmds[index++] = rb->timestamp;
  311. } else {
  312. cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
  313. cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
  314. cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
  315. drawctxt));
  316. cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
  317. drawctxt));
  318. cmds[index++] = timestamp;
  319. cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
  320. cmds[index++] = CACHE_CLEAN | BIT(27);
  321. cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
  322. cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
  323. cmds[index++] = rb->timestamp;
  324. }
  325. if (IS_WFI(flags))
  326. cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
  327. if (IS_SECURE(flags)) {
  328. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  329. cmds[index++] = CP_CONCURRENT_BIN_DISABLE;
  330. cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
  331. cmds[index++] = 0;
  332. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  333. cmds[index++] = CP_SYNC_THREADS;
  334. }
  335. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  336. cmds[index++] = CP_SET_THREAD_BOTH;
  337. cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
  338. cmds[index++] = 0x100; /* IFPC enable */
  339. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  340. cmds[index++] = CP_SET_THREAD_BR;
  341. /* 10 dwords */
  342. index += gen8_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
  343. /* Adjust the thing for the number of bytes we actually wrote */
  344. rb->_wptr -= (size - index);
  345. return gen8_ringbuffer_submit(rb, time);
  346. }
  347. static u32 gen8_get_alwayson_counter(u32 *cmds, u64 gpuaddr)
  348. {
  349. cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
  350. cmds[1] = GEN8_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18);
  351. cmds[2] = lower_32_bits(gpuaddr);
  352. cmds[3] = upper_32_bits(gpuaddr);
  353. return 4;
  354. }
  355. static u32 gen8_get_alwayson_context(u32 *cmds, u64 gpuaddr)
  356. {
  357. cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
  358. cmds[1] = GEN8_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18);
  359. cmds[2] = lower_32_bits(gpuaddr);
  360. cmds[3] = upper_32_bits(gpuaddr);
  361. return 4;
  362. }
  363. #define PROFILE_IB_DWORDS 4
  364. #define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
  365. static u64 gen8_get_user_profiling_ib(struct adreno_ringbuffer *rb,
  366. struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds)
  367. {
  368. u32 offset, *ib, dwords;
  369. if (IS_ERR(rb->profile_desc))
  370. return 0;
  371. offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
  372. ib = rb->profile_desc->hostptr + offset;
  373. dwords = gen8_get_alwayson_counter(ib,
  374. cmdobj->profiling_buffer_gpuaddr + target_offset);
  375. cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
  376. cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset);
  377. cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset);
  378. cmds[3] = dwords;
  379. rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
  380. return 4;
  381. }
  382. static int gen8_drawctxt_switch(struct adreno_device *adreno_dev,
  383. struct adreno_ringbuffer *rb,
  384. struct adreno_context *drawctxt)
  385. {
  386. struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
  387. int ret;
  388. if (rb->drawctxt_active == drawctxt)
  389. return 0;
  390. if (kgsl_context_detached(&drawctxt->base))
  391. return -ENOENT;
  392. if (!_kgsl_context_get(&drawctxt->base))
  393. return -ENOENT;
  394. ret = gen8_rb_context_switch(adreno_dev, rb, drawctxt);
  395. if (ret) {
  396. kgsl_context_put(&drawctxt->base);
  397. return ret;
  398. }
  399. trace_adreno_drawctxt_switch(rb, drawctxt);
  400. /* Release the current drawctxt as soon as the new one is switched */
  401. adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
  402. rb, rb->timestamp);
  403. rb->drawctxt_active = drawctxt;
  404. return 0;
  405. }
  406. #define GEN8_USER_PROFILE_IB(rb, cmdobj, cmds, field) \
  407. gen8_get_user_profiling_ib((rb), (cmdobj), \
  408. offsetof(struct kgsl_drawobj_profiling_buffer, field), \
  409. (cmds))
  410. #define GEN8_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
  411. gen8_get_alwayson_counter((cmds), \
  412. (dev)->profile_buffer->gpuaddr + \
  413. ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
  414. field))
  415. #define GEN8_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \
  416. gen8_get_alwayson_context((cmds), \
  417. (dev)->profile_buffer->gpuaddr + \
  418. ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
  419. field))
  420. #define GEN8_COMMAND_DWORDS 60
  421. int gen8_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
  422. struct kgsl_drawobj_cmd *cmdobj, u32 flags,
  423. struct adreno_submit_time *time)
  424. {
  425. struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
  426. struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
  427. struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
  428. struct adreno_ringbuffer *rb = drawctxt->rb;
  429. int ret = 0, numibs = 0, index = 0;
  430. u32 *cmds;
  431. /* Count the number of IBs (if we are not skipping) */
  432. if (!IS_SKIP(flags)) {
  433. struct list_head *tmp;
  434. list_for_each(tmp, &cmdobj->cmdlist)
  435. numibs++;
  436. }
  437. cmds = kvmalloc((GEN8_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
  438. if (!cmds) {
  439. ret = -ENOMEM;
  440. goto done;
  441. }
  442. cmds[index++] = cp_type7_packet(CP_NOP, 1);
  443. cmds[index++] = START_IB_IDENTIFIER;
  444. /* Kernel profiling: 8 dwords */
  445. if (IS_KERNEL_PROFILE(flags)) {
  446. index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
  447. started);
  448. index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
  449. ctx_start);
  450. }
  451. /* User profiling: 4 dwords */
  452. if (IS_USER_PROFILE(flags))
  453. index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
  454. gpu_ticks_submitted);
  455. if (is_concurrent_binning(drawctxt)) {
  456. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  457. cmds[index++] = CP_SET_THREAD_BOTH;
  458. }
  459. if (numibs) {
  460. struct kgsl_memobj_node *ib;
  461. cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
  462. cmds[index++] = 0x00d; /* IB1LIST start */
  463. list_for_each_entry(ib, &cmdobj->cmdlist, node) {
  464. if (ib->priv & MEMOBJ_SKIP ||
  465. (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE &&
  466. !IS_PREAMBLE(flags)))
  467. cmds[index++] = cp_type7_packet(CP_NOP, 4);
  468. cmds[index++] =
  469. cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
  470. cmds[index++] = lower_32_bits(ib->gpuaddr);
  471. cmds[index++] = upper_32_bits(ib->gpuaddr);
  472. /* Double check that IB_PRIV is never set */
  473. cmds[index++] = (ib->size >> 2) & 0xfffff;
  474. }
  475. cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
  476. cmds[index++] = 0x00e; /* IB1LIST end */
  477. }
  478. if (is_concurrent_binning(drawctxt)) {
  479. cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
  480. cmds[index++] = CP_SET_THREAD_BR;
  481. }
  482. /* CCU invalidate depth */
  483. cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
  484. cmds[index++] = 24;
  485. /* CCU invalidate color */
  486. cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
  487. cmds[index++] = 25;
  488. /* 8 dwords */
  489. if (IS_KERNEL_PROFILE(flags)) {
  490. index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
  491. retired);
  492. index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
  493. ctx_end);
  494. }
  495. /* 4 dwords */
  496. if (IS_USER_PROFILE(flags))
  497. index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
  498. gpu_ticks_retired);
  499. cmds[index++] = cp_type7_packet(CP_NOP, 1);
  500. cmds[index++] = END_IB_IDENTIFIER;
  501. ret = gen8_drawctxt_switch(adreno_dev, rb, drawctxt);
  502. /*
  503. * In the unlikely event of an error in the drawctxt switch,
  504. * treat it like a hang
  505. */
  506. if (ret) {
  507. /*
  508. * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
  509. * the upper layers know how to handle it
  510. */
  511. if (ret != -ENOSPC && ret != -ENOENT)
  512. dev_err(device->dev,
  513. "Unable to switch draw context: %d\n", ret);
  514. goto done;
  515. }
  516. adreno_drawobj_set_constraint(device, drawobj);
  517. ret = gen8_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
  518. flags, cmds, index, drawobj->timestamp, time);
  519. done:
  520. trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
  521. drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
  522. kvfree(cmds);
  523. return ret;
  524. }