// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" #include "adreno_a5xx.h" #include "adreno_pm4types.h" #include "adreno_ringbuffer.h" #include "adreno_trace.h" #include "kgsl_trace.h" static int a5xx_rb_pagetable_switch(struct kgsl_device *device, struct adreno_context *drawctxt, struct adreno_ringbuffer *rb, struct kgsl_pagetable *pagetable, u32 *cmds) { u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); u32 id = drawctxt ? drawctxt->base.id : 0; if (pagetable == device->mmu.defaultpagetable) return 0; cmds[0] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3); cmds[1] = lower_32_bits(ttbr0); cmds[2] = upper_32_bits(ttbr0); cmds[3] = id; cmds[4] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); cmds[5] = cp_type7_packet(CP_WAIT_FOR_ME, 0); cmds[6] = cp_type4_packet(A5XX_CP_CNTL, 1); cmds[7] = 1; cmds[8] = cp_type7_packet(CP_MEM_WRITE, 5); cmds[9] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, ttbr0)); cmds[10] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, ttbr0)); cmds[11] = lower_32_bits(ttbr0); cmds[12] = upper_32_bits(ttbr0); cmds[13] = id; cmds[14] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); cmds[15] = cp_type7_packet(CP_WAIT_FOR_ME, 0); cmds[16] = cp_type4_packet(A5XX_CP_CNTL, 1); cmds[17] = 0; return 18; } #define RB_SOPTIMESTAMP(device, rb) \ MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) #define CTXT_SOPTIMESTAMP(device, drawctxt) \ MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) #define RB_EOPTIMESTAMP(device, rb) \ MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) #define CTXT_EOPTIMESTAMP(device, drawctxt) \ MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, struct adreno_submit_time *time, bool sync) { struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned long flags; adreno_get_submit_time(adreno_dev, rb, time); adreno_profile_submit_time(time); if (sync) { u32 *cmds = adreno_ringbuffer_allocspace(rb, 3); if (IS_ERR(cmds)) return PTR_ERR(cmds); cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2); cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, rptr)); cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, rptr)); } spin_lock_irqsave(&rb->preempt_lock, flags); if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { if (adreno_dev->cur_rb == rb) { kgsl_pwrscale_busy(device); kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->_wptr); } } rb->wptr = rb->_wptr; spin_unlock_irqrestore(&rb->preempt_lock, flags); return 0; } int a5xx_ringbuffer_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int i; if (IS_ERR_OR_NULL(device->scratch)) device->scratch = kgsl_allocate_global(device, PAGE_SIZE, 0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED, "scratch"); if (IS_ERR(device->scratch)) return PTR_ERR(device->scratch); adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); if (!adreno_preemption_feature_set(adreno_dev)) { adreno_dev->num_ringbuffers = 1; return adreno_ringbuffer_setup(adreno_dev, &adreno_dev->ringbuffers[0], 0); } adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers); for (i = 0; i < adreno_dev->num_ringbuffers; i++) { int ret; ret = adreno_ringbuffer_setup(adreno_dev, &adreno_dev->ringbuffers[i], i); if (ret) return ret; } timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0); a5xx_preemption_init(adreno_dev); return 0; } #define A5XX_SUBMIT_MAX 64 int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, u32 flags, u32 *in, u32 dwords, u32 timestamp, struct adreno_submit_time *time) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); static u32 sequence; u32 size = A5XX_SUBMIT_MAX + dwords; u32 *cmds, index = 0; u64 profile_gpuaddr; u32 profile_dwords; if (adreno_drawctxt_detached(drawctxt)) return -ENOENT; if (adreno_gpu_fault(adreno_dev) != 0) return -EPROTO; rb->timestamp++; if (drawctxt) drawctxt->internal_timestamp = rb->timestamp; cmds = adreno_ringbuffer_allocspace(rb, size); if (IS_ERR(cmds)) return PTR_ERR(cmds); /* Identify the start of a command */ cmds[index++] = cp_type7_packet(CP_NOP, 1); cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; /* 14 dwords */ index += a5xx_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt, &cmds[index]); profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, drawctxt, &profile_dwords); if (profile_gpuaddr) { cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); cmds[index++] = lower_32_bits(profile_gpuaddr); cmds[index++] = upper_32_bits(profile_gpuaddr); cmds[index++] = profile_dwords; } if (drawctxt) { cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, drawctxt)); cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device, drawctxt)); cmds[index++] = timestamp; } cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb)); cmds[index++] = rb->timestamp; if (IS_SECURE(flags)) { cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); cmds[index++] = 1; } if (IS_NOTPROTECTED(flags)) { cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); cmds[index++] = 0; } memcpy(&cmds[index], in, dwords << 2); index += dwords; if (IS_NOTPROTECTED(flags)) { cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); cmds[index++] = 1; } /* 4 dwords */ profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, drawctxt, &profile_dwords); if (profile_gpuaddr) { cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); cmds[index++] = lower_32_bits(profile_gpuaddr); cmds[index++] = upper_32_bits(profile_gpuaddr); cmds[index++] = profile_dwords; } if (!adreno_is_a510(adreno_dev) && test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy)) cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); /* * Do a unique memory write from the GPU to assist in early detection of * interrupt storms */ cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); cmds[index++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, KGSL_MEMSTORE_GLOBAL, ref_wait_ts)); cmds[index++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, KGSL_MEMSTORE_GLOBAL, ref_wait_ts)); cmds[index++] = ++sequence; /* * If this is an internal command, just write the ringbuffer timestamp, * otherwise, write both */ if (!drawctxt) { cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); cmds[index++] = CACHE_FLUSH_TS | (1 << 31); cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); cmds[index++] = rb->timestamp; } else { cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); cmds[index++] = CACHE_FLUSH_TS | (1 << 31); cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, drawctxt)); cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device, drawctxt)); cmds[index++] = timestamp; cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); cmds[index++] = CACHE_FLUSH_TS; cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); cmds[index++] = rb->timestamp; } if (IS_WFI(flags)) cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); if (IS_SECURE(flags)) { cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); cmds[index++] = 0; } /* 5 dwords */ index += a5xx_preemption_post_ibsubmit(adreno_dev, &cmds[index]); /* Adjust the thing for the number of bytes we actually wrote */ rb->_wptr -= (size - index); a5xx_ringbuffer_submit(rb, time, !adreno_is_preemption_enabled(adreno_dev)); return 0; } static u32 a5xx_get_alwayson_counter(struct adreno_device *adreno_dev, u32 *cmds, u64 gpuaddr) { cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3); cmds[1] = A5XX_RBBM_ALWAYSON_COUNTER_LO; /* On some targets the upper 32 bits are not reliable */ if (ADRENO_GPUREV(adreno_dev) > ADRENO_REV_A530) cmds[1] |= (1 << 30) | (2 << 18); cmds[2] = lower_32_bits(gpuaddr); cmds[3] = upper_32_bits(gpuaddr); return 4; } /* This is the maximum possible size for 64 bit targets */ #define PROFILE_IB_DWORDS 4 #define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) static u64 a5xx_get_user_profiling_ib(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds) { u32 offset, *ib, dwords; u64 gpuaddr; if (IS_ERR(rb->profile_desc)) return 0; offset = rb->profile_index * (PROFILE_IB_DWORDS << 2); ib = rb->profile_desc->hostptr + offset; gpuaddr = rb->profile_desc->gpuaddr + offset; dwords = a5xx_get_alwayson_counter(adreno_dev, ib, cmdobj->profiling_buffer_gpuaddr + target_offset); cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); cmds[1] = lower_32_bits(gpuaddr); cmds[2] = upper_32_bits(gpuaddr); cmds[3] = dwords; rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; return 4; } static int a5xx_rb_context_switch(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt) { struct kgsl_pagetable *pagetable = adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; u32 cmds[32]; if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) count += a5xx_rb_pagetable_switch(device, drawctxt, rb, pagetable, cmds); cmds[count++] = cp_type7_packet(CP_NOP, 1); cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, current_context)); cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, current_context)); cmds[count++] = drawctxt->base.id; cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, KGSL_MEMSTORE_GLOBAL, current_context)); cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, KGSL_MEMSTORE_GLOBAL, current_context)); cmds[count++] = drawctxt->base.id; cmds[count++] = cp_type4_packet(A5XX_UCHE_INVALIDATE0, 1); cmds[count++] = 0x12; return a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, cmds, count, 0, NULL); } static int a5xx_drawctxt_switch(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); if (rb->drawctxt_active == drawctxt) return 0; if (kgsl_context_detached(&drawctxt->base)) return -ENOENT; if (!_kgsl_context_get(&drawctxt->base)) return -ENOENT; trace_adreno_drawctxt_switch(rb, drawctxt); a5xx_rb_context_switch(adreno_dev, rb, drawctxt); /* Release the current drawctxt as soon as the new one is switched */ adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, rb, rb->timestamp); rb->drawctxt_active = drawctxt; return 0; } #define A5XX_USER_PROFILE_IB(dev, rb, cmdobj, cmds, field) \ a5xx_get_user_profiling_ib((dev), (rb), (cmdobj), \ offsetof(struct kgsl_drawobj_profiling_buffer, field), \ (cmds)) #define A5XX_KERNEL_PROFILE(dev, cmdobj, cmds, field) \ a5xx_get_alwayson_counter((dev), (cmds), \ (dev)->profile_buffer->gpuaddr + \ ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ field)) #define A5XX_COMMAND_DWORDS 32 int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, u32 flags, struct adreno_submit_time *time) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); struct adreno_ringbuffer *rb = drawctxt->rb; int ret = 0, numibs = 0, index = 0; u32 *cmds; /* Count the number of IBs (if we are not skipping) */ if (!IS_SKIP(flags)) { struct list_head *tmp; list_for_each(tmp, &cmdobj->cmdlist) numibs++; } cmds = kmalloc((A5XX_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL); if (!cmds) { ret = -ENOMEM; goto done; } cmds[index++] = cp_type7_packet(CP_NOP, 1); cmds[index++] = START_IB_IDENTIFIER; /* Kernel profiling: 4 dwords */ if (IS_KERNEL_PROFILE(flags)) index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], started); /* User profiling: 4 dwords */ if (IS_USER_PROFILE(flags)) index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj, &cmds[index], gpu_ticks_submitted); if (numibs) { struct kgsl_memobj_node *ib; list_for_each_entry(ib, &cmdobj->cmdlist, node) { if (ib->priv & MEMOBJ_SKIP || (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE && !IS_PREAMBLE(flags))) cmds[index++] = cp_type7_packet(CP_NOP, 4); cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); cmds[index++] = lower_32_bits(ib->gpuaddr); cmds[index++] = upper_32_bits(ib->gpuaddr); /* Double check that IB_PRIV is never set */ cmds[index++] = (ib->size >> 2) & 0xfffff; } } /* * SRM -- set render mode (ex binning, direct render etc) * SRM is set by UMD usually at start of IB to tell CP the type of * preemption. * KMD needs to set SRM to NULL to indicate CP that rendering is * done by IB. */ cmds[index++] = cp_type7_packet(CP_SET_RENDER_MODE, 5); cmds[index++] = 0; cmds[index++] = 0; cmds[index++] = 0; cmds[index++] = 0; cmds[index++] = 0; cmds[index++] = cp_type7_packet(CP_YIELD_ENABLE, 1); cmds[index++] = 1; /* 4 dwords */ if (IS_KERNEL_PROFILE(flags)) index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], retired); /* 4 dwords */ if (IS_USER_PROFILE(flags)) index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj, &cmds[index], gpu_ticks_retired); cmds[index++] = cp_type7_packet(CP_NOP, 1); cmds[index++] = END_IB_IDENTIFIER; ret = a5xx_drawctxt_switch(adreno_dev, rb, drawctxt); /* * In the unlikely event of an error in the drawctxt switch, * treat it like a hang */ if (ret) { /* * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, * the upper layers know how to handle it */ if (ret != -ENOSPC && ret != -ENOENT) dev_err(device->dev, "Unable to switch draw context: %d\n", ret); goto done; } adreno_drawobj_set_constraint(device, drawobj); ret = a5xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, flags, cmds, index, drawobj->timestamp, time); done: trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, drawobj->timestamp, drawobj->flags, ret, drawctxt->type); kfree(cmds); return ret; }