// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2017,2021 The Linux Foundation. All rights reserved. * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" #include "adreno_a5xx.h" #include "adreno_pm4types.h" #include "adreno_trace.h" #define PREEMPT_RECORD(_field) \ offsetof(struct a5xx_cp_preemption_record, _field) #define PREEMPT_SMMU_RECORD(_field) \ offsetof(struct a5xx_cp_smmu_info, _field) static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_ringbuffer *rb = adreno_dev->cur_rb; unsigned int wptr; unsigned long flags; spin_lock_irqsave(&rb->preempt_lock, flags); kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr); if (wptr != rb->wptr) { kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->wptr); /* * In case something got submitted while preemption was on * going, reset the timer. */ reset_timer = true; } if (reset_timer) rb->dispatch_q.expires = jiffies + msecs_to_jiffies(adreno_drawobj_timeout); spin_unlock_irqrestore(&rb->preempt_lock, flags); } static void _a5xx_preemption_done(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int status; /* * In the very unlikely case that the power is off, do nothing - the * state will be reset on power up and everybody will be happy */ if (!kgsl_state_is_awake(device)) return; kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); if (status != 0) { dev_err(device->dev, "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n", status, adreno_dev->cur_rb->id, adreno_get_rptr(adreno_dev->cur_rb), adreno_dev->cur_rb->wptr, adreno_dev->next_rb->id, adreno_get_rptr(adreno_dev->next_rb), adreno_dev->next_rb->wptr); /* Set a fault and restart */ adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); return; } del_timer_sync(&adreno_dev->preempt.timer); trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, 0, 0); /* Clean up all the bits */ adreno_dev->prev_rb = adreno_dev->cur_rb; adreno_dev->cur_rb = adreno_dev->next_rb; adreno_dev->next_rb = NULL; /* Update the wptr for the new command queue */ _update_wptr(adreno_dev, true); /* Update the dispatcher timer for the new command queue */ mod_timer(&adreno_dev->dispatcher.timer, adreno_dev->cur_rb->dispatch_q.expires); /* Clear the preempt state */ adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); } static void _a5xx_preemption_fault(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int status; /* * If the power is on check the preemption status one more time - if it * was successful then just transition to the complete state */ if (kgsl_state_is_awake(device)) { kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); if (status == 0) { adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); adreno_dispatcher_schedule(device); return; } } dev_err(device->dev, "Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n", adreno_dev->cur_rb->id, adreno_get_rptr(adreno_dev->cur_rb), adreno_dev->cur_rb->wptr, adreno_dev->next_rb->id, adreno_get_rptr(adreno_dev->next_rb), adreno_dev->next_rb->wptr); adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); } static void _a5xx_preemption_worker(struct work_struct *work) { struct adreno_preemption *preempt = container_of(work, struct adreno_preemption, work); struct adreno_device *adreno_dev = container_of(preempt, struct adreno_device, preempt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); /* Need to take the mutex to make sure that the power stays on */ mutex_lock(&device->mutex); if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED)) _a5xx_preemption_fault(adreno_dev); mutex_unlock(&device->mutex); } /* Find the highest priority active ringbuffer */ static struct adreno_ringbuffer *a5xx_next_ringbuffer( struct adreno_device *adreno_dev) { struct adreno_ringbuffer *rb; unsigned long flags; unsigned int i; FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { bool empty; spin_lock_irqsave(&rb->preempt_lock, flags); empty = adreno_rb_empty(rb); spin_unlock_irqrestore(&rb->preempt_lock, flags); if (!empty) return rb; } return NULL; } void a5xx_preemption_trigger(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_iommu *iommu = KGSL_IOMMU(device); struct adreno_ringbuffer *next; uint64_t ttbr0; unsigned int contextidr; unsigned long flags; /* Put ourselves into a possible trigger state */ if (!adreno_move_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START)) return; /* Get the next ringbuffer to preempt in */ next = a5xx_next_ringbuffer(adreno_dev); /* * Nothing to do if every ringbuffer is empty or if the current * ringbuffer is the only active one */ if (next == NULL || next == adreno_dev->cur_rb) { /* * Update any critical things that might have been skipped while * we were looking for a new ringbuffer */ if (next != NULL) { _update_wptr(adreno_dev, false); mod_timer(&adreno_dev->dispatcher.timer, adreno_dev->cur_rb->dispatch_q.expires); } adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); return; } /* Turn off the dispatcher timer */ del_timer(&adreno_dev->dispatcher.timer); /* * This is the most critical section - we need to take care not to race * until we have programmed the CP for the switch */ spin_lock_irqsave(&next->preempt_lock, flags); /* Get the pagetable from the pagetable info. */ kgsl_sharedmem_readq(device->scratch, &ttbr0, SCRATCH_RB_OFFSET(next->id, ttbr0)); kgsl_sharedmem_readl(device->scratch, &contextidr, SCRATCH_RB_OFFSET(next->id, contextidr)); kgsl_sharedmem_writel(next->preemption_desc, PREEMPT_RECORD(wptr), next->wptr); spin_unlock_irqrestore(&next->preempt_lock, flags); /* And write it to the smmu info */ if (kgsl_mmu_is_perprocess(&device->mmu)) { kgsl_sharedmem_writeq(iommu->smmu_info, PREEMPT_SMMU_RECORD(ttbr0), ttbr0); kgsl_sharedmem_writel(iommu->smmu_info, PREEMPT_SMMU_RECORD(context_idr), contextidr); } kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, lower_32_bits(next->preemption_desc->gpuaddr)); kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, upper_32_bits(next->preemption_desc->gpuaddr)); adreno_dev->next_rb = next; /* Start the timer to detect a stuck preemption */ mod_timer(&adreno_dev->preempt.timer, jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT)); trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, 1, 0); adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED); /* Trigger the preemption */ kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_CNTL, 1); } void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int status; if (!adreno_move_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING)) return; kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); if (status != 0) { dev_err(KGSL_DEVICE(adreno_dev)->dev, "preempt interrupt with non-zero status: %X\n", status); /* * Under the assumption that this is a race between the * interrupt and the register, schedule the worker to clean up. * If the status still hasn't resolved itself by the time we get * there then we have to assume something bad happened */ adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); adreno_dispatcher_schedule(device); return; } del_timer(&adreno_dev->preempt.timer); trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, 0, 0); adreno_dev->prev_rb = adreno_dev->cur_rb; adreno_dev->cur_rb = adreno_dev->next_rb; adreno_dev->next_rb = NULL; /* Update the wptr if it changed while preemption was ongoing */ _update_wptr(adreno_dev, true); /* Update the dispatcher timer for the new command queue */ mod_timer(&adreno_dev->dispatcher.timer, adreno_dev->cur_rb->dispatch_q.expires); adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); a5xx_preemption_trigger(adreno_dev); } void a5xx_preemption_schedule(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); if (!adreno_is_preemption_enabled(adreno_dev)) return; mutex_lock(&device->mutex); if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE)) _a5xx_preemption_done(adreno_dev); a5xx_preemption_trigger(adreno_dev); mutex_unlock(&device->mutex); } u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, u32 *cmds) { unsigned int *cmds_orig = cmds; uint64_t gpuaddr = rb->preemption_desc->gpuaddr; unsigned int preempt_style = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; if (drawctxt) { /* * Preemption from secure to unsecure needs Zap shader to be * run to clear all secure content. CP does not know during * preemption if it is switching between secure and unsecure * contexts so restrict Secure contexts to be preempted at * ringbuffer level. */ if (drawctxt->base.flags & KGSL_CONTEXT_SECURE) preempt_style = KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER; else preempt_style = FIELD_GET(KGSL_CONTEXT_PREEMPT_STYLE_MASK, drawctxt->base.flags); } /* * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD * in ringbuffer. * 1) set global preemption to 0x0 to disable global preemption. * Only RB level preemption is allowed in this mode * 2) Set global preemption to defer(0x2) for finegrain preemption. * when global preemption is set to defer(0x2), * CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the * preemption point. Local preemption * can be enabled by both UMD(within IB) and KMD. */ *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1); *cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) ? 2 : 0); /* Turn CP protection OFF */ cmds += cp_protected_mode(adreno_dev, cmds, 0); /* * CP during context switch will save context switch info to * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR */ *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1); *cmds++ = lower_32_bits(gpuaddr); *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1); *cmds++ = upper_32_bits(gpuaddr); /* Turn CP protection ON */ cmds += cp_protected_mode(adreno_dev, cmds, 1); /* * Enable local preemption for finegrain preemption in case of * a misbehaving IB */ if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) { *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); *cmds++ = 1; } else { *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); *cmds++ = 0; } /* Enable CP_CONTEXT_SWITCH_YIELD packets in the IB2s */ *cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1); *cmds++ = 2; return (unsigned int) (cmds - cmds_orig); } unsigned int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, unsigned int *cmds) { int dwords = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; cmds[dwords++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); /* Write NULL to the address to skip the data write */ dwords += cp_gpuaddr(adreno_dev, &cmds[dwords], 0x0); cmds[dwords++] = 1; /* generate interrupt on preemption completion */ cmds[dwords++] = 1; return dwords; } void a5xx_preemption_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_iommu *iommu = KGSL_IOMMU(device); struct adreno_ringbuffer *rb; unsigned int i; if (!adreno_is_preemption_enabled(adreno_dev)) return; /* Force the state to be clear */ adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); /* Only set up smmu info when per-process pagetables are enabled */ if (kgsl_mmu_is_perprocess(&device->mmu)) { /* smmu_info is allocated and mapped in a5xx_preemption_iommu_init */ kgsl_sharedmem_writel(iommu->smmu_info, PREEMPT_SMMU_RECORD(magic), A5XX_CP_SMMU_INFO_MAGIC_REF); kgsl_sharedmem_writeq(iommu->smmu_info, PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device)); /* The CP doesn't use the asid record, so poison it */ kgsl_sharedmem_writel(iommu->smmu_info, PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD); kgsl_sharedmem_writel(iommu->smmu_info, PREEMPT_SMMU_RECORD(context_idr), 0); kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, lower_32_bits(iommu->smmu_info->gpuaddr)); kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, upper_32_bits(iommu->smmu_info->gpuaddr)); } FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { /* * preemption_desc is allocated and mapped at init time, * so no need to check sharedmem_writel return value */ kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(rptr), 0); kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); } } static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, uint64_t counteraddr) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); if (IS_ERR_OR_NULL(rb->preemption_desc)) rb->preemption_desc = kgsl_allocate_global(device, A5XX_CP_CTXRECORD_SIZE_IN_BYTES, SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); if (IS_ERR(rb->preemption_desc)) return PTR_ERR(rb->preemption_desc); kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF); kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(info), 0); kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(data), 0); kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(cntl), A5XX_CP_RB_CNTL_DEFAULT); kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(rptr), 0); kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(device, rb->id, rptr)); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(counter), counteraddr); return 0; } int a5xx_preemption_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_iommu *iommu = KGSL_IOMMU(device); struct adreno_preemption *preempt = &adreno_dev->preempt; struct adreno_ringbuffer *rb; int ret; unsigned int i; uint64_t addr; /* We are dependent on IOMMU to make preemption go on the CP side */ if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) return -ENODEV; INIT_WORK(&preempt->work, _a5xx_preemption_worker); /* Allocate mem for storing preemption counters */ if (IS_ERR_OR_NULL(preempt->scratch)) preempt->scratch = kgsl_allocate_global(device, adreno_dev->num_ringbuffers * A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0, 0, "preemption_counters"); ret = PTR_ERR_OR_ZERO(preempt->scratch); if (ret) return ret; addr = preempt->scratch->gpuaddr; /* Allocate mem for storing preemption switch record */ FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { ret = a5xx_preemption_ringbuffer_init(adreno_dev, rb, addr); if (ret) return ret; addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE; } /* Allocate mem for storing preemption smmu record */ if (kgsl_mmu_is_perprocess(&device->mmu) && IS_ERR_OR_NULL(iommu->smmu_info)) iommu->smmu_info = kgsl_allocate_global(device, PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED, "smmu_info"); if (IS_ERR(iommu->smmu_info)) return PTR_ERR(iommu->smmu_info); set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); return 0; }