// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021, The Linux Foundation. All rights reserved. * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include "adreno.h" #include "adreno_gen8.h" #include "adreno_gen8_hwsched.h" #include "adreno_pm4types.h" #include "adreno_trace.h" #include "kgsl_pwrscale.h" #include "kgsl_trace.h" #include "kgsl_util.h" /* IFPC & Preemption static powerup restore list */ static const u32 gen8_3_0_pwrup_reglist[] = { GEN8_UCHE_MODE_CNTL, GEN8_UCHE_VARB_IDLE_TIMEOUT, GEN8_UCHE_GBIF_GX_CONFIG, GEN8_UCHE_CACHE_WAYS, GEN8_UCHE_CCHE_MODE_CNTL, GEN8_UCHE_CCHE_CACHE_WAYS, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI, GEN8_UCHE_WRITE_THRU_BASE_LO, GEN8_UCHE_WRITE_THRU_BASE_HI, GEN8_UCHE_TRAP_BASE_LO, GEN8_UCHE_TRAP_BASE_HI, GEN8_UCHE_CLIENT_PF, GEN8_VSC_BIN_SIZE, GEN8_RB_CMP_NC_MODE_CNTL, GEN8_SP_HLSQ_TIMEOUT_THRESHOLD_DP, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, GEN8_SP_READ_SEL, }; /* IFPC only static powerup restore list */ static const u32 gen8_3_0_ifpc_pwrup_reglist[] = { GEN8_RBBM_NC_MODE_CNTL, GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, GEN8_RBBM_SLICE_NC_MODE_CNTL, GEN8_SP_NC_MODE_CNTL, GEN8_SP_CHICKEN_BITS_2, GEN8_SP_CHICKEN_BITS_3, GEN8_SP_PERFCTR_SHADER_MASK, GEN8_TPL1_NC_MODE_CNTL, GEN8_TPL1_DBG_ECO_CNTL, GEN8_TPL1_DBG_ECO_CNTL1, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19, GEN8_CP_PROTECT_REG_GLOBAL, GEN8_CP_PROTECT_REG_GLOBAL + 1, GEN8_CP_PROTECT_REG_GLOBAL + 2, GEN8_CP_PROTECT_REG_GLOBAL + 3, GEN8_CP_PROTECT_REG_GLOBAL + 4, GEN8_CP_PROTECT_REG_GLOBAL + 5, GEN8_CP_PROTECT_REG_GLOBAL + 6, GEN8_CP_PROTECT_REG_GLOBAL + 7, GEN8_CP_PROTECT_REG_GLOBAL + 8, GEN8_CP_PROTECT_REG_GLOBAL + 9, GEN8_CP_PROTECT_REG_GLOBAL + 10, GEN8_CP_PROTECT_REG_GLOBAL + 11, GEN8_CP_PROTECT_REG_GLOBAL + 12, GEN8_CP_PROTECT_REG_GLOBAL + 13, GEN8_CP_PROTECT_REG_GLOBAL + 14, GEN8_CP_PROTECT_REG_GLOBAL + 15, GEN8_CP_PROTECT_REG_GLOBAL + 16, GEN8_CP_PROTECT_REG_GLOBAL + 17, GEN8_CP_PROTECT_REG_GLOBAL + 18, GEN8_CP_PROTECT_REG_GLOBAL + 19, GEN8_CP_PROTECT_REG_GLOBAL + 20, GEN8_CP_PROTECT_REG_GLOBAL + 21, GEN8_CP_PROTECT_REG_GLOBAL + 22, GEN8_CP_PROTECT_REG_GLOBAL + 23, GEN8_CP_PROTECT_REG_GLOBAL + 24, GEN8_CP_PROTECT_REG_GLOBAL + 25, GEN8_CP_PROTECT_REG_GLOBAL + 26, GEN8_CP_PROTECT_REG_GLOBAL + 27, GEN8_CP_PROTECT_REG_GLOBAL + 28, GEN8_CP_PROTECT_REG_GLOBAL + 29, GEN8_CP_PROTECT_REG_GLOBAL + 30, GEN8_CP_PROTECT_REG_GLOBAL + 31, GEN8_CP_PROTECT_REG_GLOBAL + 32, GEN8_CP_PROTECT_REG_GLOBAL + 33, GEN8_CP_PROTECT_REG_GLOBAL + 34, GEN8_CP_PROTECT_REG_GLOBAL + 35, GEN8_CP_PROTECT_REG_GLOBAL + 36, GEN8_CP_PROTECT_REG_GLOBAL + 37, GEN8_CP_PROTECT_REG_GLOBAL + 38, GEN8_CP_PROTECT_REG_GLOBAL + 39, GEN8_CP_PROTECT_REG_GLOBAL + 40, GEN8_CP_PROTECT_REG_GLOBAL + 41, GEN8_CP_PROTECT_REG_GLOBAL + 42, GEN8_CP_PROTECT_REG_GLOBAL + 43, GEN8_CP_PROTECT_REG_GLOBAL + 44, GEN8_CP_PROTECT_REG_GLOBAL + 45, GEN8_CP_PROTECT_REG_GLOBAL + 63, }; static const struct gen8_pwrup_extlist gen8_3_0_pwrup_extlist[] = { { GEN8_CP_PROTECT_CNTL_PIPE, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_CP_PROTECT_REG_PIPE + 15, BIT(PIPE_BR) | BIT(PIPE_BV) }, { GEN8_GRAS_TSEFE_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_RB_CCU_CNTL, BIT(PIPE_BR)}, { GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR)}, { GEN8_RB_CMP_NC_MODE_CNTL, BIT(PIPE_BR)}, { GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR)}, { GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR)}, { GEN8_RB_GC_GMEM_PROTECT, BIT(PIPE_BR)}, { GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, BIT(PIPE_BR)}, { GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_PC_AUTO_VERTEX_STRIDE, BIT(PIPE_BR) | BIT(PIPE_BV)}, { GEN8_PC_VIS_STREAM_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, { GEN8_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, BIT(PIPE_BR) | BIT(PIPE_BV)}, { GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR)}, { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BR) | BIT(PIPE_BV)}, }; struct gen8_nonctxt_overrides gen8_nc_overrides[] = { { GEN8_UCHE_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_UCHE_CACHE_WAYS, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_UCHE_CLIENT_PF, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_UCHE_DBG_ECO_CNTL_0, BIT(PIPE_NONE), 0, 0, 2, }, { GEN8_UCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, }, { GEN8_UCHE_CCHE_HW_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 2, }, { GEN8_GRAS_NC_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_GRAS_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_RB_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, }, { GEN8_RB_CCU_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 3, }, { GEN8_RB_CCU_CNTL, BIT(PIPE_BR), 0, 0, 0, }, { GEN8_RB_CCU_NC_MODE_CNTL, BIT(PIPE_BR), 0, 0, 0, }, { GEN8_RB_SLICE_UFC_PREFETCH_CNTL, BIT(PIPE_BR), 0, 0, 3, }, { GEN8_RB_SLICE_UFC_DBG_CNTL, BIT(PIPE_BR), 0, 0, 3, }, { GEN8_RB_CMP_NC_MODE_CNTL, BIT(PIPE_BR), 0, 0, 0, }, { GEN8_RB_RESOLVE_PREFETCH_CNTL, BIT(PIPE_BR), 0, 0, 0, }, { GEN8_RB_CMP_DBG_ECO_CNTL, BIT(PIPE_BR), 0, 0, 0, }, { GEN8_RB_UFC_DBG_CNTL, BIT(PIPE_BR), 0, 0, 3, }, { GEN8_PC_CHICKEN_BITS_1, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_PC_CHICKEN_BITS_2, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_PC_CHICKEN_BITS_3, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_PC_CHICKEN_BITS_4, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_PC_CHICKEN_BITS_5, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, { GEN8_PC_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 3, }, { GEN8_VFD_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_VFD_CB_BV_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_VFD_CB_BR_THRESHOLD, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_VFD_CB_LP_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_VFD_CB_BUSY_REQ_CNT, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_VPC_DBG_ECO_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, { GEN8_VPC_DBG_ECO_CNTL_1, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, { GEN8_VPC_DBG_ECO_CNTL_2, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 1, }, { GEN8_VPC_DBG_ECO_CNTL_3, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 2, }, { GEN8_VPC_FLATSHADE_MODE_CNTL, BIT(PIPE_BV) | BIT(PIPE_BR), 0, 0, 0, }, { GEN8_SP_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_SP_NC_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_SP_CHICKEN_BITS, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_SP_NC_MODE_CNTL_2, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_SP_CHICKEN_BITS_1, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_SP_CHICKEN_BITS_2, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_SP_CHICKEN_BITS_3, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_SP_CHICKEN_BITS_4, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_SP_DISPATCH_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_SP_HLSQ_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_SP_DBG_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_TPL1_NC_MODE_CNTL, BIT(PIPE_NONE), 0, 0, 1, }, { GEN8_TPL1_DBG_ECO_CNTL, BIT(PIPE_NONE), 0, 0, 0, }, { GEN8_TPL1_DBG_ECO_CNTL1, BIT(PIPE_NONE), 0, 0, 0, }, { 0 } }; static int acd_calibrate_set(void *data, u64 val) { struct kgsl_device *device = data; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); u32 debug_val = (u32) val; int ret; mutex_lock(&device->mutex); ret = adreno_active_count_get(adreno_dev); if (ret) goto err; ret = gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_DBG, F_PWR_ACD_CALIBRATE, debug_val); if (!ret) gmu->acd_debug_val = debug_val; adreno_active_count_put(adreno_dev); err: mutex_unlock(&device->mutex); return ret; } static int acd_calibrate_get(void *data, u64 *val) { struct kgsl_device *device = data; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev); *val = (u64) gmu->acd_debug_val; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(acd_cal_fops, acd_calibrate_get, acd_calibrate_set, "%llu\n"); static ssize_t nc_override_get(struct file *filep, char __user *user_buf, size_t len, loff_t *off) { struct kgsl_device *device = (struct kgsl_device *) filep->private_data; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; u32 i, max_size = PAGE_SIZE; char *buf, *pos; ssize_t size = 0; if (!gen8_dev->nc_overrides_enabled || !nc_overrides) return 0; buf = kzalloc(max_size, GFP_KERNEL); if (!buf) return -ENOMEM; pos = buf; mutex_lock(&gen8_dev->nc_mutex); /* Copy all assignments from list to str */ for (i = 0; nc_overrides[i].offset; i++) { if (nc_overrides[i].set) { len = scnprintf(pos, max_size, "0x%x:0x%8.8x\n", nc_overrides[i].offset, nc_overrides[i].val); /* If we run out of space len will be zero */ if (len == 0) break; max_size -= len; pos += len; } } mutex_unlock(&gen8_dev->nc_mutex); size = simple_read_from_buffer(user_buf, len, off, buf, pos - buf); kfree(buf); return size; } static void nc_override_cb(struct adreno_device *adreno_dev, void *priv) { struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); gen8_dev->nc_overrides_enabled = true; /* Force to update and make new patched reglist */ adreno_dev->patch_reglist = false; } static ssize_t nc_override_set(struct file *filep, const char __user *user_buf, size_t len, loff_t *off) { struct kgsl_device *device = (struct kgsl_device *) filep->private_data; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; u32 i, offset, val; int ret = -EINVAL; ssize_t size = 0; char *buf; if (!nc_overrides) return 0; if ((len >= PAGE_SIZE) || (len == 0)) return -EINVAL; buf = kzalloc(len + 1, GFP_KERNEL); if (buf == NULL) return -ENOMEM; if (copy_from_user(buf, user_buf, len)) { ret = -EFAULT; goto err; } /* For sanity and parsing, ensure it is null terminated */ buf[len] = '\0'; size = sscanf(buf, "0x%x:0x%x", &offset, &val); if (size == 0) goto err; size = 0; mutex_lock(&gen8_dev->nc_mutex); for (i = 0; nc_overrides[i].offset; i++) { if (nc_overrides[i].offset == offset) { nc_overrides[i].val = val; nc_overrides[i].set = true; size = len; break; } } mutex_unlock(&gen8_dev->nc_mutex); if (size > 0) { ret = adreno_power_cycle(ADRENO_DEVICE(device), nc_override_cb, NULL); if (!ret) ret = size; } err: kfree(buf); return ret; } static const struct file_operations nc_override_fops = { .owner = THIS_MODULE, .open = simple_open, .read = nc_override_get, .write = nc_override_set, .llseek = noop_llseek, }; void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) { u32 i = 0, mask = 0; /* Disable concurrent binning before sending CP init */ cmds[i++] = cp_type7_packet(CP_THREAD_CONTROL, 1); cmds[i++] = BIT(27); /* Use multiple HW contexts */ mask |= BIT(0); /* Enable error detection */ mask |= BIT(1); /* Set default reset state */ mask |= BIT(3); /* Disable save/restore of performance counters across preemption */ mask |= BIT(6); /* Enable the register init list with the spinlock */ mask |= BIT(8); cmds[i++] = cp_type7_packet(CP_ME_INIT, 7); /* Enabled ordinal mask */ cmds[i++] = mask; cmds[i++] = 0x00000003; /* Set number of HW contexts */ cmds[i++] = 0x20000000; /* Enable error detection */ cmds[i++] = 0x00000002; /* Operation mode mask */ /* Register initialization list with spinlock */ cmds[i++] = lower_32_bits(adreno_dev->pwrup_reglist->gpuaddr); cmds[i++] = upper_32_bits(adreno_dev->pwrup_reglist->gpuaddr); /* * Gen8 targets with concurrent binning are expected to have a dynamic * power up list with triplets which contains the pipe id in it. * Bit 31 of POWER_UP_REGISTER_LIST_LENGTH is reused here to let CP * know if the power up contains the triplets. If * REGISTER_INIT_LIST_WITH_SPINLOCK is set and bit 31 below is set, * CP expects a dynamic list with triplets. */ cmds[i++] = BIT(31); } int gen8_fenced_write(struct adreno_device *adreno_dev, u32 offset, u32 value, u32 mask) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); u32 status, i; u64 ts1, ts2; kgsl_regwrite(device, offset, value); ts1 = gpudev->read_alwayson(adreno_dev); for (i = 0; i < GMU_CORE_LONG_WAKEUP_RETRY_LIMIT; i++) { /* * Make sure the previous register write is posted before * checking the fence status */ mb(); gmu_core_regread(device, GEN8_GMUAO_AHB_FENCE_STATUS, &status); /* * If !writedropped0/1, then the write to fenced register * was successful */ if (!(status & mask)) break; /* Wait a small amount of time before trying again */ udelay(GMU_CORE_WAKEUP_DELAY_US); /* Try to write the fenced register again */ kgsl_regwrite(device, offset, value); } if (i < GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT) return 0; if (i == GMU_CORE_LONG_WAKEUP_RETRY_LIMIT) { ts2 = gpudev->read_alwayson(adreno_dev); dev_err(device->dev, "Timed out waiting %d usecs to write fenced register 0x%x, timestamps: %llx %llx\n", i * GMU_CORE_WAKEUP_DELAY_US, offset, ts1, ts2); return -ETIMEDOUT; } dev_info(device->dev, "Waited %d usecs to write fenced register 0x%x\n", i * GMU_CORE_WAKEUP_DELAY_US, offset); return 0; } int gen8_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); u64 freq = gen8_core->gmu_hub_clk_freq; adreno_dev->highest_bank_bit = gen8_core->highest_bank_bit; adreno_dev->gmu_hub_clk_freq = freq ? freq : 150000000; adreno_dev->ahb_timeout_val = adreno_get_ahb_timeout_val(adreno_dev, gen8_core->noc_timeout_us); adreno_dev->bcl_data = gen8_core->bcl_data; adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, ADRENO_COOP_RESET); /* If the memory type is DDR 4, override the existing configuration */ if (of_fdt_get_ddrtype() == 0x7) adreno_dev->highest_bank_bit = 14; gen8_crashdump_init(adreno_dev); gen8_dev->nc_overrides = gen8_nc_overrides; mutex_init(&gen8_dev->nc_mutex); /* Debugfs node for noncontext registers override */ debugfs_create_file("nc_override", 0644, device->d_debugfs, device, &nc_override_fops); return adreno_allocate_global(device, &adreno_dev->pwrup_reglist, PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, "powerup_register_list"); } #define CX_TIMER_INIT_SAMPLES 16 void gen8_cx_timer_init(struct adreno_device *adreno_dev) { u64 seed_val, tmr, skew = 0; int i; unsigned long flags; /* Set it up during first boot or after suspend resume */ if (test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv)) return; /* Disable irqs to get accurate timings */ local_irq_save(flags); /* Calculate the overhead of timer reads and register writes */ for (i = 0; i < CX_TIMER_INIT_SAMPLES; i++) { u64 tmr1, tmr2, tmr3; /* Measure time for two reads of the CPU timer */ tmr1 = arch_timer_read_counter(); tmr2 = arch_timer_read_counter(); /* Write to the register and time it */ adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(tmr2)); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(tmr2)); /* Barrier to make sure the write completes before timing it */ mb(); tmr3 = arch_timer_read_counter(); /* Calculate difference between register write and CPU timer */ skew += (tmr3 - tmr2) - (tmr2 - tmr1); } local_irq_restore(flags); /* Get the average over all our readings, to the closest integer */ skew = (skew + CX_TIMER_INIT_SAMPLES / 2) / CX_TIMER_INIT_SAMPLES; local_irq_save(flags); tmr = arch_timer_read_counter(); seed_val = tmr + skew; /* Seed the GPU CX counter with the adjusted timer */ adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_AO_COUNTER_LO, lower_32_bits(seed_val)); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_AO_COUNTER_HI, upper_32_bits(seed_val)); local_irq_restore(flags); set_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv); } void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev) { u32 feature_fuse = 0; /* Get HW feature soft fuse value */ adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_SW_FUSE_VALUE, &feature_fuse); adreno_dev->fastblend_enabled = feature_fuse & BIT(GEN8_FASTBLEND_SW_FUSE); adreno_dev->raytracing_enabled = feature_fuse & BIT(GEN8_RAYTRACING_SW_FUSE); /* If software enables LPAC without HW support, disable it */ if (ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) adreno_dev->lpac_enabled = feature_fuse & BIT(GEN8_LPAC_SW_FUSE); adreno_dev->feature_fuse = feature_fuse; } void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id, u32 slice_id, u32 use_slice_id) { struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); u32 aperture_val = (FIELD_PREP(GENMASK(15, 12), pipe_id) | FIELD_PREP(GENMASK(18, 16), slice_id) | FIELD_PREP(GENMASK(23, 23), use_slice_id)); /* Check if we already set the aperture */ if (gen8_dev->aperture == aperture_val) return; kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_CP_APERTURE_CNTL_HOST, aperture_val); /* Make sure the aperture write goes through before reading the registers */ mb(); gen8_dev->aperture = aperture_val; } void gen8_regread64_aperture(struct kgsl_device *device, u32 offsetwords_lo, u32 offsetwords_hi, u64 *value, u32 pipe, u32 slice_id, u32 use_slice_id) { u32 val_lo = 0, val_hi = 0; gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); val_lo = kgsl_regmap_read(&device->regmap, offsetwords_lo); val_hi = kgsl_regmap_read(&device->regmap, offsetwords_hi); *value = (((u64)val_hi << 32) | val_lo); } void gen8_regread_aperture(struct kgsl_device *device, u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id) { gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); *value = kgsl_regmap_read(&device->regmap, offsetwords); } static inline void gen8_regwrite_aperture(struct kgsl_device *device, u32 offsetwords, u32 value, u32 pipe, u32 slice_id, u32 use_slice_id) { gen8_host_aperture_set(ADRENO_DEVICE(device), pipe, slice_id, use_slice_id); kgsl_regmap_write(&device->regmap, value, offsetwords); } #define GEN8_CP_PROTECT_DEFAULT (FIELD_PREP(GENMASK(31, 16), 0xffff) | BIT(0) | BIT(1) | BIT(3)) static void gen8_protect_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); const struct gen8_protected_regs *regs = gen8_core->protected_regs; u32 count = 0; int i; /* * Enable access protection to privileged registers, fault on an access * protect violation and select the last span to protect from the start * address all the way to the end of the register address space */ gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE, GEN8_CP_PROTECT_DEFAULT, PIPE_BR, 0, 0); gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE, GEN8_CP_PROTECT_DEFAULT, PIPE_BV, 0, 0); if (adreno_dev->lpac_enabled) gen8_regwrite_aperture(device, GEN8_CP_PROTECT_CNTL_PIPE, GEN8_CP_PROTECT_DEFAULT, PIPE_LPAC, 0, 0); /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); /* Program each register defined by the core definition */ for (i = 0; regs[i].reg; i++) { /* * This is the offset of the end register as counted from the * start, i.e. # of registers in the range - 1 */ count = regs[i].end - regs[i].start; kgsl_regwrite(device, regs[i].reg, FIELD_PREP(GENMASK(17, 0), regs[i].start) | FIELD_PREP(GENMASK(30, 18), count) | FIELD_PREP(BIT(31), regs[i].noaccess)); } /* * Last span setting is only being applied to the last pipe specific * register. Hence duplicate the last span from protect reg into the * BR, BV and LPAC protect reg pipe 15. */ i--; gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15, FIELD_PREP(GENMASK(17, 0), regs[i].start) | FIELD_PREP(GENMASK(30, 18), count) | FIELD_PREP(BIT(31), regs[i].noaccess), PIPE_BR, 0, 0); gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15, FIELD_PREP(GENMASK(17, 0), regs[i].start) | FIELD_PREP(GENMASK(30, 18), count) | FIELD_PREP(BIT(31), regs[i].noaccess), PIPE_BV, 0, 0); if (adreno_dev->lpac_enabled) gen8_regwrite_aperture(device, GEN8_CP_PROTECT_REG_PIPE + 15, FIELD_PREP(GENMASK(17, 0), regs[i].start) | FIELD_PREP(GENMASK(30, 18), count) | FIELD_PREP(BIT(31), regs[i].noaccess), PIPE_LPAC, 0, 0); /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); } static void gen8_nonctxt_regconfig(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); const struct gen8_nonctxt_regs *regs = gen8_core->nonctxt_regs; struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); u32 i, pipe_id; unsigned long pipe; /* Program non context registers for all pipes */ for (pipe_id = PIPE_NONE; pipe_id <= PIPE_AQE1; pipe_id++) { if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) continue; else if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) && !ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) continue; for (i = 0; regs[i].offset; i++) { pipe = (unsigned long)regs[i].pipelines; if (test_bit(pipe_id, &pipe)) gen8_regwrite_aperture(device, regs[i].offset, regs[i].val, pipe_id, 0, 0); } } /* Program non context registers overrides for all pipes */ if (gen8_dev->nc_overrides_enabled) { struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; mutex_lock(&gen8_dev->nc_mutex); for (pipe_id = PIPE_NONE; pipe_id <= PIPE_AQE1; pipe_id++) { if ((pipe_id == PIPE_LPAC) && !ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) continue; else if (((pipe_id == PIPE_AQE0) || (pipe_id == PIPE_AQE1)) && !ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) continue; for (i = 0; nc_overrides[i].offset; i++) { if (!nc_overrides[i].set) continue; pipe = (unsigned long)nc_overrides[i].pipelines; if (test_bit(pipe_id, &pipe)) gen8_regwrite_aperture(device, nc_overrides[i].offset, nc_overrides[i].val, pipe_id, 0, 0); } } mutex_unlock(&gen8_dev->nc_mutex); } /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); } #define RBBM_CLOCK_CNTL_ON 0x8aa8aa82 static void gen8_hwcg_set(struct adreno_device *adreno_dev, bool on) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); u32 value; int i; /* Increase clock keep-on hysteresis from 5 cycles to 8 cycles */ if (!adreno_is_gen8_3_0(adreno_dev) && on) kgsl_regwrite(device, GEN8_RBBM_CGC_0_PC, 0x00000702); if (!adreno_dev->hwcg_enabled) on = false; for (i = 0; i < gen8_core->ao_hwcg_count; i++) gmu_core_regwrite(device, gen8_core->ao_hwcg[i].offset, on ? gen8_core->ao_hwcg[i].val : 0); kgsl_regwrite(device, GEN8_RBBM_CLOCK_CNTL_GLOBAL, 1); kgsl_regwrite(device, GEN8_RBBM_CGC_GLOBAL_LOAD_CMD, on ? 1 : 0); if (on) { u32 retry = 3; kgsl_regwrite(device, GEN8_RBBM_CGC_P2S_TRIG_CMD, 1); /* Poll for the TXDONE:BIT(0) status */ do { /* Wait for small amount of time for TXDONE status*/ udelay(1); kgsl_regread(device, GEN8_RBBM_CGC_P2S_STATUS, &value); } while (!(value & BIT(0)) && --retry); if (!(value & BIT(0))) { dev_err(device->dev, "RBBM_CGC_P2S_STATUS:TXDONE Poll failed\n"); kgsl_device_snapshot(device, NULL, NULL, false); return; } kgsl_regwrite(device, GEN8_RBBM_CLOCK_CNTL_GLOBAL, 0); } } static void gen8_patch_pwrup_reglist(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); struct adreno_reglist_list reglist[3]; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 items = 0, i, j, pipe_id; u32 *dest = ptr + sizeof(*lock); struct gen8_nonctxt_overrides *nc_overrides = gen8_dev->nc_overrides; /* Static IFPC restore only registers */ reglist[items].regs = gen8_3_0_ifpc_pwrup_reglist; reglist[items].count = ARRAY_SIZE(gen8_3_0_ifpc_pwrup_reglist); lock->ifpc_list_len = reglist[items].count; items++; /* Static IFPC + preemption registers */ reglist[items].regs = gen8_3_0_pwrup_reglist; reglist[items].count = ARRAY_SIZE(gen8_3_0_pwrup_reglist); lock->preemption_list_len = reglist[items].count; items++; /* * For each entry in each of the lists, write the offset and the current * register value into the GPU buffer */ for (i = 0; i < items; i++) { const u32 *r = reglist[i].regs; for (j = 0; j < reglist[i].count; j++) { *dest++ = r[j]; kgsl_regread(device, r[j], dest++); } mutex_lock(&gen8_dev->nc_mutex); for (j = 0; j < nc_overrides[j].offset; j++) { unsigned long pipe = (unsigned long)nc_overrides[j].pipelines; if (!(test_bit(PIPE_NONE, &pipe) && nc_overrides[j].set && nc_overrides[j].list_type)) continue; if ((reglist[i].regs == gen8_3_0_ifpc_pwrup_reglist) && (nc_overrides[j].list_type == 1)) { *dest++ = nc_overrides[j].offset; kgsl_regread(device, nc_overrides[j].offset, dest++); lock->ifpc_list_len++; } else if ((reglist[i].regs == gen8_3_0_pwrup_reglist) && (nc_overrides[j].list_type == 2)) { *dest++ = nc_overrides[j].offset; kgsl_regread(device, nc_overrides[j].offset, dest++); lock->preemption_list_len++; } } mutex_unlock(&gen8_dev->nc_mutex); } /* * The overall register list is composed of * 1. Static IFPC-only registers * 2. Static IFPC + preemption registers * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) * * The first two lists are static. Size of these lists are stored as * number of pairs in ifpc_list_len and preemption_list_len * respectively. With concurrent binning, Some of the perfcounter * registers being virtualized, CP needs to know the pipe id to program * the aperture inorder to restore the same. Thus, third list is a * dynamic list with triplets as * (
), and the length is * stored as number for triplets in dynamic_list_len. * * Starting with Gen8, some of the registers that are initialized statically * by the kernel are pipe-specific. Because only the dynamic list is able to * support specifying a pipe ID, these registers are bundled along with any * dynamic entries such as perf counter selects into a single dynamic list. */ gen8_dev->ext_pwrup_list_len = 0; /* * Write external pipe specific regs (
- triplets) * offset and the current value into GPU buffer */ for (pipe_id = PIPE_BR; pipe_id <= PIPE_LPAC; pipe_id++) { for (i = 0; i < ARRAY_SIZE(gen8_3_0_pwrup_extlist); i++) { unsigned long pipe = (unsigned long)gen8_3_0_pwrup_extlist[i].pipelines; if (!test_bit(pipe_id, &pipe)) continue; *dest++ = FIELD_PREP(GENMASK(15, 12), pipe_id); *dest++ = gen8_3_0_pwrup_extlist[i].offset; gen8_regread_aperture(device, gen8_3_0_pwrup_extlist[i].offset, dest++, pipe_id, 0, 0); gen8_dev->ext_pwrup_list_len++; } } /* * Write noncontext override pipe specific regs (
- triplets) * offset and the current value into GPU buffer */ mutex_lock(&gen8_dev->nc_mutex); for (pipe_id = PIPE_BR; pipe_id <= PIPE_BV; pipe_id++) { for (i = 0; i < nc_overrides[i].offset; i++) { unsigned long pipe = (unsigned long)nc_overrides[i].pipelines; if (!(test_bit(pipe_id, &pipe) && nc_overrides[i].set && nc_overrides[i].list_type)) continue; *dest++ = FIELD_PREP(GENMASK(15, 12), pipe_id); *dest++ = nc_overrides[i].offset; gen8_regread_aperture(device, nc_overrides[i].offset, dest++, pipe_id, 0, 0); gen8_dev->ext_pwrup_list_len++; } } mutex_unlock(&gen8_dev->nc_mutex); /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len; } /* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */ static void _llc_configure_gpu_scid(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 gpu_scid; if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) || !adreno_dev->gpu_llc_slice_enable) return; if (llcc_slice_activate(adreno_dev->gpu_llc_slice)) return; gpu_scid = llcc_get_slice_id(adreno_dev->gpu_llc_slice); /* 5 blocks at 6 bits per block */ kgsl_regwrite(device, GEN8_GBIF_SCACHE_CNTL1, FIELD_PREP(GENMASK(29, 24), gpu_scid) | FIELD_PREP(GENMASK(23, 18), gpu_scid) | FIELD_PREP(GENMASK(17, 12), gpu_scid) | FIELD_PREP(GENMASK(11, 6), gpu_scid) | FIELD_PREP(GENMASK(5, 0), gpu_scid)); kgsl_regwrite(device, GEN8_GBIF_SCACHE_CNTL0, FIELD_PREP(GENMASK(15, 10), gpu_scid) | FIELD_PREP(GENMASK(21, 16), gpu_scid) | FIELD_PREP(GENMASK(27, 22), gpu_scid) | BIT(8)); } static void _llc_gpuhtw_slice_activate(struct adreno_device *adreno_dev) { if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) || !adreno_dev->gpuhtw_llc_slice_enable) return; llcc_slice_activate(adreno_dev->gpuhtw_llc_slice); } static void _set_secvid(struct kgsl_device *device) { kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_CNTL, 0x0); kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_LO, lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_BASE_HI, upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, GEN8_RBBM_SECVID_TSB_TRUSTED_SIZE, FIELD_PREP(GENMASK(31, 12), (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); } /* Set UCHE_TRAP_BASE to a page below the top of the memory space */ #define GEN8_UCHE_TRAP_BASE 0x1FFFFFFFFF000ULL static u64 gen8_get_uche_trap_base(void) { return GEN8_UCHE_TRAP_BASE; } /* * All Gen8 targets support marking certain transactions as always privileged * which allows us to mark more memory as privileged without having to * explicitly set the APRIV bit. Choose the following transactions to be * privileged by default: * CDWRITE [6:6] - Crashdumper writes * CDREAD [5:5] - Crashdumper reads * RBRPWB [3:3] - RPTR shadow writes * RBPRIVLEVEL [2:2] - Memory accesses from PM4 packets in the ringbuffer * RBFETCH [1:1] - Ringbuffer reads * ICACHE [0:0] - Instruction cache fetches */ #define GEN8_APRIV_DEFAULT (BIT(3) | BIT(2) | BIT(1) | BIT(0)) /* Add crashdumper permissions for the BR APRIV */ #define GEN8_BR_APRIV_DEFAULT (GEN8_APRIV_DEFAULT | BIT(6) | BIT(5)) static const struct kgsl_regmap_list gen8_3_0_bicubic_regs[] = { /*GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_0 default and recomended values are same */ { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_1, 0x3fe05ff4 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_2, 0x3fa0ebee }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_3, 0x3f5193ed }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_4, 0x3f0243f0 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_5, 0x00000000 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_6, 0x3fd093e8 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_7, 0x3f4133dc }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_8, 0x3ea1dfdb }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_9, 0x3e0283e0 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_10, 0x0000ac2b }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_11, 0x0000f01d }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_12, 0x00114412 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_13, 0x0021980a }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_14, 0x0051ec05 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_15, 0x0000380e }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_16, 0x3ff09001 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_17, 0x3fc10bfa }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_18, 0x3f9193f7 }, { GEN8_TPL1_BICUBIC_WEIGHTS_TABLE_19, 0x3f7227f7 }, }; void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev) { u32 val; if (!adreno_dev->ahb_timeout_val) return; val = (ADRENO_AHB_CNTL_DEFAULT | FIELD_PREP(GENMASK(4, 0), adreno_dev->ahb_timeout_val)); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_AON_CNTL, val); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_GMU_CNTL, val); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_CP_CNTL, val); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_VBIF_SMMU_CNTL, val); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_CX_AHB_HOST_CNTL, val); } #define MIN_HBB 13 int gen8_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); u32 mal, mode = 0, mode2 = 0, rgb565_predicator = 0, amsbc = 0; struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); /* * HBB values 13 to 16 can represented LSB of HBB from 0 to 3. * Any HBB value beyond 16 needs programming MSB of HBB. * By default highest bank bit is 14, Hence set default HBB LSB * to "1" and MSB to "0". */ u32 hbb_lo = 1, hbb_hi = 0, hbb = 1; struct cpu_gpu_lock *pwrup_lock = adreno_dev->pwrup_reglist->hostptr; u64 uche_trap_base = gen8_get_uche_trap_base(); u32 rgba8888_lossless = 0, fp16compoptdis = 0; /* Reset aperture fields to go through first aperture write check */ gen8_dev->aperture = UINT_MAX; /* Make all blocks contribute to the GPU BUSY perf counter */ kgsl_regwrite(device, GEN8_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); kgsl_regwrite(device, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_LO, lower_32_bits(adreno_dev->uche_gmem_base)); kgsl_regwrite(device, GEN8_UCHE_CCHE_GC_GMEM_RANGE_MIN_HI, upper_32_bits(adreno_dev->uche_gmem_base)); kgsl_regwrite(device, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_LO, lower_32_bits(adreno_dev->uche_gmem_base)); kgsl_regwrite(device, GEN8_SP_HLSQ_GC_GMEM_RANGE_MIN_HI, upper_32_bits(adreno_dev->uche_gmem_base)); if (adreno_dev->lpac_enabled) { gen8_regwrite_aperture(device, GEN8_RB_LPAC_GMEM_PROTECT, 0x0c000000, PIPE_BR, 0, 0); /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); kgsl_regwrite(device, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_LO, lower_32_bits(adreno_dev->uche_gmem_base)); kgsl_regwrite(device, GEN8_UCHE_CCHE_LPAC_GMEM_RANGE_MIN_HI, upper_32_bits(adreno_dev->uche_gmem_base)); kgsl_regwrite(device, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_LO, lower_32_bits(adreno_dev->uche_gmem_base)); kgsl_regwrite(device, GEN8_SP_HLSQ_LPAC_GMEM_RANGE_MIN_HI, upper_32_bits(adreno_dev->uche_gmem_base)); } /* * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively * disabling L2 bypass */ kgsl_regwrite(device, GEN8_UCHE_TRAP_BASE_LO, lower_32_bits(uche_trap_base)); kgsl_regwrite(device, GEN8_UCHE_TRAP_BASE_HI, upper_32_bits(uche_trap_base)); kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(uche_trap_base)); kgsl_regwrite(device, GEN8_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(uche_trap_base)); /* * CP takes care of the restore during IFPC exit. We need to restore at slumber * boundary as well */ if (pwrup_lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len > 0) { kgsl_regwrite(device, GEN8_RBBM_PERFCTR_CNTL, 0x1); kgsl_regwrite(device, GEN8_RBBM_SLICE_PERFCTR_CNTL, 0x1); } /* Turn on the IFPC counter (countable 4 on XOCLK4) */ kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1, FIELD_PREP(GENMASK(7, 0), 0x4)); /* Turn on counter to count total time spent in BCL throttle */ if (adreno_dev->bcl_enabled) kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_1, GENMASK(15, 8), FIELD_PREP(GENMASK(15, 8), 0x26)); if (of_property_read_u32(device->pdev->dev.of_node, "qcom,min-access-length", &mal)) mal = 32; of_property_read_u32(device->pdev->dev.of_node, "qcom,ubwc-mode", &mode); switch (mode) { case KGSL_UBWC_5_0: amsbc = 1; rgb565_predicator = 1; mode2 = 4; break; case KGSL_UBWC_4_0: amsbc = 1; rgb565_predicator = 1; fp16compoptdis = 1; rgba8888_lossless = 1; mode2 = 2; break; case KGSL_UBWC_3_0: amsbc = 1; mode2 = 1; break; default: break; } if (!WARN_ON(!adreno_dev->highest_bank_bit)) { hbb = adreno_dev->highest_bank_bit - MIN_HBB; hbb_lo = hbb & 3; hbb_hi = (hbb >> 2) & 1; } mal = (mal == 64) ? 1 : 0; gen8_regwrite_aperture(device, GEN8_GRAS_NC_MODE_CNTL, FIELD_PREP(GENMASK(8, 5), hbb), PIPE_BV, 0, 0); gen8_regwrite_aperture(device, GEN8_GRAS_NC_MODE_CNTL, FIELD_PREP(GENMASK(8, 5), hbb), PIPE_BR, 0, 0); gen8_regwrite_aperture(device, GEN8_RB_CCU_NC_MODE_CNTL, FIELD_PREP(GENMASK(3, 3), hbb_hi) | FIELD_PREP(GENMASK(2, 1), hbb_lo), PIPE_BR, 0, 0); gen8_regwrite_aperture(device, GEN8_RB_CMP_NC_MODE_CNTL, FIELD_PREP(GENMASK(17, 15), mode2) | FIELD_PREP(GENMASK(4, 4), rgba8888_lossless) | FIELD_PREP(GENMASK(3, 3), fp16compoptdis) | FIELD_PREP(GENMASK(2, 2), rgb565_predicator) | FIELD_PREP(GENMASK(1, 1), amsbc) | FIELD_PREP(GENMASK(0, 0), mal), PIPE_BR, 0, 0); /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); kgsl_regwrite(device, GEN8_SP_NC_MODE_CNTL, FIELD_PREP(GENMASK(11, 10), hbb_hi) | FIELD_PREP(GENMASK(5, 4), 2) | FIELD_PREP(GENMASK(3, 3), mal) | FIELD_PREP(GENMASK(2, 1), hbb_lo)); kgsl_regwrite(device, GEN8_TPL1_NC_MODE_CNTL, FIELD_PREP(GENMASK(4, 4), hbb_hi) | FIELD_PREP(GENMASK(3, 3), mal) | FIELD_PREP(GENMASK(2, 1), hbb_lo)); /* Configure TP bicubic registers */ kgsl_regmap_multi_write(&device->regmap, gen8_3_0_bicubic_regs, ARRAY_SIZE(gen8_3_0_bicubic_regs)); /* Program noncontext registers */ gen8_nonctxt_regconfig(adreno_dev); /* Enable hardware hang detection */ kgsl_regwrite(device, GEN8_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | FIELD_PREP(GENMASK(27, 0), gen8_core->hang_detect_cycles)); kgsl_regwrite(device, GEN8_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); kgsl_regwrite(device, GEN8_UCHE_CLIENT_PF, BIT(7) | FIELD_PREP(GENMASK(6, 0), adreno_dev->uche_client_pf)); /* Enable the GMEM save/restore feature for preemption */ if (adreno_is_preemption_enabled(adreno_dev)) { gen8_regwrite_aperture(device, GEN8_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, 0x1, PIPE_BR, 0, 0); /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); } /* Enable GMU power counter 0 to count GPU busy */ kgsl_regwrite(device, GEN8_GMUAO_GPU_CX_BUSY_MASK, 0xff000000); kgsl_regrmw(device, GEN8_GMUCX_POWER_COUNTER_SELECT_XOCLK_0, 0xFF, 0x20); kgsl_regwrite(device, GEN8_GMUCX_POWER_COUNTER_ENABLE, 0x1); gen8_protect_init(adreno_dev); /* Configure LLCC */ _llc_configure_gpu_scid(adreno_dev); _llc_gpuhtw_slice_activate(adreno_dev); gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, GEN8_BR_APRIV_DEFAULT, PIPE_BR, 0, 0); gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, GEN8_APRIV_DEFAULT, PIPE_BV, 0, 0); if (adreno_dev->lpac_enabled) gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, GEN8_APRIV_DEFAULT, PIPE_LPAC, 0, 0); if (ADRENO_FEATURE(adreno_dev, ADRENO_AQE)) { gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, GEN8_APRIV_DEFAULT, PIPE_AQE0, 0, 0); gen8_regwrite_aperture(device, GEN8_CP_APRIV_CNTL_PIPE, GEN8_APRIV_DEFAULT, PIPE_AQE1, 0, 0); } /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); _set_secvid(device); /* * Enable hardware clock gating here to prevent any register access * issue due to internal clock gating. */ gen8_hwcg_set(adreno_dev, true); /* * All registers must be written before this point so that we don't * miss any register programming when we patch the power up register * list. */ if (!adreno_dev->patch_reglist && (adreno_dev->pwrup_reglist->gpuaddr != 0)) { gen8_patch_pwrup_reglist(adreno_dev); adreno_dev->patch_reglist = true; } return 0; } /* Offsets into the MX/CX mapped register regions */ #define GEN8_RDPM_MX_OFFSET 0xf00 #define GEN8_RDPM_CX_OFFSET 0xf14 void gen8_rdpm_mx_freq_update(struct gen8_gmu_device *gmu, u32 freq) { if (gmu->rdpm_mx_virt) { writel_relaxed(freq/1000, (gmu->rdpm_mx_virt + GEN8_RDPM_MX_OFFSET)); /* * ensure previous writes post before this one, * i.e. act like normal writel() */ wmb(); } } void gen8_rdpm_cx_freq_update(struct gen8_gmu_device *gmu, u32 freq) { if (gmu->rdpm_cx_virt) { writel_relaxed(freq/1000, (gmu->rdpm_cx_virt + GEN8_RDPM_CX_OFFSET)); /* * ensure previous writes post before this one, * i.e. act like normal writel() */ wmb(); } } int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 gpu_req = GPU_ALWAYS_EN_REQ; int ret; if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) gpu_req |= GPU_BCL_EN_REQ; if (ADRENO_FEATURE(adreno_dev, ADRENO_CLX)) gpu_req |= GPU_CLX_EN_REQ; gpu_req |= GPU_TSENSE_EN_REQ; ret = kgsl_scm_gpu_init_regs(&device->pdev->dev, gpu_req); /* * For targets that support this scm call to program BCL id , enable BCL. * For other targets, BCL is enabled after first GMU boot. */ if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) adreno_dev->bcl_enabled = true; /* If programming TZ CLX was successful, then program KMD owned CLX regs */ if (!ret && ADRENO_FEATURE(adreno_dev, ADRENO_CLX)) adreno_dev->clx_enabled = true; /* * If scm call returned EOPNOTSUPP, either we are on a kernel version * lesser than 6.1 where scm call is not supported or we are sending an * empty request. Ignore the error in such cases. */ return (ret == -EOPNOTSUPP) ? 0 : ret; } void gen8_spin_idle_debug(struct adreno_device *adreno_dev, const char *str) { struct kgsl_device *device = &adreno_dev->dev; u32 rptr, wptr, status, intstatus, global_status; dev_err(device->dev, str); kgsl_regread(device, GEN8_CP_RB_RPTR_BR, &rptr); kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr); kgsl_regread(device, GEN8_RBBM_STATUS, &status); kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &intstatus); kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status); dev_err(device->dev, "rb=%d pos=%X/%X rbbm_status=%8.8X int_0_status=%8.8X global_status=%8.8X\n", adreno_dev->cur_rb ? adreno_dev->cur_rb->id : -1, rptr, wptr, status, intstatus, global_status); kgsl_device_snapshot(device, NULL, NULL, false); } /* * gen8_send_cp_init() - Initialize ringbuffer * @adreno_dev: Pointer to adreno device * @rb: Pointer to the ringbuffer of device * * Submit commands for ME initialization, */ static int gen8_send_cp_init(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb) { u32 *cmds; int ret; cmds = adreno_ringbuffer_allocspace(rb, GEN8_CP_INIT_DWORDS); if (IS_ERR(cmds)) return PTR_ERR(cmds); gen8_cp_init_cmds(adreno_dev, cmds); ret = gen8_ringbuffer_submit(rb, NULL); if (ret) return ret; ret = adreno_spin_idle(adreno_dev, 2000); if (ret) { gen8_spin_idle_debug(adreno_dev, "CP initialization failed to idle\n"); rb->wptr = 0; rb->_wptr = 0; } return ret; } static int gen8_post_start(struct adreno_device *adreno_dev) { int ret; u32 *cmds; struct adreno_ringbuffer *rb = adreno_dev->cur_rb; struct adreno_preemption *preempt = &adreno_dev->preempt; u64 kmd_postamble_addr; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev)); gen8_preemption_prepare_postamble(adreno_dev); cmds = adreno_ringbuffer_allocspace(rb, (preempt->postamble_bootup_len ? 16 : 12)); if (IS_ERR(cmds)) return PTR_ERR(cmds); *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; *cmds++ = lower_32_bits(rb->preemption_desc->gpuaddr); *cmds++ = upper_32_bits(rb->preemption_desc->gpuaddr); *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; *cmds++ = lower_32_bits(rb->secure_preemption_desc->gpuaddr); *cmds++ = upper_32_bits(rb->secure_preemption_desc->gpuaddr); if (preempt->postamble_bootup_len) { *cmds++ = cp_type7_packet(CP_SET_AMBLE, 3); *cmds++ = lower_32_bits(kmd_postamble_addr); *cmds++ = upper_32_bits(kmd_postamble_addr); *cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE) | (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_bootup_len)); } *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); *cmds++ = 0; *cmds++ = 0; *cmds++ = 0; /* generate interrupt on preemption completion */ *cmds++ = 0; ret = gen8_ringbuffer_submit(rb, NULL); if (!ret) { ret = adreno_spin_idle(adreno_dev, 2000); if (ret) gen8_spin_idle_debug(adreno_dev, "hw preemption initialization failed to idle\n"); } return ret; } int gen8_rb_start(struct adreno_device *adreno_dev) { const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_ringbuffer *rb; u64 addr; int ret, i; u32 *cmds; /* Clear all the ringbuffers */ FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); kgsl_sharedmem_writel(device->scratch, SCRATCH_RB_OFFSET(rb->id, rptr), 0); kgsl_sharedmem_writel(device->scratch, SCRATCH_RB_OFFSET(rb->id, bv_rptr), 0); rb->wptr = 0; rb->_wptr = 0; rb->wptr_preempt_end = UINT_MAX; } gen8_preemption_start(adreno_dev); /* Set up the current ringbuffer */ rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_LO_BR, lower_32_bits(addr)); kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_HI_BR, upper_32_bits(addr)); addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr); kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_LO_BV, lower_32_bits(addr)); kgsl_regwrite(device, GEN8_CP_RB_RPTR_ADDR_HI_BV, upper_32_bits(addr)); kgsl_regwrite(device, GEN8_CP_RB_CNTL_GC, GEN8_CP_RB_CNTL_DEFAULT); kgsl_regwrite(device, GEN8_CP_RB_BASE_LO_GC, lower_32_bits(rb->buffer_desc->gpuaddr)); kgsl_regwrite(device, GEN8_CP_RB_BASE_HI_GC, upper_32_bits(rb->buffer_desc->gpuaddr)); /* Program the ucode base for CP */ kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_LO, lower_32_bits(fw->memdesc->gpuaddr)); kgsl_regwrite(device, GEN8_CP_SQE_INSTR_BASE_HI, upper_32_bits(fw->memdesc->gpuaddr)); /* Clear the SQE_HALT to start the CP engine */ kgsl_regwrite(device, GEN8_CP_SQE_CNTL, 1); ret = gen8_send_cp_init(adreno_dev, rb); if (ret) return ret; ret = adreno_zap_shader_load(adreno_dev, gen8_core->zap_name); if (ret) return ret; /* * Take the GPU out of secure mode. Try the zap shader if it is loaded, * otherwise just try to write directly to the secure control register */ if (!adreno_dev->zap_loaded) kgsl_regwrite(device, GEN8_RBBM_SECVID_TRUST_CNTL, 0); else { cmds = adreno_ringbuffer_allocspace(rb, 2); if (IS_ERR(cmds)) return PTR_ERR(cmds); *cmds++ = cp_type7_packet(CP_SET_SECURE_MODE, 1); *cmds++ = 0; ret = gen8_ringbuffer_submit(rb, NULL); if (!ret) { ret = adreno_spin_idle(adreno_dev, 2000); if (ret) { gen8_spin_idle_debug(adreno_dev, "Switch to unsecure failed to idle\n"); return ret; } } } return gen8_post_start(adreno_dev); } /* * gen8_gpu_keepalive() - GMU reg write to request GPU stays on * @adreno_dev: Pointer to the adreno device that has the GMU * @state: State to set: true is ON, false is OFF */ static void gen8_gpu_keepalive(struct adreno_device *adreno_dev, bool state) { gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN8_GMUCX_PWR_COL_KEEPALIVE, state); } bool gen8_hw_isidle(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 reg; gmu_core_regread(device, GEN8_GMUAO_GPU_CX_BUSY_STATUS, ®); /* Bit 23 is GPUBUSYIGNAHB */ return (reg & BIT(23)) ? false : true; } int gen8_microcode_read(struct adreno_device *adreno_dev) { struct adreno_firmware *sqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev); return adreno_get_firmware(adreno_dev, gen8_core->sqefw_name, sqe_fw); } /* CP Interrupt bits */ #define GEN8_CP_GLOBAL_INT_HWFAULTBR 0 #define GEN8_CP_GLOBAL_INT_HWFAULTBV 1 #define GEN8_CP_GLOBAL_INT_HWFAULTLPAC 2 #define GEN8_CP_GLOBAL_INT_HWFAULTAQE0 3 #define GEN8_CP_GLOBAL_INT_HWFAULTAQE1 4 #define GEN8_CP_GLOBAL_INT_HWFAULTDDEBR 5 #define GEN8_CP_GLOBAL_INT_HWFAULTDDEBV 6 #define GEN8_CP_GLOBAL_INT_SWFAULTBR 16 #define GEN8_CP_GLOBAL_INT_SWFAULTBV 17 #define GEN8_CP_GLOBAL_INT_SWFAULTLPAC 18 #define GEN8_CP_GLOBAL_INT_SWFAULTAQE0 19 #define GEN8_CP_GLOBAL_INT_SWFAULTAQE1 20 #define GEN8_CP_GLOBAL_INT_SWFAULTDDEBR 21 #define GEN8_CP_GLOBAL_INT_SWFAULTDDEBV 22 /* CP HW Fault status bits */ #define CP_HW_RBFAULT 0 #define CP_HW_IB1FAULT 1 #define CP_HW_IB2FAULT 2 #define CP_HW_IB3FAULT 3 #define CP_HW_SDSFAULT 4 #define CP_HW_MRBFAULT 5 #define CP_HW_VSDFAULT 6 #define CP_HW_SQEREADBRUSTOVF 8 #define CP_HW_EVENTENGINEOVF 9 #define CP_HW_UCODEERROR 10 /* CP SW Fault status bits */ #define CP_SW_CSFRBWRAP 0 #define CP_SW_CSFIB1WRAP 1 #define CP_SW_CSFIB2WRAP 2 #define CP_SW_CSFIB3WRAP 3 #define CP_SW_SDSWRAP 4 #define CP_SW_MRBWRAP 5 #define CP_SW_VSDWRAP 6 #define CP_SW_OPCODEERROR 8 #define CP_SW_VSDPARITYERROR 9 #define CP_SW_REGISTERPROTECTIONERROR 10 #define CP_SW_ILLEGALINSTRUCTION 11 #define CP_SW_SMMUFAULT 12 #define CP_SW_VBIFRESPCLIENT 13 #define CP_SW_VBIFRESPTYPE 19 #define CP_SW_VBIFRESPREAD 21 #define CP_SW_VBIFRESP 22 #define CP_SW_RTWROVF 23 #define CP_SW_LRZRTWROVF 24 #define CP_SW_LRZRTREFCNTOVF 25 #define CP_SW_LRZRTCLRRESMISS 26 static void gen8_get_cp_hwfault_status(struct adreno_device *adreno_dev, u32 status) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 hw_status; u32 pipe_id = PIPE_NONE; const char * const table[] = { [CP_HW_RBFAULT] = "RBFAULT", [CP_HW_IB1FAULT] = "IB1FAULT", [CP_HW_IB2FAULT] = "IB2FAULT", [CP_HW_SDSFAULT] = "SDSFAULT", [CP_HW_MRBFAULT] = "MRGFAULT", [CP_HW_VSDFAULT] = "VSDFAULT", [CP_HW_SQEREADBRUSTOVF] = "SQEREADBRUSTOVF", [CP_HW_EVENTENGINEOVF] = "EVENTENGINEOVF", [CP_HW_UCODEERROR] = "UCODEERROR", }; switch (status) { case BIT(GEN8_CP_GLOBAL_INT_HWFAULTBR): pipe_id = PIPE_BR; break; case BIT(GEN8_CP_GLOBAL_INT_HWFAULTBV): pipe_id = PIPE_BV; break; case BIT(GEN8_CP_GLOBAL_INT_HWFAULTLPAC): pipe_id = PIPE_LPAC; break; case BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE0): pipe_id = PIPE_AQE0; break; case BIT(GEN8_CP_GLOBAL_INT_HWFAULTAQE1): pipe_id = PIPE_AQE1; break; case BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBR): pipe_id = PIPE_DDE_BR; break; case BIT(GEN8_CP_GLOBAL_INT_HWFAULTDDEBV): pipe_id = PIPE_DDE_BV; break; } gen8_regread_aperture(device, GEN8_CP_HW_FAULT_STATUS_PIPE, &hw_status, pipe_id, 0, 0); /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); dev_crit_ratelimited(device->dev, "CP HW Fault pipe_id:%u %s\n", pipe_id, hw_status < ARRAY_SIZE(table) ? table[hw_status] : "UNKNOWN"); } static void gen8_get_cp_swfault_status(struct adreno_device *adreno_dev, u32 status) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 sw_status, status1; u32 opcode, pipe_id = PIPE_NONE; const char * const table[] = { [CP_SW_CSFRBWRAP] = "CSFRBWRAP", [CP_SW_CSFIB1WRAP] = "CSFIB1WRAP", [CP_SW_CSFIB2WRAP] = "CSFIB2WRAP", [CP_SW_CSFIB3WRAP] = "CSFIB3WRAP", [CP_SW_SDSWRAP] = "SDSWRAP", [CP_SW_MRBWRAP] = "MRBWRAP", [CP_SW_VSDWRAP] = "VSDWRAP", [CP_SW_OPCODEERROR] = "OPCODEERROR", [CP_SW_VSDPARITYERROR] = "VSDPARITYERROR", [CP_SW_REGISTERPROTECTIONERROR] = "REGISTERPROTECTIONERROR", [CP_SW_ILLEGALINSTRUCTION] = "ILLEGALINSTRUCTION", [CP_SW_SMMUFAULT] = "SMMUFAULT", [CP_SW_VBIFRESPCLIENT] = "VBIFRESPCLIENT", [CP_SW_VBIFRESPTYPE] = "VBIFRESPTYPE", [CP_SW_VBIFRESPREAD] = "VBIFRESPREAD", [CP_SW_VBIFRESP] = "VBIFRESP", [CP_SW_RTWROVF] = "RTWROVF", [CP_SW_LRZRTWROVF] = "LRZRTWROVF", [CP_SW_LRZRTREFCNTOVF] = "LRZRTREFCNTOVF", [CP_SW_LRZRTCLRRESMISS] = "LRZRTCLRRESMISS", }; switch (status) { case BIT(GEN8_CP_GLOBAL_INT_SWFAULTBR): pipe_id = PIPE_BR; break; case BIT(GEN8_CP_GLOBAL_INT_SWFAULTBV): pipe_id = PIPE_BV; break; case BIT(GEN8_CP_GLOBAL_INT_SWFAULTLPAC): pipe_id = PIPE_LPAC; break; case BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE0): pipe_id = PIPE_AQE0; break; case BIT(GEN8_CP_GLOBAL_INT_SWFAULTAQE1): pipe_id = PIPE_AQE1; break; case BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBR): pipe_id = PIPE_DDE_BR; break; case BIT(GEN8_CP_GLOBAL_INT_SWFAULTDDEBV): pipe_id = PIPE_DDE_BV; break; } gen8_regread_aperture(device, GEN8_CP_INTERRUPT_STATUS_PIPE, &sw_status, pipe_id, 0, 0); dev_crit_ratelimited(device->dev, "CP SW Fault pipe_id: %u %s\n", pipe_id, sw_status < ARRAY_SIZE(table) ? table[sw_status] : "UNKNOWN"); if (sw_status & BIT(CP_SW_OPCODEERROR)) { gen8_regwrite_aperture(device, GEN8_CP_SQE_STAT_ADDR_PIPE, 1, pipe_id, 0, 0); gen8_regread_aperture(device, GEN8_CP_SQE_STAT_DATA_PIPE, &opcode, pipe_id, 0, 0); dev_crit_ratelimited(device->dev, "CP opcode error interrupt | opcode=0x%8.8x\n", opcode); } if (sw_status & BIT(CP_SW_REGISTERPROTECTIONERROR)) { gen8_regread_aperture(device, GEN8_CP_PROTECT_STATUS_PIPE, &status1, pipe_id, 0, 0); dev_crit_ratelimited(device->dev, "CP | Protected mode error | %s | addr=%lx | status=%x\n", FIELD_GET(GENMASK(20, 20), status1) ? "READ" : "WRITE", FIELD_GET(GENMASK(17, 0), status1), status1); } /* Clear aperture register */ gen8_host_aperture_set(adreno_dev, 0, 0, 0); } static void gen8_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 global_status; u32 hw_fault, sw_fault; kgsl_regread(device, GEN8_CP_INTERRUPT_STATUS_GLOBAL, &global_status); dev_crit_ratelimited(device->dev, "CP fault int_status_global=0x%x\n", global_status); hw_fault = FIELD_GET(GENMASK(6, 0), global_status); sw_fault = FIELD_GET(GENMASK(22, 16), global_status); if (hw_fault) gen8_get_cp_hwfault_status(adreno_dev, hw_fault); else if (sw_fault) gen8_get_cp_swfault_status(adreno_dev, sw_fault); } static void gen8_err_callback(struct adreno_device *adreno_dev, int bit) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); switch (bit) { case GEN8_INT_AHBERROR: { u32 err_details_0, err_details_1; kgsl_regread(device, GEN8_CP_RL_ERROR_DETAILS_0, &err_details_0); kgsl_regread(device, GEN8_CP_RL_ERROR_DETAILS_1, &err_details_1); dev_crit_ratelimited(device->dev, "CP: AHB bus error, CP_RL_ERROR_DETAILS_0:0x%x CP_RL_ERROR_DETAILS_1:0x%x\n", err_details_0, err_details_1); break; } case GEN8_INT_ATBASYNCFIFOOVERFLOW: dev_crit_ratelimited(device->dev, "RBBM: ATB ASYNC overflow\n"); break; case GEN8_INT_ATBBUSOVERFLOW: dev_crit_ratelimited(device->dev, "RBBM: ATB bus overflow\n"); break; case GEN8_INT_OUTOFBOUNDACCESS: dev_crit_ratelimited(device->dev, "UCHE: Out of bounds access\n"); break; case GEN8_INT_UCHETRAPINTERRUPT: dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n"); break; case GEN8_INT_TSBWRITEERROR: { u32 lo, hi; kgsl_regread(device, GEN8_RBBM_SECVID_TSB_STATUS_LO, &lo); kgsl_regread(device, GEN8_RBBM_SECVID_TSB_STATUS_HI, &hi); dev_crit_ratelimited(device->dev, "TSB: Write error interrupt: Address: 0x%lx MID: %lu\n", FIELD_GET(GENMASK(16, 0), hi) << 32 | lo, FIELD_GET(GENMASK(31, 23), hi)); break; } default: dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n", bit); } } static const char *const uche_client[] = { "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", "STCHE", }; static const char *const uche_lpac_client[] = { "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC" }; #define SCOOBYDOO 0x5c00bd00 static const char *gen8_fault_block_uche(struct kgsl_device *device, char *str, int size, bool lpac) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); u32 uche_client_id = adreno_dev->uche_client_pf; const char *uche_client_str, *fault_block; /* * Smmu driver takes a vote on CX gdsc before calling the kgsl * pagefault handler. If there is contention for device mutex in this * path and the dispatcher fault handler is holding this lock, trying * to turn off CX gdsc will fail during the reset. So to avoid blocking * here, try to lock device mutex and return if it fails. */ if (!mutex_trylock(&device->mutex)) goto regread_fail; if (!kgsl_state_is_awake(device)) { mutex_unlock(&device->mutex); goto regread_fail; } kgsl_regread(device, GEN8_UCHE_CLIENT_PF, &uche_client_id); mutex_unlock(&device->mutex); /* Ignore the value if the gpu is in IFPC */ if (uche_client_id == SCOOBYDOO) { uche_client_id = adreno_dev->uche_client_pf; goto regread_fail; } /* UCHE client id mask is bits [6:0] */ uche_client_id &= GENMASK(6, 0); regread_fail: if (lpac) { fault_block = "UCHE_LPAC"; if (uche_client_id >= ARRAY_SIZE(uche_lpac_client)) goto fail; uche_client_str = uche_lpac_client[uche_client_id]; } else { fault_block = "UCHE"; if (uche_client_id >= ARRAY_SIZE(uche_client)) goto fail; uche_client_str = uche_client[uche_client_id]; } snprintf(str, size, "%s: %s", fault_block, uche_client_str); return str; fail: snprintf(str, size, "%s: Unknown (client_id: %u)", fault_block, uche_client_id); return str; } static const char *gen8_iommu_fault_block(struct kgsl_device *device, u32 fsynr1) { u32 mid = fsynr1 & 0xff; static char str[36]; switch (mid) { case 0x0: return "CP"; case 0x1: return "UCHE: Unknown"; case 0x2: return "UCHE_LPAC: Unknown"; case 0x3: return gen8_fault_block_uche(device, str, sizeof(str), false); case 0x4: return "CCU"; case 0x5: return "Flag cache"; case 0x6: return "PREFETCH"; case 0x7: return "GMU"; case 0x8: return gen8_fault_block_uche(device, str, sizeof(str), true); case 0x9: return "UCHE_HPAC"; } snprintf(str, sizeof(str), "Unknown (mid: %u)", mid); return str; } static void gen8_cp_callback(struct adreno_device *adreno_dev, int bit) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); if (adreno_is_preemption_enabled(adreno_dev)) gen8_preemption_trigger(adreno_dev, true); adreno_dispatcher_schedule(device); } /* * gen8_gpc_err_int_callback() - Isr for GPC error interrupts * @adreno_dev: Pointer to device * @bit: Interrupt bit */ static void gen8_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); /* * GPC error is typically the result of mistake SW programming. * Force GPU fault for this interrupt so that we can debug it * with help of register dump. */ dev_crit(device->dev, "RBBM: GPC error\n"); adreno_irqctrl(adreno_dev, 0); /* Trigger a fault in the dispatcher - this will effect a restart */ adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); } /* * gen8_swfuse_violation_callback() - ISR for software fuse violation interrupt * @adreno_dev: Pointer to device * @bit: Interrupt bit */ static void gen8_swfuse_violation_callback(struct adreno_device *adreno_dev, int bit) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 status; /* * SWFUSEVIOLATION error is typically the result of enabling software * feature which is not supported by the hardware. Following are the * Feature violation will be reported * 1) FASTBLEND (BIT:0): NO Fault, RB will send the workload to legacy * blender HW pipeline. * 2) LPAC (BIT:1): Fault * 3) RAYTRACING (BIT:2): Fault */ kgsl_regread(device, GEN8_RBBM_SW_FUSE_INT_STATUS, &status); /* * RBBM_INT_CLEAR_CMD will not clear SWFUSEVIOLATION interrupt. Hence * do explicit swfuse irq clear. */ kgsl_regwrite(device, GEN8_RBBM_SW_FUSE_INT_MASK, 0); dev_crit_ratelimited(device->dev, "RBBM: SW Feature Fuse violation status=0x%8.8x\n", status); /* Trigger a fault in the dispatcher for LPAC and RAYTRACING violation */ if (status & GENMASK(GEN8_RAYTRACING_SW_FUSE, GEN8_LPAC_SW_FUSE)) { adreno_irqctrl(adreno_dev, 0); adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT); } } static const struct adreno_irq_funcs gen8_irq_funcs[32] = { ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 1 - RBBM_AHB_ERROR */ ADRENO_IRQ_CALLBACK(NULL), /* 2 - UNUSED */ ADRENO_IRQ_CALLBACK(NULL), /* 3 - UNUSED */ ADRENO_IRQ_CALLBACK(NULL), /* 4 - CPIPCINT0 */ ADRENO_IRQ_CALLBACK(NULL), /* 5 - CPIPCINT1 */ ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 6 - ATBASYNCOVERFLOW */ ADRENO_IRQ_CALLBACK(gen8_gpc_err_int_callback), /* 7 - GPC_ERR */ ADRENO_IRQ_CALLBACK(gen8_preemption_callback),/* 8 - CP_SW */ ADRENO_IRQ_CALLBACK(gen8_cp_hw_err_callback), /* 9 - CP_HW_ERROR */ ADRENO_IRQ_CALLBACK(NULL), /* 10 - CP_CCU_FLUSH_DEPTH_TS */ ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */ ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_CCU_RESOLVE_TS */ ADRENO_IRQ_CALLBACK(NULL), /* 13 - UNUSED */ ADRENO_IRQ_CALLBACK(NULL), /* 14 - UNUSED */ ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ ADRENO_IRQ_CALLBACK(NULL), /* 16 - CP_RB_INT_LPAC*/ ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ ADRENO_IRQ_CALLBACK(NULL), /* 18 - UNUSED */ ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNUSED */ ADRENO_IRQ_CALLBACK(gen8_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ ADRENO_IRQ_CALLBACK(NULL), /* 21 - CP_CACHE_TS_LPAC */ ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), /* 23 - MISHANGDETECT */ ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 24 - UCHE_OOB_ACCESS */ ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 25 - UCHE_TRAP_INTR */ ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */ ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */ ADRENO_IRQ_CALLBACK(gen8_err_callback), /* 28 - TSBWRITEERROR */ ADRENO_IRQ_CALLBACK(gen8_swfuse_violation_callback), /* 29 - SWFUSEVIOLATION */ ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */ ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */ }; /* * If the AHB fence is not in ALLOW mode when we receive an RBBM * interrupt, something went wrong. This means that we cannot proceed * since the IRQ status and clear registers are not accessible. * This is usually harmless because the GMU will abort power collapse * and change the fence back to ALLOW. Poll so that this can happen. */ static int gen8_irq_poll_fence(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); u32 status, fence, fence_retries = 0; u64 a, b, c; a = gpudev->read_alwayson(adreno_dev); kgsl_regread(device, GEN8_GMUAO_AHB_FENCE_CTRL, &fence); while (fence != 0) { b = gpudev->read_alwayson(adreno_dev); /* Wait for small time before trying again */ udelay(1); kgsl_regread(device, GEN8_GMUAO_AHB_FENCE_CTRL, &fence); if (fence_retries == 100 && fence != 0) { c = gpudev->read_alwayson(adreno_dev); kgsl_regread(device, GEN8_GMUAO_RBBM_INT_UNMASKED_STATUS_SHADOW, &status); dev_crit_ratelimited(device->dev, "status=0x%x Unmasked status=0x%x Mask=0x%x timestamps: %llx %llx %llx\n", status & adreno_dev->irq_mask, status, adreno_dev->irq_mask, a, b, c); return -ETIMEDOUT; } fence_retries++; } return 0; } static irqreturn_t gen8_irq_handler(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); irqreturn_t ret = IRQ_NONE; u32 status; /* * GPU can power down once the INT_0_STATUS is read below. * But there still might be some register reads required so * force the GMU/GPU into KEEPALIVE mode until done with the ISR. */ gen8_gpu_keepalive(adreno_dev, true); if (gen8_irq_poll_fence(adreno_dev)) { adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); goto done; } kgsl_regread(device, GEN8_RBBM_INT_0_STATUS, &status); kgsl_regwrite(device, GEN8_RBBM_INT_CLEAR_CMD, status); ret = adreno_irq_callbacks(adreno_dev, gen8_irq_funcs, status); trace_kgsl_gen8_irq_status(adreno_dev, status); done: /* If hard fault, then let snapshot turn off the keepalive */ if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT)) gen8_gpu_keepalive(adreno_dev, false); return ret; } static irqreturn_t gen8_cx_host_irq_handler(int irq, void *data) { struct kgsl_device *device = data; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); u32 status; adreno_cx_misc_regread(adreno_dev, GEN8_GPU_CX_MISC_INT_0_STATUS, &status); adreno_cx_misc_regwrite(adreno_dev, GEN8_GPU_CX_MISC_INT_CLEAR_CMD, status); if (status & BIT(GEN8_CX_MISC_GPU_CC_IRQ)) KGSL_PWRCTRL_LOG_FREQLIM(device); if (status & ~GEN8_CX_MISC_INT_MASK) dev_err_ratelimited(device->dev, "Unhandled CX MISC interrupts 0x%lx\n", status & ~GEN8_CX_MISC_INT_MASK); return IRQ_HANDLED; } int gen8_probe_common(struct platform_device *pdev, struct adreno_device *adreno_dev, u32 chipid, const struct adreno_gpu_core *gpucore) { const struct adreno_gpudev *gpudev = gpucore->gpudev; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gen8_core *gen8_core = container_of(gpucore, struct adreno_gen8_core, base); int ret; adreno_dev->gpucore = gpucore; adreno_dev->chipid = chipid; adreno_reg_offset_init(gpudev->reg_offsets); adreno_dev->hwcg_enabled = true; adreno_dev->uche_client_pf = 1; kgsl_pwrscale_fast_bus_hint(gen8_core->fast_bus_hint); device->pwrctrl.cx_cfg_gdsc_offset = GEN8_GPU_CC_CX_CFG_GDSCR; device->pwrctrl.rt_bus_hint = gen8_core->rt_bus_hint; device->cx_host_irq_num = kgsl_request_irq_optional(pdev, "cx_host_irq", gen8_cx_host_irq_handler, device); ret = adreno_device_probe(pdev, adreno_dev); if (ret) return ret; if (adreno_preemption_feature_set(adreno_dev)) { adreno_dev->preempt.preempt_level = gen8_core->preempt_level; adreno_dev->preempt.skipsaverestore = true; adreno_dev->preempt.usesgmem = true; set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); } /* debugfs node for ACD calibration */ debugfs_create_file("acd_calibrate", 0644, device->d_debugfs, device, &acd_cal_fops); /* Dump additional AQE 16KB data on top of default 128KB(64(BR)+64(BV)) */ device->snapshot_ctxt_record_size = ADRENO_FEATURE(adreno_dev, ADRENO_AQE) ? (GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES + SZ_16K) : GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES; return 0; } /* Register offset defines for Gen8, in order of enum adreno_regs */ static u32 gen8_register_offsets[ADRENO_REG_REGISTER_MAX] = { ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, GEN8_CP_RB_BASE_LO_GC), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, GEN8_CP_RB_BASE_HI_GC), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, GEN8_CP_RB_RPTR_BR), ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, GEN8_CP_RB_WPTR_GC), ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, GEN8_CP_SQE_CNTL), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, GEN8_CP_IB1_BASE_LO_PIPE), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, GEN8_CP_IB1_BASE_HI_PIPE), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, GEN8_CP_IB1_REM_SIZE_PIPE), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, GEN8_CP_IB2_BASE_LO_PIPE), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, GEN8_CP_IB2_BASE_HI_PIPE), ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, GEN8_CP_IB2_REM_SIZE_PIPE), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, GEN8_RBBM_STATUS), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, GEN8_RBBM_INT_0_MASK), ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, GEN8_RBBM_SW_RESET_CMD), ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, GEN8_GMUAO_AO_HOST_INTERRUPT_MASK), ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK, GEN8_GMUCX_GMU2HOST_INTR_MASK), }; static u32 _get_pipeid(u32 groupid) { switch (groupid) { case KGSL_PERFCOUNTER_GROUP_BV_PC: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_VFD: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_VPC: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_TSE: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_RAS: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_LRZ: fallthrough; case KGSL_PERFCOUNTER_GROUP_BV_HLSQ: return PIPE_BV; case KGSL_PERFCOUNTER_GROUP_PC: fallthrough; case KGSL_PERFCOUNTER_GROUP_VFD: fallthrough; case KGSL_PERFCOUNTER_GROUP_HLSQ: fallthrough; case KGSL_PERFCOUNTER_GROUP_VPC: fallthrough; case KGSL_PERFCOUNTER_GROUP_CCU: fallthrough; case KGSL_PERFCOUNTER_GROUP_CMP: fallthrough; case KGSL_PERFCOUNTER_GROUP_TSE: fallthrough; case KGSL_PERFCOUNTER_GROUP_RAS: fallthrough; case KGSL_PERFCOUNTER_GROUP_LRZ: fallthrough; case KGSL_PERFCOUNTER_GROUP_RB: return PIPE_BR; default: return PIPE_NONE; } } int gen8_perfcounter_remove(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, u32 groupid) { const struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); const struct adreno_perfcount_group *group; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) + (gen8_dev->ext_pwrup_list_len * 3); int i, last_offset, num_removed, start_offset = -1; u32 *data = ptr + sizeof(*lock), pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); u16 perfcntr_list_len = lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len; if (!perfcntr_list_len) return -EINVAL; group = &(counters->groups[groupid]); if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE)) { if (perfcntr_list_len != 2) return 0; if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); return -EBUSY; } goto disable_perfcounter; } last_offset = offset + (perfcntr_list_len * 3); /* Look for the perfcounter to remove in the list */ for (i = 0; i < perfcntr_list_len - 2; i++) { if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { start_offset = offset; break; } offset += 3; } if (start_offset == -1) return -ENOENT; for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) offset += 3; if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); return -EBUSY; } /* Let offset point to the first entry that is going to be retained */ offset += 3; memcpy(&data[start_offset], &data[offset], (last_offset - offset) * sizeof(u32)); memset(&data[start_offset + (last_offset - offset)], 0, (offset - start_offset) * sizeof(u32)); num_removed = offset - start_offset; do_div(num_removed, 3); lock->dynamic_list_len -= num_removed; disable_perfcounter: /* * If dynamic list length is 2 and no_restore_count is 0, then we can remove * the perfcounter controls from the list. */ if (perfcntr_list_len == 2 && !adreno_dev->no_restore_count) { memset(&data[offset], 0, 6 * sizeof(u32)); lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len; } kgsl_hwunlock(lock); return 0; } int gen8_perfcounter_update(struct adreno_device *adreno_dev, struct adreno_perfcount_register *reg, bool update_reg, u32 pipe, unsigned long flags) { struct gen8_device *gen8_dev = container_of(adreno_dev, struct gen8_device, adreno_dev); void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 offset = ((lock->ifpc_list_len + lock->preemption_list_len) * 2) + (gen8_dev->ext_pwrup_list_len * 3); u32 *data = ptr + sizeof(*lock); int i, start_offset = -1; u16 perfcntr_list_len = lock->dynamic_list_len - gen8_dev->ext_pwrup_list_len; if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { for (i = 0; i < perfcntr_list_len - 2; i++) { if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { start_offset = offset; break; } offset += 3; } } else if (perfcntr_list_len) { goto update; } if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); return -EBUSY; } /* * If the perfcounter select register is already present in reglist * update it, otherwise append the * triplet to the end of the list. */ if (start_offset != -1) { data[offset + 2] = reg->countable; for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) { offset += 3; data[offset + 2] = reg->countable; } kgsl_hwunlock(lock); goto update; } /* Initialize the lock->dynamic_list_len to account for perfcounter controls */ if (!perfcntr_list_len) lock->dynamic_list_len = gen8_dev->ext_pwrup_list_len + 2; /* * For all targets GEN8_SLICE_RBBM_PERFCTR_CNTL needs to be the last entry, * so overwrite the existing GEN8_SLICE_RBBM_PERFCNTL_CNTL and add it back to * the end. */ if (flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) { data[offset++] = pipe; data[offset++] = reg->select; data[offset++] = reg->countable; lock->dynamic_list_len++; for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) { data[offset++] = pipe; data[offset++] = reg->reg_dependency[i]; data[offset++] = reg->countable; lock->dynamic_list_len++; } } data[offset++] = FIELD_PREP(GENMASK(15, 12), PIPE_NONE); data[offset++] = GEN8_RBBM_PERFCTR_CNTL; data[offset++] = 1; data[offset++] = FIELD_PREP(GENMASK(15, 12), PIPE_NONE); data[offset++] = GEN8_RBBM_SLICE_PERFCTR_CNTL; data[offset++] = 1; kgsl_hwunlock(lock); update: if (update_reg) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); kgsl_regwrite(device, reg->select, reg->countable); for (i = 0; i < PERFCOUNTER_REG_DEPENDENCY_LEN && reg->reg_dependency[i]; i++) kgsl_regwrite(device, reg->reg_dependency[i], reg->countable); } return 0; } static u64 gen8_read_alwayson(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); u32 lo = 0, hi = 0, tmp = 0; /* Always use the GMU AO counter when doing a AHB read */ gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_HI, &hi); gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_LO, &lo); /* Check for overflow */ gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_HI, &tmp); if (hi != tmp) { gmu_core_regread(device, GEN8_GMUCX_AO_COUNTER_LO, &lo); hi = tmp; } return (((u64) hi) << 32) | lo; } static int gen8_lpac_store(struct adreno_device *adreno_dev, bool enable) { if (!ADRENO_FEATURE(adreno_dev, ADRENO_LPAC)) return -EINVAL; if (!(adreno_dev->feature_fuse & BIT(GEN8_LPAC_SW_FUSE)) || (adreno_dev->lpac_enabled == enable)) return 0; /* Power down the GPU before changing the lpac setting */ return adreno_power_cycle_bool(adreno_dev, &adreno_dev->lpac_enabled, enable); } static void gen8_remove(struct adreno_device *adreno_dev) { if (adreno_preemption_feature_set(adreno_dev)) del_timer(&adreno_dev->preempt.timer); } static void gen8_read_bus_stats(struct kgsl_device *device, struct kgsl_power_stats *stats, struct adreno_busy_data *busy) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); u64 ram_cycles, starved_ram; ram_cycles = counter_delta(device, adreno_dev->ram_cycles_lo, &busy->bif_ram_cycles); starved_ram = counter_delta(device, adreno_dev->starved_ram_lo, &busy->bif_starved_ram); ram_cycles += counter_delta(device, adreno_dev->ram_cycles_lo_ch1_read, &busy->bif_ram_cycles_read_ch1); ram_cycles += counter_delta(device, adreno_dev->ram_cycles_lo_ch0_write, &busy->bif_ram_cycles_write_ch0); ram_cycles += counter_delta(device, adreno_dev->ram_cycles_lo_ch1_write, &busy->bif_ram_cycles_write_ch1); starved_ram += counter_delta(device, adreno_dev->starved_ram_lo_ch1, &busy->bif_starved_ram_ch1); stats->ram_time = ram_cycles; stats->ram_wait = starved_ram; } static void gen8_power_stats(struct adreno_device *adreno_dev, struct kgsl_power_stats *stats) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_busy_data *busy = &adreno_dev->busy_data; u64 gpu_busy; /* Set the GPU busy counter for frequency scaling */ gpu_busy = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_0, &busy->gpu_busy); stats->busy_time = gpu_busy * 10; do_div(stats->busy_time, 192); if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) { u32 ifpc = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_4, &busy->num_ifpc); adreno_dev->ifpc_count += ifpc; if (ifpc > 0) trace_adreno_ifpc_count(adreno_dev->ifpc_count); } if (device->pwrctrl.bus_control) gen8_read_bus_stats(device, stats, busy); if (adreno_dev->bcl_enabled) { u32 a, b, c, bcl_throttle; a = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_1, &busy->throttle_cycles[0]); b = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_2, &busy->throttle_cycles[1]); c = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_3, &busy->throttle_cycles[2]); if (a || b || c) trace_kgsl_bcl_clock_throttling(a, b, c); bcl_throttle = counter_delta(device, GEN8_GMUCX_POWER_COUNTER_XOCLK_L_5, &busy->bcl_throttle); /* * This counts number of cycles throttled in XO cycles. Convert it to * micro seconds by dividing by XO freq which is 19.2MHz. */ adreno_dev->bcl_throttle_time_us += ((bcl_throttle * 10) / 192); } } static int gen8_setproperty(struct kgsl_device_private *dev_priv, u32 type, void __user *value, u32 sizebytes) { struct kgsl_device *device = dev_priv->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); u32 enable; if (type != KGSL_PROP_PWRCTRL) return -ENODEV; if (sizebytes != sizeof(enable)) return -EINVAL; if (copy_from_user(&enable, value, sizeof(enable))) return -EFAULT; mutex_lock(&device->mutex); if (enable) { clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); kgsl_pwrscale_enable(device); } else { set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); if (!adreno_active_count_get(adreno_dev)) adreno_active_count_put(adreno_dev); kgsl_pwrscale_disable(device, true); } mutex_unlock(&device->mutex); return 0; } static void gen8_set_isdb_breakpoint_registers(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct clk *clk; int ret; if (!device->set_isdb_breakpoint || device->ftbl->is_hwcg_on(device) || device->qdss_gfx_virt == NULL || !device->force_panic) return; clk = clk_get(&device->pdev->dev, "apb_pclk"); if (IS_ERR(clk)) { dev_err(device->dev, "Unable to get QDSS clock\n"); goto err; } ret = clk_prepare_enable(clk); if (ret) { dev_err(device->dev, "QDSS Clock enable error: %d\n", ret); clk_put(clk); goto err; } /* Issue break command for SPs */ isdb_write(device->qdss_gfx_virt, 0x0000); isdb_write(device->qdss_gfx_virt, 0x1000); isdb_write(device->qdss_gfx_virt, 0x2000); isdb_write(device->qdss_gfx_virt, 0x3000); isdb_write(device->qdss_gfx_virt, 0x4000); isdb_write(device->qdss_gfx_virt, 0x5000); isdb_write(device->qdss_gfx_virt, 0x6000); isdb_write(device->qdss_gfx_virt, 0x7000); isdb_write(device->qdss_gfx_virt, 0x8000); isdb_write(device->qdss_gfx_virt, 0x9000); isdb_write(device->qdss_gfx_virt, 0xa000); isdb_write(device->qdss_gfx_virt, 0xb000); clk_disable_unprepare(clk); clk_put(clk); return; err: /* Do not force kernel panic if isdb writes did not go through */ device->force_panic = false; } static void gen8_swfuse_irqctrl(struct adreno_device *adreno_dev, bool state) { kgsl_regwrite(KGSL_DEVICE(adreno_dev), GEN8_RBBM_SW_FUSE_INT_MASK, state ? GEN8_SW_FUSE_INT_MASK : 0); } static void gen8_lpac_fault_header(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_context *drawctxt; u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0, ib3sz = 0; u64 ib1base = 0, ib2base = 0, ib3base = 0; bool gx_on = adreno_gx_is_on(adreno_dev); drawctxt = ADRENO_CONTEXT(drawobj->context); drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; drawobj->context->total_fault_count++; pr_context(device, drawobj->context, "LPAC ctx %u ctx_type %s ts %u policy %lX dispatch_queue=%d\n", drawobj->context->id, kgsl_context_type(drawctxt->type), drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery, drawobj->context->gmu_dispatch_queue); pr_context(device, drawobj->context, "lpac cmdline: %s\n", drawctxt->base.proc_priv->cmdline); if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) goto done; kgsl_regread(device, GEN8_RBBM_LPAC_STATUS, &status); kgsl_regread(device, GEN8_CP_RB_RPTR_LPAC, &rptr); kgsl_regread(device, GEN8_CP_RB_WPTR_LPAC, &wptr); gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, GEN8_CP_IB1_BASE_HI_PIPE, &ib1base, PIPE_LPAC, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz, PIPE_LPAC, 0, 0); gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, GEN8_CP_IB2_BASE_HI_PIPE, &ib2base, PIPE_LPAC, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz, PIPE_LPAC, 0, 0); gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE, GEN8_CP_IB3_BASE_HI_PIPE, &ib3base, PIPE_LPAC, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz, PIPE_LPAC, 0, 0); gen8_host_aperture_set(adreno_dev, 0, 0, 0); pr_context(device, drawobj->context, "LPAC: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz); done: trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, adreno_get_level(drawobj->context)); } static void gen8_fault_header(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_context *drawctxt; u32 status = 0, rptr = 0, wptr = 0, ib1sz = 0, ib2sz = 0, ib3sz, rptr_bv = 0; u32 ib1sz_bv = 0, ib2sz_bv = 0, ib3sz_bv, gfx_status, gfx_br_status, gfx_bv_status; u64 ib1base = 0, ib2base = 0, ib3base, ib1base_bv = 0, ib2base_bv, ib3base_bv; u32 ctxt_id = 0, ts = 0; int rb_id = -1; bool gx_on = adreno_gx_is_on(adreno_dev); if (drawobj) { drawctxt = ADRENO_CONTEXT(drawobj->context); drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; drawobj->context->total_fault_count++; ctxt_id = drawobj->context->id; ts = drawobj->timestamp; rb_id = adreno_get_level(drawobj->context); pr_context(device, drawobj->context, "ctx %u ctx_type %s ts %u policy %lX\n", drawobj->context->id, kgsl_context_type(drawctxt->type), drawobj->timestamp, CMDOBJ(drawobj)->fault_recovery); pr_context(device, drawobj->context, "cmdline: %s\n", drawctxt->base.proc_priv->cmdline); } if (!gen8_gmu_rpmh_pwr_state_is_active(device) || !gx_on) goto done; kgsl_regread(device, GEN8_RBBM_STATUS, &status); kgsl_regread(device, GEN8_RBBM_GFX_STATUS, &gfx_status); kgsl_regread(device, GEN8_RBBM_GFX_BV_STATUS, &gfx_bv_status); kgsl_regread(device, GEN8_RBBM_GFX_BR_STATUS, &gfx_br_status); kgsl_regread(device, GEN8_CP_RB_RPTR_BR, &rptr); kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr); kgsl_regread(device, GEN8_CP_RB_RPTR_BV, &rptr_bv); gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, GEN8_CP_IB1_BASE_HI_PIPE, &ib1base, PIPE_BR, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz, PIPE_BR, 0, 0); gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, GEN8_CP_IB2_BASE_HI_PIPE, &ib2base, PIPE_BR, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz, PIPE_BR, 0, 0); gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE, GEN8_CP_IB3_BASE_HI_PIPE, &ib3base, PIPE_BR, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz, PIPE_BR, 0, 0); gen8_regread64_aperture(device, GEN8_CP_IB1_BASE_LO_PIPE, GEN8_CP_IB1_BASE_HI_PIPE, &ib1base_bv, PIPE_BV, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB1_REM_SIZE_PIPE, &ib1sz_bv, PIPE_BV, 0, 0); gen8_regread64_aperture(device, GEN8_CP_IB2_BASE_LO_PIPE, GEN8_CP_IB2_BASE_HI_PIPE, &ib2base_bv, PIPE_BV, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB2_REM_SIZE_PIPE, &ib2sz_bv, PIPE_BV, 0, 0); gen8_regread64_aperture(device, GEN8_CP_IB3_BASE_LO_PIPE, GEN8_CP_IB3_BASE_HI_PIPE, &ib3base_bv, PIPE_BV, 0, 0); gen8_regread_aperture(device, GEN8_CP_IB3_REM_SIZE_PIPE, &ib3sz_bv, PIPE_BV, 0, 0); gen8_host_aperture_set(adreno_dev, 0, 0, 0); dev_err(device->dev, "status %8.8X gfx_status %8.8X gfx_br_status %8.8X gfx_bv_status %8.8X\n", status, gfx_status, gfx_br_status, gfx_bv_status); dev_err(device->dev, "BR: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, ib3base, ib3sz); dev_err(device->dev, "BV: rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", rptr_bv, wptr, ib1base_bv, ib1sz_bv, ib2base_bv, ib2sz_bv, ib3base_bv, ib3sz_bv); done: trace_adreno_gpu_fault(ctxt_id, ts, status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, rb_id); } const struct gen8_gpudev adreno_gen8_hwsched_gpudev = { .base = { .reg_offsets = gen8_register_offsets, .probe = gen8_hwsched_probe, .snapshot = gen8_hwsched_snapshot, .irq_handler = gen8_irq_handler, .iommu_fault_block = gen8_iommu_fault_block, .preemption_context_init = gen8_preemption_context_init, .context_detach = gen8_hwsched_context_detach, .read_alwayson = gen8_read_alwayson, .reset = gen8_hwsched_reset_replay, .power_ops = &gen8_hwsched_power_ops, .power_stats = gen8_power_stats, .setproperty = gen8_setproperty, .hw_isidle = gen8_hw_isidle, .add_to_va_minidump = gen8_hwsched_add_to_minidump, .gx_is_on = gen8_gmu_gx_is_on, .send_recurring_cmdobj = gen8_hwsched_send_recurring_cmdobj, .perfcounter_remove = gen8_perfcounter_remove, .set_isdb_breakpoint_registers = gen8_set_isdb_breakpoint_registers, .context_destroy = gen8_hwsched_context_destroy, .lpac_store = gen8_lpac_store, .get_uche_trap_base = gen8_get_uche_trap_base, .fault_header = gen8_fault_header, .lpac_fault_header = gen8_lpac_fault_header, }, .hfi_probe = gen8_hwsched_hfi_probe, .hfi_remove = gen8_hwsched_hfi_remove, .handle_watchdog = gen8_hwsched_handle_watchdog, }; const struct gen8_gpudev adreno_gen8_gmu_gpudev = { .base = { .reg_offsets = gen8_register_offsets, .probe = gen8_gmu_device_probe, .snapshot = gen8_gmu_snapshot, .irq_handler = gen8_irq_handler, .rb_start = gen8_rb_start, .gpu_keepalive = gen8_gpu_keepalive, .hw_isidle = gen8_hw_isidle, .iommu_fault_block = gen8_iommu_fault_block, .reset = gen8_gmu_reset, .preemption_schedule = gen8_preemption_schedule, .preemption_context_init = gen8_preemption_context_init, .read_alwayson = gen8_read_alwayson, .power_ops = &gen8_gmu_power_ops, .remove = gen8_remove, .ringbuffer_submitcmd = gen8_ringbuffer_submitcmd, .power_stats = gen8_power_stats, .setproperty = gen8_setproperty, .add_to_va_minidump = gen8_gmu_add_to_minidump, .gx_is_on = gen8_gmu_gx_is_on, .perfcounter_remove = gen8_perfcounter_remove, .set_isdb_breakpoint_registers = gen8_set_isdb_breakpoint_registers, .swfuse_irqctrl = gen8_swfuse_irqctrl, .get_uche_trap_base = gen8_get_uche_trap_base, .fault_header = gen8_fault_header, }, .hfi_probe = gen8_gmu_hfi_probe, .handle_watchdog = gen8_gmu_handle_watchdog, };