From ae6b281a9ff75c23b2e92c6f6e71e52fde3c1b7a Mon Sep 17 00:00:00 2001 From: Git User Date: Wed, 25 Aug 2021 03:13:00 -0700 Subject: [PATCH 001/750] Initial empty repository From a38e9d3e98ca3459667bb4fed56d5d537a2ce0c6 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 19 Aug 2021 11:19:53 -0700 Subject: [PATCH 002/750] msm: kgsl: Initial KGSL commit Snapshot of the Adreno GPU (KGSL) driver from msm-5.10 commit b809380cd817 (Merge "defconfig: enable new touch NT36XXX_SPI"). Add in the Makefile and other changes required to compile kgsl code outside the kernel tree. Change-Id: I63801a9c9ebcd6e2dbd20f023b664caf7255818c Signed-off-by: Lynus Vaz --- Android.bp | 35 + Android.mk | 29 + Kbuild | 95 + Kconfig | 89 + Makefile | 16 + a3xx_reg.h | 564 ++++ a5xx_reg.h | 902 +++++++ a6xx_reg.h | 1197 ++++++++ adreno-gpulist.h | 1915 +++++++++++++ adreno.c | 3214 ++++++++++++++++++++++ adreno.h | 1781 ++++++++++++ adreno_a3xx.c | 1569 +++++++++++ adreno_a3xx.h | 69 + adreno_a3xx_perfcounter.c | 411 +++ adreno_a3xx_ringbuffer.c | 454 ++++ adreno_a3xx_snapshot.c | 448 +++ adreno_a5xx.c | 2726 +++++++++++++++++++ adreno_a5xx.h | 307 +++ adreno_a5xx_packets.h | 1406 ++++++++++ adreno_a5xx_perfcounter.c | 695 +++++ adreno_a5xx_preempt.c | 552 ++++ adreno_a5xx_ringbuffer.c | 521 ++++ adreno_a5xx_snapshot.c | 1220 +++++++++ adreno_a6xx.c | 2774 +++++++++++++++++++ adreno_a6xx.h | 458 ++++ adreno_a6xx_gmu.c | 3459 ++++++++++++++++++++++++ adreno_a6xx_gmu.h | 425 +++ adreno_a6xx_gmu_snapshot.c | 431 +++ adreno_a6xx_hfi.c | 860 ++++++ adreno_a6xx_hfi.h | 184 ++ adreno_a6xx_hwsched.c | 1178 ++++++++ adreno_a6xx_hwsched.h | 84 + adreno_a6xx_hwsched_hfi.c | 1569 +++++++++++ adreno_a6xx_hwsched_hfi.h | 141 + adreno_a6xx_perfcounter.c | 952 +++++++ adreno_a6xx_preempt.c | 768 ++++++ adreno_a6xx_rgmu.c | 1443 ++++++++++ adreno_a6xx_rgmu.h | 99 + adreno_a6xx_ringbuffer.c | 523 ++++ adreno_a6xx_rpmh.c | 478 ++++ adreno_a6xx_snapshot.c | 2262 ++++++++++++++++ adreno_compat.c | 185 ++ adreno_compat.h | 46 + adreno_coresight.c | 407 +++ adreno_coresight.h | 132 + adreno_cp_parser.c | 1047 +++++++ adreno_cp_parser.h | 175 ++ adreno_debugfs.c | 416 +++ adreno_dispatch.c | 2976 ++++++++++++++++++++ adreno_dispatch.h | 112 + adreno_drawctxt.c | 586 ++++ adreno_drawctxt.h | 178 ++ adreno_gen7.c | 1318 +++++++++ adreno_gen7.h | 436 +++ adreno_gen7_gmu.c | 2708 +++++++++++++++++++ adreno_gen7_gmu.h | 422 +++ adreno_gen7_gmu_snapshot.c | 326 +++ adreno_gen7_hfi.c | 636 +++++ adreno_gen7_hfi.h | 185 ++ adreno_gen7_hwsched.c | 1161 ++++++++ adreno_gen7_hwsched.h | 84 + adreno_gen7_hwsched_hfi.c | 1606 +++++++++++ adreno_gen7_hwsched_hfi.h | 155 ++ adreno_gen7_perfcounter.c | 896 ++++++ adreno_gen7_preempt.c | 746 +++++ adreno_gen7_ringbuffer.c | 556 ++++ adreno_gen7_rpmh.c | 469 ++++ adreno_gen7_snapshot.c | 1254 +++++++++ adreno_gen7_snapshot.h | 1311 +++++++++ adreno_hfi.h | 869 ++++++ adreno_hwsched.c | 1714 ++++++++++++ adreno_hwsched.h | 129 + adreno_ioctl.c | 227 ++ adreno_perfcounter.c | 580 ++++ adreno_perfcounter.h | 137 + adreno_pm4types.h | 404 +++ adreno_profile.c | 1130 ++++++++ adreno_profile.h | 107 + adreno_ringbuffer.c | 435 +++ adreno_ringbuffer.h | 247 ++ adreno_snapshot.c | 1134 ++++++++ adreno_snapshot.h | 85 + adreno_sysfs.c | 337 +++ adreno_sysfs.h | 78 + adreno_trace.c | 36 + adreno_trace.h | 786 ++++++ build.config.msm_kgsl | 1 + config/gki_waipiodisp.conf | 15 + gen7_reg.h | 1158 ++++++++ gfx_driver_product.mk | 4 + gfx_kernel_board.mk | 10 + gfx_kernel_headers.py | 96 + governor_gpubw_mon.c | 318 +++ governor_msm_adreno_tz.c | 563 ++++ include/linux/msm_kgsl.h | 52 + include/uapi/linux/msm_kgsl.h | 2001 ++++++++++++++ kgsl.c | 4809 +++++++++++++++++++++++++++++++++ kgsl.h | 606 +++++ kgsl_bus.c | 179 ++ kgsl_bus.h | 25 + kgsl_compat.c | 392 +++ kgsl_compat.h | 243 ++ kgsl_debugfs.c | 406 +++ kgsl_debugfs.h | 36 + kgsl_device.h | 962 +++++++ kgsl_drawobj.c | 1489 ++++++++++ kgsl_drawobj.h | 332 +++ kgsl_eventlog.c | 232 ++ kgsl_eventlog.h | 20 + kgsl_events.c | 434 +++ kgsl_gmu_core.c | 213 ++ kgsl_gmu_core.h | 321 +++ kgsl_ioctl.c | 189 ++ kgsl_iommu.c | 2419 +++++++++++++++++ kgsl_iommu.h | 180 ++ kgsl_mmu.c | 618 +++++ kgsl_mmu.h | 393 +++ kgsl_pool.c | 641 +++++ kgsl_pool.h | 68 + kgsl_pwrctrl.c | 2329 ++++++++++++++++ kgsl_pwrctrl.h | 265 ++ kgsl_pwrscale.c | 805 ++++++ kgsl_pwrscale.h | 110 + kgsl_reclaim.c | 422 +++ kgsl_reclaim.h | 52 + kgsl_regmap.c | 328 +++ kgsl_regmap.h | 265 ++ kgsl_sharedmem.c | 1605 +++++++++++ kgsl_sharedmem.h | 463 ++++ kgsl_snapshot.c | 1273 +++++++++ kgsl_snapshot.h | 314 +++ kgsl_sync.c | 884 ++++++ kgsl_sync.h | 184 ++ kgsl_sysfs.h | 31 + kgsl_timeline.c | 551 ++++ kgsl_timeline.h | 115 + kgsl_trace.c | 12 + kgsl_trace.h | 1522 +++++++++++ kgsl_util.c | 350 +++ kgsl_util.h | 150 + kgsl_vbo.c | 627 +++++ msm_adreno_devfreq.h | 77 + 142 files changed, 99426 insertions(+) create mode 100644 Android.bp create mode 100644 Android.mk create mode 100644 Kbuild create mode 100644 Kconfig create mode 100644 Makefile create mode 100644 a3xx_reg.h create mode 100644 a5xx_reg.h create mode 100644 a6xx_reg.h create mode 100644 adreno-gpulist.h create mode 100644 adreno.c create mode 100644 adreno.h create mode 100644 adreno_a3xx.c create mode 100644 adreno_a3xx.h create mode 100644 adreno_a3xx_perfcounter.c create mode 100644 adreno_a3xx_ringbuffer.c create mode 100644 adreno_a3xx_snapshot.c create mode 100644 adreno_a5xx.c create mode 100644 adreno_a5xx.h create mode 100644 adreno_a5xx_packets.h create mode 100644 adreno_a5xx_perfcounter.c create mode 100644 adreno_a5xx_preempt.c create mode 100644 adreno_a5xx_ringbuffer.c create mode 100644 adreno_a5xx_snapshot.c create mode 100644 adreno_a6xx.c create mode 100644 adreno_a6xx.h create mode 100644 adreno_a6xx_gmu.c create mode 100644 adreno_a6xx_gmu.h create mode 100644 adreno_a6xx_gmu_snapshot.c create mode 100644 adreno_a6xx_hfi.c create mode 100644 adreno_a6xx_hfi.h create mode 100644 adreno_a6xx_hwsched.c create mode 100644 adreno_a6xx_hwsched.h create mode 100644 adreno_a6xx_hwsched_hfi.c create mode 100644 adreno_a6xx_hwsched_hfi.h create mode 100644 adreno_a6xx_perfcounter.c create mode 100644 adreno_a6xx_preempt.c create mode 100644 adreno_a6xx_rgmu.c create mode 100644 adreno_a6xx_rgmu.h create mode 100644 adreno_a6xx_ringbuffer.c create mode 100644 adreno_a6xx_rpmh.c create mode 100644 adreno_a6xx_snapshot.c create mode 100644 adreno_compat.c create mode 100644 adreno_compat.h create mode 100644 adreno_coresight.c create mode 100644 adreno_coresight.h create mode 100644 adreno_cp_parser.c create mode 100644 adreno_cp_parser.h create mode 100644 adreno_debugfs.c create mode 100644 adreno_dispatch.c create mode 100644 adreno_dispatch.h create mode 100644 adreno_drawctxt.c create mode 100644 adreno_drawctxt.h create mode 100644 adreno_gen7.c create mode 100644 adreno_gen7.h create mode 100644 adreno_gen7_gmu.c create mode 100644 adreno_gen7_gmu.h create mode 100644 adreno_gen7_gmu_snapshot.c create mode 100644 adreno_gen7_hfi.c create mode 100644 adreno_gen7_hfi.h create mode 100644 adreno_gen7_hwsched.c create mode 100644 adreno_gen7_hwsched.h create mode 100644 adreno_gen7_hwsched_hfi.c create mode 100644 adreno_gen7_hwsched_hfi.h create mode 100644 adreno_gen7_perfcounter.c create mode 100644 adreno_gen7_preempt.c create mode 100644 adreno_gen7_ringbuffer.c create mode 100644 adreno_gen7_rpmh.c create mode 100644 adreno_gen7_snapshot.c create mode 100644 adreno_gen7_snapshot.h create mode 100644 adreno_hfi.h create mode 100644 adreno_hwsched.c create mode 100644 adreno_hwsched.h create mode 100644 adreno_ioctl.c create mode 100644 adreno_perfcounter.c create mode 100644 adreno_perfcounter.h create mode 100644 adreno_pm4types.h create mode 100644 adreno_profile.c create mode 100644 adreno_profile.h create mode 100644 adreno_ringbuffer.c create mode 100644 adreno_ringbuffer.h create mode 100644 adreno_snapshot.c create mode 100644 adreno_snapshot.h create mode 100644 adreno_sysfs.c create mode 100644 adreno_sysfs.h create mode 100644 adreno_trace.c create mode 100644 adreno_trace.h create mode 100644 build.config.msm_kgsl create mode 100644 config/gki_waipiodisp.conf create mode 100644 gen7_reg.h create mode 100644 gfx_driver_product.mk create mode 100644 gfx_kernel_board.mk create mode 100644 gfx_kernel_headers.py create mode 100644 governor_gpubw_mon.c create mode 100644 governor_msm_adreno_tz.c create mode 100644 include/linux/msm_kgsl.h create mode 100644 include/uapi/linux/msm_kgsl.h create mode 100644 kgsl.c create mode 100644 kgsl.h create mode 100644 kgsl_bus.c create mode 100644 kgsl_bus.h create mode 100644 kgsl_compat.c create mode 100644 kgsl_compat.h create mode 100644 kgsl_debugfs.c create mode 100644 kgsl_debugfs.h create mode 100644 kgsl_device.h create mode 100644 kgsl_drawobj.c create mode 100644 kgsl_drawobj.h create mode 100644 kgsl_eventlog.c create mode 100644 kgsl_eventlog.h create mode 100644 kgsl_events.c create mode 100644 kgsl_gmu_core.c create mode 100644 kgsl_gmu_core.h create mode 100644 kgsl_ioctl.c create mode 100644 kgsl_iommu.c create mode 100644 kgsl_iommu.h create mode 100644 kgsl_mmu.c create mode 100644 kgsl_mmu.h create mode 100644 kgsl_pool.c create mode 100644 kgsl_pool.h create mode 100644 kgsl_pwrctrl.c create mode 100644 kgsl_pwrctrl.h create mode 100644 kgsl_pwrscale.c create mode 100644 kgsl_pwrscale.h create mode 100644 kgsl_reclaim.c create mode 100644 kgsl_reclaim.h create mode 100644 kgsl_regmap.c create mode 100644 kgsl_regmap.h create mode 100644 kgsl_sharedmem.c create mode 100644 kgsl_sharedmem.h create mode 100644 kgsl_snapshot.c create mode 100644 kgsl_snapshot.h create mode 100644 kgsl_sync.c create mode 100644 kgsl_sync.h create mode 100644 kgsl_sysfs.h create mode 100644 kgsl_timeline.c create mode 100644 kgsl_timeline.h create mode 100644 kgsl_trace.c create mode 100644 kgsl_trace.h create mode 100644 kgsl_util.c create mode 100644 kgsl_util.h create mode 100644 kgsl_vbo.c create mode 100644 msm_adreno_devfreq.h diff --git a/Android.bp b/Android.bp new file mode 100644 index 0000000000..44160ae331 --- /dev/null +++ b/Android.bp @@ -0,0 +1,35 @@ +headers_src = [ + "include/uapi/linux/*.h", +] + +gfx_headers_out = [ + "linux/msm_kgsl.h", +] + +gfx_kernel_headers_verbose = "--verbose " +genrule { + name: "qti_generate_gfx_kernel_headers", + tools: ["headers_install.sh", + "unifdef" + ], + tool_files: [ + "gfx_kernel_headers.py", + ], + srcs: headers_src, + cmd: "python3 -u $(location gfx_kernel_headers.py) " + + gfx_kernel_headers_verbose + + "--header_arch arm64 " + + "--gen_dir $(genDir) " + + "--gfx_include_uapi $(locations include/uapi/linux/*.h) " + + "--unifdef $(location unifdef) " + + "--headers_install $(location headers_install.sh)", + out: gfx_headers_out, +} + +cc_library_headers { + name: "qti_gfx_kernel_uapi", + generated_headers: ["qti_generate_gfx_kernel_headers"], + export_generated_headers: ["qti_generate_gfx_kernel_headers"], + vendor: true, + recovery_available: true +} diff --git a/Android.mk b/Android.mk new file mode 100644 index 0000000000..cad91bfab6 --- /dev/null +++ b/Android.mk @@ -0,0 +1,29 @@ +# Test dlkm +DLKM_DIR := device/qcom/common/dlkm +KGSL_SELECT := CONFIG_QCOM_KGSL=m +KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel + +LOCAL_PATH := $(call my-dir) + +KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) +KBUILD_OPTIONS += $(KGSL_SELECT) +KBUILD_OPTIONS += MODNAME=msm_kgsl +KBUILD_OPTIONS += KERN_SRC=$(KERN_SRC) + +KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS=$(PWD)/$(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers + +include $(CLEAR_VARS) +# For incremental compilation +LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*) +LOCAL_MODULE := msm_kgsl.ko +LOCAL_MODULE_KBUILD_NAME := msm_kgsl.ko +LOCAL_MODULE_TAGS := optional +LOCAL_MODULE_DEBUG_ENABLE := true +LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT) +#LOCAL_REQUIRED_MODULES := mmrm-module-symvers +#LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers + +# Include msm_kgsl.ko in the /vendor/lib/modules (vendor.img) +BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE) +include $(DLKM_DIR)/Build_external_kernelmodule.mk + diff --git a/Kbuild b/Kbuild new file mode 100644 index 0000000000..b1212a31a7 --- /dev/null +++ b/Kbuild @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: GPL-2.0-only + +KDIR := $(TOP)/kernel_platform/common + +ifeq ($(KGSL_PATH),) +KGSL_PATH=$(src) +endif + +ifeq ($(CONFIG_ARCH_WAIPIO), y) + include $(KGSL_PATH)/config/gki_waipiodisp.conf +endif + +ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq + +obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o + +msm_kgsl-y = \ + kgsl.o \ + kgsl_bus.o \ + kgsl_drawobj.o \ + kgsl_events.o \ + kgsl_eventlog.o \ + kgsl_gmu_core.o \ + kgsl_ioctl.o \ + kgsl_mmu.o \ + kgsl_pwrctrl.o \ + kgsl_pwrscale.o \ + kgsl_regmap.o \ + kgsl_sharedmem.o \ + kgsl_snapshot.o \ + kgsl_timeline.o \ + kgsl_trace.o \ + kgsl_util.o \ + kgsl_vbo.o + +msm_kgsl-$(CONFIG_COMPAT) += kgsl_compat.o +msm_kgsl-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o +msm_kgsl-$(CONFIG_ARM_SMMU) += kgsl_iommu.o +msm_kgsl-$(CONFIG_SYNC_FILE) += kgsl_sync.o +msm_kgsl-$(CONFIG_QCOM_KGSL_PROCESS_RECLAIM) += kgsl_reclaim.o + +ifndef CONFIG_QCOM_KGSL_USE_SHMEM + msm_kgsl-y += kgsl_pool.o +endif + +msm_kgsl-y += \ + adreno.o \ + adreno_a3xx.o \ + adreno_a3xx_perfcounter.o \ + adreno_a3xx_ringbuffer.o \ + adreno_a3xx_snapshot.o \ + adreno_a5xx.o \ + adreno_a5xx_perfcounter.o \ + adreno_a5xx_preempt.o \ + adreno_a5xx_ringbuffer.o \ + adreno_a5xx_snapshot.o \ + adreno_a6xx.o \ + adreno_a6xx_gmu.o \ + adreno_a6xx_gmu_snapshot.o \ + adreno_a6xx_hfi.o \ + adreno_a6xx_hwsched.o \ + adreno_a6xx_hwsched_hfi.o \ + adreno_a6xx_perfcounter.o \ + adreno_a6xx_preempt.o \ + adreno_a6xx_rgmu.o \ + adreno_a6xx_ringbuffer.o \ + adreno_a6xx_rpmh.o \ + adreno_a6xx_snapshot.o \ + adreno_cp_parser.o \ + adreno_dispatch.o \ + adreno_drawctxt.o \ + adreno_gen7.o \ + adreno_gen7_gmu.o \ + adreno_gen7_gmu_snapshot.o \ + adreno_gen7_hfi.o \ + adreno_gen7_hwsched.o \ + adreno_gen7_hwsched_hfi.o \ + adreno_gen7_perfcounter.o \ + adreno_gen7_preempt.o \ + adreno_gen7_ringbuffer.o \ + adreno_gen7_rpmh.o \ + adreno_gen7_snapshot.o \ + adreno_hwsched.o \ + adreno_ioctl.o \ + adreno_perfcounter.o \ + adreno_ringbuffer.o \ + adreno_snapshot.o \ + adreno_sysfs.o \ + adreno_trace.o \ + governor_msm_adreno_tz.o \ + governor_gpubw_mon.o + +msm_kgsl-$(CONFIG_COMPAT) += adreno_compat.o +msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_coresight.o +msm_kgsl-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o diff --git a/Kconfig b/Kconfig new file mode 100644 index 0000000000..0c04a88e74 --- /dev/null +++ b/Kconfig @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0-only +config QCOM_KGSL + tristate "Qualcomm Technologies, Inc. 3D Graphics driver" + depends on ARCH_QCOM + depends on QCOM_QFPROM + select QCOM_MDT_LOADER + select INTERVAL_TREE + select TRACE_GPU_MEM + help + 3D graphics driver for the Adreno family of GPUs from QTI. + Required to use hardware accelerated OpenGL, compute and Vulkan + on QTI targets. This includes power management, memory management, + and scheduling for the Adreno GPUs. + +config DEVFREQ_GOV_QCOM_ADRENO_TZ + tristate "Qualcomm Technologies, Inc. GPU frequency governor" + depends on PM_DEVFREQ && QCOM_KGSL + help + GPU frequency governor for the Adreno GPU. Sets the frequency + using an "on demand" algorithm in conjunction with other + components on Adreno platforms. This is not useful for non-Adreno + devices. + +config DEVFREQ_GOV_QCOM_GPUBW_MON + tristate "Qualcomm Technologies, Inc. GPU bandwidth governor" + depends on DEVFREQ_GOV_QCOM_ADRENO_TZ + help + This governor works together with the Adreno GPU governor to + select bus frequency votes using an "on-demand" algorithm. + This governor will not be useful for non-Adreno based + targets. + +config QCOM_ADRENO_DEFAULT_GOVERNOR + string "devfreq governor for the adreno core" + default "msm-adreno-tz" + depends on QCOM_KGSL + +config QCOM_KGSL_CORESIGHT + bool "Enable coresight support for the Adreno GPU" + depends on QCOM_KGSL && CORESIGHT + help + When enabled, the Adreno GPU is available as a source for Coresight + data. On a6xx targets there are two sources available for the GX and + CX domains respectively. Debug kernels should say 'Y' here. + +config QCOM_KGSL_IOCOHERENCY_DEFAULT + bool "Enable I/O coherency on cached GPU memory by default" + depends on QCOM_KGSL + default y if ARCH_LAHAINA + help + Say 'Y' here to enable I/O cache coherency by default on targets that + support hardware I/O coherency. If enabled all cached GPU memory + will use I/O coherency regardless of the user flags. If not enabled + the user can still selectively enable I/O coherency with a flag. + +config QCOM_KGSL_IDLE_TIMEOUT + int + depends on QCOM_KGSL + default 80 + help + GPU idle timeout for Adreno GPU. This value decides after how + long the GPU will go into slumber. A higher value will mean that + the GPU is powered ON for a longer duration which will have + power costs. + +config QCOM_KGSL_CONTEXT_DEBUG + bool "Log kgsl context information for all processes" + depends on QCOM_KGSL + help + When enabled, total number of KGSL contexts, number of attached and + detached contexts are dumped into kernel log for all the processes. + This gives insight about the number of contexts held by each process. + +config QCOM_KGSL_SORT_POOL + bool "Sort pool page list based on physical address" + depends on QCOM_KGSL + default y + help + When enabled, the pool page list is sorted based on physical + addresses. This can be turned on for targets where better DDR + efficiency is attained on accesses for adjacent memory. + +config QCOM_KGSL_QDSS_STM + bool "Enable support for QDSS STM for Adreno GPU" + depends on QCOM_KGSL && CORESIGHT + help + When enabled, the Adreno GPU QDSS STM support is enabled. GPU QDSS STM + memory will be mapped to GPU and QDSS clock needed to access this memory + is voted. Debug kernels should say 'Y' here. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..9743341b60 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +ifeq ($(KGSL_MODULE_ROOT),) +KGSL_MODULE_ROOT=$(KERNEL_SRC)/$(M) +endif + +KBUILD_OPTIONS+=KGSL_PATH=$(KGSL_MODULE_ROOT) + +all: modules + +modules_install: + $(MAKE) INSTALL_MOD_STRIP=1 -C $(KERNEL_SRC) M=$(M) modules_install + +clean: + $(MAKE) -C $(KERNEL_SRC) M=$(M) clean + +%: + $(MAKE) -C $(KERNEL_SRC) M=$(M) $@ $(KBUILD_OPTIONS) diff --git a/a3xx_reg.h b/a3xx_reg.h new file mode 100644 index 0000000000..ab5079aa45 --- /dev/null +++ b/a3xx_reg.h @@ -0,0 +1,564 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _A300_REG_H +#define _A300_REG_H + +/* Interrupt bit positions within RBBM_INT_0 */ + +#define A3XX_INT_RBBM_GPU_IDLE 0 +#define A3XX_INT_RBBM_AHB_ERROR 1 +#define A3XX_INT_RBBM_REG_TIMEOUT 2 +#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5 +#define A3XX_INT_VFD_ERROR 6 +#define A3XX_INT_CP_SW_INT 7 +#define A3XX_INT_CP_T0_PACKET_IN_IB 8 +#define A3XX_INT_CP_OPCODE_ERROR 9 +#define A3XX_INT_CP_RESERVED_BIT_ERROR 10 +#define A3XX_INT_CP_HW_FAULT 11 +#define A3XX_INT_CP_DMA 12 +#define A3XX_INT_CP_IB2_INT 13 +#define A3XX_INT_CP_IB1_INT 14 +#define A3XX_INT_CP_RB_INT 15 +#define A3XX_INT_CP_REG_PROTECT_FAULT 16 +#define A3XX_INT_CP_RB_DONE_TS 17 +#define A3XX_INT_CP_VS_DONE_TS 18 +#define A3XX_INT_CP_PS_DONE_TS 19 +#define A3XX_INT_CACHE_FLUSH_TS 20 +#define A3XX_INT_CP_AHB_ERROR_HALT 21 +#define A3XX_INT_MISC_HANG_DETECT 24 +#define A3XX_INT_UCHE_OOB_ACCESS 25 + +/* Register definitions */ + +#define A3XX_RBBM_CLOCK_CTL 0x010 +#define A3XX_RBBM_SP_HYST_CNT 0x012 +#define A3XX_RBBM_SW_RESET_CMD 0x018 +#define A3XX_RBBM_AHB_CTL0 0x020 +#define A3XX_RBBM_AHB_CTL1 0x021 +#define A3XX_RBBM_AHB_CMD 0x022 +#define A3XX_RBBM_AHB_ERROR_STATUS 0x027 +#define A3XX_RBBM_GPR0_CTL 0x02E +/* This the same register as on A2XX, just in a different place */ +#define A3XX_RBBM_STATUS 0x030 +#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33 +#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50 +#define A3XX_RBBM_INT_CLEAR_CMD 0x061 +#define A3XX_RBBM_INT_0_MASK 0x063 +#define A3XX_RBBM_INT_0_STATUS 0x064 +#define A3XX_RBBM_PERFCTR_CTL 0x80 +#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81 +#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85 +#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86 +#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87 +#define A3XX_RBBM_GPU_BUSY_MASKED 0x88 +#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90 +#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91 +#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92 +#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93 +#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94 +#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95 +#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96 +#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97 +#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98 +#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99 +#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A +#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B +#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C +#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D +#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E +#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F +#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0 +#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1 +#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2 +#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3 +#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4 +#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5 +#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6 +#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7 +#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8 +#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9 +#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA +#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB +#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC +#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD +#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE +#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF +#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0 +#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1 +#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2 +#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3 +#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4 +#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5 +#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6 +#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7 +#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8 +#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9 +#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA +#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB +#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC +#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD +#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE +#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF +#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0 +#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1 +#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2 +#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3 +#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4 +#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5 +#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6 +#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7 +#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8 +#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9 +#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA +#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB +#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC +#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD +#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE +#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF +#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0 +#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1 +#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2 +#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3 +#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4 +#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5 +#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6 +#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7 +#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8 +#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9 +#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA +#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB +#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC +#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD +#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE +#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF +#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0 +#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1 +#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2 +#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3 +#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4 +#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5 + +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA +#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB +#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC +#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED +#define A3XX_RBBM_DEBUG_BUS_CTL 0x111 +#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112 +#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B +#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C +#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D +#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E +#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F +#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120 +#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121 +#define A3XX_RBBM_EXT_TRACE_CMD 0x122 +#define A3XX_CP_RB_BASE 0x01C0 +#define A3XX_CP_RB_CNTL 0x01C1 +#define A3XX_CP_RB_RPTR 0x01C4 +#define A3XX_CP_RB_WPTR 0x01C5 +/* Following two are same as on A2XX, just in a different place */ +#define A3XX_CP_PFP_UCODE_ADDR 0x1C9 +#define A3XX_CP_PFP_UCODE_DATA 0x1CA +#define A3XX_CP_ROQ_ADDR 0x1CC +#define A3XX_CP_ROQ_DATA 0x1CD +#define A3XX_CP_MERCIU_ADDR 0x1D1 +#define A3XX_CP_MERCIU_DATA 0x1D2 +#define A3XX_CP_MERCIU_DATA2 0x1D3 +#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5 +#define A3XX_CP_MEQ_ADDR 0x1DA +#define A3XX_CP_MEQ_DATA 0x1DB +#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC +#define A3XX_CP_STATE_DEBUG_DATA 0x01ED +#define A3XX_CP_CNTL 0x01F4 +#define A3XX_CP_WFI_PEND_CTR 0x01F5 +#define A3XX_CP_ME_CNTL 0x01F6 +#define A3XX_CP_ME_STATUS 0x01F7 +#define A3XX_CP_ME_RAM_WADDR 0x01F8 +#define A3XX_CP_ME_RAM_RADDR 0x01F9 +#define A3XX_CP_ME_RAM_DATA 0x01FA +#define A3XX_CP_DEBUG 0x01FC + +#define A3XX_RBBM_PM_OVERRIDE2 0x039D + +#define A3XX_CP_PERFCOUNTER_SELECT 0x445 +#define A3XX_CP_IB1_BASE 0x0458 +#define A3XX_CP_IB1_BUFSZ 0x0459 +#define A3XX_CP_IB2_BASE 0x045A +#define A3XX_CP_IB2_BUFSZ 0x045B + +#define A3XX_CP_HW_FAULT 0x45C +#define A3XX_CP_PROTECT_CTRL 0x45E +#define A3XX_CP_PROTECT_STATUS 0x45F +#define A3XX_CP_PROTECT_REG_0 0x460 +#define A3XX_CP_STAT 0x047F +#define A3XX_CP_SCRATCH_REG0 0x578 +#define A3XX_CP_SCRATCH_REG6 0x57E +#define A3XX_CP_SCRATCH_REG7 0x57F +#define A3XX_VSC_SIZE_ADDRESS 0xC02 +#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07 +#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08 +#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A +#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B +#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D +#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E +#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10 +#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11 +#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13 +#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14 +#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16 +#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17 +#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19 +#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A +#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C +#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D +#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48 +#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49 +#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A +#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B +#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81 +#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88 +#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89 +#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A +#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B +#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 +#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 +#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 +#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3 +#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4 +#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5 +#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6 +#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7 +#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8 +#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9 +#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA +#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB +#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC +#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD +#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE +#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF +#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0 +#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1 +#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2 +#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3 +#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4 +#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5 +#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 +#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 +#define A3XX_RB_GMEM_BASE_ADDR 0xCC0 +#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1 +#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6 +#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7 +#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0 +#define A3XX_SQ_GPR_MANAGEMENT 0x0D00 +#define A3XX_SQ_INST_STORE_MANAGEMENT 0x0D02 +#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00 +#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01 +#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02 +#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03 +#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04 +#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05 +#define A3XX_TP0_CHICKEN 0x0E1E +#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44 +#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45 +#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61 +#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62 +#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64 +#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65 +#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82 +#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84 +#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85 +#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86 +#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87 +#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88 +#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 +#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1 +#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6 +#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4 +#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5 +#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6 +#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7 +#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8 +#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9 +#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA +#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB +#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04 +#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05 +#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06 +#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07 +#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08 +#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09 +#define A3XX_GRAS_CL_CLIP_CNTL 0x2040 +#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 +#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 +#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049 +#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A +#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B +#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C +#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D +#define A3XX_GRAS_SU_POINT_MINMAX 0x2068 +#define A3XX_GRAS_SU_POINT_SIZE 0x2069 +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D +#define A3XX_GRAS_SU_MODE_CONTROL 0x2070 +#define A3XX_GRAS_SC_CONTROL 0x2072 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A +#define A3XX_RB_MODE_CONTROL 0x20C0 +#define A3XX_RB_RENDER_CONTROL 0x20C1 +#define A3XX_RB_MSAA_CONTROL 0x20C2 +#define A3XX_RB_ALPHA_REFERENCE 0x20C3 +#define A3XX_RB_MRT_CONTROL0 0x20C4 +#define A3XX_RB_MRT_BUF_INFO0 0x20C5 +#define A3XX_RB_MRT_BUF_BASE0 0x20C6 +#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7 +#define A3XX_RB_MRT_CONTROL1 0x20C8 +#define A3XX_RB_MRT_BUF_INFO1 0x20C9 +#define A3XX_RB_MRT_BUF_BASE1 0x20CA +#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB +#define A3XX_RB_MRT_CONTROL2 0x20CC +#define A3XX_RB_MRT_BUF_INFO2 0x20CD +#define A3XX_RB_MRT_BUF_BASE2 0x20CE +#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF +#define A3XX_RB_MRT_CONTROL3 0x20D0 +#define A3XX_RB_MRT_BUF_INFO3 0x20D1 +#define A3XX_RB_MRT_BUF_BASE3 0x20D2 +#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3 +#define A3XX_RB_BLEND_RED 0x20E4 +#define A3XX_RB_BLEND_GREEN 0x20E5 +#define A3XX_RB_BLEND_BLUE 0x20E6 +#define A3XX_RB_BLEND_ALPHA 0x20E7 +#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8 +#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9 +#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA +#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB +#define A3XX_RB_COPY_CONTROL 0x20EC +#define A3XX_RB_COPY_DEST_BASE 0x20ED +#define A3XX_RB_COPY_DEST_PITCH 0x20EE +#define A3XX_RB_COPY_DEST_INFO 0x20EF +#define A3XX_RB_DEPTH_CONTROL 0x2100 +#define A3XX_RB_DEPTH_CLEAR 0x2101 +#define A3XX_RB_DEPTH_BUF_INFO 0x2102 +#define A3XX_RB_DEPTH_BUF_PITCH 0x2103 +#define A3XX_RB_STENCIL_CONTROL 0x2104 +#define A3XX_RB_STENCIL_CLEAR 0x2105 +#define A3XX_RB_STENCIL_BUF_INFO 0x2106 +#define A3XX_RB_STENCIL_BUF_PITCH 0x2107 +#define A3XX_RB_STENCIL_REF_MASK 0x2108 +#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109 +#define A3XX_RB_LRZ_VSC_CONTROL 0x210C +#define A3XX_RB_WINDOW_OFFSET 0x210E +#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110 +#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111 +#define A3XX_RB_Z_CLAMP_MIN 0x2114 +#define A3XX_RB_Z_CLAMP_MAX 0x2115 +#define A3XX_HLSQ_CONTROL_0_REG 0x2200 +#define A3XX_HLSQ_CONTROL_1_REG 0x2201 +#define A3XX_HLSQ_CONTROL_2_REG 0x2202 +#define A3XX_HLSQ_CONTROL_3_REG 0x2203 +#define A3XX_HLSQ_VS_CONTROL_REG 0x2204 +#define A3XX_HLSQ_FS_CONTROL_REG 0x2205 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207 +#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A +#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B +#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C +#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D +#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E +#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F +#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210 +#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211 +#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212 +#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214 +#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217 +#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A +#define A3XX_VFD_FETCH_INSTR_1_0 0x2247 +#define A3XX_VFD_FETCH_INSTR_1_1 0x2249 +#define A3XX_VFD_FETCH_INSTR_1_2 0x224B +#define A3XX_VFD_FETCH_INSTR_1_3 0x224D +#define A3XX_VFD_FETCH_INSTR_1_4 0x224F +#define A3XX_VFD_FETCH_INSTR_1_5 0x2251 +#define A3XX_VFD_FETCH_INSTR_1_6 0x2253 +#define A3XX_VFD_FETCH_INSTR_1_7 0x2255 +#define A3XX_VFD_FETCH_INSTR_1_8 0x2257 +#define A3XX_VFD_FETCH_INSTR_1_9 0x2259 +#define A3XX_VFD_FETCH_INSTR_1_A 0x225B +#define A3XX_VFD_FETCH_INSTR_1_B 0x225D +#define A3XX_VFD_FETCH_INSTR_1_C 0x225F +#define A3XX_VFD_FETCH_INSTR_1_D 0x2261 +#define A3XX_VFD_FETCH_INSTR_1_E 0x2263 +#define A3XX_VFD_FETCH_INSTR_1_F 0x2265 +#define A3XX_SP_SP_CTRL_REG 0x22C0 +#define A3XX_SP_VS_CTRL_REG0 0x22C4 +#define A3XX_SP_VS_CTRL_REG1 0x22C5 +#define A3XX_SP_VS_PARAM_REG 0x22C6 +#define A3XX_SP_VS_OUT_REG_0 0x22C7 +#define A3XX_SP_VS_OUT_REG_1 0x22C8 +#define A3XX_SP_VS_OUT_REG_2 0x22C9 +#define A3XX_SP_VS_OUT_REG_3 0x22CA +#define A3XX_SP_VS_OUT_REG_4 0x22CB +#define A3XX_SP_VS_OUT_REG_5 0x22CC +#define A3XX_SP_VS_OUT_REG_6 0x22CD +#define A3XX_SP_VS_OUT_REG_7 0x22CE +#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 +#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1 +#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2 +#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3 +#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 +#define A3XX_SP_VS_OBJ_START_REG 0x22D5 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6 +#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7 +#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 +#define A3XX_SP_VS_LENGTH_REG 0x22DF +#define A3XX_SP_FS_CTRL_REG0 0x22E0 +#define A3XX_SP_FS_CTRL_REG1 0x22E1 +#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 +#define A3XX_SP_FS_OBJ_START_REG 0x22E3 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4 +#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5 +#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9 +#define A3XX_SP_FS_OUTPUT_REG 0x22EC +#define A3XX_SP_FS_MRT_REG_0 0x22F0 +#define A3XX_SP_FS_MRT_REG_1 0x22F1 +#define A3XX_SP_FS_MRT_REG_2 0x22F2 +#define A3XX_SP_FS_MRT_REG_3 0x22F3 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7 +#define A3XX_SP_FS_LENGTH_REG 0x22FF +#define A3XX_PA_SC_AA_CONFIG 0x2301 +#define A3XX_VBIF_CLKON 0x3001 +#define A3XX_VBIF_ABIT_SORT 0x301C +#define A3XX_VBIF_ABIT_SORT_CONF 0x301D +#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A +#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C +#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D +#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030 +#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031 +#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034 +#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035 +#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036 +#define A3XX_VBIF_ARB_CTL 0x303C +#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 +#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E +#define A3XX_VBIF_OUT_AXI_AOOO 0x305F +#define A3XX_VBIF_PERF_CNT0_LO 0x3073 +#define A3XX_VBIF_PERF_CNT0_HI 0x3074 +#define A3XX_VBIF_PERF_CNT1_LO 0x3075 +#define A3XX_VBIF_PERF_CNT1_HI 0x3076 +#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077 +#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078 +#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079 +#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a +#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b +#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c + +#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F +#define A30X_VBIF_XIN_HALT_CTRL0_MASK 0x7 + +#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081 + +/* VBIF register offsets for A306 */ +#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0 +#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1 +#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2 +#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3 +#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8 +#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9 +#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da +#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db +#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0 +#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1 +#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2 +#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3 + +#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100 +#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101 +#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a + +#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800 +#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801 + +/* RBBM Debug bus block IDs */ +#define RBBM_BLOCK_ID_CP 0x1 +#define RBBM_BLOCK_ID_RBBM 0x2 +#define RBBM_BLOCK_ID_VBIF 0x3 +#define RBBM_BLOCK_ID_HLSQ 0x4 +#define RBBM_BLOCK_ID_UCHE 0x5 +#define RBBM_BLOCK_ID_PC 0x8 +#define RBBM_BLOCK_ID_VFD 0x9 +#define RBBM_BLOCK_ID_VPC 0xa +#define RBBM_BLOCK_ID_TSE 0xb +#define RBBM_BLOCK_ID_RAS 0xc +#define RBBM_BLOCK_ID_VSC 0xd +#define RBBM_BLOCK_ID_SP_0 0x10 +#define RBBM_BLOCK_ID_SP_1 0x11 +#define RBBM_BLOCK_ID_SP_2 0x12 +#define RBBM_BLOCK_ID_SP_3 0x13 +#define RBBM_BLOCK_ID_TPL1_0 0x18 +#define RBBM_BLOCK_ID_TPL1_1 0x19 +#define RBBM_BLOCK_ID_TPL1_2 0x1a +#define RBBM_BLOCK_ID_TPL1_3 0x1b +#define RBBM_BLOCK_ID_RB_0 0x20 +#define RBBM_BLOCK_ID_RB_1 0x21 +#define RBBM_BLOCK_ID_RB_2 0x22 +#define RBBM_BLOCK_ID_RB_3 0x23 +#define RBBM_BLOCK_ID_MARB_0 0x28 +#define RBBM_BLOCK_ID_MARB_1 0x29 +#define RBBM_BLOCK_ID_MARB_2 0x2a +#define RBBM_BLOCK_ID_MARB_3 0x2b + +/* RBBM_CLOCK_CTL default value */ +#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA +#define A320_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF +#define A330_RBBM_CLOCK_CTL_DEFAULT 0xBFFCFFFF + +#define A330_RBBM_GPR0_CTL_DEFAULT 0x00000000 +#define A330v2_RBBM_GPR0_CTL_DEFAULT 0x05515455 +#define A310_RBBM_GPR0_CTL_DEFAULT 0x000000AA + +/* COUNTABLE FOR SP PERFCOUNTER */ +#define SP_ALU_ACTIVE_CYCLES 0x1D +#define SP0_ICL1_MISSES 0x1A +#define SP_FS_CFLOW_INSTRUCTIONS 0x0C + +/* COUNTABLE FOR TSE PERFCOUNTER */ +#define TSE_INPUT_PRIM_NUM 0x0 + +/* VBIF countables */ +#define VBIF_AXI_TOTAL_BEATS 85 + +/* VBIF Recoverable HALT bit value */ +#define VBIF_RECOVERABLE_HALT_CTRL 0x1 + +/* + * CP DEBUG settings for A3XX core: + * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control + * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF + */ +#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25)) + + +#endif diff --git a/a5xx_reg.h b/a5xx_reg.h new file mode 100644 index 0000000000..137a11c3d9 --- /dev/null +++ b/a5xx_reg.h @@ -0,0 +1,902 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2014-2016,2019, The Linux Foundation. All rights reserved. + */ + +#ifndef _A5XX_REG_H +#define _A5XX_REG_H + +/* A5XX interrupt bits */ +#define A5XX_INT_RBBM_GPU_IDLE 0 +#define A5XX_INT_RBBM_AHB_ERROR 1 +#define A5XX_INT_RBBM_TRANSFER_TIMEOUT 2 +#define A5XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A5XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A5XX_INT_RBBM_ETS_MS_TIMEOUT 5 +#define A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW 6 +#define A5XX_INT_RBBM_GPC_ERROR 7 +#define A5XX_INT_CP_SW 8 +#define A5XX_INT_CP_HW_ERROR 9 +#define A5XX_INT_CP_CCU_FLUSH_DEPTH_TS 10 +#define A5XX_INT_CP_CCU_FLUSH_COLOR_TS 11 +#define A5XX_INT_CP_CCU_RESOLVE_TS 12 +#define A5XX_INT_CP_IB2 13 +#define A5XX_INT_CP_IB1 14 +#define A5XX_INT_CP_RB 15 +#define A5XX_INT_CP_UNUSED_1 16 +#define A5XX_INT_CP_RB_DONE_TS 17 +#define A5XX_INT_CP_WT_DONE_TS 18 +#define A5XX_INT_UNKNOWN_1 19 +#define A5XX_INT_CP_CACHE_FLUSH_TS 20 +#define A5XX_INT_UNUSED_2 21 +#define A5XX_INT_RBBM_ATB_BUS_OVERFLOW 22 +#define A5XX_INT_MISC_HANG_DETECT 23 +#define A5XX_INT_UCHE_OOB_ACCESS 24 +#define A5XX_INT_UCHE_TRAP_INTR 25 +#define A5XX_INT_DEBBUS_INTR_0 26 +#define A5XX_INT_DEBBUS_INTR_1 27 +#define A5XX_INT_GPMU_VOLTAGE_DROOP 28 +#define A5XX_INT_GPMU_FIRMWARE 29 +#define A5XX_INT_ISDB_CPU_IRQ 30 +#define A5XX_INT_ISDB_UNDER_DEBUG 31 + +/* CP Interrupt bits */ +#define A5XX_CP_OPCODE_ERROR 0 +#define A5XX_CP_RESERVED_BIT_ERROR 1 +#define A5XX_CP_HW_FAULT_ERROR 2 +#define A5XX_CP_DMA_ERROR 3 +#define A5XX_CP_REGISTER_PROTECTION_ERROR 4 +#define A5XX_CP_AHB_ERROR 5 + +/* CP registers */ +#define A5XX_CP_RB_BASE 0x800 +#define A5XX_CP_RB_BASE_HI 0x801 +#define A5XX_CP_RB_CNTL 0x802 +#define A5XX_CP_RB_RPTR_ADDR_LO 0x804 +#define A5XX_CP_RB_RPTR_ADDR_HI 0x805 +#define A5XX_CP_RB_RPTR 0x806 +#define A5XX_CP_RB_WPTR 0x807 +#define A5XX_CP_PFP_STAT_ADDR 0x808 +#define A5XX_CP_PFP_STAT_DATA 0x809 +#define A5XX_CP_DRAW_STATE_ADDR 0x80B +#define A5XX_CP_DRAW_STATE_DATA 0x80C +#define A5XX_CP_CRASH_SCRIPT_BASE_LO 0x817 +#define A5XX_CP_CRASH_SCRIPT_BASE_HI 0x818 +#define A5XX_CP_CRASH_DUMP_CNTL 0x819 +#define A5XX_CP_ME_STAT_ADDR 0x81A +#define A5XX_CP_ROQ_THRESHOLDS_1 0x81F +#define A5XX_CP_ROQ_THRESHOLDS_2 0x820 +#define A5XX_CP_ROQ_DBG_ADDR 0x821 +#define A5XX_CP_ROQ_DBG_DATA 0x822 +#define A5XX_CP_MEQ_DBG_ADDR 0x823 +#define A5XX_CP_MEQ_DBG_DATA 0x824 +#define A5XX_CP_MEQ_THRESHOLDS 0x825 +#define A5XX_CP_MERCIU_SIZE 0x826 +#define A5XX_CP_MERCIU_DBG_ADDR 0x827 +#define A5XX_CP_MERCIU_DBG_DATA_1 0x828 +#define A5XX_CP_MERCIU_DBG_DATA_2 0x829 +#define A5XX_CP_PFP_UCODE_DBG_ADDR 0x82A +#define A5XX_CP_PFP_UCODE_DBG_DATA 0x82B +#define A5XX_CP_ME_UCODE_DBG_ADDR 0x82F +#define A5XX_CP_ME_UCODE_DBG_DATA 0x830 +#define A5XX_CP_CNTL 0x831 +#define A5XX_CP_ME_CNTL 0x832 +#define A5XX_CP_CHICKEN_DBG 0x833 +#define A5XX_CP_PFP_INSTR_BASE_LO 0x835 +#define A5XX_CP_PFP_INSTR_BASE_HI 0x836 +#define A5XX_CP_PM4_INSTR_BASE_LO 0x838 +#define A5XX_CP_PM4_INSTR_BASE_HI 0x839 +#define A5XX_CP_CONTEXT_SWITCH_CNTL 0x83B +#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO 0x83C +#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI 0x83D +#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO 0x83E +#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI 0x83F +#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x840 +#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x841 +#define A5XX_CP_ADDR_MODE_CNTL 0x860 +#define A5XX_CP_ME_STAT_DATA 0xB14 +#define A5XX_CP_WFI_PEND_CTR 0xB15 +#define A5XX_CP_INTERRUPT_STATUS 0xB18 +#define A5XX_CP_HW_FAULT 0xB1A +#define A5XX_CP_PROTECT_STATUS 0xB1C +#define A5XX_CP_IB1_BASE 0xB1F +#define A5XX_CP_IB1_BASE_HI 0xB20 +#define A5XX_CP_IB1_BUFSZ 0xB21 +#define A5XX_CP_IB2_BASE 0xB22 +#define A5XX_CP_IB2_BASE_HI 0xB23 +#define A5XX_CP_IB2_BUFSZ 0xB24 +#define A5XX_CP_PROTECT_REG_0 0x880 +#define A5XX_CP_PROTECT_CNTL 0x8A0 +#define A5XX_CP_AHB_FAULT 0xB1B +#define A5XX_CP_PERFCTR_CP_SEL_0 0xBB0 +#define A5XX_CP_PERFCTR_CP_SEL_1 0xBB1 +#define A5XX_CP_PERFCTR_CP_SEL_2 0xBB2 +#define A5XX_CP_PERFCTR_CP_SEL_3 0xBB3 +#define A5XX_CP_PERFCTR_CP_SEL_4 0xBB4 +#define A5XX_CP_PERFCTR_CP_SEL_5 0xBB5 +#define A5XX_CP_PERFCTR_CP_SEL_6 0xBB6 +#define A5XX_CP_PERFCTR_CP_SEL_7 0xBB7 + +#define A5XX_VSC_ADDR_MODE_CNTL 0xBC1 + +/* CP Power Counter Registers Select */ +#define A5XX_CP_POWERCTR_CP_SEL_0 0xBBA +#define A5XX_CP_POWERCTR_CP_SEL_1 0xBBB +#define A5XX_CP_POWERCTR_CP_SEL_2 0xBBC +#define A5XX_CP_POWERCTR_CP_SEL_3 0xBBD + +/* RBBM registers */ +#define A5XX_RBBM_CFG_DBGBUS_SEL_A 0x4 +#define A5XX_RBBM_CFG_DBGBUS_SEL_B 0x5 +#define A5XX_RBBM_CFG_DBGBUS_SEL_C 0x6 +#define A5XX_RBBM_CFG_DBGBUS_SEL_D 0x7 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT 0x0 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8 + +#define A5XX_RBBM_CFG_DBGBUS_CNTLT 0x8 +#define A5XX_RBBM_CFG_DBGBUS_CNTLM 0x9 +#define A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x18 +#define A5XX_RBBM_CFG_DBGBUS_OPL 0xA +#define A5XX_RBBM_CFG_DBGBUS_OPE 0xB +#define A5XX_RBBM_CFG_DBGBUS_IVTL_0 0xC +#define A5XX_RBBM_CFG_DBGBUS_IVTL_1 0xD +#define A5XX_RBBM_CFG_DBGBUS_IVTL_2 0xE +#define A5XX_RBBM_CFG_DBGBUS_IVTL_3 0xF +#define A5XX_RBBM_CFG_DBGBUS_MASKL_0 0x10 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_1 0x11 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_2 0x12 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_3 0x13 +#define A5XX_RBBM_CFG_DBGBUS_BYTEL_0 0x14 +#define A5XX_RBBM_CFG_DBGBUS_BYTEL_1 0x15 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_0 0x16 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_1 0x17 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_2 0x18 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_3 0x19 +#define A5XX_RBBM_CFG_DBGBUS_MASKE_0 0x1A +#define A5XX_RBBM_CFG_DBGBUS_MASKE_1 0x1B +#define A5XX_RBBM_CFG_DBGBUS_MASKE_2 0x1C +#define A5XX_RBBM_CFG_DBGBUS_MASKE_3 0x1D +#define A5XX_RBBM_CFG_DBGBUS_NIBBLEE 0x1E +#define A5XX_RBBM_CFG_DBGBUS_PTRC0 0x1F +#define A5XX_RBBM_CFG_DBGBUS_PTRC1 0x20 +#define A5XX_RBBM_CFG_DBGBUS_LOADREG 0x21 +#define A5XX_RBBM_CFG_DBGBUS_IDX 0x22 +#define A5XX_RBBM_CFG_DBGBUS_CLRC 0x23 +#define A5XX_RBBM_CFG_DBGBUS_LOADIVT 0x24 +#define A5XX_RBBM_INTERFACE_HANG_INT_CNTL 0x2F +#define A5XX_RBBM_INT_CLEAR_CMD 0x37 +#define A5XX_RBBM_INT_0_MASK 0x38 +#define A5XX_RBBM_AHB_DBG_CNTL 0x3F +#define A5XX_RBBM_EXT_VBIF_DBG_CNTL 0x41 +#define A5XX_RBBM_SW_RESET_CMD 0x43 +#define A5XX_RBBM_BLOCK_SW_RESET_CMD 0x45 +#define A5XX_RBBM_BLOCK_SW_RESET_CMD2 0x46 +#define A5XX_RBBM_DBG_LO_HI_GPIO 0x48 +#define A5XX_RBBM_EXT_TRACE_BUS_CNTL 0x49 +#define A5XX_RBBM_CLOCK_CNTL_TP0 0x4A +#define A5XX_RBBM_CLOCK_CNTL_TP1 0x4B +#define A5XX_RBBM_CLOCK_CNTL_TP2 0x4C +#define A5XX_RBBM_CLOCK_CNTL_TP3 0x4D +#define A5XX_RBBM_CLOCK_CNTL2_TP0 0x4E +#define A5XX_RBBM_CLOCK_CNTL2_TP1 0x4F +#define A5XX_RBBM_CLOCK_CNTL2_TP2 0x50 +#define A5XX_RBBM_CLOCK_CNTL2_TP3 0x51 +#define A5XX_RBBM_CLOCK_CNTL3_TP0 0x52 +#define A5XX_RBBM_CLOCK_CNTL3_TP1 0x53 +#define A5XX_RBBM_CLOCK_CNTL3_TP2 0x54 +#define A5XX_RBBM_CLOCK_CNTL3_TP3 0x55 +#define A5XX_RBBM_READ_AHB_THROUGH_DBG 0x59 +#define A5XX_RBBM_CLOCK_CNTL_UCHE 0x5A +#define A5XX_RBBM_CLOCK_CNTL2_UCHE 0x5B +#define A5XX_RBBM_CLOCK_CNTL3_UCHE 0x5C +#define A5XX_RBBM_CLOCK_CNTL4_UCHE 0x5D +#define A5XX_RBBM_CLOCK_HYST_UCHE 0x5E +#define A5XX_RBBM_CLOCK_DELAY_UCHE 0x5F +#define A5XX_RBBM_CLOCK_MODE_GPC 0x60 +#define A5XX_RBBM_CLOCK_DELAY_GPC 0x61 +#define A5XX_RBBM_CLOCK_HYST_GPC 0x62 +#define A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x63 +#define A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x64 +#define A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x65 +#define A5XX_RBBM_CLOCK_DELAY_HLSQ 0x66 +#define A5XX_RBBM_CLOCK_CNTL 0x67 +#define A5XX_RBBM_CLOCK_CNTL_SP0 0x68 +#define A5XX_RBBM_CLOCK_CNTL_SP1 0x69 +#define A5XX_RBBM_CLOCK_CNTL_SP2 0x6A +#define A5XX_RBBM_CLOCK_CNTL_SP3 0x6B +#define A5XX_RBBM_CLOCK_CNTL2_SP0 0x6C +#define A5XX_RBBM_CLOCK_CNTL2_SP1 0x6D +#define A5XX_RBBM_CLOCK_CNTL2_SP2 0x6E +#define A5XX_RBBM_CLOCK_CNTL2_SP3 0x6F +#define A5XX_RBBM_CLOCK_HYST_SP0 0x70 +#define A5XX_RBBM_CLOCK_HYST_SP1 0x71 +#define A5XX_RBBM_CLOCK_HYST_SP2 0x72 +#define A5XX_RBBM_CLOCK_HYST_SP3 0x73 +#define A5XX_RBBM_CLOCK_DELAY_SP0 0x74 +#define A5XX_RBBM_CLOCK_DELAY_SP1 0x75 +#define A5XX_RBBM_CLOCK_DELAY_SP2 0x76 +#define A5XX_RBBM_CLOCK_DELAY_SP3 0x77 +#define A5XX_RBBM_CLOCK_CNTL_RB0 0x78 +#define A5XX_RBBM_CLOCK_CNTL_RB1 0x79 +#define A5XX_RBBM_CLOCK_CNTL_RB2 0x7a +#define A5XX_RBBM_CLOCK_CNTL_RB3 0x7B +#define A5XX_RBBM_CLOCK_CNTL2_RB0 0x7C +#define A5XX_RBBM_CLOCK_CNTL2_RB1 0x7D +#define A5XX_RBBM_CLOCK_CNTL2_RB2 0x7E +#define A5XX_RBBM_CLOCK_CNTL2_RB3 0x7F +#define A5XX_RBBM_CLOCK_HYST_RAC 0x80 +#define A5XX_RBBM_CLOCK_DELAY_RAC 0x81 +#define A5XX_RBBM_CLOCK_CNTL_CCU0 0x82 +#define A5XX_RBBM_CLOCK_CNTL_CCU1 0x83 +#define A5XX_RBBM_CLOCK_CNTL_CCU2 0x84 +#define A5XX_RBBM_CLOCK_CNTL_CCU3 0x85 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU0 0x86 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU1 0x87 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU2 0x88 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU3 0x89 +#define A5XX_RBBM_CLOCK_CNTL_RAC 0x8A +#define A5XX_RBBM_CLOCK_CNTL2_RAC 0x8B +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0 0x8C +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1 0x8D +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2 0x8E +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3 0x8F +#define A5XX_RBBM_CLOCK_HYST_VFD 0x90 +#define A5XX_RBBM_CLOCK_MODE_VFD 0x91 +#define A5XX_RBBM_CLOCK_DELAY_VFD 0x92 +#define A5XX_RBBM_AHB_CNTL0 0x93 +#define A5XX_RBBM_AHB_CNTL1 0x94 +#define A5XX_RBBM_AHB_CNTL2 0x95 +#define A5XX_RBBM_AHB_CMD 0x96 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11 0x9C +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12 0x9D +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13 0x9E +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14 0x9F +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15 0xA0 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16 0xA1 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17 0xA2 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18 0xA3 +#define A5XX_RBBM_CLOCK_DELAY_TP0 0xA4 +#define A5XX_RBBM_CLOCK_DELAY_TP1 0xA5 +#define A5XX_RBBM_CLOCK_DELAY_TP2 0xA6 +#define A5XX_RBBM_CLOCK_DELAY_TP3 0xA7 +#define A5XX_RBBM_CLOCK_DELAY2_TP0 0xA8 +#define A5XX_RBBM_CLOCK_DELAY2_TP1 0xA9 +#define A5XX_RBBM_CLOCK_DELAY2_TP2 0xAA +#define A5XX_RBBM_CLOCK_DELAY2_TP3 0xAB +#define A5XX_RBBM_CLOCK_DELAY3_TP0 0xAC +#define A5XX_RBBM_CLOCK_DELAY3_TP1 0xAD +#define A5XX_RBBM_CLOCK_DELAY3_TP2 0xAE +#define A5XX_RBBM_CLOCK_DELAY3_TP3 0xAF +#define A5XX_RBBM_CLOCK_HYST_TP0 0xB0 +#define A5XX_RBBM_CLOCK_HYST_TP1 0xB1 +#define A5XX_RBBM_CLOCK_HYST_TP2 0xB2 +#define A5XX_RBBM_CLOCK_HYST_TP3 0xB3 +#define A5XX_RBBM_CLOCK_HYST2_TP0 0xB4 +#define A5XX_RBBM_CLOCK_HYST2_TP1 0xB5 +#define A5XX_RBBM_CLOCK_HYST2_TP2 0xB6 +#define A5XX_RBBM_CLOCK_HYST2_TP3 0xB7 +#define A5XX_RBBM_CLOCK_HYST3_TP0 0xB8 +#define A5XX_RBBM_CLOCK_HYST3_TP1 0xB9 +#define A5XX_RBBM_CLOCK_HYST3_TP2 0xBA +#define A5XX_RBBM_CLOCK_HYST3_TP3 0xBB +#define A5XX_RBBM_CLOCK_CNTL_GPMU 0xC8 +#define A5XX_RBBM_CLOCK_DELAY_GPMU 0xC9 +#define A5XX_RBBM_CLOCK_HYST_GPMU 0xCA +#define A5XX_RBBM_PERFCTR_CP_0_LO 0x3A0 +#define A5XX_RBBM_PERFCTR_CP_0_HI 0x3A1 +#define A5XX_RBBM_PERFCTR_CP_1_LO 0x3A2 +#define A5XX_RBBM_PERFCTR_CP_1_HI 0x3A3 +#define A5XX_RBBM_PERFCTR_CP_2_LO 0x3A4 +#define A5XX_RBBM_PERFCTR_CP_2_HI 0x3A5 +#define A5XX_RBBM_PERFCTR_CP_3_LO 0x3A6 +#define A5XX_RBBM_PERFCTR_CP_3_HI 0x3A7 +#define A5XX_RBBM_PERFCTR_CP_4_LO 0x3A8 +#define A5XX_RBBM_PERFCTR_CP_4_HI 0x3A9 +#define A5XX_RBBM_PERFCTR_CP_5_LO 0x3AA +#define A5XX_RBBM_PERFCTR_CP_5_HI 0x3AB +#define A5XX_RBBM_PERFCTR_CP_6_LO 0x3AC +#define A5XX_RBBM_PERFCTR_CP_6_HI 0x3AD +#define A5XX_RBBM_PERFCTR_CP_7_LO 0x3AE +#define A5XX_RBBM_PERFCTR_CP_7_HI 0x3AF +#define A5XX_RBBM_PERFCTR_RBBM_0_LO 0x3B0 +#define A5XX_RBBM_PERFCTR_RBBM_0_HI 0x3B1 +#define A5XX_RBBM_PERFCTR_RBBM_1_LO 0x3B2 +#define A5XX_RBBM_PERFCTR_RBBM_1_HI 0x3B3 +#define A5XX_RBBM_PERFCTR_RBBM_2_LO 0x3B4 +#define A5XX_RBBM_PERFCTR_RBBM_2_HI 0x3B5 +#define A5XX_RBBM_PERFCTR_RBBM_3_LO 0x3B6 +#define A5XX_RBBM_PERFCTR_RBBM_3_HI 0x3B7 +#define A5XX_RBBM_PERFCTR_PC_0_LO 0x3B8 +#define A5XX_RBBM_PERFCTR_PC_0_HI 0x3B9 +#define A5XX_RBBM_PERFCTR_PC_1_LO 0x3BA +#define A5XX_RBBM_PERFCTR_PC_1_HI 0x3BB +#define A5XX_RBBM_PERFCTR_PC_2_LO 0x3BC +#define A5XX_RBBM_PERFCTR_PC_2_HI 0x3BD +#define A5XX_RBBM_PERFCTR_PC_3_LO 0x3BE +#define A5XX_RBBM_PERFCTR_PC_3_HI 0x3BF +#define A5XX_RBBM_PERFCTR_PC_4_LO 0x3C0 +#define A5XX_RBBM_PERFCTR_PC_4_HI 0x3C1 +#define A5XX_RBBM_PERFCTR_PC_5_LO 0x3C2 +#define A5XX_RBBM_PERFCTR_PC_5_HI 0x3C3 +#define A5XX_RBBM_PERFCTR_PC_6_LO 0x3C4 +#define A5XX_RBBM_PERFCTR_PC_6_HI 0x3C5 +#define A5XX_RBBM_PERFCTR_PC_7_LO 0x3C6 +#define A5XX_RBBM_PERFCTR_PC_7_HI 0x3C7 +#define A5XX_RBBM_PERFCTR_VFD_0_LO 0x3C8 +#define A5XX_RBBM_PERFCTR_VFD_0_HI 0x3C9 +#define A5XX_RBBM_PERFCTR_VFD_1_LO 0x3CA +#define A5XX_RBBM_PERFCTR_VFD_1_HI 0x3CB +#define A5XX_RBBM_PERFCTR_VFD_2_LO 0x3CC +#define A5XX_RBBM_PERFCTR_VFD_2_HI 0x3CD +#define A5XX_RBBM_PERFCTR_VFD_3_LO 0x3CE +#define A5XX_RBBM_PERFCTR_VFD_3_HI 0x3CF +#define A5XX_RBBM_PERFCTR_VFD_4_LO 0x3D0 +#define A5XX_RBBM_PERFCTR_VFD_4_HI 0x3D1 +#define A5XX_RBBM_PERFCTR_VFD_5_LO 0x3D2 +#define A5XX_RBBM_PERFCTR_VFD_5_HI 0x3D3 +#define A5XX_RBBM_PERFCTR_VFD_6_LO 0x3D4 +#define A5XX_RBBM_PERFCTR_VFD_6_HI 0x3D5 +#define A5XX_RBBM_PERFCTR_VFD_7_LO 0x3D6 +#define A5XX_RBBM_PERFCTR_VFD_7_HI 0x3D7 +#define A5XX_RBBM_PERFCTR_HLSQ_0_LO 0x3D8 +#define A5XX_RBBM_PERFCTR_HLSQ_0_HI 0x3D9 +#define A5XX_RBBM_PERFCTR_HLSQ_1_LO 0x3DA +#define A5XX_RBBM_PERFCTR_HLSQ_1_HI 0x3DB +#define A5XX_RBBM_PERFCTR_HLSQ_2_LO 0x3DC +#define A5XX_RBBM_PERFCTR_HLSQ_2_HI 0x3DD +#define A5XX_RBBM_PERFCTR_HLSQ_3_LO 0x3DE +#define A5XX_RBBM_PERFCTR_HLSQ_3_HI 0x3DF +#define A5XX_RBBM_PERFCTR_HLSQ_4_LO 0x3E0 +#define A5XX_RBBM_PERFCTR_HLSQ_4_HI 0x3E1 +#define A5XX_RBBM_PERFCTR_HLSQ_5_LO 0x3E2 +#define A5XX_RBBM_PERFCTR_HLSQ_5_HI 0x3E3 +#define A5XX_RBBM_PERFCTR_HLSQ_6_LO 0x3E4 +#define A5XX_RBBM_PERFCTR_HLSQ_6_HI 0x3E5 +#define A5XX_RBBM_PERFCTR_HLSQ_7_LO 0x3E6 +#define A5XX_RBBM_PERFCTR_HLSQ_7_HI 0x3E7 +#define A5XX_RBBM_PERFCTR_VPC_0_LO 0x3E8 +#define A5XX_RBBM_PERFCTR_VPC_0_HI 0x3E9 +#define A5XX_RBBM_PERFCTR_VPC_1_LO 0x3EA +#define A5XX_RBBM_PERFCTR_VPC_1_HI 0x3EB +#define A5XX_RBBM_PERFCTR_VPC_2_LO 0x3EC +#define A5XX_RBBM_PERFCTR_VPC_2_HI 0x3ED +#define A5XX_RBBM_PERFCTR_VPC_3_LO 0x3EE +#define A5XX_RBBM_PERFCTR_VPC_3_HI 0x3EF +#define A5XX_RBBM_PERFCTR_CCU_0_LO 0x3F0 +#define A5XX_RBBM_PERFCTR_CCU_0_HI 0x3F1 +#define A5XX_RBBM_PERFCTR_CCU_1_LO 0x3F2 +#define A5XX_RBBM_PERFCTR_CCU_1_HI 0x3F3 +#define A5XX_RBBM_PERFCTR_CCU_2_LO 0x3F4 +#define A5XX_RBBM_PERFCTR_CCU_2_HI 0x3F5 +#define A5XX_RBBM_PERFCTR_CCU_3_LO 0x3F6 +#define A5XX_RBBM_PERFCTR_CCU_3_HI 0x3F7 +#define A5XX_RBBM_PERFCTR_TSE_0_LO 0x3F8 +#define A5XX_RBBM_PERFCTR_TSE_0_HI 0x3F9 +#define A5XX_RBBM_PERFCTR_TSE_1_LO 0x3FA +#define A5XX_RBBM_PERFCTR_TSE_1_HI 0x3FB +#define A5XX_RBBM_PERFCTR_TSE_2_LO 0x3FC +#define A5XX_RBBM_PERFCTR_TSE_2_HI 0x3FD +#define A5XX_RBBM_PERFCTR_TSE_3_LO 0x3FE +#define A5XX_RBBM_PERFCTR_TSE_3_HI 0x3FF +#define A5XX_RBBM_PERFCTR_RAS_0_LO 0x400 +#define A5XX_RBBM_PERFCTR_RAS_0_HI 0x401 +#define A5XX_RBBM_PERFCTR_RAS_1_LO 0x402 +#define A5XX_RBBM_PERFCTR_RAS_1_HI 0x403 +#define A5XX_RBBM_PERFCTR_RAS_2_LO 0x404 +#define A5XX_RBBM_PERFCTR_RAS_2_HI 0x405 +#define A5XX_RBBM_PERFCTR_RAS_3_LO 0x406 +#define A5XX_RBBM_PERFCTR_RAS_3_HI 0x407 +#define A5XX_RBBM_PERFCTR_UCHE_0_LO 0x408 +#define A5XX_RBBM_PERFCTR_UCHE_0_HI 0x409 +#define A5XX_RBBM_PERFCTR_UCHE_1_LO 0x40A +#define A5XX_RBBM_PERFCTR_UCHE_1_HI 0x40B +#define A5XX_RBBM_PERFCTR_UCHE_2_LO 0x40C +#define A5XX_RBBM_PERFCTR_UCHE_2_HI 0x40D +#define A5XX_RBBM_PERFCTR_UCHE_3_LO 0x40E +#define A5XX_RBBM_PERFCTR_UCHE_3_HI 0x40F +#define A5XX_RBBM_PERFCTR_UCHE_4_LO 0x410 +#define A5XX_RBBM_PERFCTR_UCHE_4_HI 0x411 +#define A5XX_RBBM_PERFCTR_UCHE_5_LO 0x412 +#define A5XX_RBBM_PERFCTR_UCHE_5_HI 0x413 +#define A5XX_RBBM_PERFCTR_UCHE_6_LO 0x414 +#define A5XX_RBBM_PERFCTR_UCHE_6_HI 0x415 +#define A5XX_RBBM_PERFCTR_UCHE_7_LO 0x416 +#define A5XX_RBBM_PERFCTR_UCHE_7_HI 0x417 +#define A5XX_RBBM_PERFCTR_TP_0_LO 0x418 +#define A5XX_RBBM_PERFCTR_TP_0_HI 0x419 +#define A5XX_RBBM_PERFCTR_TP_1_LO 0x41A +#define A5XX_RBBM_PERFCTR_TP_1_HI 0x41B +#define A5XX_RBBM_PERFCTR_TP_2_LO 0x41C +#define A5XX_RBBM_PERFCTR_TP_2_HI 0x41D +#define A5XX_RBBM_PERFCTR_TP_3_LO 0x41E +#define A5XX_RBBM_PERFCTR_TP_3_HI 0x41F +#define A5XX_RBBM_PERFCTR_TP_4_LO 0x420 +#define A5XX_RBBM_PERFCTR_TP_4_HI 0x421 +#define A5XX_RBBM_PERFCTR_TP_5_LO 0x422 +#define A5XX_RBBM_PERFCTR_TP_5_HI 0x423 +#define A5XX_RBBM_PERFCTR_TP_6_LO 0x424 +#define A5XX_RBBM_PERFCTR_TP_6_HI 0x425 +#define A5XX_RBBM_PERFCTR_TP_7_LO 0x426 +#define A5XX_RBBM_PERFCTR_TP_7_HI 0x427 +#define A5XX_RBBM_PERFCTR_SP_0_LO 0x428 +#define A5XX_RBBM_PERFCTR_SP_0_HI 0x429 +#define A5XX_RBBM_PERFCTR_SP_1_LO 0x42A +#define A5XX_RBBM_PERFCTR_SP_1_HI 0x42B +#define A5XX_RBBM_PERFCTR_SP_2_LO 0x42C +#define A5XX_RBBM_PERFCTR_SP_2_HI 0x42D +#define A5XX_RBBM_PERFCTR_SP_3_LO 0x42E +#define A5XX_RBBM_PERFCTR_SP_3_HI 0x42F +#define A5XX_RBBM_PERFCTR_SP_4_LO 0x430 +#define A5XX_RBBM_PERFCTR_SP_4_HI 0x431 +#define A5XX_RBBM_PERFCTR_SP_5_LO 0x432 +#define A5XX_RBBM_PERFCTR_SP_5_HI 0x433 +#define A5XX_RBBM_PERFCTR_SP_6_LO 0x434 +#define A5XX_RBBM_PERFCTR_SP_6_HI 0x435 +#define A5XX_RBBM_PERFCTR_SP_7_LO 0x436 +#define A5XX_RBBM_PERFCTR_SP_7_HI 0x437 +#define A5XX_RBBM_PERFCTR_SP_8_LO 0x438 +#define A5XX_RBBM_PERFCTR_SP_8_HI 0x439 +#define A5XX_RBBM_PERFCTR_SP_9_LO 0x43A +#define A5XX_RBBM_PERFCTR_SP_9_HI 0x43B +#define A5XX_RBBM_PERFCTR_SP_10_LO 0x43C +#define A5XX_RBBM_PERFCTR_SP_10_HI 0x43D +#define A5XX_RBBM_PERFCTR_SP_11_LO 0x43E +#define A5XX_RBBM_PERFCTR_SP_11_HI 0x43F +#define A5XX_RBBM_PERFCTR_RB_0_LO 0x440 +#define A5XX_RBBM_PERFCTR_RB_0_HI 0x441 +#define A5XX_RBBM_PERFCTR_RB_1_LO 0x442 +#define A5XX_RBBM_PERFCTR_RB_1_HI 0x443 +#define A5XX_RBBM_PERFCTR_RB_2_LO 0x444 +#define A5XX_RBBM_PERFCTR_RB_2_HI 0x445 +#define A5XX_RBBM_PERFCTR_RB_3_LO 0x446 +#define A5XX_RBBM_PERFCTR_RB_3_HI 0x447 +#define A5XX_RBBM_PERFCTR_RB_4_LO 0x448 +#define A5XX_RBBM_PERFCTR_RB_4_HI 0x449 +#define A5XX_RBBM_PERFCTR_RB_5_LO 0x44A +#define A5XX_RBBM_PERFCTR_RB_5_HI 0x44B +#define A5XX_RBBM_PERFCTR_RB_6_LO 0x44C +#define A5XX_RBBM_PERFCTR_RB_6_HI 0x44D +#define A5XX_RBBM_PERFCTR_RB_7_LO 0x44E +#define A5XX_RBBM_PERFCTR_RB_7_HI 0x44F +#define A5XX_RBBM_PERFCTR_VSC_0_LO 0x450 +#define A5XX_RBBM_PERFCTR_VSC_0_HI 0x451 +#define A5XX_RBBM_PERFCTR_VSC_1_LO 0x452 +#define A5XX_RBBM_PERFCTR_VSC_1_HI 0x453 +#define A5XX_RBBM_PERFCTR_LRZ_0_LO 0x454 +#define A5XX_RBBM_PERFCTR_LRZ_0_HI 0x455 +#define A5XX_RBBM_PERFCTR_LRZ_1_LO 0x456 +#define A5XX_RBBM_PERFCTR_LRZ_1_HI 0x457 +#define A5XX_RBBM_PERFCTR_LRZ_2_LO 0x458 +#define A5XX_RBBM_PERFCTR_LRZ_2_HI 0x459 +#define A5XX_RBBM_PERFCTR_LRZ_3_LO 0x45A +#define A5XX_RBBM_PERFCTR_LRZ_3_HI 0x45B +#define A5XX_RBBM_PERFCTR_CMP_0_LO 0x45C +#define A5XX_RBBM_PERFCTR_CMP_0_HI 0x45D +#define A5XX_RBBM_PERFCTR_CMP_1_LO 0x45E +#define A5XX_RBBM_PERFCTR_CMP_1_HI 0x45F +#define A5XX_RBBM_PERFCTR_CMP_2_LO 0x460 +#define A5XX_RBBM_PERFCTR_CMP_2_HI 0x461 +#define A5XX_RBBM_PERFCTR_CMP_3_LO 0x462 +#define A5XX_RBBM_PERFCTR_CMP_3_HI 0x463 +#define A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x46B +#define A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x46C +#define A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x46D +#define A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x46E +#define A5XX_RBBM_ALWAYSON_COUNTER_LO 0x4D2 +#define A5XX_RBBM_ALWAYSON_COUNTER_HI 0x4D3 +#define A5XX_RBBM_STATUS 0x4F5 +#define A5XX_RBBM_STATUS3 0x530 +#define A5XX_RBBM_INT_0_STATUS 0x4E1 +#define A5XX_RBBM_AHB_ME_SPLIT_STATUS 0x4F0 +#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS 0x4F1 +#define A5XX_RBBM_AHB_ERROR_STATUS 0x4F4 +#define A5XX_RBBM_PERFCTR_CNTL 0x464 +#define A5XX_RBBM_PERFCTR_LOAD_CMD0 0x465 +#define A5XX_RBBM_PERFCTR_LOAD_CMD1 0x466 +#define A5XX_RBBM_PERFCTR_LOAD_CMD2 0x467 +#define A5XX_RBBM_PERFCTR_LOAD_CMD3 0x468 +#define A5XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x469 +#define A5XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x46A +#define A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x46B +#define A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x46C +#define A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x46D +#define A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x46E +#define A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x46F +#define A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC 0x504 +#define A5XX_RBBM_CFG_DBGBUS_OVER 0x505 +#define A5XX_RBBM_CFG_DBGBUS_COUNT0 0x506 +#define A5XX_RBBM_CFG_DBGBUS_COUNT1 0x507 +#define A5XX_RBBM_CFG_DBGBUS_COUNT2 0x508 +#define A5XX_RBBM_CFG_DBGBUS_COUNT3 0x509 +#define A5XX_RBBM_CFG_DBGBUS_COUNT4 0x50A +#define A5XX_RBBM_CFG_DBGBUS_COUNT5 0x50B +#define A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR 0x50C +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 0x50D +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 0x50E +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 0x50F +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 0x510 +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 0x511 +#define A5XX_RBBM_CFG_DBGBUS_MISR0 0x512 +#define A5XX_RBBM_CFG_DBGBUS_MISR1 0x513 +#define A5XX_RBBM_ISDB_CNT 0x533 +#define A5XX_RBBM_SECVID_TRUST_CONFIG 0xF000 +#define A5XX_RBBM_SECVID_TRUST_CNTL 0xF400 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xF800 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xF801 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0xF802 +#define A5XX_RBBM_SECVID_TSB_CNTL 0xF803 +#define A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0xF810 + +/* VSC registers */ +#define A5XX_VSC_PERFCTR_VSC_SEL_0 0xC60 +#define A5XX_VSC_PERFCTR_VSC_SEL_1 0xC61 + +#define A5XX_GRAS_ADDR_MODE_CNTL 0xC81 + +/* TSE registers */ +#define A5XX_GRAS_PERFCTR_TSE_SEL_0 0xC90 +#define A5XX_GRAS_PERFCTR_TSE_SEL_1 0xC91 +#define A5XX_GRAS_PERFCTR_TSE_SEL_2 0xC92 +#define A5XX_GRAS_PERFCTR_TSE_SEL_3 0xC93 + +/* RAS registers */ +#define A5XX_GRAS_PERFCTR_RAS_SEL_0 0xC94 +#define A5XX_GRAS_PERFCTR_RAS_SEL_1 0xC95 +#define A5XX_GRAS_PERFCTR_RAS_SEL_2 0xC96 +#define A5XX_GRAS_PERFCTR_RAS_SEL_3 0xC97 + +/* LRZ registers */ +#define A5XX_GRAS_PERFCTR_LRZ_SEL_0 0xC98 +#define A5XX_GRAS_PERFCTR_LRZ_SEL_1 0xC99 +#define A5XX_GRAS_PERFCTR_LRZ_SEL_2 0xC9A +#define A5XX_GRAS_PERFCTR_LRZ_SEL_3 0xC9B + + +/* RB registers */ +#define A5XX_RB_DBG_ECO_CNT 0xCC4 +#define A5XX_RB_ADDR_MODE_CNTL 0xCC5 +#define A5XX_RB_MODE_CNTL 0xCC6 +#define A5XX_RB_PERFCTR_RB_SEL_0 0xCD0 +#define A5XX_RB_PERFCTR_RB_SEL_1 0xCD1 +#define A5XX_RB_PERFCTR_RB_SEL_2 0xCD2 +#define A5XX_RB_PERFCTR_RB_SEL_3 0xCD3 +#define A5XX_RB_PERFCTR_RB_SEL_4 0xCD4 +#define A5XX_RB_PERFCTR_RB_SEL_5 0xCD5 +#define A5XX_RB_PERFCTR_RB_SEL_6 0xCD6 +#define A5XX_RB_PERFCTR_RB_SEL_7 0xCD7 + +/* CCU registers */ +#define A5XX_RB_PERFCTR_CCU_SEL_0 0xCD8 +#define A5XX_RB_PERFCTR_CCU_SEL_1 0xCD9 +#define A5XX_RB_PERFCTR_CCU_SEL_2 0xCDA +#define A5XX_RB_PERFCTR_CCU_SEL_3 0xCDB + +/* RB Power Counter RB Registers Select */ +#define A5XX_RB_POWERCTR_RB_SEL_0 0xCE0 +#define A5XX_RB_POWERCTR_RB_SEL_1 0xCE1 +#define A5XX_RB_POWERCTR_RB_SEL_2 0xCE2 +#define A5XX_RB_POWERCTR_RB_SEL_3 0xCE3 + +/* RB Power Counter CCU Registers Select */ +#define A5XX_RB_POWERCTR_CCU_SEL_0 0xCE4 +#define A5XX_RB_POWERCTR_CCU_SEL_1 0xCE5 + +/* CMP registers */ +#define A5XX_RB_PERFCTR_CMP_SEL_0 0xCEC +#define A5XX_RB_PERFCTR_CMP_SEL_1 0xCED +#define A5XX_RB_PERFCTR_CMP_SEL_2 0xCEE +#define A5XX_RB_PERFCTR_CMP_SEL_3 0xCEF + +/* PC registers */ +#define A5XX_PC_DBG_ECO_CNTL 0xD00 +#define A5XX_PC_ADDR_MODE_CNTL 0xD01 +#define A5XX_PC_PERFCTR_PC_SEL_0 0xD10 +#define A5XX_PC_PERFCTR_PC_SEL_1 0xD11 +#define A5XX_PC_PERFCTR_PC_SEL_2 0xD12 +#define A5XX_PC_PERFCTR_PC_SEL_3 0xD13 +#define A5XX_PC_PERFCTR_PC_SEL_4 0xD14 +#define A5XX_PC_PERFCTR_PC_SEL_5 0xD15 +#define A5XX_PC_PERFCTR_PC_SEL_6 0xD16 +#define A5XX_PC_PERFCTR_PC_SEL_7 0xD17 + +/* HLSQ registers */ +#define A5XX_HLSQ_DBG_ECO_CNTL 0xE04 +#define A5XX_HLSQ_ADDR_MODE_CNTL 0xE05 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 0xE10 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 0xE11 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 0xE12 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 0xE13 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 0xE14 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 0xE15 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 0xE16 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 0xE17 +#define A5XX_HLSQ_DBG_READ_SEL 0xBC00 +#define A5XX_HLSQ_DBG_AHB_READ_APERTURE 0xA000 + +/* VFD registers */ +#define A5XX_VFD_ADDR_MODE_CNTL 0xE41 +#define A5XX_VFD_PERFCTR_VFD_SEL_0 0xE50 +#define A5XX_VFD_PERFCTR_VFD_SEL_1 0xE51 +#define A5XX_VFD_PERFCTR_VFD_SEL_2 0xE52 +#define A5XX_VFD_PERFCTR_VFD_SEL_3 0xE53 +#define A5XX_VFD_PERFCTR_VFD_SEL_4 0xE54 +#define A5XX_VFD_PERFCTR_VFD_SEL_5 0xE55 +#define A5XX_VFD_PERFCTR_VFD_SEL_6 0xE56 +#define A5XX_VFD_PERFCTR_VFD_SEL_7 0xE57 + +/* VPC registers */ +#define A5XX_VPC_DBG_ECO_CNTL 0xE60 +#define A5XX_VPC_ADDR_MODE_CNTL 0xE61 +#define A5XX_VPC_PERFCTR_VPC_SEL_0 0xE64 +#define A5XX_VPC_PERFCTR_VPC_SEL_1 0xE65 +#define A5XX_VPC_PERFCTR_VPC_SEL_2 0xE66 +#define A5XX_VPC_PERFCTR_VPC_SEL_3 0xE67 + +/* UCHE registers */ +#define A5XX_UCHE_ADDR_MODE_CNTL 0xE80 +#define A5XX_UCHE_MODE_CNTL 0xE81 +#define A5XX_UCHE_WRITE_THRU_BASE_LO 0xE87 +#define A5XX_UCHE_WRITE_THRU_BASE_HI 0xE88 +#define A5XX_UCHE_TRAP_BASE_LO 0xE89 +#define A5XX_UCHE_TRAP_BASE_HI 0xE8A +#define A5XX_UCHE_GMEM_RANGE_MIN_LO 0xE8B +#define A5XX_UCHE_GMEM_RANGE_MIN_HI 0xE8C +#define A5XX_UCHE_GMEM_RANGE_MAX_LO 0xE8D +#define A5XX_UCHE_GMEM_RANGE_MAX_HI 0xE8E +#define A5XX_UCHE_DBG_ECO_CNTL_2 0xE8F +#define A5XX_UCHE_INVALIDATE0 0xE95 +#define A5XX_UCHE_CACHE_WAYS 0xE96 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_0 0xEA0 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_1 0xEA1 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_2 0xEA2 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_3 0xEA3 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_4 0xEA4 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_5 0xEA5 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_6 0xEA6 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_7 0xEA7 + +/* UCHE Power Counter UCHE Registers Select */ +#define A5XX_UCHE_POWERCTR_UCHE_SEL_0 0xEA8 +#define A5XX_UCHE_POWERCTR_UCHE_SEL_1 0xEA9 +#define A5XX_UCHE_POWERCTR_UCHE_SEL_2 0xEAA +#define A5XX_UCHE_POWERCTR_UCHE_SEL_3 0xEAB + +/* SP registers */ +#define A5XX_SP_DBG_ECO_CNTL 0xEC0 +#define A5XX_SP_ADDR_MODE_CNTL 0xEC1 +#define A5XX_SP_PERFCTR_SP_SEL_0 0xED0 +#define A5XX_SP_PERFCTR_SP_SEL_1 0xED1 +#define A5XX_SP_PERFCTR_SP_SEL_2 0xED2 +#define A5XX_SP_PERFCTR_SP_SEL_3 0xED3 +#define A5XX_SP_PERFCTR_SP_SEL_4 0xED4 +#define A5XX_SP_PERFCTR_SP_SEL_5 0xED5 +#define A5XX_SP_PERFCTR_SP_SEL_6 0xED6 +#define A5XX_SP_PERFCTR_SP_SEL_7 0xED7 +#define A5XX_SP_PERFCTR_SP_SEL_8 0xED8 +#define A5XX_SP_PERFCTR_SP_SEL_9 0xED9 +#define A5XX_SP_PERFCTR_SP_SEL_10 0xEDA +#define A5XX_SP_PERFCTR_SP_SEL_11 0xEDB + +/* SP Power Counter SP Registers Select */ +#define A5XX_SP_POWERCTR_SP_SEL_0 0xEDC +#define A5XX_SP_POWERCTR_SP_SEL_1 0xEDD +#define A5XX_SP_POWERCTR_SP_SEL_2 0xEDE +#define A5XX_SP_POWERCTR_SP_SEL_3 0xEDF + +/* TP registers */ +#define A5XX_TPL1_ADDR_MODE_CNTL 0xF01 +#define A5XX_TPL1_MODE_CNTL 0xF02 +#define A5XX_TPL1_PERFCTR_TP_SEL_0 0xF10 +#define A5XX_TPL1_PERFCTR_TP_SEL_1 0xF11 +#define A5XX_TPL1_PERFCTR_TP_SEL_2 0xF12 +#define A5XX_TPL1_PERFCTR_TP_SEL_3 0xF13 +#define A5XX_TPL1_PERFCTR_TP_SEL_4 0xF14 +#define A5XX_TPL1_PERFCTR_TP_SEL_5 0xF15 +#define A5XX_TPL1_PERFCTR_TP_SEL_6 0xF16 +#define A5XX_TPL1_PERFCTR_TP_SEL_7 0xF17 + +/* TP Power Counter TP Registers Select */ +#define A5XX_TPL1_POWERCTR_TP_SEL_0 0xF18 +#define A5XX_TPL1_POWERCTR_TP_SEL_1 0xF19 +#define A5XX_TPL1_POWERCTR_TP_SEL_2 0xF1A +#define A5XX_TPL1_POWERCTR_TP_SEL_3 0xF1B + +/* VBIF registers */ +#define A5XX_VBIF_VERSION 0x3000 +#define A5XX_VBIF_CLKON 0x3001 +#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK 0x1 +#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT 0x1 + +#define A5XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 +#define A5XX_VBIF_GATE_OFF_WRREQ_EN 0x302A + +#define A5XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A5XX_VBIF_XIN_HALT_CTRL0_MASK 0xF +#define A510_VBIF_XIN_HALT_CTRL0_MASK 0x7 +#define A5XX_VBIF_XIN_HALT_CTRL1 0x3081 + +#define A5XX_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK 0x1 +#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS1_CTRL0 0x3085 +#define A5XX_VBIF_TEST_BUS1_CTRL1 0x3086 +#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK 0xF +#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS2_CTRL0 0x3087 +#define A5XX_VBIF_TEST_BUS2_CTRL1 0x3088 +#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK 0x1FF +#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS_OUT 0x308c + +#define A5XX_VBIF_PERF_CNT_SEL0 0x30D0 +#define A5XX_VBIF_PERF_CNT_SEL1 0x30D1 +#define A5XX_VBIF_PERF_CNT_SEL2 0x30D2 +#define A5XX_VBIF_PERF_CNT_SEL3 0x30D3 +#define A5XX_VBIF_PERF_CNT_LOW0 0x30D8 +#define A5XX_VBIF_PERF_CNT_LOW1 0x30D9 +#define A5XX_VBIF_PERF_CNT_LOW2 0x30DA +#define A5XX_VBIF_PERF_CNT_LOW3 0x30DB +#define A5XX_VBIF_PERF_CNT_HIGH0 0x30E0 +#define A5XX_VBIF_PERF_CNT_HIGH1 0x30E1 +#define A5XX_VBIF_PERF_CNT_HIGH2 0x30E2 +#define A5XX_VBIF_PERF_CNT_HIGH3 0x30E3 + +#define A5XX_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define A5XX_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define A5XX_VBIF_PERF_PWR_CNT_EN2 0x3102 + +#define A5XX_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define A5XX_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define A5XX_VBIF_PERF_PWR_CNT_LOW2 0x3112 + +#define A5XX_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define A5XX_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define A5XX_VBIF_PERF_PWR_CNT_HIGH2 0x311A + +/* GPMU registers */ +#define A5XX_GPMU_INST_RAM_BASE 0x8800 +#define A5XX_GPMU_DATA_RAM_BASE 0x9800 +#define A5XX_GPMU_SP_POWER_CNTL 0xA881 +#define A5XX_GPMU_RBCCU_CLOCK_CNTL 0xA886 +#define A5XX_GPMU_RBCCU_POWER_CNTL 0xA887 +#define A5XX_GPMU_SP_PWR_CLK_STATUS 0xA88B +#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS 0xA88D +#define A5XX_GPMU_PWR_COL_STAGGER_DELAY 0xA891 +#define A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0xA892 +#define A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0xA893 +#define A5XX_GPMU_PWR_COL_BINNING_CTRL 0xA894 +#define A5XX_GPMU_CLOCK_THROTTLE_CTRL 0xA8A3 +#define A5XX_GPMU_WFI_CONFIG 0xA8C1 +#define A5XX_GPMU_RBBM_INTR_INFO 0xA8D6 +#define A5XX_GPMU_CM3_SYSRESET 0xA8D8 +#define A5XX_GPMU_GENERAL_0 0xA8E0 +#define A5XX_GPMU_GENERAL_1 0xA8E1 + +/* COUNTABLE FOR SP PERFCOUNTER */ +#define A5XX_SP_ALU_ACTIVE_CYCLES 0x1 +#define A5XX_SP0_ICL1_MISSES 0x35 +#define A5XX_SP_FS_CFLOW_INSTRUCTIONS 0x27 + +/* COUNTABLE FOR TSE PERFCOUNTER */ +#define A5XX_TSE_INPUT_PRIM_NUM 0x6 + +/* COUNTABLE FOR RBBM PERFCOUNTER */ +#define A5XX_RBBM_ALWAYS_COUNT 0x0 + +/* GPMU POWER COUNTERS */ +#define A5XX_SP_POWER_COUNTER_0_LO 0xA840 +#define A5XX_SP_POWER_COUNTER_0_HI 0xA841 +#define A5XX_SP_POWER_COUNTER_1_LO 0xA842 +#define A5XX_SP_POWER_COUNTER_1_HI 0xA843 +#define A5XX_SP_POWER_COUNTER_2_LO 0xA844 +#define A5XX_SP_POWER_COUNTER_2_HI 0xA845 +#define A5XX_SP_POWER_COUNTER_3_LO 0xA846 +#define A5XX_SP_POWER_COUNTER_3_HI 0xA847 + +#define A5XX_TP_POWER_COUNTER_0_LO 0xA848 +#define A5XX_TP_POWER_COUNTER_0_HI 0xA849 +#define A5XX_TP_POWER_COUNTER_1_LO 0xA84A +#define A5XX_TP_POWER_COUNTER_1_HI 0xA84B +#define A5XX_TP_POWER_COUNTER_2_LO 0xA84C +#define A5XX_TP_POWER_COUNTER_2_HI 0xA84D +#define A5XX_TP_POWER_COUNTER_3_LO 0xA84E +#define A5XX_TP_POWER_COUNTER_3_HI 0xA84F + +#define A5XX_RB_POWER_COUNTER_0_LO 0xA850 +#define A5XX_RB_POWER_COUNTER_0_HI 0xA851 +#define A5XX_RB_POWER_COUNTER_1_LO 0xA852 +#define A5XX_RB_POWER_COUNTER_1_HI 0xA853 +#define A5XX_RB_POWER_COUNTER_2_LO 0xA854 +#define A5XX_RB_POWER_COUNTER_2_HI 0xA855 +#define A5XX_RB_POWER_COUNTER_3_LO 0xA856 +#define A5XX_RB_POWER_COUNTER_3_HI 0xA857 + +#define A5XX_CCU_POWER_COUNTER_0_LO 0xA858 +#define A5XX_CCU_POWER_COUNTER_0_HI 0xA859 +#define A5XX_CCU_POWER_COUNTER_1_LO 0xA85A +#define A5XX_CCU_POWER_COUNTER_1_HI 0xA85B + +#define A5XX_UCHE_POWER_COUNTER_0_LO 0xA85C +#define A5XX_UCHE_POWER_COUNTER_0_HI 0xA85D +#define A5XX_UCHE_POWER_COUNTER_1_LO 0xA85E +#define A5XX_UCHE_POWER_COUNTER_1_HI 0xA85F +#define A5XX_UCHE_POWER_COUNTER_2_LO 0xA860 +#define A5XX_UCHE_POWER_COUNTER_2_HI 0xA861 +#define A5XX_UCHE_POWER_COUNTER_3_LO 0xA862 +#define A5XX_UCHE_POWER_COUNTER_3_HI 0xA863 + +#define A5XX_CP_POWER_COUNTER_0_LO 0xA864 +#define A5XX_CP_POWER_COUNTER_0_HI 0xA865 +#define A5XX_CP_POWER_COUNTER_1_LO 0xA866 +#define A5XX_CP_POWER_COUNTER_1_HI 0xA867 +#define A5XX_CP_POWER_COUNTER_2_LO 0xA868 +#define A5XX_CP_POWER_COUNTER_2_HI 0xA869 +#define A5XX_CP_POWER_COUNTER_3_LO 0xA86A +#define A5XX_CP_POWER_COUNTER_3_HI 0xA86B + +#define A5XX_GPMU_POWER_COUNTER_0_LO 0xA86C +#define A5XX_GPMU_POWER_COUNTER_0_HI 0xA86D +#define A5XX_GPMU_POWER_COUNTER_1_LO 0xA86E +#define A5XX_GPMU_POWER_COUNTER_1_HI 0xA86F +#define A5XX_GPMU_POWER_COUNTER_2_LO 0xA870 +#define A5XX_GPMU_POWER_COUNTER_2_HI 0xA871 +#define A5XX_GPMU_POWER_COUNTER_3_LO 0xA872 +#define A5XX_GPMU_POWER_COUNTER_3_HI 0xA873 +#define A5XX_GPMU_POWER_COUNTER_4_LO 0xA874 +#define A5XX_GPMU_POWER_COUNTER_4_HI 0xA875 +#define A5XX_GPMU_POWER_COUNTER_5_LO 0xA876 +#define A5XX_GPMU_POWER_COUNTER_5_HI 0xA877 + +#define A5XX_GPMU_POWER_COUNTER_ENABLE 0xA878 +#define A5XX_GPMU_ALWAYS_ON_COUNTER_LO 0xA879 +#define A5XX_GPMU_ALWAYS_ON_COUNTER_HI 0xA87A +#define A5XX_GPMU_ALWAYS_ON_COUNTER_RESET 0xA87B +#define A5XX_GPMU_POWER_COUNTER_SELECT_0 0xA87C +#define A5XX_GPMU_POWER_COUNTER_SELECT_1 0xA87D +#define A5XX_GPMU_GPMU_SP_CLOCK_CONTROL 0xA880 + +#define A5XX_GPMU_CLOCK_THROTTLE_CTRL 0xA8A3 +#define A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL 0xA8A8 + +#define A5XX_GPMU_TEMP_SENSOR_ID 0xAC00 +#define A5XX_GPMU_TEMP_SENSOR_CONFIG 0xAC01 +#define A5XX_GPMU_DELTA_TEMP_THRESHOLD 0xAC03 +#define A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK 0xAC06 + +#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1 0xAC40 +#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3 0xAC41 +#define A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1 0xAC42 +#define A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3 0xAC43 +#define A5XX_GPMU_BASE_LEAKAGE 0xAC46 + +#define A5XX_GPMU_GPMU_VOLTAGE 0xAC60 +#define A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS 0xAC61 +#define A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK 0xAC62 +#define A5XX_GPMU_GPMU_PWR_THRESHOLD 0xAC80 +#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL 0xACC4 +#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS 0xACC5 +#define A5XX_GPMU_GPMU_ISENSE_CTRL 0xACD0 + +#define A5XX_GDPM_CONFIG1 0xB80C +#define A5XX_GDPM_INT_EN 0xB80F +#define A5XX_GDPM_INT_MASK 0xB811 +#define A5XX_GPMU_BEC_ENABLE 0xB9A0 + +/* ISENSE registers */ +#define A5XX_GPU_CS_DECIMAL_ALIGN 0xC16A +#define A5XX_GPU_CS_SENSOR_PARAM_CORE_1 0xC126 +#define A5XX_GPU_CS_SENSOR_PARAM_CORE_2 0xC127 +#define A5XX_GPU_CS_SW_OV_FUSE_EN 0xC168 +#define A5XX_GPU_CS_SENSOR_GENERAL_STATUS 0xC41A +#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0 0xC41D +#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2 0xC41F +#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4 0xC421 +#define A5XX_GPU_CS_ENABLE_REG 0xC520 +#define A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0xC557 +#define A5XX_GPU_CS_AMP_CALIBRATION_DONE 0xC565 +#define A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE 0xC556 +#endif /* _A5XX_REG_H */ + diff --git a/a6xx_reg.h b/a6xx_reg.h new file mode 100644 index 0000000000..f6b7dcde8a --- /dev/null +++ b/a6xx_reg.h @@ -0,0 +1,1197 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _A6XX_REG_H +#define _A6XX_REG_H + +/* A6XX interrupt bits */ +#define A6XX_INT_RBBM_GPU_IDLE 0 +#define A6XX_INT_CP_AHB_ERROR 1 +#define A6XX_INT_ATB_ASYNCFIFO_OVERFLOW 6 +#define A6XX_INT_RBBM_GPC_ERROR 7 +#define A6XX_INT_CP_SW 8 +#define A6XX_INT_CP_HW_ERROR 9 +#define A6XX_INT_CP_CCU_FLUSH_DEPTH_TS 10 +#define A6XX_INT_CP_CCU_FLUSH_COLOR_TS 11 +#define A6XX_INT_CP_CCU_RESOLVE_TS 12 +#define A6XX_INT_CP_IB2 13 +#define A6XX_INT_CP_IB1 14 +#define A6XX_INT_CP_RB 15 +#define A6XX_INT_CP_RB_DONE_TS 17 +#define A6XX_INT_CP_WT_DONE_TS 18 +#define A6XX_INT_CP_CACHE_FLUSH_TS 20 +#define A6XX_INT_RBBM_ATB_BUS_OVERFLOW 22 +#define A6XX_INT_RBBM_HANG_DETECT 23 +#define A6XX_INT_UCHE_OOB_ACCESS 24 +#define A6XX_INT_UCHE_TRAP_INTR 25 +#define A6XX_INT_DEBBUS_INTR_0 26 +#define A6XX_INT_DEBBUS_INTR_1 27 +#define A6XX_INT_TSB_WRITE_ERROR 28 +#define A6XX_INT_ISDB_CPU_IRQ 30 +#define A6XX_INT_ISDB_UNDER_DEBUG 31 + +/* CP Interrupt bits */ +#define A6XX_CP_OPCODE_ERROR 0 +#define A6XX_CP_UCODE_ERROR 1 +#define A6XX_CP_HW_FAULT_ERROR 2 +#define A6XX_CP_REGISTER_PROTECTION_ERROR 4 +#define A6XX_CP_AHB_ERROR 5 +#define A6XX_CP_VSD_PARITY_ERROR 6 +#define A6XX_CP_ILLEGAL_INSTR_ERROR 7 + +/* CP registers */ +#define A6XX_CP_RB_BASE 0x800 +#define A6XX_CP_RB_BASE_HI 0x801 +#define A6XX_CP_RB_CNTL 0x802 +#define A6XX_CP_RB_RPTR_ADDR_LO 0x804 +#define A6XX_CP_RB_RPTR_ADDR_HI 0x805 +#define A6XX_CP_RB_RPTR 0x806 +#define A6XX_CP_RB_WPTR 0x807 +#define A6XX_CP_SQE_CNTL 0x808 +#define A6XX_CP_CP2GMU_STATUS 0x812 +#define A6XX_CP_HW_FAULT 0x821 +#define A6XX_CP_INTERRUPT_STATUS 0x823 +#define A6XX_CP_PROTECT_STATUS 0x824 +#define A6XX_CP_STATUS_1 0x825 +#define A6XX_CP_SQE_INSTR_BASE_LO 0x830 +#define A6XX_CP_SQE_INSTR_BASE_HI 0x831 +#define A6XX_CP_MISC_CNTL 0x840 +#define A6XX_CP_APRIV_CNTL 0X844 +#define A6XX_CP_ROQ_THRESHOLDS_1 0x8C1 +#define A6XX_CP_ROQ_THRESHOLDS_2 0x8C2 +#define A6XX_CP_MEM_POOL_SIZE 0x8C3 +#define A6XX_CP_CHICKEN_DBG 0x841 +#define A6XX_CP_ADDR_MODE_CNTL 0x842 +#define A6XX_CP_DBG_ECO_CNTL 0x843 +#define A6XX_CP_PROTECT_CNTL 0x84F +#define A6XX_CP_PROTECT_REG 0x850 +#define A6XX_CP_CONTEXT_SWITCH_CNTL 0x8A0 +#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x8A1 +#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x8A2 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO 0x8A3 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI 0x8A4 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO 0x8A5 +#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI 0x8A6 +#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO 0x8A7 +#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI 0x8A8 +#define A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS 0x8AB +#define A6XX_CP_PERFCTR_CP_SEL_0 0x8D0 +#define A6XX_CP_PERFCTR_CP_SEL_1 0x8D1 +#define A6XX_CP_PERFCTR_CP_SEL_2 0x8D2 +#define A6XX_CP_PERFCTR_CP_SEL_3 0x8D3 +#define A6XX_CP_PERFCTR_CP_SEL_4 0x8D4 +#define A6XX_CP_PERFCTR_CP_SEL_5 0x8D5 +#define A6XX_CP_PERFCTR_CP_SEL_6 0x8D6 +#define A6XX_CP_PERFCTR_CP_SEL_7 0x8D7 +#define A6XX_CP_PERFCTR_CP_SEL_8 0x8D8 +#define A6XX_CP_PERFCTR_CP_SEL_9 0x8D9 +#define A6XX_CP_PERFCTR_CP_SEL_10 0x8DA +#define A6XX_CP_PERFCTR_CP_SEL_11 0x8DB +#define A6XX_CP_PERFCTR_CP_SEL_12 0x8DC +#define A6XX_CP_PERFCTR_CP_SEL_13 0x8DD +#define A6XX_CP_CRASH_SCRIPT_BASE_LO 0x900 +#define A6XX_CP_CRASH_SCRIPT_BASE_HI 0x901 +#define A6XX_CP_CRASH_DUMP_CNTL 0x902 +#define A6XX_CP_CRASH_DUMP_STATUS 0x903 +#define A6XX_CP_SQE_STAT_ADDR 0x908 +#define A6XX_CP_SQE_STAT_DATA 0x909 +#define A6XX_CP_DRAW_STATE_ADDR 0x90A +#define A6XX_CP_DRAW_STATE_DATA 0x90B +#define A6XX_CP_ROQ_DBG_ADDR 0x90C +#define A6XX_CP_ROQ_DBG_DATA 0x90D +#define A6XX_CP_MEM_POOL_DBG_ADDR 0x90E +#define A6XX_CP_MEM_POOL_DBG_DATA 0x90F +#define A6XX_CP_SQE_UCODE_DBG_ADDR 0x910 +#define A6XX_CP_SQE_UCODE_DBG_DATA 0x911 +#define A6XX_CP_IB1_BASE 0x928 +#define A6XX_CP_IB1_BASE_HI 0x929 +#define A6XX_CP_IB1_REM_SIZE 0x92A +#define A6XX_CP_IB2_BASE 0x92B +#define A6XX_CP_IB2_BASE_HI 0x92C +#define A6XX_CP_IB2_REM_SIZE 0x92D +#define A6XX_CP_ALWAYS_ON_COUNTER_LO 0x980 +#define A6XX_CP_ALWAYS_ON_COUNTER_HI 0x981 +#define A6XX_CP_AHB_CNTL 0x98D +#define A6XX_CP_APERTURE_CNTL_HOST 0xA00 +#define A6XX_CP_APERTURE_CNTL_CD 0xA03 +#define A6XX_VSC_ADDR_MODE_CNTL 0xC01 + +/* LPAC registers */ +#define A6XX_CP_LPAC_DRAW_STATE_ADDR 0xB0A +#define A6XX_CP_LPAC_DRAW_STATE_DATA 0xB0B +#define A6XX_CP_LPAC_ROQ_DBG_ADDR 0xB0C +#define A6XX_CP_SQE_AC_UCODE_DBG_ADDR 0xB27 +#define A6XX_CP_SQE_AC_UCODE_DBG_DATA 0xB28 +#define A6XX_CP_SQE_AC_STAT_ADDR 0xB29 +#define A6XX_CP_SQE_AC_STAT_DATA 0xB2A +#define A6XX_CP_LPAC_ROQ_THRESHOLDS_1 0xB32 +#define A6XX_CP_LPAC_ROQ_THRESHOLDS_2 0xB33 +#define A6XX_CP_LPAC_PROG_FIFO_SIZE 0xB34 +#define A6XX_CP_LPAC_ROQ_DBG_DATA 0xB35 +#define A6XX_CP_LPAC_FIFO_DBG_DATA 0xB36 +#define A6XX_CP_LPAC_FIFO_DBG_ADDR 0xB40 + +/* RBBM registers */ +#define A6XX_RBBM_INT_0_STATUS 0x201 +#define A6XX_RBBM_STATUS 0x210 +#define A6XX_RBBM_STATUS3 0x213 +#define A6XX_RBBM_VBIF_GX_RESET_STATUS 0x215 +#define A6XX_RBBM_PERFCTR_CP_0_LO 0x400 +#define A6XX_RBBM_PERFCTR_CP_0_HI 0x401 +#define A6XX_RBBM_PERFCTR_CP_1_LO 0x402 +#define A6XX_RBBM_PERFCTR_CP_1_HI 0x403 +#define A6XX_RBBM_PERFCTR_CP_2_LO 0x404 +#define A6XX_RBBM_PERFCTR_CP_2_HI 0x405 +#define A6XX_RBBM_PERFCTR_CP_3_LO 0x406 +#define A6XX_RBBM_PERFCTR_CP_3_HI 0x407 +#define A6XX_RBBM_PERFCTR_CP_4_LO 0x408 +#define A6XX_RBBM_PERFCTR_CP_4_HI 0x409 +#define A6XX_RBBM_PERFCTR_CP_5_LO 0x40a +#define A6XX_RBBM_PERFCTR_CP_5_HI 0x40b +#define A6XX_RBBM_PERFCTR_CP_6_LO 0x40c +#define A6XX_RBBM_PERFCTR_CP_6_HI 0x40d +#define A6XX_RBBM_PERFCTR_CP_7_LO 0x40e +#define A6XX_RBBM_PERFCTR_CP_7_HI 0x40f +#define A6XX_RBBM_PERFCTR_CP_8_LO 0x410 +#define A6XX_RBBM_PERFCTR_CP_8_HI 0x411 +#define A6XX_RBBM_PERFCTR_CP_9_LO 0x412 +#define A6XX_RBBM_PERFCTR_CP_9_HI 0x413 +#define A6XX_RBBM_PERFCTR_CP_10_LO 0x414 +#define A6XX_RBBM_PERFCTR_CP_10_HI 0x415 +#define A6XX_RBBM_PERFCTR_CP_11_LO 0x416 +#define A6XX_RBBM_PERFCTR_CP_11_HI 0x417 +#define A6XX_RBBM_PERFCTR_CP_12_LO 0x418 +#define A6XX_RBBM_PERFCTR_CP_12_HI 0x419 +#define A6XX_RBBM_PERFCTR_CP_13_LO 0x41a +#define A6XX_RBBM_PERFCTR_CP_13_HI 0x41b +#define A6XX_RBBM_PERFCTR_RBBM_0_LO 0x41c +#define A6XX_RBBM_PERFCTR_RBBM_0_HI 0x41d +#define A6XX_RBBM_PERFCTR_RBBM_1_LO 0x41e +#define A6XX_RBBM_PERFCTR_RBBM_1_HI 0x41f +#define A6XX_RBBM_PERFCTR_RBBM_2_LO 0x420 +#define A6XX_RBBM_PERFCTR_RBBM_2_HI 0x421 +#define A6XX_RBBM_PERFCTR_RBBM_3_LO 0x422 +#define A6XX_RBBM_PERFCTR_RBBM_3_HI 0x423 +#define A6XX_RBBM_PERFCTR_PC_0_LO 0x424 +#define A6XX_RBBM_PERFCTR_PC_0_HI 0x425 +#define A6XX_RBBM_PERFCTR_PC_1_LO 0x426 +#define A6XX_RBBM_PERFCTR_PC_1_HI 0x427 +#define A6XX_RBBM_PERFCTR_PC_2_LO 0x428 +#define A6XX_RBBM_PERFCTR_PC_2_HI 0x429 +#define A6XX_RBBM_PERFCTR_PC_3_LO 0x42a +#define A6XX_RBBM_PERFCTR_PC_3_HI 0x42b +#define A6XX_RBBM_PERFCTR_PC_4_LO 0x42c +#define A6XX_RBBM_PERFCTR_PC_4_HI 0x42d +#define A6XX_RBBM_PERFCTR_PC_5_LO 0x42e +#define A6XX_RBBM_PERFCTR_PC_5_HI 0x42f +#define A6XX_RBBM_PERFCTR_PC_6_LO 0x430 +#define A6XX_RBBM_PERFCTR_PC_6_HI 0x431 +#define A6XX_RBBM_PERFCTR_PC_7_LO 0x432 +#define A6XX_RBBM_PERFCTR_PC_7_HI 0x433 +#define A6XX_RBBM_PERFCTR_VFD_0_LO 0x434 +#define A6XX_RBBM_PERFCTR_VFD_0_HI 0x435 +#define A6XX_RBBM_PERFCTR_VFD_1_LO 0x436 +#define A6XX_RBBM_PERFCTR_VFD_1_HI 0x437 +#define A6XX_RBBM_PERFCTR_VFD_2_LO 0x438 +#define A6XX_RBBM_PERFCTR_VFD_2_HI 0x439 +#define A6XX_RBBM_PERFCTR_VFD_3_LO 0x43a +#define A6XX_RBBM_PERFCTR_VFD_3_HI 0x43b +#define A6XX_RBBM_PERFCTR_VFD_4_LO 0x43c +#define A6XX_RBBM_PERFCTR_VFD_4_HI 0x43d +#define A6XX_RBBM_PERFCTR_VFD_5_LO 0x43e +#define A6XX_RBBM_PERFCTR_VFD_5_HI 0x43f +#define A6XX_RBBM_PERFCTR_VFD_6_LO 0x440 +#define A6XX_RBBM_PERFCTR_VFD_6_HI 0x441 +#define A6XX_RBBM_PERFCTR_VFD_7_LO 0x442 +#define A6XX_RBBM_PERFCTR_VFD_7_HI 0x443 +#define A6XX_RBBM_PERFCTR_HLSQ_0_LO 0x444 +#define A6XX_RBBM_PERFCTR_HLSQ_0_HI 0x445 +#define A6XX_RBBM_PERFCTR_HLSQ_1_LO 0x446 +#define A6XX_RBBM_PERFCTR_HLSQ_1_HI 0x447 +#define A6XX_RBBM_PERFCTR_HLSQ_2_LO 0x448 +#define A6XX_RBBM_PERFCTR_HLSQ_2_HI 0x449 +#define A6XX_RBBM_PERFCTR_HLSQ_3_LO 0x44a +#define A6XX_RBBM_PERFCTR_HLSQ_3_HI 0x44b +#define A6XX_RBBM_PERFCTR_HLSQ_4_LO 0x44c +#define A6XX_RBBM_PERFCTR_HLSQ_4_HI 0x44d +#define A6XX_RBBM_PERFCTR_HLSQ_5_LO 0x44e +#define A6XX_RBBM_PERFCTR_HLSQ_5_HI 0x44f +#define A6XX_RBBM_PERFCTR_VPC_0_LO 0x450 +#define A6XX_RBBM_PERFCTR_VPC_0_HI 0x451 +#define A6XX_RBBM_PERFCTR_VPC_1_LO 0x452 +#define A6XX_RBBM_PERFCTR_VPC_1_HI 0x453 +#define A6XX_RBBM_PERFCTR_VPC_2_LO 0x454 +#define A6XX_RBBM_PERFCTR_VPC_2_HI 0x455 +#define A6XX_RBBM_PERFCTR_VPC_3_LO 0x456 +#define A6XX_RBBM_PERFCTR_VPC_3_HI 0x457 +#define A6XX_RBBM_PERFCTR_VPC_4_LO 0x458 +#define A6XX_RBBM_PERFCTR_VPC_4_HI 0x459 +#define A6XX_RBBM_PERFCTR_VPC_5_LO 0x45a +#define A6XX_RBBM_PERFCTR_VPC_5_HI 0x45b +#define A6XX_RBBM_PERFCTR_CCU_0_LO 0x45c +#define A6XX_RBBM_PERFCTR_CCU_0_HI 0x45d +#define A6XX_RBBM_PERFCTR_CCU_1_LO 0x45e +#define A6XX_RBBM_PERFCTR_CCU_1_HI 0x45f +#define A6XX_RBBM_PERFCTR_CCU_2_LO 0x460 +#define A6XX_RBBM_PERFCTR_CCU_2_HI 0x461 +#define A6XX_RBBM_PERFCTR_CCU_3_LO 0x462 +#define A6XX_RBBM_PERFCTR_CCU_3_HI 0x463 +#define A6XX_RBBM_PERFCTR_CCU_4_LO 0x464 +#define A6XX_RBBM_PERFCTR_CCU_4_HI 0x465 +#define A6XX_RBBM_PERFCTR_TSE_0_LO 0x466 +#define A6XX_RBBM_PERFCTR_TSE_0_HI 0x467 +#define A6XX_RBBM_PERFCTR_TSE_1_LO 0x468 +#define A6XX_RBBM_PERFCTR_TSE_1_HI 0x469 +#define A6XX_RBBM_PERFCTR_TSE_2_LO 0x46a +#define A6XX_RBBM_PERFCTR_CCU_4_HI 0x465 +#define A6XX_RBBM_PERFCTR_TSE_0_LO 0x466 +#define A6XX_RBBM_PERFCTR_TSE_0_HI 0x467 +#define A6XX_RBBM_PERFCTR_TSE_1_LO 0x468 +#define A6XX_RBBM_PERFCTR_TSE_1_HI 0x469 +#define A6XX_RBBM_PERFCTR_TSE_2_LO 0x46a +#define A6XX_RBBM_PERFCTR_TSE_2_HI 0x46b +#define A6XX_RBBM_PERFCTR_TSE_3_LO 0x46c +#define A6XX_RBBM_PERFCTR_TSE_3_HI 0x46d +#define A6XX_RBBM_PERFCTR_RAS_0_LO 0x46e +#define A6XX_RBBM_PERFCTR_RAS_0_HI 0x46f +#define A6XX_RBBM_PERFCTR_RAS_1_LO 0x470 +#define A6XX_RBBM_PERFCTR_RAS_1_HI 0x471 +#define A6XX_RBBM_PERFCTR_RAS_2_LO 0x472 +#define A6XX_RBBM_PERFCTR_RAS_2_HI 0x473 +#define A6XX_RBBM_PERFCTR_RAS_3_LO 0x474 +#define A6XX_RBBM_PERFCTR_RAS_3_HI 0x475 +#define A6XX_RBBM_PERFCTR_UCHE_0_LO 0x476 +#define A6XX_RBBM_PERFCTR_UCHE_0_HI 0x477 +#define A6XX_RBBM_PERFCTR_UCHE_1_LO 0x478 +#define A6XX_RBBM_PERFCTR_UCHE_1_HI 0x479 +#define A6XX_RBBM_PERFCTR_UCHE_2_LO 0x47a +#define A6XX_RBBM_PERFCTR_UCHE_2_HI 0x47b +#define A6XX_RBBM_PERFCTR_UCHE_3_LO 0x47c +#define A6XX_RBBM_PERFCTR_UCHE_3_HI 0x47d +#define A6XX_RBBM_PERFCTR_UCHE_4_LO 0x47e +#define A6XX_RBBM_PERFCTR_UCHE_4_HI 0x47f +#define A6XX_RBBM_PERFCTR_UCHE_5_LO 0x480 +#define A6XX_RBBM_PERFCTR_UCHE_5_HI 0x481 +#define A6XX_RBBM_PERFCTR_UCHE_6_LO 0x482 +#define A6XX_RBBM_PERFCTR_UCHE_6_HI 0x483 +#define A6XX_RBBM_PERFCTR_UCHE_7_LO 0x484 +#define A6XX_RBBM_PERFCTR_UCHE_7_HI 0x485 +#define A6XX_RBBM_PERFCTR_UCHE_8_LO 0x486 +#define A6XX_RBBM_PERFCTR_UCHE_8_HI 0x487 +#define A6XX_RBBM_PERFCTR_UCHE_9_LO 0x488 +#define A6XX_RBBM_PERFCTR_UCHE_9_HI 0x489 +#define A6XX_RBBM_PERFCTR_UCHE_10_LO 0x48a +#define A6XX_RBBM_PERFCTR_UCHE_10_HI 0x48b +#define A6XX_RBBM_PERFCTR_UCHE_11_LO 0x48c +#define A6XX_RBBM_PERFCTR_UCHE_11_HI 0x48d +#define A6XX_RBBM_PERFCTR_TP_0_LO 0x48e +#define A6XX_RBBM_PERFCTR_TP_0_HI 0x48f +#define A6XX_RBBM_PERFCTR_TP_1_LO 0x490 +#define A6XX_RBBM_PERFCTR_TP_1_HI 0x491 +#define A6XX_RBBM_PERFCTR_TP_2_LO 0x492 +#define A6XX_RBBM_PERFCTR_TP_2_HI 0x493 +#define A6XX_RBBM_PERFCTR_TP_3_LO 0x494 +#define A6XX_RBBM_PERFCTR_TP_3_HI 0x495 +#define A6XX_RBBM_PERFCTR_TP_4_LO 0x496 +#define A6XX_RBBM_PERFCTR_TP_4_HI 0x497 +#define A6XX_RBBM_PERFCTR_TP_5_LO 0x498 +#define A6XX_RBBM_PERFCTR_TP_5_HI 0x499 +#define A6XX_RBBM_PERFCTR_TP_6_LO 0x49a +#define A6XX_RBBM_PERFCTR_TP_6_HI 0x49b +#define A6XX_RBBM_PERFCTR_TP_7_LO 0x49c +#define A6XX_RBBM_PERFCTR_TP_7_HI 0x49d +#define A6XX_RBBM_PERFCTR_TP_8_LO 0x49e +#define A6XX_RBBM_PERFCTR_TP_8_HI 0x49f +#define A6XX_RBBM_PERFCTR_TP_9_LO 0x4a0 +#define A6XX_RBBM_PERFCTR_TP_9_HI 0x4a1 +#define A6XX_RBBM_PERFCTR_TP_10_LO 0x4a2 +#define A6XX_RBBM_PERFCTR_TP_10_HI 0x4a3 +#define A6XX_RBBM_PERFCTR_TP_11_LO 0x4a4 +#define A6XX_RBBM_PERFCTR_TP_11_HI 0x4a5 +#define A6XX_RBBM_PERFCTR_SP_0_LO 0x4a6 +#define A6XX_RBBM_PERFCTR_SP_0_HI 0x4a7 +#define A6XX_RBBM_PERFCTR_SP_1_LO 0x4a8 +#define A6XX_RBBM_PERFCTR_SP_1_HI 0x4a9 +#define A6XX_RBBM_PERFCTR_SP_2_LO 0x4aa +#define A6XX_RBBM_PERFCTR_SP_2_HI 0x4ab +#define A6XX_RBBM_PERFCTR_SP_3_LO 0x4ac +#define A6XX_RBBM_PERFCTR_SP_3_HI 0x4ad +#define A6XX_RBBM_PERFCTR_SP_4_LO 0x4ae +#define A6XX_RBBM_PERFCTR_SP_4_HI 0x4af +#define A6XX_RBBM_PERFCTR_SP_5_LO 0x4b0 +#define A6XX_RBBM_PERFCTR_SP_5_HI 0x4b1 +#define A6XX_RBBM_PERFCTR_SP_6_LO 0x4b2 +#define A6XX_RBBM_PERFCTR_SP_6_HI 0x4b3 +#define A6XX_RBBM_PERFCTR_SP_7_LO 0x4b4 +#define A6XX_RBBM_PERFCTR_SP_7_HI 0x4b5 +#define A6XX_RBBM_PERFCTR_SP_8_LO 0x4b6 +#define A6XX_RBBM_PERFCTR_SP_8_HI 0x4b7 +#define A6XX_RBBM_PERFCTR_SP_9_LO 0x4b8 +#define A6XX_RBBM_PERFCTR_SP_9_HI 0x4b9 +#define A6XX_RBBM_PERFCTR_SP_10_LO 0x4ba +#define A6XX_RBBM_PERFCTR_SP_10_HI 0x4bb +#define A6XX_RBBM_PERFCTR_SP_11_LO 0x4bc +#define A6XX_RBBM_PERFCTR_SP_11_HI 0x4bd +#define A6XX_RBBM_PERFCTR_SP_12_LO 0x4be +#define A6XX_RBBM_PERFCTR_SP_12_HI 0x4bf +#define A6XX_RBBM_PERFCTR_SP_13_LO 0x4c0 +#define A6XX_RBBM_PERFCTR_SP_13_HI 0x4c1 +#define A6XX_RBBM_PERFCTR_SP_14_LO 0x4c2 +#define A6XX_RBBM_PERFCTR_SP_14_HI 0x4c3 +#define A6XX_RBBM_PERFCTR_SP_15_LO 0x4c4 +#define A6XX_RBBM_PERFCTR_SP_15_HI 0x4c5 +#define A6XX_RBBM_PERFCTR_SP_16_LO 0x4c6 +#define A6XX_RBBM_PERFCTR_SP_16_HI 0x4c7 +#define A6XX_RBBM_PERFCTR_SP_17_LO 0x4c8 +#define A6XX_RBBM_PERFCTR_SP_17_HI 0x4c9 +#define A6XX_RBBM_PERFCTR_SP_18_LO 0x4ca +#define A6XX_RBBM_PERFCTR_SP_18_HI 0x4cb +#define A6XX_RBBM_PERFCTR_SP_19_LO 0x4cc +#define A6XX_RBBM_PERFCTR_SP_19_HI 0x4cd +#define A6XX_RBBM_PERFCTR_SP_20_LO 0x4ce +#define A6XX_RBBM_PERFCTR_SP_20_HI 0x4cf +#define A6XX_RBBM_PERFCTR_SP_21_LO 0x4d0 +#define A6XX_RBBM_PERFCTR_SP_21_HI 0x4d1 +#define A6XX_RBBM_PERFCTR_SP_22_LO 0x4d2 +#define A6XX_RBBM_PERFCTR_SP_22_HI 0x4d3 +#define A6XX_RBBM_PERFCTR_SP_23_LO 0x4d4 +#define A6XX_RBBM_PERFCTR_SP_23_HI 0x4d5 +#define A6XX_RBBM_PERFCTR_RB_0_LO 0x4d6 +#define A6XX_RBBM_PERFCTR_RB_0_HI 0x4d7 +#define A6XX_RBBM_PERFCTR_RB_1_LO 0x4d8 +#define A6XX_RBBM_PERFCTR_RB_1_HI 0x4d9 +#define A6XX_RBBM_PERFCTR_RB_2_LO 0x4da +#define A6XX_RBBM_PERFCTR_RB_2_HI 0x4db +#define A6XX_RBBM_PERFCTR_RB_3_LO 0x4dc +#define A6XX_RBBM_PERFCTR_RB_3_HI 0x4dd +#define A6XX_RBBM_PERFCTR_RB_4_LO 0x4de +#define A6XX_RBBM_PERFCTR_RB_4_HI 0x4df +#define A6XX_RBBM_PERFCTR_RB_5_LO 0x4e0 +#define A6XX_RBBM_PERFCTR_RB_5_HI 0x4e1 +#define A6XX_RBBM_PERFCTR_RB_6_LO 0x4e2 +#define A6XX_RBBM_PERFCTR_RB_6_HI 0x4e3 +#define A6XX_RBBM_PERFCTR_RB_7_LO 0x4e4 +#define A6XX_RBBM_PERFCTR_RB_7_HI 0x4e5 +#define A6XX_RBBM_PERFCTR_VSC_0_LO 0x4e6 +#define A6XX_RBBM_PERFCTR_VSC_0_HI 0x4e7 +#define A6XX_RBBM_PERFCTR_VSC_1_LO 0x4e8 +#define A6XX_RBBM_PERFCTR_VSC_1_HI 0x4e9 +#define A6XX_RBBM_PERFCTR_LRZ_0_LO 0x4ea +#define A6XX_RBBM_PERFCTR_LRZ_0_HI 0x4eb +#define A6XX_RBBM_PERFCTR_LRZ_1_LO 0x4ec +#define A6XX_RBBM_PERFCTR_LRZ_1_HI 0x4ed +#define A6XX_RBBM_PERFCTR_LRZ_2_LO 0x4ee +#define A6XX_RBBM_PERFCTR_LRZ_2_HI 0x4ef +#define A6XX_RBBM_PERFCTR_LRZ_3_LO 0x4f0 +#define A6XX_RBBM_PERFCTR_LRZ_3_HI 0x4f1 +#define A6XX_RBBM_PERFCTR_CMP_0_LO 0x4f2 +#define A6XX_RBBM_PERFCTR_CMP_0_HI 0x4f3 +#define A6XX_RBBM_PERFCTR_CMP_1_LO 0x4f4 +#define A6XX_RBBM_PERFCTR_CMP_1_HI 0x4f5 +#define A6XX_RBBM_PERFCTR_CMP_2_LO 0x4f6 +#define A6XX_RBBM_PERFCTR_CMP_2_HI 0x4f7 +#define A6XX_RBBM_PERFCTR_CMP_3_LO 0x4f8 +#define A6XX_RBBM_PERFCTR_CMP_3_HI 0x4f9 +#define A6XX_RBBM_PERFCTR_CNTL 0x500 +#define A6XX_RBBM_PERFCTR_LOAD_CMD0 0x501 +#define A6XX_RBBM_PERFCTR_LOAD_CMD1 0x502 +#define A6XX_RBBM_PERFCTR_LOAD_CMD2 0x503 +#define A6XX_RBBM_PERFCTR_LOAD_CMD3 0x504 +#define A6XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x505 +#define A6XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x506 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_0 0x507 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_1 0x508 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_2 0x509 +#define A6XX_RBBM_PERFCTR_RBBM_SEL_3 0x50A +#define A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x50B + +#define A6XX_RBBM_ISDB_CNT 0x533 +#define A6XX_RBBM_NC_MODE_CNTL 0X534 +#define A6XX_RBBM_SNAPSHOT_STATUS 0x535 + +#define A6XX_RBBM_SECVID_TRUST_CNTL 0xF400 +#define A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xF800 +#define A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xF801 +#define A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0xF802 +#define A6XX_RBBM_SECVID_TSB_CNTL 0xF803 +#define A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0xF810 + +#define A6XX_RBBM_VBIF_CLIENT_QOS_CNTL 0x00010 +#define A6XX_RBBM_GBIF_CLIENT_QOS_CNTL 0x00011 +#define A6XX_RBBM_GBIF_HALT 0x00016 +#define A6XX_RBBM_GBIF_HALT_ACK 0x00017 +#define A6XX_RBBM_GPR0_CNTL 0x00018 +#define A6XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0001f +#define A6XX_RBBM_INT_CLEAR_CMD 0x00037 +#define A6XX_RBBM_INT_0_MASK 0x00038 +#define A6XX_RBBM_INT_2_MASK 0x0003A +#define A6XX_RBBM_SP_HYST_CNT 0x00042 +#define A6XX_RBBM_SW_RESET_CMD 0x00043 +#define A6XX_RBBM_RAC_THRESHOLD_CNT 0x00044 +#define A6XX_RBBM_BLOCK_SW_RESET_CMD 0x00045 +#define A6XX_RBBM_BLOCK_SW_RESET_CMD2 0x00046 +#define A6XX_RBBM_BLOCK_GX_RETENTION_CNTL 0x00050 +#define A6XX_RBBM_CLOCK_CNTL 0x000ae +#define A6XX_RBBM_CLOCK_CNTL_SP0 0x000b0 +#define A6XX_RBBM_CLOCK_CNTL_SP1 0x000b1 +#define A6XX_RBBM_CLOCK_CNTL_SP2 0x000b2 +#define A6XX_RBBM_CLOCK_CNTL_SP3 0x000b3 +#define A6XX_RBBM_CLOCK_CNTL2_SP0 0x000b4 +#define A6XX_RBBM_CLOCK_CNTL2_SP1 0x000b5 +#define A6XX_RBBM_CLOCK_CNTL2_SP2 0x000b6 +#define A6XX_RBBM_CLOCK_CNTL2_SP3 0x000b7 +#define A6XX_RBBM_CLOCK_DELAY_SP0 0x000b8 +#define A6XX_RBBM_CLOCK_DELAY_SP1 0x000b9 +#define A6XX_RBBM_CLOCK_DELAY_SP2 0x000ba +#define A6XX_RBBM_CLOCK_DELAY_SP3 0x000bb +#define A6XX_RBBM_CLOCK_HYST_SP0 0x000bc +#define A6XX_RBBM_CLOCK_HYST_SP1 0x000bd +#define A6XX_RBBM_CLOCK_HYST_SP2 0x000be +#define A6XX_RBBM_CLOCK_HYST_SP3 0x000bf +#define A6XX_RBBM_CLOCK_CNTL_TP0 0x000c0 +#define A6XX_RBBM_CLOCK_CNTL_TP1 0x000c1 +#define A6XX_RBBM_CLOCK_CNTL_TP2 0x000c2 +#define A6XX_RBBM_CLOCK_CNTL_TP3 0x000c3 +#define A6XX_RBBM_CLOCK_CNTL2_TP0 0x000c4 +#define A6XX_RBBM_CLOCK_CNTL2_TP1 0x000c5 +#define A6XX_RBBM_CLOCK_CNTL2_TP2 0x000c6 +#define A6XX_RBBM_CLOCK_CNTL2_TP3 0x000c7 +#define A6XX_RBBM_CLOCK_CNTL3_TP0 0x000c8 +#define A6XX_RBBM_CLOCK_CNTL3_TP1 0x000c9 +#define A6XX_RBBM_CLOCK_CNTL3_TP2 0x000ca +#define A6XX_RBBM_CLOCK_CNTL3_TP3 0x000cb +#define A6XX_RBBM_CLOCK_CNTL4_TP0 0x000cc +#define A6XX_RBBM_CLOCK_CNTL4_TP1 0x000cd +#define A6XX_RBBM_CLOCK_CNTL4_TP2 0x000ce +#define A6XX_RBBM_CLOCK_CNTL4_TP3 0x000cf +#define A6XX_RBBM_CLOCK_DELAY_TP0 0x000d0 +#define A6XX_RBBM_CLOCK_DELAY_TP1 0x000d1 +#define A6XX_RBBM_CLOCK_DELAY_TP2 0x000d2 +#define A6XX_RBBM_CLOCK_DELAY_TP3 0x000d3 +#define A6XX_RBBM_CLOCK_DELAY2_TP0 0x000d4 +#define A6XX_RBBM_CLOCK_DELAY2_TP1 0x000d5 +#define A6XX_RBBM_CLOCK_DELAY2_TP2 0x000d6 +#define A6XX_RBBM_CLOCK_DELAY2_TP3 0x000d7 +#define A6XX_RBBM_CLOCK_DELAY3_TP0 0x000d8 +#define A6XX_RBBM_CLOCK_DELAY3_TP1 0x000d9 +#define A6XX_RBBM_CLOCK_DELAY3_TP2 0x000da +#define A6XX_RBBM_CLOCK_DELAY3_TP3 0x000db +#define A6XX_RBBM_CLOCK_DELAY4_TP0 0x000dc +#define A6XX_RBBM_CLOCK_DELAY4_TP1 0x000dd +#define A6XX_RBBM_CLOCK_DELAY4_TP2 0x000de +#define A6XX_RBBM_CLOCK_DELAY4_TP3 0x000df +#define A6XX_RBBM_CLOCK_HYST_TP0 0x000e0 +#define A6XX_RBBM_CLOCK_HYST_TP1 0x000e1 +#define A6XX_RBBM_CLOCK_HYST_TP2 0x000e2 +#define A6XX_RBBM_CLOCK_HYST_TP3 0x000e3 +#define A6XX_RBBM_CLOCK_HYST2_TP0 0x000e4 +#define A6XX_RBBM_CLOCK_HYST2_TP1 0x000e5 +#define A6XX_RBBM_CLOCK_HYST2_TP2 0x000e6 +#define A6XX_RBBM_CLOCK_HYST2_TP3 0x000e7 +#define A6XX_RBBM_CLOCK_HYST3_TP0 0x000e8 +#define A6XX_RBBM_CLOCK_HYST3_TP1 0x000e9 +#define A6XX_RBBM_CLOCK_HYST3_TP2 0x000ea +#define A6XX_RBBM_CLOCK_HYST3_TP3 0x000eb +#define A6XX_RBBM_CLOCK_HYST4_TP0 0x000ec +#define A6XX_RBBM_CLOCK_HYST4_TP1 0x000ed +#define A6XX_RBBM_CLOCK_HYST4_TP2 0x000ee +#define A6XX_RBBM_CLOCK_HYST4_TP3 0x000ef +#define A6XX_RBBM_CLOCK_CNTL_RB0 0x000f0 +#define A6XX_RBBM_CLOCK_CNTL_RB1 0x000f1 +#define A6XX_RBBM_CLOCK_CNTL_RB2 0x000f2 +#define A6XX_RBBM_CLOCK_CNTL_RB3 0x000f3 +#define A6XX_RBBM_CLOCK_CNTL2_RB0 0x000f4 +#define A6XX_RBBM_CLOCK_CNTL2_RB1 0x000f5 +#define A6XX_RBBM_CLOCK_CNTL2_RB2 0x000f6 +#define A6XX_RBBM_CLOCK_CNTL2_RB3 0x000f7 +#define A6XX_RBBM_CLOCK_CNTL_CCU0 0x000f8 +#define A6XX_RBBM_CLOCK_CNTL_CCU1 0x000f9 +#define A6XX_RBBM_CLOCK_CNTL_CCU2 0x000fa +#define A6XX_RBBM_CLOCK_CNTL_CCU3 0x000fb +#define A6XX_RBBM_CLOCK_HYST_RB_CCU0 0x00100 +#define A6XX_RBBM_CLOCK_HYST_RB_CCU1 0x00101 +#define A6XX_RBBM_CLOCK_HYST_RB_CCU2 0x00102 +#define A6XX_RBBM_CLOCK_HYST_RB_CCU3 0x00103 +#define A6XX_RBBM_CLOCK_CNTL_RAC 0x00104 +#define A6XX_RBBM_CLOCK_CNTL2_RAC 0x00105 +#define A6XX_RBBM_CLOCK_DELAY_RAC 0x00106 +#define A6XX_RBBM_CLOCK_HYST_RAC 0x00107 +#define A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00108 +#define A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00109 +#define A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0010a +#define A6XX_RBBM_CLOCK_CNTL_UCHE 0x0010b +#define A6XX_RBBM_CLOCK_CNTL2_UCHE 0x0010c +#define A6XX_RBBM_CLOCK_CNTL3_UCHE 0x0010d +#define A6XX_RBBM_CLOCK_CNTL4_UCHE 0x0010e +#define A6XX_RBBM_CLOCK_DELAY_UCHE 0x0010f +#define A6XX_RBBM_CLOCK_HYST_UCHE 0x00110 +#define A6XX_RBBM_CLOCK_MODE_VFD 0x00111 +#define A6XX_RBBM_CLOCK_DELAY_VFD 0x00112 +#define A6XX_RBBM_CLOCK_HYST_VFD 0x00113 +#define A6XX_RBBM_CLOCK_MODE_GPC 0x00114 +#define A6XX_RBBM_CLOCK_DELAY_GPC 0x00115 +#define A6XX_RBBM_CLOCK_HYST_GPC 0x00116 +#define A6XX_RBBM_CLOCK_DELAY_HLSQ_2 0x00117 +#define A6XX_RBBM_CLOCK_CNTL_GMU_GX 0x00118 +#define A6XX_RBBM_CLOCK_DELAY_GMU_GX 0x00119 +#define A6XX_RBBM_CLOCK_CNTL_TEX_FCHE 0x00120 +#define A6XX_RBBM_CLOCK_DELAY_TEX_FCHE 0x00121 +#define A6XX_RBBM_CLOCK_HYST_TEX_FCHE 0x00122 +#define A6XX_RBBM_CLOCK_HYST_GMU_GX 0x0011a +#define A6XX_RBBM_CLOCK_MODE_HLSQ 0x0011b +#define A6XX_RBBM_CLOCK_DELAY_HLSQ 0x0011c +#define A6XX_RBBM_CLOCK_HYST_HLSQ 0x0011d + +/* DBGC_CFG registers */ +#define A6XX_DBGC_CFG_DBGBUS_SEL_A 0x600 +#define A6XX_DBGC_CFG_DBGBUS_SEL_B 0x601 +#define A6XX_DBGC_CFG_DBGBUS_SEL_C 0x602 +#define A6XX_DBGC_CFG_DBGBUS_SEL_D 0x603 +#define A6XX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT 0x604 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT 0xC +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT 0x1C +#define A6XX_DBGC_CFG_DBGBUS_CNTLM 0x605 +#define A6XX_DBGC_CFG_DBGBUS_CTLTM_ENABLE_SHIFT 0x18 +#define A6XX_DBGC_CFG_DBGBUS_OPL 0x606 +#define A6XX_DBGC_CFG_DBGBUS_OPE 0x607 +#define A6XX_DBGC_CFG_DBGBUS_IVTL_0 0x608 +#define A6XX_DBGC_CFG_DBGBUS_IVTL_1 0x609 +#define A6XX_DBGC_CFG_DBGBUS_IVTL_2 0x60a +#define A6XX_DBGC_CFG_DBGBUS_IVTL_3 0x60b +#define A6XX_DBGC_CFG_DBGBUS_MASKL_0 0x60c +#define A6XX_DBGC_CFG_DBGBUS_MASKL_1 0x60d +#define A6XX_DBGC_CFG_DBGBUS_MASKL_2 0x60e +#define A6XX_DBGC_CFG_DBGBUS_MASKL_3 0x60f +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0 0x610 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1 0x611 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT 0x4 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT 0x8 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT 0xC +#define A6XX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT 0x10 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT 0x14 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT 0x18 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT 0x1C +#define A6XX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT 0x0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT 0x4 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT 0x8 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT 0xC +#define A6XX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT 0x10 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT 0x14 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT 0x18 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT 0x1C +#define A6XX_DBGC_CFG_DBGBUS_IVTE_0 0x612 +#define A6XX_DBGC_CFG_DBGBUS_IVTE_1 0x613 +#define A6XX_DBGC_CFG_DBGBUS_IVTE_2 0x614 +#define A6XX_DBGC_CFG_DBGBUS_IVTE_3 0x615 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_0 0x616 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_1 0x617 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_2 0x618 +#define A6XX_DBGC_CFG_DBGBUS_MASKE_3 0x619 +#define A6XX_DBGC_CFG_DBGBUS_NIBBLEE 0x61a +#define A6XX_DBGC_CFG_DBGBUS_PTRC0 0x61b +#define A6XX_DBGC_CFG_DBGBUS_PTRC1 0x61c +#define A6XX_DBGC_CFG_DBGBUS_LOADREG 0x61d +#define A6XX_DBGC_CFG_DBGBUS_IDX 0x61e +#define A6XX_DBGC_CFG_DBGBUS_CLRC 0x61f +#define A6XX_DBGC_CFG_DBGBUS_LOADIVT 0x620 +#define A6XX_DBGC_VBIF_DBG_CNTL 0x621 +#define A6XX_DBGC_DBG_LO_HI_GPIO 0x622 +#define A6XX_DBGC_EXT_TRACE_BUS_CNTL 0x623 +#define A6XX_DBGC_READ_AHB_THROUGH_DBG 0x624 +#define A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x62f +#define A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x630 +#define A6XX_DBGC_EVT_CFG 0x640 +#define A6XX_DBGC_EVT_INTF_SEL_0 0x641 +#define A6XX_DBGC_EVT_INTF_SEL_1 0x642 +#define A6XX_DBGC_PERF_ATB_CFG 0x643 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_0 0x644 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_1 0x645 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_2 0x646 +#define A6XX_DBGC_PERF_ATB_COUNTER_SEL_3 0x647 +#define A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x648 +#define A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x649 +#define A6XX_DBGC_PERF_ATB_DRAIN_CMD 0x64a +#define A6XX_DBGC_ECO_CNTL 0x650 +#define A6XX_DBGC_AHB_DBG_CNTL 0x651 + +/* VSC registers */ +#define A6XX_VSC_PERFCTR_VSC_SEL_0 0xCD8 +#define A6XX_VSC_PERFCTR_VSC_SEL_1 0xCD9 + +/* GRAS registers */ +#define A6XX_GRAS_ADDR_MODE_CNTL 0x8601 +#define A6XX_GRAS_PERFCTR_TSE_SEL_0 0x8610 +#define A6XX_GRAS_PERFCTR_TSE_SEL_1 0x8611 +#define A6XX_GRAS_PERFCTR_TSE_SEL_2 0x8612 +#define A6XX_GRAS_PERFCTR_TSE_SEL_3 0x8613 +#define A6XX_GRAS_PERFCTR_RAS_SEL_0 0x8614 +#define A6XX_GRAS_PERFCTR_RAS_SEL_1 0x8615 +#define A6XX_GRAS_PERFCTR_RAS_SEL_2 0x8616 +#define A6XX_GRAS_PERFCTR_RAS_SEL_3 0x8617 +#define A6XX_GRAS_PERFCTR_LRZ_SEL_0 0x8618 +#define A6XX_GRAS_PERFCTR_LRZ_SEL_1 0x8619 +#define A6XX_GRAS_PERFCTR_LRZ_SEL_2 0x861A +#define A6XX_GRAS_PERFCTR_LRZ_SEL_3 0x861B + +/* RB registers */ +#define A6XX_RB_ADDR_MODE_CNTL 0x8E05 +#define A6XX_RB_NC_MODE_CNTL 0x8E08 +#define A6XX_RB_PERFCTR_RB_SEL_0 0x8E10 +#define A6XX_RB_PERFCTR_RB_SEL_1 0x8E11 +#define A6XX_RB_PERFCTR_RB_SEL_2 0x8E12 +#define A6XX_RB_PERFCTR_RB_SEL_3 0x8E13 +#define A6XX_RB_PERFCTR_RB_SEL_4 0x8E14 +#define A6XX_RB_PERFCTR_RB_SEL_5 0x8E15 +#define A6XX_RB_PERFCTR_RB_SEL_6 0x8E16 +#define A6XX_RB_PERFCTR_RB_SEL_7 0x8E17 +#define A6XX_RB_PERFCTR_CCU_SEL_0 0x8E18 +#define A6XX_RB_PERFCTR_CCU_SEL_1 0x8E19 +#define A6XX_RB_PERFCTR_CCU_SEL_2 0x8E1A +#define A6XX_RB_PERFCTR_CCU_SEL_3 0x8E1B +#define A6XX_RB_PERFCTR_CCU_SEL_4 0x8E1C +#define A6XX_RB_PERFCTR_CMP_SEL_0 0x8E2C +#define A6XX_RB_PERFCTR_CMP_SEL_1 0x8E2D +#define A6XX_RB_PERFCTR_CMP_SEL_2 0x8E2E +#define A6XX_RB_PERFCTR_CMP_SEL_3 0x8E2F +#define A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_HOST 0x8E3B +#define A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD 0x8E3D +#define A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8E50 + +/* PC registers */ +#define A6XX_PC_DBG_ECO_CNTL 0x9E00 +#define A6XX_PC_ADDR_MODE_CNTL 0x9E01 +#define A6XX_PC_PERFCTR_PC_SEL_0 0x9E34 +#define A6XX_PC_PERFCTR_PC_SEL_1 0x9E35 +#define A6XX_PC_PERFCTR_PC_SEL_2 0x9E36 +#define A6XX_PC_PERFCTR_PC_SEL_3 0x9E37 +#define A6XX_PC_PERFCTR_PC_SEL_4 0x9E38 +#define A6XX_PC_PERFCTR_PC_SEL_5 0x9E39 +#define A6XX_PC_PERFCTR_PC_SEL_6 0x9E3A +#define A6XX_PC_PERFCTR_PC_SEL_7 0x9E3B + +/* HLSQ registers */ +#define A6XX_HLSQ_ADDR_MODE_CNTL 0xBE05 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_0 0xBE10 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_1 0xBE11 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_2 0xBE12 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_3 0xBE13 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_4 0xBE14 +#define A6XX_HLSQ_PERFCTR_HLSQ_SEL_5 0xBE15 +#define A6XX_HLSQ_DBG_AHB_READ_APERTURE 0xC800 +#define A6XX_HLSQ_DBG_READ_SEL 0xD000 + +/* VFD registers */ +#define A6XX_VFD_ADDR_MODE_CNTL 0xA601 +#define A6XX_VFD_PERFCTR_VFD_SEL_0 0xA610 +#define A6XX_VFD_PERFCTR_VFD_SEL_1 0xA611 +#define A6XX_VFD_PERFCTR_VFD_SEL_2 0xA612 +#define A6XX_VFD_PERFCTR_VFD_SEL_3 0xA613 +#define A6XX_VFD_PERFCTR_VFD_SEL_4 0xA614 +#define A6XX_VFD_PERFCTR_VFD_SEL_5 0xA615 +#define A6XX_VFD_PERFCTR_VFD_SEL_6 0xA616 +#define A6XX_VFD_PERFCTR_VFD_SEL_7 0xA617 + +/* VPC registers */ +#define A6XX_VPC_ADDR_MODE_CNTL 0x9601 +#define A6XX_VPC_PERFCTR_VPC_SEL_0 0x9604 +#define A6XX_VPC_PERFCTR_VPC_SEL_1 0x9605 +#define A6XX_VPC_PERFCTR_VPC_SEL_2 0x9606 +#define A6XX_VPC_PERFCTR_VPC_SEL_3 0x9607 +#define A6XX_VPC_PERFCTR_VPC_SEL_4 0x9608 +#define A6XX_VPC_PERFCTR_VPC_SEL_5 0x9609 + +/* UCHE registers */ +#define A6XX_UCHE_ADDR_MODE_CNTL 0xE00 +#define A6XX_UCHE_MODE_CNTL 0xE01 +#define A6XX_UCHE_WRITE_RANGE_MAX_LO 0xE05 +#define A6XX_UCHE_WRITE_RANGE_MAX_HI 0xE06 +#define A6XX_UCHE_WRITE_THRU_BASE_LO 0xE07 +#define A6XX_UCHE_WRITE_THRU_BASE_HI 0xE08 +#define A6XX_UCHE_TRAP_BASE_LO 0xE09 +#define A6XX_UCHE_TRAP_BASE_HI 0xE0A +#define A6XX_UCHE_GMEM_RANGE_MIN_LO 0xE0B +#define A6XX_UCHE_GMEM_RANGE_MIN_HI 0xE0C +#define A6XX_UCHE_GMEM_RANGE_MAX_LO 0xE0D +#define A6XX_UCHE_GMEM_RANGE_MAX_HI 0xE0E +#define A6XX_UCHE_CACHE_WAYS 0xE17 +#define A6XX_UCHE_FILTER_CNTL 0xE18 +#define A6XX_UCHE_CLIENT_PF 0xE19 +#define A6XX_UCHE_CLIENT_PF_CLIENT_ID_MASK 0x7 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_0 0xE1C +#define A6XX_UCHE_PERFCTR_UCHE_SEL_1 0xE1D +#define A6XX_UCHE_PERFCTR_UCHE_SEL_2 0xE1E +#define A6XX_UCHE_PERFCTR_UCHE_SEL_3 0xE1F +#define A6XX_UCHE_PERFCTR_UCHE_SEL_4 0xE20 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_5 0xE21 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_6 0xE22 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_7 0xE23 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_8 0xE24 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_9 0xE25 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_10 0xE26 +#define A6XX_UCHE_PERFCTR_UCHE_SEL_11 0xE27 +#define A6XX_UCHE_GBIF_GX_CONFIG 0xE3A +#define A6XX_UCHE_CMDQ_CONFIG 0xE3C + +/* SP registers */ +#define A6XX_SP_ADDR_MODE_CNTL 0xAE01 +#define A6XX_SP_NC_MODE_CNTL 0xAE02 +#define A6XX_SP_PERFCTR_SP_SEL_0 0xAE10 +#define A6XX_SP_PERFCTR_SP_SEL_1 0xAE11 +#define A6XX_SP_PERFCTR_SP_SEL_2 0xAE12 +#define A6XX_SP_PERFCTR_SP_SEL_3 0xAE13 +#define A6XX_SP_PERFCTR_SP_SEL_4 0xAE14 +#define A6XX_SP_PERFCTR_SP_SEL_5 0xAE15 +#define A6XX_SP_PERFCTR_SP_SEL_6 0xAE16 +#define A6XX_SP_PERFCTR_SP_SEL_7 0xAE17 +#define A6XX_SP_PERFCTR_SP_SEL_8 0xAE18 +#define A6XX_SP_PERFCTR_SP_SEL_9 0xAE19 +#define A6XX_SP_PERFCTR_SP_SEL_10 0xAE1A +#define A6XX_SP_PERFCTR_SP_SEL_11 0xAE1B +#define A6XX_SP_PERFCTR_SP_SEL_12 0xAE1C +#define A6XX_SP_PERFCTR_SP_SEL_13 0xAE1D +#define A6XX_SP_PERFCTR_SP_SEL_14 0xAE1E +#define A6XX_SP_PERFCTR_SP_SEL_15 0xAE1F +#define A6XX_SP_PERFCTR_SP_SEL_16 0xAE20 +#define A6XX_SP_PERFCTR_SP_SEL_17 0xAE21 +#define A6XX_SP_PERFCTR_SP_SEL_18 0xAE22 +#define A6XX_SP_PERFCTR_SP_SEL_19 0xAE23 +#define A6XX_SP_PERFCTR_SP_SEL_20 0xAE24 +#define A6XX_SP_PERFCTR_SP_SEL_21 0xAE25 +#define A6XX_SP_PERFCTR_SP_SEL_22 0xAE26 +#define A6XX_SP_PERFCTR_SP_SEL_23 0xAE27 + +/* TP registers */ +#define A6XX_TPL1_ADDR_MODE_CNTL 0xB601 +#define A6XX_TPL1_NC_MODE_CNTL 0xB604 +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0 0xB608 +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1 0xB609 +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2 0xB60A +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3 0xB60B +#define A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4 0xB60C +#define A6XX_TPL1_PERFCTR_TP_SEL_0 0xB610 +#define A6XX_TPL1_PERFCTR_TP_SEL_1 0xB611 +#define A6XX_TPL1_PERFCTR_TP_SEL_2 0xB612 +#define A6XX_TPL1_PERFCTR_TP_SEL_3 0xB613 +#define A6XX_TPL1_PERFCTR_TP_SEL_4 0xB614 +#define A6XX_TPL1_PERFCTR_TP_SEL_5 0xB615 +#define A6XX_TPL1_PERFCTR_TP_SEL_6 0xB616 +#define A6XX_TPL1_PERFCTR_TP_SEL_7 0xB617 +#define A6XX_TPL1_PERFCTR_TP_SEL_8 0xB618 +#define A6XX_TPL1_PERFCTR_TP_SEL_9 0xB619 +#define A6XX_TPL1_PERFCTR_TP_SEL_10 0xB61A +#define A6XX_TPL1_PERFCTR_TP_SEL_11 0xB61B + +/* VBIF registers */ +#define A6XX_VBIF_VERSION 0x3000 +#define A6XX_VBIF_CLKON 0x3001 +#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK 0x1 +#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT 0x1 +#define A6XX_VBIF_GATE_OFF_WRREQ_EN 0x302A +#define A6XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A6XX_VBIF_XIN_HALT_CTRL0_MASK 0xF +#define A6XX_VBIF_XIN_HALT_CTRL1 0x3081 +#define A6XX_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK 0x1 +#define A6XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT 0x0 +#define A6XX_VBIF_TEST_BUS1_CTRL0 0x3085 +#define A6XX_VBIF_TEST_BUS1_CTRL1 0x3086 +#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK 0xF +#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0 +#define A6XX_VBIF_TEST_BUS2_CTRL0 0x3087 +#define A6XX_VBIF_TEST_BUS2_CTRL1 0x3088 +#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK 0x1FF +#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT 0x0 +#define A6XX_VBIF_TEST_BUS_OUT 0x308C +#define A6XX_VBIF_PERF_CNT_SEL0 0x30d0 +#define A6XX_VBIF_PERF_CNT_SEL1 0x30d1 +#define A6XX_VBIF_PERF_CNT_SEL2 0x30d2 +#define A6XX_VBIF_PERF_CNT_SEL3 0x30d3 +#define A6XX_VBIF_PERF_CNT_LOW0 0x30d8 +#define A6XX_VBIF_PERF_CNT_LOW1 0x30d9 +#define A6XX_VBIF_PERF_CNT_LOW2 0x30da +#define A6XX_VBIF_PERF_CNT_LOW3 0x30db +#define A6XX_VBIF_PERF_CNT_HIGH0 0x30e0 +#define A6XX_VBIF_PERF_CNT_HIGH1 0x30e1 +#define A6XX_VBIF_PERF_CNT_HIGH2 0x30e2 +#define A6XX_VBIF_PERF_CNT_HIGH3 0x30e3 +#define A6XX_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define A6XX_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define A6XX_VBIF_PERF_PWR_CNT_EN2 0x3102 +#define A6XX_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define A6XX_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define A6XX_VBIF_PERF_PWR_CNT_LOW2 0x3112 +#define A6XX_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define A6XX_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define A6XX_VBIF_PERF_PWR_CNT_HIGH2 0x311a + +/* GBIF countables */ +#define GBIF_AXI0_READ_DATA_TOTAL_BEATS 34 +#define GBIF_AXI1_READ_DATA_TOTAL_BEATS 35 +#define GBIF_AXI0_WRITE_DATA_TOTAL_BEATS 46 +#define GBIF_AXI1_WRITE_DATA_TOTAL_BEATS 47 + +/* GBIF registers */ +#define A6XX_GBIF_SCACHE_CNTL0 0x3c01 +#define A6XX_GBIF_SCACHE_CNTL1 0x3c02 +#define A6XX_GBIF_QSB_SIDE0 0x3c03 +#define A6XX_GBIF_QSB_SIDE1 0x3c04 +#define A6XX_GBIF_QSB_SIDE2 0x3c05 +#define A6XX_GBIF_QSB_SIDE3 0x3c06 +#define A6XX_GBIF_HALT 0x3c45 +#define A6XX_GBIF_HALT_ACK 0x3c46 + +#define A6XX_GBIF_CLIENT_HALT_MASK BIT(0) +#define A6XX_GBIF_ARB_HALT_MASK BIT(1) +#define A6XX_GBIF_GX_HALT_MASK BIT(0) + +#define A6XX_GBIF_PERF_PWR_CNT_EN 0x3cc0 +#define A6XX_GBIF_PERF_CNT_SEL 0x3cc2 +#define A6XX_GBIF_PERF_PWR_CNT_SEL 0x3cc3 +#define A6XX_GBIF_PERF_CNT_LOW0 0x3cc4 +#define A6XX_GBIF_PERF_CNT_LOW1 0x3cc5 +#define A6XX_GBIF_PERF_CNT_LOW2 0x3cc6 +#define A6XX_GBIF_PERF_CNT_LOW3 0x3cc7 +#define A6XX_GBIF_PERF_CNT_HIGH0 0x3cc8 +#define A6XX_GBIF_PERF_CNT_HIGH1 0x3cc9 +#define A6XX_GBIF_PERF_CNT_HIGH2 0x3cca +#define A6XX_GBIF_PERF_CNT_HIGH3 0x3ccb +#define A6XX_GBIF_PWR_CNT_LOW0 0x3ccc +#define A6XX_GBIF_PWR_CNT_LOW1 0x3ccd +#define A6XX_GBIF_PWR_CNT_LOW2 0x3cce +#define A6XX_GBIF_PWR_CNT_HIGH0 0x3ccf +#define A6XX_GBIF_PWR_CNT_HIGH1 0x3cd0 +#define A6XX_GBIF_PWR_CNT_HIGH2 0x3cd1 + + +/* CX_DBGC_CFG registers */ +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A 0x18400 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_B 0x18401 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_C 0x18402 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_D 0x18403 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_INDEX_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT 0x18404 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU_SHIFT 0xC +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT_SHIFT 0x1C +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM 0x18405 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE_SHIFT 0x18 +#define A6XX_CX_DBGC_CFG_DBGBUS_OPL 0x18406 +#define A6XX_CX_DBGC_CFG_DBGBUS_OPE 0x18407 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 0x18408 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 0x18409 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 0x1840A +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 0x1840B +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 0x1840C +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 0x1840D +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 0x1840E +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 0x1840F +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x18410 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x18411 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL0_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL1_SHIFT 0x4 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL2_SHIFT 0x8 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL3_SHIFT 0xC +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL4_SHIFT 0x10 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL5_SHIFT 0x14 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL6_SHIFT 0x18 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL7_SHIFT 0x1C +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL8_SHIFT 0x0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL9_SHIFT 0x4 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL10_SHIFT 0x8 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL11_SHIFT 0xC +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL12_SHIFT 0x10 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL13_SHIFT 0x14 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL14_SHIFT 0x18 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL15_SHIFT 0x1C +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_0 0x18412 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_1 0x18413 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_2 0x18414 +#define A6XX_CX_DBGC_CFG_DBGBUS_IVTE_3 0x18415 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_0 0x18416 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_1 0x18417 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_2 0x18418 +#define A6XX_CX_DBGC_CFG_DBGBUS_MASKE_3 0x18419 +#define A6XX_CX_DBGC_CFG_DBGBUS_NIBBLEE 0x1841A +#define A6XX_CX_DBGC_CFG_DBGBUS_PTRC0 0x1841B +#define A6XX_CX_DBGC_CFG_DBGBUS_PTRC1 0x1841C +#define A6XX_CX_DBGC_CFG_DBGBUS_LOADREG 0x1841D +#define A6XX_CX_DBGC_CFG_DBGBUS_IDX 0x1841E +#define A6XX_CX_DBGC_CFG_DBGBUS_CLRC 0x1841F +#define A6XX_CX_DBGC_CFG_DBGBUS_LOADIVT 0x18420 +#define A6XX_CX_DBGC_VBIF_DBG_CNTL 0x18421 +#define A6XX_CX_DBGC_DBG_LO_HI_GPIO 0x18422 +#define A6XX_CX_DBGC_EXT_TRACE_BUS_CNTL 0x18423 +#define A6XX_CX_DBGC_READ_AHB_THROUGH_DBG 0x18424 +#define A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x1842F +#define A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x18430 +#define A6XX_CX_DBGC_EVT_CFG 0x18440 +#define A6XX_CX_DBGC_EVT_INTF_SEL_0 0x18441 +#define A6XX_CX_DBGC_EVT_INTF_SEL_1 0x18442 +#define A6XX_CX_DBGC_PERF_ATB_CFG 0x18443 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_0 0x18444 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_1 0x18445 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_2 0x18446 +#define A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_3 0x18447 +#define A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x18448 +#define A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x18449 +#define A6XX_CX_DBGC_PERF_ATB_DRAIN_CMD 0x1844A +#define A6XX_CX_DBGC_ECO_CNTL 0x18450 +#define A6XX_CX_DBGC_AHB_DBG_CNTL 0x18451 + +/* GMU control registers */ +#define A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL 0x1A880 +#define A6XX_GMU_GX_SPTPRAC_POWER_CONTROL 0x1A881 +#define A6XX_GMU_CM3_ITCM_START 0x1B400 +#define A6XX_GMU_CM3_DTCM_START 0x1C400 +#define A6XX_GMU_NMI_CONTROL_STATUS 0x1CBF0 +#define A6XX_GMU_BOOT_SLUMBER_OPTION 0x1CBF8 +#define A6XX_GMU_GX_VOTE_IDX 0x1CBF9 +#define A6XX_GMU_MX_VOTE_IDX 0x1CBFA +#define A6XX_GMU_DCVS_ACK_OPTION 0x1CBFC +#define A6XX_GMU_DCVS_PERF_SETTING 0x1CBFD +#define A6XX_GMU_DCVS_BW_SETTING 0x1CBFE +#define A6XX_GMU_DCVS_RETURN 0x1CBFF +#define A6XX_GMU_ICACHE_CONFIG 0x1F400 +#define A6XX_GMU_DCACHE_CONFIG 0x1F401 +#define A6XX_GMU_SYS_BUS_CONFIG 0x1F40F +#define A6XX_GMU_CM3_SYSRESET 0x1F800 +#define A6XX_GMU_CM3_BOOT_CONFIG 0x1F801 +#define A6XX_GMU_CX_GMU_WFI_CONFIG 0x1F802 +#define A6XX_GMU_CX_GMU_WDOG_CTRL 0x1F813 +#define A6XX_GMU_CM3_FW_BUSY 0x1F81A +#define A6XX_GMU_CM3_FW_INIT_RESULT 0x1F81C +#define A6XX_GMU_CM3_CFG 0x1F82D +#define A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE 0x1F840 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0 0x1F841 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1 0x1F842 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L 0x1F844 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H 0x1F845 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L 0x1F846 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H 0x1F847 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L 0x1F848 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H 0x1F849 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L 0x1F84A +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H 0x1F84B +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L 0x1F84C +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H 0x1F84D +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L 0x1F84E +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H 0x1F84F +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L 0x1F850 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H 0x1F851 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L 0x1F852 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H 0x1F853 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_2 0x1F860 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L 0x1F870 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H 0x1F871 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L 0x1F872 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1F843 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L 0x1F874 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H 0x1F875 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L 0x1F876 +#define A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H 0x1F877 +#define A6XX_GMU_CX_GMU_ALWAYS_ON_COUNTER_L 0x1F888 +#define A6XX_GMU_CX_GMU_ALWAYS_ON_COUNTER_H 0x1F889 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_ENABLE 0x1F8A0 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_0 0x1F8A1 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_SELECT_1 0x1F8A2 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_0_L 0x1F8A4 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_0_H 0x1F8A5 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_1_L 0x1F8A6 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_1_H 0x1F8A7 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_2_L 0x1F8A8 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_2_H 0x1F8A9 +#define A6XX_GMU_CX_GMU_PERF_COUNTER_3_L 0x1F8AA +#define A6XX_GMU_CX_GMU_PERF_COUNTER_3_H 0x1F8AB +#define A6XX_GMU_CX_GMU_PERF_COUNTER_4_L 0x1F8AC +#define A6XX_GMU_CX_GMU_PERF_COUNTER_4_H 0x1F8AD +#define A6XX_GMU_CX_GMU_PERF_COUNTER_5_L 0x1F8AE +#define A6XX_GMU_CX_GMU_PERF_COUNTER_5_H 0x1F8AF +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL 0x1F8C0 +#define A6XX_GMU_PWR_COL_INTER_FRAME_HYST 0x1F8C1 +#define A6XX_GMU_PWR_COL_SPTPRAC_HYST 0x1F8C2 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS 0x1F8D0 +#define A6XX_GMU_GPU_NAP_CTRL 0x1F8E4 +#define A6XX_GMU_RPMH_CTRL 0x1F8E8 +#define A6XX_GMU_RPMH_HYST_CTRL 0x1F8E9 +#define A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE 0x1F8EC +#define A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_MSG 0x1F900 +#define A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP 0x1F901 +#define A6XX_GMU_BOOT_KMD_LM_HANDSHAKE 0x1F9F0 +#define A6XX_GMU_LLM_GLM_SLEEP_CTRL 0x1F957 +#define A6XX_GMU_LLM_GLM_SLEEP_STATUS 0x1F958 + +/* HFI registers*/ +#define A6XX_GMU_ALWAYS_ON_COUNTER_L 0x1F888 +#define A6XX_GMU_ALWAYS_ON_COUNTER_H 0x1F889 +#define A6XX_GMU_GMU_PWR_COL_KEEPALIVE 0x1F8C3 +#define A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE 0x1F8C4 +#define A6XX_GMU_HFI_CTRL_STATUS 0x1F980 +#define A6XX_GMU_HFI_VERSION_INFO 0x1F981 +#define A6XX_GMU_HFI_SFR_ADDR 0x1F982 +#define A6XX_GMU_HFI_MMAP_ADDR 0x1F983 +#define A6XX_GMU_HFI_QTBL_INFO 0x1F984 +#define A6XX_GMU_HFI_QTBL_ADDR 0x1F985 +#define A6XX_GMU_HFI_CTRL_INIT 0x1F986 +#define A6XX_GMU_GMU2HOST_INTR_SET 0x1F990 +#define A6XX_GMU_GMU2HOST_INTR_CLR 0x1F991 +#define A6XX_GMU_GMU2HOST_INTR_INFO 0x1F992 +#define A6XX_GMU_GMU2HOST_INTR_MASK 0x1F993 +#define A6XX_GMU_HOST2GMU_INTR_SET 0x1F994 +#define A6XX_GMU_HOST2GMU_INTR_CLR 0x1F995 +#define A6XX_GMU_HOST2GMU_INTR_RAW_INFO 0x1F996 +#define A6XX_GMU_HOST2GMU_INTR_EN_0 0x1F997 +#define A6XX_GMU_HOST2GMU_INTR_EN_1 0x1F998 +#define A6XX_GMU_HOST2GMU_INTR_EN_2 0x1F999 +#define A6XX_GMU_HOST2GMU_INTR_EN_3 0x1F99A +#define A6XX_GMU_HOST2GMU_INTR_INFO_0 0x1F99B +#define A6XX_GMU_HOST2GMU_INTR_INFO_1 0x1F99C +#define A6XX_GMU_HOST2GMU_INTR_INFO_2 0x1F99D +#define A6XX_GMU_HOST2GMU_INTR_INFO_3 0x1F99E +#define A6XX_GMU_GENERAL_0 0x1F9C5 +#define A6XX_GMU_GENERAL_1 0x1F9C6 +#define A6XX_GMU_GENERAL_6 0x1F9CB +#define A6XX_GMU_GENERAL_7 0x1F9CC + +/* ISENSE registers */ +#define A6XX_GMU_ISENSE_CTRL 0x1F95D +#define A6XX_GPU_GMU_CX_GMU_ISENSE_CTRL 0x1f95d +#define A6XX_GPU_CS_ENABLE_REG 0x23120 + +/* LM registers */ +#define A6XX_GPU_GMU_CX_GMU_PWR_THRESHOLD 0x1F94D + +/* FAL10 veto register */ +#define A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF 0x1F8F0 +#define A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF 0x1F8F1 + +#define A6XX_GMU_AO_INTERRUPT_EN 0x23B03 +#define A6XX_GMU_AO_HOST_INTERRUPT_CLR 0x23B04 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS 0x23B05 +#define A6XX_GMU_AO_HOST_INTERRUPT_MASK 0x23B06 +#define A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL 0x23B09 +#define A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL 0x23B0A +#define A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL 0x23B0B +#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS 0x23B0C +#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2 0x23B0D +#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK 0x23B0E +#define A6XX_GMU_AO_AHB_FENCE_CTRL 0x23B10 +#define A6XX_GMU_AHB_FENCE_STATUS 0x23B13 +#define A6XX_GMU_AHB_FENCE_STATUS_CLR 0x23B14 +#define A6XX_GMU_RBBM_INT_UNMASKED_STATUS 0x23B15 +#define A6XX_GMU_AO_SPARE_CNTL 0x23B16 + +/* RGMU GLM registers */ +#define A6XX_GMU_AO_RGMU_GLM_SLEEP_CTRL 0x23B80 +#define A6XX_GMU_AO_RGMU_GLM_SLEEP_STATUS 0x23B81 +#define A6XX_GMU_AO_RGMU_GLM_HW_CRC_DISABLE 0x23B82 + +/* GMU RSC control registers */ +#define A6XX_GMU_RSCC_CONTROL_REQ 0x23B07 +#define A6XX_GMU_RSCC_CONTROL_ACK 0x23B08 + +/* FENCE control registers */ +#define A6XX_GMU_AHB_FENCE_RANGE_0 0x23B11 +#define A6XX_GMU_AHB_FENCE_RANGE_1 0x23B12 + +/* GPUCC registers */ +#define A6XX_GPU_CC_GX_GDSCR 0x24403 +#define A6XX_GPU_CC_GX_DOMAIN_MISC 0x24542 +#define A6XX_GPU_CC_GX_DOMAIN_MISC3 0x24563 +#define A6XX_GPU_CC_CX_GDSCR 0x2441B + +/* GPU CPR registers */ +#define A6XX_GPU_CPR_FSM_CTL 0x26801 + +/* GPU RSC sequencer registers */ +#define A6XX_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 +#define A6XX_RSCC_PDC_SEQ_START_ADDR 0x00008 +#define A6XX_RSCC_PDC_MATCH_VALUE_LO 0x00009 +#define A6XX_RSCC_PDC_MATCH_VALUE_HI 0x0000A +#define A6XX_RSCC_PDC_SLAVE_ID_DRV0 0x0000B +#define A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR 0x0000D +#define A6XX_RSCC_HIDDEN_TCS_CMD0_DATA 0x0000E +#define A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0 0x00082 +#define A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0 0x00083 +#define A6XX_RSCC_TIMESTAMP_UNIT1_EN_DRV0 0x00089 +#define A6XX_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0 0x0008C +#define A6XX_RSCC_OVERRIDE_START_ADDR 0x00100 +#define A6XX_RSCC_SEQ_BUSY_DRV0 0x00101 +#define A6XX_RSCC_SEQ_MEM_0_DRV0 0x00180 +#define A6XX_RSCC_TCS0_DRV0_STATUS 0x00346 +#define A6XX_RSCC_TCS1_DRV0_STATUS 0x003EE +#define A6XX_RSCC_TCS2_DRV0_STATUS 0x00496 +#define A6XX_RSCC_TCS3_DRV0_STATUS 0x0053E + +/* GPU PDC sequencer registers in AOSS.RPMh domain */ +#define PDC_GPU_ENABLE_PDC 0x1140 +#define PDC_GPU_SEQ_START_ADDR 0x1148 +#define PDC_GPU_TCS0_CONTROL 0x1540 +#define PDC_GPU_TCS0_CMD_ENABLE_BANK 0x1541 +#define PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK 0x1542 +#define PDC_GPU_TCS0_CMD0_MSGID 0x1543 +#define PDC_GPU_TCS0_CMD0_ADDR 0x1544 +#define PDC_GPU_TCS0_CMD0_DATA 0x1545 +#define PDC_GPU_TCS1_CONTROL 0x1572 +#define PDC_GPU_TCS1_CMD_ENABLE_BANK 0x1573 +#define PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK 0x1574 +#define PDC_GPU_TCS1_CMD0_MSGID 0x1575 +#define PDC_GPU_TCS1_CMD0_ADDR 0x1576 +#define PDC_GPU_TCS1_CMD0_DATA 0x1577 +#define PDC_GPU_TCS2_CONTROL 0x15A4 +#define PDC_GPU_TCS2_CMD_ENABLE_BANK 0x15A5 +#define PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK 0x15A6 +#define PDC_GPU_TCS2_CMD0_MSGID 0x15A7 +#define PDC_GPU_TCS2_CMD0_ADDR 0x15A8 +#define PDC_GPU_TCS2_CMD0_DATA 0x15A9 +#define PDC_GPU_TCS3_CONTROL 0x15D6 +#define PDC_GPU_TCS3_CMD_ENABLE_BANK 0x15D7 +#define PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK 0x15D8 +#define PDC_GPU_TCS3_CMD0_MSGID 0x15D9 +#define PDC_GPU_TCS3_CMD0_ADDR 0x15DA +#define PDC_GPU_TCS3_CMD0_DATA 0x15DB + +/* + * Legacy DTSI used an offset from the start of the PDC resource + * for PDC SEQ programming. We are now using PDC subsections so + * start the PDC SEQ offset at zero. + */ +#define PDC_GPU_SEQ_MEM_0 0x0 + +/* + * Legacy RSCC register range was a part of the GMU register space + * now we are using a separate section for RSCC regsiters. Add the + * offset for backward compatibility. + */ +#define RSCC_OFFSET_LEGACY 0x23400 + +/* RGMU(PCC) registers in A6X_GMU_CX_0_NON_CONTEXT_DEC domain */ +#define A6XX_RGMU_CX_INTR_GEN_EN 0x1F80F +#define A6XX_RGMU_CX_RGMU_TIMER0 0x1F834 +#define A6XX_RGMU_CX_RGMU_TIMER1 0x1F835 +#define A6XX_RGMU_CX_PCC_CTRL 0x1F838 +#define A6XX_RGMU_CX_PCC_INIT_RESULT 0x1F839 +#define A6XX_RGMU_CX_PCC_BKPT_CFG 0x1F83A +#define A6XX_RGMU_CX_PCC_BKPT_ADDR 0x1F83B +#define A6XX_RGMU_CX_PCC_STATUS 0x1F83C +#define A6XX_RGMU_CX_PCC_DEBUG 0x1F83D + +/* GPU CX_MISC registers */ +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0 0x1 +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1 0x2 +#define A6XX_LLC_NUM_GPU_SCIDS 5 +#define A6XX_GPU_LLC_SCID_NUM_BITS 5 +#define A6XX_GPU_LLC_SCID_MASK \ + ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1) +#define A6XX_GPUHTW_LLC_SCID_SHIFT 25 +#define A6XX_GPUHTW_LLC_SCID_MASK \ + (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT) + +#endif /* _A6XX_REG_H */ + diff --git a/adreno-gpulist.h b/adreno-gpulist.h new file mode 100644 index 0000000000..e7e061f76f --- /dev/null +++ b/adreno-gpulist.h @@ -0,0 +1,1915 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#define ANY_ID (~0) + +#define DEFINE_ADRENO_REV(_rev, _core, _major, _minor, _patchid) \ + .gpurev = _rev, .core = _core, .major = _major, .minor = _minor, \ + .patchid = _patchid + +#define DEFINE_DEPRECATED_CORE(_name, _rev, _core, _major, _minor, _patchid) \ +static const struct adreno_gpu_core adreno_gpu_core_##_name = { \ + DEFINE_ADRENO_REV(_rev, _core, _major, _minor, _patchid), \ + .features = ADRENO_DEPRECATED, \ +} + +static const struct kgsl_regmap_list a306_vbif_regs[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A }, +}; + +static const struct adreno_a3xx_core adreno_gpu_core_a306 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A306, 3, 0, 6, 0), + .features = ADRENO_SOFT_FAULT_DETECT, + .gpudev = &adreno_a3xx_gpudev, + .perfcounters = &adreno_a3xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_128K, + .bus_width = 0, + .snapshot_size = 600 * SZ_1K, + }, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .vbif = a306_vbif_regs, + .vbif_count = ARRAY_SIZE(a306_vbif_regs), +}; + +static const struct kgsl_regmap_list a306a_vbif_regs[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 }, +}; + +static const struct adreno_a3xx_core adreno_gpu_core_a306a = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A306A, 3, 0, 6, 0x20), + .features = ADRENO_SOFT_FAULT_DETECT, + .gpudev = &adreno_a3xx_gpudev, + .perfcounters = &adreno_a3xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_128K, + .bus_width = 16, + .snapshot_size = 600 * SZ_1K, + }, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .vbif = a306a_vbif_regs, + .vbif_count = ARRAY_SIZE(a306a_vbif_regs), +}; + +static const struct kgsl_regmap_list a304_vbif_regs[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, +}; + +static const struct adreno_a3xx_core adreno_gpu_core_a304 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A304, 3, 0, 4, 0), + .features = ADRENO_SOFT_FAULT_DETECT, + .gpudev = &adreno_a3xx_gpudev, + .perfcounters = &adreno_a3xx_perfcounters, + .gmem_base = 0, + .gmem_size = (SZ_64K + SZ_32K), + .bus_width = 0, + .snapshot_size = 600 * SZ_1K, + }, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .vbif = a304_vbif_regs, + .vbif_count = ARRAY_SIZE(a304_vbif_regs), +}; + +DEFINE_DEPRECATED_CORE(a405, ADRENO_REV_A405, 4, 0, 5, ANY_ID); +DEFINE_DEPRECATED_CORE(a418, ADRENO_REV_A418, 4, 1, 8, ANY_ID); +DEFINE_DEPRECATED_CORE(a420, ADRENO_REV_A420, 4, 2, 0, ANY_ID); +DEFINE_DEPRECATED_CORE(a430, ADRENO_REV_A430, 4, 3, 0, ANY_ID); +DEFINE_DEPRECATED_CORE(a530v1, ADRENO_REV_A530, 5, 3, 0, 0); + +static const struct kgsl_regmap_list a530_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, +}; + +/* VBIF control registers for a530, a510, a508, a505 and a506 */ +static const struct kgsl_regmap_list a530_vbif_regs[] = { + {A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a530v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A530, 5, 3, 0, 1), + .features = ADRENO_SPTP_PC | ADRENO_LM | + ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .gpmu_tsens = 0x00060007, + .max_power = 5448, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpmufw_name = "a530_gpmu.fw2", + .regfw_name = "a530v2_seq.fw2", + .zap_name = "a530_zap", + .hwcg = a530_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a530_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 15, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a530v3 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A530, 5, 3, 0, ANY_ID), + .features = ADRENO_SPTP_PC | ADRENO_LM | + ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .gpmu_tsens = 0x00060007, + .max_power = 5448, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpmufw_name = "a530v3_gpmu.fw2", + .regfw_name = "a530v3_seq.fw2", + .zap_name = "a530_zap", + .hwcg = a530_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a530_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 15, +}; + +/* For a505, a506 and a508 */ +static const struct kgsl_regmap_list a50x_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a505 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A505, 5, 0, 5, ANY_ID), + .features = ADRENO_PREEMPTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_8K), + .bus_width = 16, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .hwcg = a50x_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a50x_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a506 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A506, 5, 0, 6, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_8K), + .bus_width = 16, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a506_zap", + .hwcg = a50x_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a50x_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a510_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a510 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A510, 5, 1, 0, ANY_ID), + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_256K, + .bus_width = 16, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .hwcg = a510_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a510_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), +}; + +DEFINE_DEPRECATED_CORE(a540v1, ADRENO_REV_A540, 5, 4, 0, 0); + +static const struct kgsl_regmap_list a540_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000222}, + {A5XX_RBBM_CLOCK_DELAY_GPMU, 0x00000770}, + {A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000004}, +}; + +static const struct kgsl_regmap_list a540_vbif_regs[] = { + {A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003}, + {A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a540v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A540, 5, 4, 0, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | + ADRENO_SPTP_PC, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .gpmu_tsens = 0x000c000d, + .max_power = 5448, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpmufw_name = "a540_gpmu.fw2", + .zap_name = "a540_zap", + .hwcg = a540_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a540_hwcg_regs), + .vbif = a540_vbif_regs, + .vbif_count = ARRAY_SIZE(a540_vbif_regs), + .highest_bank_bit = 15, +}; + +static const struct kgsl_regmap_list a512_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a512 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A512, 5, 1, 2, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_256K + SZ_16K), + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a512_zap", + .hwcg = a512_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a512_hwcg_regs), + .highest_bank_bit = 14, +}; + +static const struct adreno_a5xx_core adreno_gpu_core_a508 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A508, 5, 0, 8, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, + .gpudev = &adreno_a5xx_gpudev, + .perfcounters = &adreno_a5xx_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_8K), + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a508_zap", + .hwcg = a50x_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a50x_hwcg_regs), + .vbif = a530_vbif_regs, + .vbif_count = ARRAY_SIZE(a530_vbif_regs), + .highest_bank_bit = 14, +}; + +DEFINE_DEPRECATED_CORE(a630v1, ADRENO_REV_A630, 6, 3, 0, 0); + +static const struct kgsl_regmap_list a630_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220}, + {A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220}, + {A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220}, + {A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +static const struct kgsl_regmap_list a630_vbif_regs[] = { + {A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009}, + {A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3}, +}; + + +/* For a615, a616, a618, A619, a630, a640 and a680 */ +static const struct adreno_protected_regs a630_protected_regs[] = { + { A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 }, + { A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 }, + { A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 }, + { A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 }, + { A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 }, + { A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 }, + { A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 }, + { A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 }, + { A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 }, + { A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 }, + { A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 }, + { A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 }, + { A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 }, + { A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 }, + { A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 }, + { A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 }, + { A6XX_CP_PROTECT_REG + 20, 0x09624, 0x097ff, 1 }, + { A6XX_CP_PROTECT_REG + 21, 0x09e70, 0x09e71, 1 }, + { A6XX_CP_PROTECT_REG + 22, 0x09e78, 0x09fff, 1 }, + { A6XX_CP_PROTECT_REG + 23, 0x0a630, 0x0a7ff, 1 }, + { A6XX_CP_PROTECT_REG + 24, 0x0ae02, 0x0ae02, 1 }, + { A6XX_CP_PROTECT_REG + 25, 0x0ae50, 0x0b17f, 1 }, + { A6XX_CP_PROTECT_REG + 26, 0x0b604, 0x0b604, 1 }, + { A6XX_CP_PROTECT_REG + 27, 0x0be02, 0x0be03, 1 }, + { A6XX_CP_PROTECT_REG + 28, 0x0be20, 0x0d5ff, 1 }, + { A6XX_CP_PROTECT_REG + 29, 0x0f000, 0x0fbff, 1 }, + { A6XX_CP_PROTECT_REG + 30, 0x0fc00, 0x11bff, 0 }, + { A6XX_CP_PROTECT_REG + 31, 0x11c00, 0x11c00, 1 }, + { 0 }, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a630v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A630, 6, 3, 0, ANY_ID), + .features = ADRENO_IFPC | ADRENO_CONTENT_PROTECTION | + ADRENO_IOCOHERENT | ADRENO_PREEMPTION, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a630_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 3, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a630_zap", + .hwcg = a630_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a630_hwcg_regs), + .vbif = a630_vbif_regs, + .vbif_count = ARRAY_SIZE(a630_vbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 15, +}; + +/* For a615, a616, a618 and a619 */ +static const struct kgsl_regmap_list a615_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, + {A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555} +}; + +/* For a615, a616, a618 and a619 */ +static const struct kgsl_regmap_list a615_gbif_regs[] = { + {A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a615 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A615, 6, 1, 5, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = 600 * SZ_1K, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 3, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a618 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A618, 6, 1, 8, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 7, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a619 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A619, 6, 1, 9, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 9, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a619_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a619_variant = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A619, 6, 1, 9, ANY_ID), + .compatible = "qcom,adreno-gpu-a619-holi", + .features = ADRENO_PREEMPTION | ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a619_holi_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x0018000, + .sqefw_name = "a630_sqe.fw", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .gx_cpr_toggle = true, + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a620_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +/* a620 and a650 */ +static const struct kgsl_regmap_list a650_gbif_regs[] = { + {A6XX_GBIF_QSB_SIDE0, 0x00071620}, + {A6XX_GBIF_QSB_SIDE1, 0x00071620}, + {A6XX_GBIF_QSB_SIDE2, 0x00071620}, + {A6XX_GBIF_QSB_SIDE3, 0x00071620}, + {A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3}, +}; + +/* These are for a620 and a650 */ +static const struct adreno_protected_regs a620_protected_regs[] = { + { A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 }, + { A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 }, + { A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 }, + { A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 }, + { A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 }, + { A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 }, + { A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 }, + { A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 }, + { A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 }, + { A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 }, + { A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 }, + { A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 }, + { A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 }, + { A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 }, + { A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 }, + { A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 }, + { A6XX_CP_PROTECT_REG + 20, 0x08e80, 0x090ff, 1 }, + { A6XX_CP_PROTECT_REG + 21, 0x09624, 0x097ff, 1 }, + { A6XX_CP_PROTECT_REG + 22, 0x09e60, 0x09e71, 1 }, + { A6XX_CP_PROTECT_REG + 23, 0x09e78, 0x09fff, 1 }, + { A6XX_CP_PROTECT_REG + 24, 0x0a630, 0x0a7ff, 1 }, + { A6XX_CP_PROTECT_REG + 25, 0x0ae02, 0x0ae02, 1 }, + { A6XX_CP_PROTECT_REG + 26, 0x0ae50, 0x0b17f, 1 }, + { A6XX_CP_PROTECT_REG + 27, 0x0b604, 0x0b604, 1 }, + { A6XX_CP_PROTECT_REG + 28, 0x0b608, 0x0b60f, 1 }, + { A6XX_CP_PROTECT_REG + 29, 0x0be02, 0x0be03, 1 }, + { A6XX_CP_PROTECT_REG + 30, 0x0be20, 0x0d5ff, 1 }, + { A6XX_CP_PROTECT_REG + 31, 0x0f000, 0x0fbff, 1 }, + { A6XX_CP_PROTECT_REG + 32, 0x0fc00, 0x11bff, 0 }, + { A6XX_CP_PROTECT_REG + 33, 0x18400, 0x1a3ff, 1 }, + { A6XX_CP_PROTECT_REG + 34, 0x1a800, 0x1c7ff, 1 }, + { A6XX_CP_PROTECT_REG + 35, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, + { 0 }, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a620 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A620, 6, 2, 0, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD | + ADRENO_APRIV, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x0010000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a650_sqe.fw", + .gmufw_name = "a650_gmu.bin", + .zap_name = "a620_zap", + .hwcg = a620_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a620_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .veto_fal10 = true, + .hang_detect_cycles = 0x3ffff, + .protected_regs = a620_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a640_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +/* These apply to a640, a680, a612 and a610 */ +static const struct kgsl_regmap_list a640_vbif_regs[] = { + {A6XX_GBIF_QSB_SIDE0, 0x00071620}, + {A6XX_GBIF_QSB_SIDE1, 0x00071620}, + {A6XX_GBIF_QSB_SIDE2, 0x00071620}, + {A6XX_GBIF_QSB_SIDE3, 0x00071620}, + {A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a640 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A640, 6, 4, 0, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | ADRENO_IOCOHERENT | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_1M, //Verified 1MB + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x00200000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a640_gmu.bin", + .zap_name = "a640_zap", + .hwcg = a640_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a640_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, +}; + +static const struct kgsl_regmap_list a650_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a650 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A650, 6, 5, 0, 0), + .features = ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_APRIV | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_128K, /* verified 1152kB */ + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a650_sqe.fw", + .gmufw_name = "a650_gmu.bin", + .zap_name = "a650_zap", + .hwcg = a650_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a650_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .veto_fal10 = true, + .pdc_in_aop = true, + .hang_detect_cycles = 0xcfffff, + .protected_regs = a620_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a650v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A650, 6, 5, 0, ANY_ID), + .features = ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD | + ADRENO_LM | ADRENO_APRIV | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_128K, /* verified 1152kB */ + .bus_width = 32, + .snapshot_size = 2 * SZ_1M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a650_sqe.fw", + .gmufw_name = "a650_gmu.bin", + .zap_name = "a650_zap", + .hwcg = a650_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a650_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .veto_fal10 = true, + .pdc_in_aop = true, + .hang_detect_cycles = 0x3ffff, + .protected_regs = a620_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a680 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A680, 6, 8, 0, ANY_ID), + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x00400000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a640_gmu.bin", + .zap_name = "a640_zap", + .hwcg = a640_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a640_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, +}; + +static const struct kgsl_regmap_list a612_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000081}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01202222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x05522022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a612 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A612, 6, 1, 2, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | + ADRENO_IOCOHERENT | ADRENO_PREEMPTION | ADRENO_IFPC, + .gpudev = &adreno_a6xx_rgmu_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_4K), + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x00080000, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a612_rgmu.bin", + .zap_name = "a612_zap", + .hwcg = a612_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a612_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0x3fffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a616 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A616, 6, 1, 6, ANY_ID), + .features = ADRENO_PREEMPTION | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_IOCOHERENT, + .gpudev = &adreno_a630_gpudev.base, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_1M, + }, + .prim_fifo_threshold = 0x0018000, + .gmu_major = 1, + .gmu_minor = 3, + .sqefw_name = "a630_sqe.fw", + .gmufw_name = "a630_gmu.bin", + .zap_name = "a615_zap", + .hwcg = a615_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a615_hwcg_regs), + .vbif = a615_gbif_regs, + .vbif_count = ARRAY_SIZE(a615_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a610 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A610, 6, 1, 0, ANY_ID), + .features = ADRENO_CONTENT_PROTECTION | + ADRENO_PREEMPTION, + .gpudev = &adreno_a6xx_gpudev, + .perfcounters = &adreno_a6xx_legacy_perfcounters, + .gmem_base = 0x100000, + .gmem_size = (SZ_128K + SZ_4K), + .bus_width = 32, + }, + .prim_fifo_threshold = 0x00080000, + .sqefw_name = "a630_sqe.fw", + .zap_name = "a610_zap", + .hwcg = a612_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a612_hwcg_regs), + .vbif = a640_vbif_regs, + .vbif_count = ARRAY_SIZE(a640_vbif_regs), + .hang_detect_cycles = 0x3ffff, + .protected_regs = a630_protected_regs, + .highest_bank_bit = 14, +}; + +static const struct kgsl_regmap_list a660_hwcg_regs[] = { + {A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, + {A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, + {A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, + {A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A6XX_RBBM_ISDB_CNT, 0x00000182}, + {A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, + {A6XX_RBBM_SP_HYST_CNT, 0x00000000}, + {A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, +}; + +/* A660 protected register list */ +static const struct adreno_protected_regs a660_protected_regs[] = { + { A6XX_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { A6XX_CP_PROTECT_REG + 1, 0x00501, 0x00506, 0 }, + { A6XX_CP_PROTECT_REG + 2, 0x0050b, 0x007ff, 0 }, + { A6XX_CP_PROTECT_REG + 3, 0x0050e, 0x0050e, 1 }, + { A6XX_CP_PROTECT_REG + 4, 0x00510, 0x00510, 1 }, + { A6XX_CP_PROTECT_REG + 5, 0x00534, 0x00534, 1 }, + { A6XX_CP_PROTECT_REG + 6, 0x00800, 0x00882, 1 }, + { A6XX_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { A6XX_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { A6XX_CP_PROTECT_REG + 9, 0x008d0, 0x0098c, 0 }, + { A6XX_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { A6XX_CP_PROTECT_REG + 11, 0x0098d, 0x00bff, 1 }, + { A6XX_CP_PROTECT_REG + 12, 0x00e00, 0x00e01, 1 }, + { A6XX_CP_PROTECT_REG + 13, 0x00e03, 0x00e0f, 1 }, + { A6XX_CP_PROTECT_REG + 14, 0x03c00, 0x03cc3, 1 }, + { A6XX_CP_PROTECT_REG + 15, 0x03cc4, 0x05cc3, 0 }, + { A6XX_CP_PROTECT_REG + 16, 0x08630, 0x087ff, 1 }, + { A6XX_CP_PROTECT_REG + 17, 0x08e00, 0x08e00, 1 }, + { A6XX_CP_PROTECT_REG + 18, 0x08e08, 0x08e08, 1 }, + { A6XX_CP_PROTECT_REG + 19, 0x08e50, 0x08e6f, 1 }, + { A6XX_CP_PROTECT_REG + 20, 0x08e80, 0x090ff, 1 }, + { A6XX_CP_PROTECT_REG + 21, 0x09624, 0x097ff, 1 }, + { A6XX_CP_PROTECT_REG + 22, 0x09e60, 0x09e71, 1 }, + { A6XX_CP_PROTECT_REG + 23, 0x09e78, 0x09fff, 1 }, + { A6XX_CP_PROTECT_REG + 24, 0x0a630, 0x0a7ff, 1 }, + { A6XX_CP_PROTECT_REG + 25, 0x0ae02, 0x0ae02, 1 }, + { A6XX_CP_PROTECT_REG + 26, 0x0ae50, 0x0af7f, 1 }, + { A6XX_CP_PROTECT_REG + 27, 0x0b604, 0x0b604, 1 }, + { A6XX_CP_PROTECT_REG + 28, 0x0b608, 0x0b60e, 1 }, + { A6XX_CP_PROTECT_REG + 29, 0x0be02, 0x0be03, 1 }, + { A6XX_CP_PROTECT_REG + 30, 0x0be20, 0x0bf7f, 1 }, + { A6XX_CP_PROTECT_REG + 31, 0x0d000, 0x0d5ff, 1 }, + { A6XX_CP_PROTECT_REG + 32, 0x0f000, 0x0fbff, 1 }, + { A6XX_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, + { A6XX_CP_PROTECT_REG + 34, 0x18400, 0x1a3ff, 1 }, + { A6XX_CP_PROTECT_REG + 35, 0x1a400, 0x1c3ff, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 38, 0x1f860, 0x1f860, 1 }, + { A6XX_CP_PROTECT_REG + 39, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, + { 0 }, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a660 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, 0), + .features = ADRENO_APRIV | + ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a660v2 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, ANY_ID), + .features = ADRENO_APRIV | + ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD | + ADRENO_L3_VOTE, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 16, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a660_shima = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A660, 6, 6, 0, ANY_ID), + .compatible = "qcom,adreno-gpu-a660-shima", + .features = ADRENO_APRIV | + ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | + ADRENO_IFPC | ADRENO_PREEMPTION | ADRENO_ACD, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_1M + SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00300000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0x3ffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct adreno_a6xx_core adreno_gpu_core_a635 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A635, 6, 3, 5, ANY_ID), + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00200000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a660_gmu.bin", + .zap_name = "a660_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0x3ffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + +static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { + { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE2, 0x00071620 }, + { GEN7_GBIF_QSB_SIDE3, 0x00071620 }, + { GEN7_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212 }, +}; + +static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { + { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_HYST_SP0, 0x0000f3cf }, + { GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, + { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, + { GEN7_RBBM_CLOCK_HYST_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, + { GEN7_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, + { GEN7_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, + { GEN7_RBBM_CLOCK_HYST_UCHE, 0x00000004 }, + { GEN7_RBBM_CLOCK_DELAY_UCHE, 0x00000002 }, + { GEN7_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_RB0, 0x01002222 }, + { GEN7_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, + { GEN7_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 }, + { GEN7_RBBM_CLOCK_CNTL_RAC, 0x25222022 }, + { GEN7_RBBM_CLOCK_CNTL2_RAC, 0x00555555 }, + { GEN7_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, + { GEN7_RBBM_CLOCK_HYST_RAC, 0x00440044 }, + { GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, + { GEN7_RBBM_CLOCK_MODE2_GRAS, 0x00000222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 }, + { GEN7_RBBM_CLOCK_MODE_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222223 }, + { GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 }, + { GEN7_RBBM_CLOCK_HYST_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000 }, + { GEN7_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, + { GEN7_RBBM_CLOCK_DELAY_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002 }, + { GEN7_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 }, + { GEN7_RBBM_CLOCK_MODE_CP, 0x00000223 }, + { GEN7_RBBM_CLOCK_CNTL, 0x8aa8aa82 }, + { GEN7_RBBM_ISDB_CNT, 0x00000182 }, + { GEN7_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, + { GEN7_RBBM_SP_HYST_CNT, 0x00000000 }, + { GEN7_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, + { GEN7_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, + { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, +}; + +/* GEN7_0_0 protected register list */ +static const struct gen7_protected_regs gen7_0_0_protected_regs[] = { + { GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, + { GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00698, 0 }, + { GEN7_CP_PROTECT_REG + 2, 0x0050e, 0x0050e, 1 }, + { GEN7_CP_PROTECT_REG + 3, 0x00510, 0x00510, 1 }, + { GEN7_CP_PROTECT_REG + 4, 0x00534, 0x00534, 1 }, + { GEN7_CP_PROTECT_REG + 5, 0x00699, 0x00882, 1 }, + { GEN7_CP_PROTECT_REG + 6, 0x008a0, 0x008a8, 1 }, + { GEN7_CP_PROTECT_REG + 7, 0x008ab, 0x008cf, 1 }, + { GEN7_CP_PROTECT_REG + 8, 0x008d0, 0x00a40, 0 }, + { GEN7_CP_PROTECT_REG + 9, 0x00900, 0x0094d, 1 }, + { GEN7_CP_PROTECT_REG + 10, 0x0098d, 0x00a3f, 1 }, + { GEN7_CP_PROTECT_REG + 11, 0x00a41, 0x00bff, 1 }, + { GEN7_CP_PROTECT_REG + 12, 0x00df0, 0x00df1, 1 }, + { GEN7_CP_PROTECT_REG + 13, 0x00e01, 0x00e01, 1 }, + { GEN7_CP_PROTECT_REG + 14, 0x00e07, 0x00e0f, 1 }, + { GEN7_CP_PROTECT_REG + 15, 0x03c00, 0x03cc3, 1 }, + { GEN7_CP_PROTECT_REG + 16, 0x03cc4, 0x05cc3, 0 }, + { GEN7_CP_PROTECT_REG + 17, 0x08630, 0x087ff, 1 }, + { GEN7_CP_PROTECT_REG + 18, 0x08e00, 0x08e00, 1 }, + { GEN7_CP_PROTECT_REG + 19, 0x08e08, 0x08e08, 1 }, + { GEN7_CP_PROTECT_REG + 20, 0x08e50, 0x08e6f, 1 }, + { GEN7_CP_PROTECT_REG + 21, 0x08e80, 0x09100, 1 }, + { GEN7_CP_PROTECT_REG + 22, 0x09624, 0x097ff, 1 }, + { GEN7_CP_PROTECT_REG + 23, 0x09e40, 0x09e40, 1 }, + { GEN7_CP_PROTECT_REG + 24, 0x09e64, 0x09e71, 1 }, + { GEN7_CP_PROTECT_REG + 25, 0x09e78, 0x09fff, 1 }, + { GEN7_CP_PROTECT_REG + 26, 0x0a630, 0x0a7ff, 1 }, + { GEN7_CP_PROTECT_REG + 27, 0x0ae02, 0x0ae02, 1 }, + { GEN7_CP_PROTECT_REG + 28, 0x0ae50, 0x0ae5f, 1 }, + { GEN7_CP_PROTECT_REG + 29, 0x0ae66, 0x0ae69, 1 }, + { GEN7_CP_PROTECT_REG + 30, 0x0ae6f, 0x0ae72, 1 }, + { GEN7_CP_PROTECT_REG + 31, 0x0b604, 0x0b607, 1 }, + { GEN7_CP_PROTECT_REG + 32, 0x0ec00, 0x0fbff, 1 }, + { GEN7_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, + { GEN7_CP_PROTECT_REG + 34, 0x18400, 0x18453, 1 }, + { GEN7_CP_PROTECT_REG + 35, 0x18454, 0x18458, 0 }, + { GEN7_CP_PROTECT_REG + 47, 0x18459, 0x18459, 1 }, + { 0 }, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_0_0, + UINT_MAX, UINT_MAX, UINT_MAX, 0), + .compatible = "qcom,adreno-gpu-gen7-0-0", + .chipid = 0x07030000, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a730_sqe.fw", + .gmufw_name = "gmu_gen70000.bin", + .gmufw_bak_name = "c500_gmu.bin", + .zap_name = "a730_zap", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_0_1, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-0-1", + .chipid = 0x07030001, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a730_sqe.fw", + .gmufw_name = "gmu_gen70000.bin", + .gmufw_bak_name = "c500_gmu.bin", + .zap_name = "a730_zap", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct adreno_gpu_core *adreno_gpulist[] = { + &adreno_gpu_core_a306.base, + &adreno_gpu_core_a306a.base, + &adreno_gpu_core_a304.base, + &adreno_gpu_core_a405, /* Deprecated */ + &adreno_gpu_core_a418, /* Deprecated */ + &adreno_gpu_core_a420, /* Deprecated */ + &adreno_gpu_core_a430, /* Deprecated */ + &adreno_gpu_core_a530v1, /* Deprecated */ + &adreno_gpu_core_a530v2.base, + &adreno_gpu_core_a530v3.base, + &adreno_gpu_core_a505.base, + &adreno_gpu_core_a506.base, + &adreno_gpu_core_a510.base, + &adreno_gpu_core_a540v1, /* Deprecated */ + &adreno_gpu_core_a540v2.base, + &adreno_gpu_core_a512.base, + &adreno_gpu_core_a508.base, + &adreno_gpu_core_a630v1, /* Deprecated */ + &adreno_gpu_core_a630v2.base, + &adreno_gpu_core_a615.base, + &adreno_gpu_core_a618.base, + &adreno_gpu_core_a619.base, + &adreno_gpu_core_a619_variant.base, + &adreno_gpu_core_a620.base, + &adreno_gpu_core_a635.base, + &adreno_gpu_core_a640.base, + &adreno_gpu_core_a650.base, + &adreno_gpu_core_a650v2.base, + &adreno_gpu_core_a660.base, + &adreno_gpu_core_a660v2.base, + &adreno_gpu_core_a680.base, + &adreno_gpu_core_a612.base, + &adreno_gpu_core_a616.base, + &adreno_gpu_core_a610.base, + &adreno_gpu_core_a660_shima.base, + &adreno_gpu_core_gen7_0_0.base, + &adreno_gpu_core_gen7_0_1.base, +}; diff --git a/adreno.c b/adreno.c new file mode 100644 index 0000000000..ed9e778e9f --- /dev/null +++ b/adreno.c @@ -0,0 +1,3214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_a5xx.h" +#include "adreno_a6xx.h" +#include "adreno_compat.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_bus.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +/* Include the master list of GPU cores that are supported */ +#include "adreno-gpulist.h" + +static void adreno_input_work(struct work_struct *work); +static int adreno_soft_reset(struct kgsl_device *device); +static unsigned int counter_delta(struct kgsl_device *device, + unsigned int reg, unsigned int *counter); +static struct device_node * + adreno_get_gpu_model_node(struct platform_device *pdev); + +static struct adreno_device device_3d0; + +/* Nice level for the higher priority GPU start thread */ +int adreno_wake_nice = -7; + +/* Number of milliseconds to stay active active after a wake on touch */ +unsigned int adreno_wake_timeout = 100; + +bool adreno_regulator_disable_poll(struct kgsl_device *device, + struct regulator *reg, u32 offset, u32 timeout) +{ + u32 val; + int ret; + + if (IS_ERR_OR_NULL(reg)) + return true; + + regulator_disable(reg); + + ret = kgsl_regmap_read_poll_timeout(&device->regmap, offset, + val, !(val & BIT(31)), 100, timeout * 1000); + + return ret ? false : true; +} + +static u32 get_ucode_version(const u32 *data) +{ + u32 version; + + version = data[1]; + + if ((version & 0xf) != 0xa) + return version; + + version &= ~0xfff; + return version | ((data[3] & 0xfff000) >> 12); +} + +int adreno_get_firmware(struct adreno_device *adreno_dev, + const char *fwfile, struct adreno_firmware *firmware) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct firmware *fw = NULL; + int ret; + + if (!IS_ERR_OR_NULL(firmware->memdesc)) + return 0; + + ret = request_firmware(&fw, fwfile, &device->pdev->dev); + + if (ret) { + dev_err(device->dev, "request_firmware(%s) failed: %d\n", + fwfile, ret); + return ret; + } + + firmware->memdesc = kgsl_allocate_global(device, fw->size - 4, 0, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_UCODE, + "ucode"); + + ret = PTR_ERR_OR_ZERO(firmware->memdesc); + if (!ret) { + memcpy(firmware->memdesc->hostptr, &fw->data[4], fw->size - 4); + firmware->size = (fw->size - 4) / sizeof(u32); + firmware->version = get_ucode_version((u32 *)fw->data); + } + + release_firmware(fw); + return ret; +} + + +int adreno_zap_shader_load(struct adreno_device *adreno_dev, + const char *name) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (!name || adreno_dev->zap_loaded) + return 0; + + ret = kgsl_zap_shader_load(&device->pdev->dev, name); + if (!ret) + adreno_dev->zap_loaded = true; + + return ret; +} + +/** + * adreno_readreg64() - Read a 64bit register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @lo: lower 32bit register enum that is to be read + * @hi: higher 32bit register enum that is to be read + * @val: 64 bit Register value read is placed here + */ +void adreno_readreg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t *val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int val_lo = 0, val_hi = 0; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, lo)) + kgsl_regread(device, gpudev->reg_offsets[lo], &val_lo); + if (adreno_checkreg_off(adreno_dev, hi)) + kgsl_regread(device, gpudev->reg_offsets[hi], &val_hi); + + *val = (val_lo | ((uint64_t)val_hi << 32)); +} + +/** + * adreno_get_rptr() - Get the current ringbuffer read pointer + * @rb: Pointer the ringbuffer to query + * + * Get the latest rptr + */ +unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 rptr = 0; + + if (adreno_is_a3xx(adreno_dev)) + kgsl_regread(device, A3XX_CP_RB_RPTR, &rptr); + else + kgsl_sharedmem_readl(device->scratch, &rptr, + SCRATCH_RPTR_OFFSET(rb->id)); + + return rptr; +} + +static void adreno_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * Don't schedule adreno_start in a high priority workqueue, we are + * already in a workqueue which should be sufficient + */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, + jiffies + msecs_to_jiffies(adreno_wake_timeout)); + +} + +/* + * A workqueue callback responsible for actually turning on the GPU after a + * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any + * active_count protection to avoid the need to maintain state. Either + * somebody will start using the GPU or the idle timer will fire and put the + * GPU back into slumber. + */ +static void adreno_input_work(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, input_work); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + mutex_lock(&device->mutex); + + adreno_dev->wake_on_touch = true; + + ops->touch_wakeup(adreno_dev); + + mutex_unlock(&device->mutex); +} + +/* + * Process input events and schedule work if needed. At this point we are only + * interested in groking EV_ABS touchscreen events + */ +static void adreno_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + struct kgsl_device *device = handle->handler->private; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* Only consider EV_ABS (touch) events */ + if (type != EV_ABS) + return; + + /* + * Don't do anything if anything hasn't been rendered since we've been + * here before + */ + + if (adreno_dev->wake_on_touch) + return; + + if (gmu_core_isenabled(device)) { + schedule_work(&adreno_dev->input_work); + return; + } + + /* + * If the device is in nap, kick the idle timer to make sure that we + * don't go into slumber before the first render. If the device is + * already in slumber schedule the wake. + */ + + if (device->state == KGSL_STATE_NAP) { + /* + * Set the wake on touch bit to keep from coming back here and + * keeping the device in nap without rendering + */ + adreno_dev->wake_on_touch = true; + kgsl_start_idle_timer(device); + + } else if (device->state == KGSL_STATE_SLUMBER) { + schedule_work(&adreno_dev->input_work); + } +} + +#ifdef CONFIG_INPUT +static int adreno_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + struct input_handle *handle; + int ret; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (handle == NULL) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = handler->name; + + ret = input_register_handle(handle); + if (ret) { + kfree(handle); + return ret; + } + + ret = input_open_device(handle); + if (ret) { + input_unregister_handle(handle); + kfree(handle); + } + + return ret; +} + +static void adreno_input_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} +#else +static int adreno_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + return 0; +} +static void adreno_input_disconnect(struct input_handle *handle) {} +#endif + +/* + * We are only interested in EV_ABS events so only register handlers for those + * input devices that have EV_ABS events + */ +static const struct input_device_id adreno_input_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_ABS) }, + /* assumption: MT_.._X & MT_.._Y are in the same long */ + .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = + BIT_MASK(ABS_MT_POSITION_X) | + BIT_MASK(ABS_MT_POSITION_Y) }, + }, + { }, +}; + +static struct input_handler adreno_input_handler = { + .event = adreno_input_event, + .connect = adreno_input_connect, + .disconnect = adreno_input_disconnect, + .name = "kgsl", + .id_table = adreno_input_ids, +}; + +/* + * _soft_reset() - Soft reset GPU + * @adreno_dev: Pointer to adreno device + * + * Soft reset the GPU by doing a AHB write of value 1 to RBBM_SW_RESET + * register. This is used when we want to reset the GPU without + * turning off GFX power rail. The reset when asserted resets + * all the HW logic, restores GPU registers to default state and + * flushes out pending VBIF transactions. + */ +static void _soft_reset(struct adreno_device *adreno_dev) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int reg; + + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 1); + /* + * Do a dummy read to get a brief read cycle delay for the + * reset to take effect + */ + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, ®); + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 0); + + /* The SP/TP regulator gets turned off after a soft reset */ + + clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv); + if (gpudev->regulator_enable) + gpudev->regulator_enable(adreno_dev); +} + +/** + * adreno_irqctrl() - Enables/disables the RBBM interrupt mask + * @adreno_dev: Pointer to an adreno_device + * @state: 1 for masked or 0 for unmasked + * Power: The caller of this function must make sure to use OOBs + * so that we know that the GPU is powered on + */ +void adreno_irqctrl(struct adreno_device *adreno_dev, int state) +{ + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, + state ? adreno_dev->irq_mask : 0); +} + +/* + * adreno_hang_int_callback() - Isr for fatal interrupts that hang GPU + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit) +{ + dev_crit_ratelimited(KGSL_DEVICE(adreno_dev)->dev, + "MISC: GPU hang detected\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +/* + * adreno_cp_callback() - CP interrupt handler + * @adreno_dev: Adreno device pointer + * @irq: irq number + * + * Handle the cp interrupt generated by GPU. + */ +void adreno_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + adreno_dispatcher_schedule(device); +} + +static irqreturn_t adreno_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + irqreturn_t ret; + + atomic_inc(&adreno_dev->pending_irq_refcnt); + /* Ensure this increment is done before the IRQ status is updated */ + smp_mb__after_atomic(); + + ret = gpudev->irq_handler(adreno_dev); + + /* Make sure the regwrites are done before the decrement */ + smp_mb__before_atomic(); + atomic_dec(&adreno_dev->pending_irq_refcnt); + /* Ensure other CPUs see the decrement */ + smp_mb__after_atomic(); + + return ret; +} + +irqreturn_t adreno_irq_callbacks(struct adreno_device *adreno_dev, + const struct adreno_irq_funcs *funcs, u32 status) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + + /* Loop through all set interrupts and call respective handlers */ + while (status) { + int i = fls(status) - 1; + + if (funcs[i].func) { + if (adreno_dev->irq_mask & BIT(i)) + funcs[i].func(adreno_dev, i); + } else + dev_crit_ratelimited(device->dev, + "Unhandled interrupt bit %x\n", i); + + ret = IRQ_HANDLED; + + status &= ~BIT(i); + } + + return ret; +} + +static int adreno_get_chipid(struct platform_device *pdev, u32 *chipid); + +static inline bool _rev_match(unsigned int id, unsigned int entry) +{ + return (entry == ANY_ID || entry == id); +} + +static const struct adreno_gpu_core * +_get_gpu_core(struct platform_device *pdev, u32 *chipid) +{ + int i; + struct device_node *node; + + /* + * When "qcom,gpu-models" is defined, use gpu model node to match + * on a compatible string, otherwise match using legacy way. + */ + node = adreno_get_gpu_model_node(pdev); + if (!node || !of_find_property(node, "compatible", NULL)) + node = pdev->dev.of_node; + + *chipid = 0; + + /* Check to see if any of the entries match on a compatible string */ + for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) { + if (adreno_gpulist[i]->compatible && + of_device_is_compatible(node, + adreno_gpulist[i]->compatible)) { + /* + * We matched compat string, set chipid based on + * dtsi, then gpulist, else fail. + */ + if (adreno_get_chipid(pdev, chipid)) + *chipid = adreno_gpulist[i]->chipid; + + if (*chipid) + return adreno_gpulist[i]; + + dev_crit(&pdev->dev, + "No chipid associated with %s\n", + adreno_gpulist[i]->compatible); + return NULL; + } + } + + /* No compatible string so try and match on chipid */ + if (!adreno_get_chipid(pdev, chipid)) { + unsigned int core = ADRENO_CHIPID_CORE(*chipid); + unsigned int major = ADRENO_CHIPID_MAJOR(*chipid); + unsigned int minor = ADRENO_CHIPID_MINOR(*chipid); + unsigned int patchid = ADRENO_CHIPID_PATCH(*chipid); + + for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) { + if (core == adreno_gpulist[i]->core && + _rev_match(major, adreno_gpulist[i]->major) && + _rev_match(minor, adreno_gpulist[i]->minor) && + _rev_match(patchid, adreno_gpulist[i]->patchid)) + return adreno_gpulist[i]; + } + } + + dev_crit(&pdev->dev, "Unknown GPU chip ID %8.8x\n", *chipid); + return NULL; +} + +static struct { + unsigned int quirk; + const char *prop; +} adreno_quirks[] = { + { ADRENO_QUIRK_TWO_PASS_USE_WFI, "qcom,gpu-quirk-two-pass-use-wfi" }, + { ADRENO_QUIRK_CRITICAL_PACKETS, "qcom,gpu-quirk-critical-packets" }, + { ADRENO_QUIRK_FAULT_DETECT_MASK, "qcom,gpu-quirk-fault-detect-mask" }, + { ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING, + "qcom,gpu-quirk-dp2clockgating-disable" }, + { ADRENO_QUIRK_DISABLE_LMLOADKILL, + "qcom,gpu-quirk-lmloadkill-disable" }, + { ADRENO_QUIRK_HFI_USE_REG, "qcom,gpu-quirk-hfi-use-reg" }, + { ADRENO_QUIRK_SECVID_SET_ONCE, "qcom,gpu-quirk-secvid-set-once" }, + { ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW, + "qcom,gpu-quirk-limit-uche-gbif-rw" }, + { ADRENO_QUIRK_CX_GDSC, "qcom,gpu-quirk-cx-gdsc" }, +}; + +static int adreno_get_chipid(struct platform_device *pdev, u32 *chipid) +{ + return of_property_read_u32(pdev->dev.of_node, "qcom,chipid", chipid); +} + +static void +adreno_update_soc_hw_revision_quirks(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + int i; + + /* update quirk */ + for (i = 0; i < ARRAY_SIZE(adreno_quirks); i++) { + if (of_property_read_bool(node, adreno_quirks[i].prop)) + adreno_dev->quirks |= adreno_quirks[i].quirk; + } +} + +static const struct adreno_gpu_core * +adreno_identify_gpu(struct platform_device *pdev, u32 *chipid) +{ + const struct adreno_gpu_core *gpucore; + + gpucore = _get_gpu_core(pdev, chipid); + if (!gpucore) + return ERR_PTR(-ENODEV); + + /* + * Identify non-longer supported targets and spins and print a helpful + * message + */ + if (gpucore->features & ADRENO_DEPRECATED) { + if (gpucore->compatible) + dev_err(&pdev->dev, + "Support for GPU %s has been deprecated\n", + gpucore->compatible); + else + dev_err(&pdev->dev, + "Support for GPU %x.%d.%x.%d has been deprecated\n", + gpucore->core, gpucore->major, + gpucore->minor, gpucore->patchid); + return ERR_PTR(-ENODEV); + } + + return gpucore; +} + +static const struct of_device_id adreno_match_table[] = { + { .compatible = "qcom,kgsl-3d0", .data = &device_3d0 }, + { }, +}; + +MODULE_DEVICE_TABLE(of, adreno_match_table); + +/* Dynamically build the OPP table for the GPU device */ +static void adreno_build_opp_table(struct device *dev, struct kgsl_pwrctrl *pwr) +{ + int i; + + /* Skip if the table has already been populated */ + if (dev_pm_opp_get_opp_count(dev) > 0) + return; + + /* Add all the supported frequencies into the tree */ + for (i = 0; i < pwr->num_pwrlevels; i++) + dev_pm_opp_add(dev, pwr->pwrlevels[i].gpu_freq, 0); +} + +static int adreno_of_parse_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *node) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct device_node *child; + int ret; + + pwr->num_pwrlevels = 0; + + for_each_child_of_node(node, child) { + u32 index, freq = 0, voltage, bus; + struct kgsl_pwrlevel *level; + + ret = of_property_read_u32(child, "reg", &index); + if (ret) { + dev_err(device->dev, "%pOF: powerlevel index not found\n", + child); + goto out; + } + + ret = of_property_read_u32(child, "qcom,gpu-freq", &freq); + if (ret) { + dev_err(device->dev, "%pOF: Unable to read qcom,gpu-freq\n", + child); + goto out; + } + + /* Ignore "zero" powerlevels */ + if (!freq) + continue; + + ret = of_property_read_u32(child, "qcom,level", &voltage); + if (ret) { + dev_err(device->dev, "%pOF: Unable to read qcom,level\n", + child); + goto out; + } + + ret = kgsl_of_property_read_ddrtype(child, "qcom,bus-freq", + &bus); + if (ret) { + dev_err(device->dev, "%pOF:Unable to read qcom,bus-freq\n", + child); + goto out; + } + + if (index >= ARRAY_SIZE(pwr->pwrlevels)) { + dev_err(device->dev, "%pOF: Pwrlevel index %d is out of range\n", + child, index); + continue; + } + + if (index >= pwr->num_pwrlevels) + pwr->num_pwrlevels = index + 1; + + level = &pwr->pwrlevels[index]; + + level->gpu_freq = freq; + level->bus_freq = bus; + level->voltage_level = voltage; + + of_property_read_u32(child, "qcom,acd-level", + &level->acd_level); + + level->bus_min = level->bus_freq; + kgsl_of_property_read_ddrtype(child, + "qcom,bus-min", &level->bus_min); + + level->bus_max = level->bus_freq; + kgsl_of_property_read_ddrtype(child, + "qcom,bus-max", &level->bus_max); + } + + adreno_build_opp_table(&device->pdev->dev, pwr); + return 0; +out: + of_node_put(child); + return ret; +} + +static void adreno_of_get_initial_pwrlevel(struct kgsl_pwrctrl *pwr, + struct device_node *node) +{ + int init_level = 1; + + of_property_read_u32(node, "qcom,initial-pwrlevel", &init_level); + + if (init_level < 0 || init_level >= pwr->num_pwrlevels) + init_level = 1; + + pwr->active_pwrlevel = init_level; + pwr->default_pwrlevel = init_level; +} + +static void adreno_of_get_limits(struct adreno_device *adreno_dev, + struct device_node *node) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwrctrl = &device->pwrctrl; + unsigned int throttle_level; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) || of_property_read_u32(node, + "qcom,throttle-pwrlevel", &throttle_level)) + return; + + throttle_level = min(throttle_level, pwrctrl->num_pwrlevels - 1); + + pwrctrl->throttle_mask = GENMASK(pwrctrl->num_pwrlevels - 1, + pwrctrl->num_pwrlevels - 1 - throttle_level); + + adreno_dev->lm_enabled = true; +} + +static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *parent) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device_node *node; + int ret; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevels"); + + if (node == NULL) { + dev_err(&device->pdev->dev, + "Unable to find 'qcom,gpu-pwrlevels'\n"); + return -EINVAL; + } + + ret = adreno_of_parse_pwrlevels(adreno_dev, node); + + if (!ret) { + adreno_of_get_initial_pwrlevel(&device->pwrctrl, parent); + adreno_of_get_limits(adreno_dev, parent); + } + + of_node_put(node); + return ret; +} + +static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *parent) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device_node *node, *child; + unsigned int bin = 0; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins"); + if (node == NULL) + return adreno_of_get_legacy_pwrlevels(adreno_dev, parent); + + for_each_child_of_node(node, child) { + + if (of_property_read_u32(child, "qcom,speed-bin", &bin)) + continue; + + if (bin == device->speed_bin) { + int ret; + + ret = adreno_of_parse_pwrlevels(adreno_dev, child); + if (ret) { + of_node_put(child); + return ret; + } + + adreno_of_get_initial_pwrlevel(&device->pwrctrl, child); + + /* + * Check for global throttle-pwrlevel first and override + * with speedbin specific one if found. + */ + adreno_of_get_limits(adreno_dev, parent); + adreno_of_get_limits(adreno_dev, child); + + of_node_put(child); + return 0; + } + } + + dev_err(&device->pdev->dev, + "GPU speed_bin:%d mismatch for bin:%d\n", + device->speed_bin, bin); + return -ENODEV; +} + +static int register_l3_voter(struct kgsl_device *device) +{ + int ret = 0; + + mutex_lock(&device->mutex); + + if (!device->l3_vote) + goto done; + + /* This indicates that we are already set up */ + if (device->num_l3_pwrlevels != 0) + goto done; + + memset(device->l3_freq, 0x0, sizeof(device->l3_freq)); + + ret = qcom_dcvs_register_voter(KGSL_L3_DEVICE, DCVS_L3, DCVS_SLOW_PATH); + if (ret) { + dev_err_once(&device->pdev->dev, + "Unable to register l3 dcvs voter: %d\n", ret); + goto done; + } + + ret = qcom_dcvs_hw_minmax_get(DCVS_L3, &device->l3_freq[1], + &device->l3_freq[2]); + if (ret) { + dev_err_once(&device->pdev->dev, + "Unable to get min/max for l3 dcvs: %d\n", ret); + qcom_dcvs_unregister_voter(KGSL_L3_DEVICE, DCVS_L3, + DCVS_SLOW_PATH); + memset(device->l3_freq, 0x0, sizeof(device->l3_freq)); + goto done; + } + + device->num_l3_pwrlevels = 3; + +done: + mutex_unlock(&device->mutex); + + return ret; +} + +static int adreno_of_get_power(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = adreno_of_get_pwrlevels(adreno_dev, pdev->dev.of_node); + if (ret) + return ret; + + device->pwrctrl.interval_timeout = CONFIG_QCOM_KGSL_IDLE_TIMEOUT; + + device->pwrctrl.minbw_timeout = 10; + + /* Set default bus control to true on all targets */ + device->pwrctrl.bus_control = true; + + return 0; +} + +static void adreno_cx_dbgc_probe(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "cx_dbgc"); + + if (res == NULL) + return; + + adreno_dev->cx_dbgc_base = res->start - device->regmap.base->start; + adreno_dev->cx_dbgc_len = resource_size(res); + adreno_dev->cx_dbgc_virt = devm_ioremap(&device->pdev->dev, + device->regmap.base->start + + adreno_dev->cx_dbgc_base, + adreno_dev->cx_dbgc_len); + + if (adreno_dev->cx_dbgc_virt == NULL) + dev_warn(device->dev, "cx_dbgc ioremap failed\n"); +} + +static void adreno_cx_misc_probe(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "cx_misc"); + + if (res == NULL) + return; + + adreno_dev->cx_misc_len = resource_size(res); + adreno_dev->cx_misc_virt = devm_ioremap(&device->pdev->dev, + res->start, adreno_dev->cx_misc_len); +} + +static void adreno_isense_probe(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "isense_cntl"); + if (res == NULL) + return; + + adreno_dev->isense_base = res->start - device->regmap.base->start; + adreno_dev->isense_len = resource_size(res); + adreno_dev->isense_virt = devm_ioremap(&device->pdev->dev, res->start, + adreno_dev->isense_len); + if (adreno_dev->isense_virt == NULL) + dev_warn(device->dev, "isense ioremap failed\n"); +} + +/* Read the fuse through the new and fancy nvmem method */ +static int adreno_read_speed_bin(struct platform_device *pdev) +{ + struct nvmem_cell *cell = nvmem_cell_get(&pdev->dev, "speed_bin"); + int ret = PTR_ERR_OR_ZERO(cell); + void *buf; + int val = 0; + size_t len; + + if (ret) { + if (ret == -ENOENT) + return 0; + + return ret; + } + + buf = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(buf)) + return PTR_ERR(buf); + + memcpy(&val, buf, min(len, sizeof(val))); + kfree(buf); + + return val; +} + +static int adreno_read_gpu_model_fuse(struct platform_device *pdev) +{ + struct nvmem_cell *cell = nvmem_cell_get(&pdev->dev, "gpu_model"); + void *buf; + int val = 0; + size_t len; + + if (IS_ERR(cell)) + return PTR_ERR(cell); + + buf = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(buf)) + return PTR_ERR(buf); + + memcpy(&val, buf, min(len, sizeof(val))); + kfree(buf); + + return val; +} + +static struct device_node * +adreno_get_gpu_model_node(struct platform_device *pdev) +{ + struct device_node *node, *child; + int fuse_model = adreno_read_gpu_model_fuse(pdev); + + if (fuse_model < 0) + return NULL; + + node = of_find_node_by_name(pdev->dev.of_node, "qcom,gpu-models"); + if (node == NULL) + return NULL; + + for_each_child_of_node(node, child) { + u32 model; + + if (of_property_read_u32(child, "qcom,gpu-model-id", &model)) + continue; + + if (model == fuse_model) { + of_node_put(node); + return child; + } + } + + of_node_put(node); + + return NULL; +} + +const char *adreno_get_gpu_model(struct kgsl_device *device) +{ + struct device_node *node; + static char gpu_model[32]; + const char *model; + int ret; + + if (strlen(gpu_model)) + return gpu_model; + + node = adreno_get_gpu_model_node(device->pdev); + if (!node) + node = of_node_get(device->pdev->dev.of_node); + + ret = of_property_read_string(node, "qcom,gpu-model", &model); + of_node_put(node); + + if (!ret) + strlcpy(gpu_model, model, sizeof(gpu_model)); + else + scnprintf(gpu_model, sizeof(gpu_model), "Adreno%d%d%dv%d", + ADRENO_CHIPID_CORE(ADRENO_DEVICE(device)->chipid), + ADRENO_CHIPID_MAJOR(ADRENO_DEVICE(device)->chipid), + ADRENO_CHIPID_MINOR(ADRENO_DEVICE(device)->chipid), + ADRENO_CHIPID_PATCH(ADRENO_DEVICE(device)->chipid) + 1); + + return gpu_model; +} + +static u32 adreno_get_vk_device_id(struct kgsl_device *device) +{ + struct device_node *node; + static u32 device_id; + + if (device_id) + return device_id; + + node = adreno_get_gpu_model_node(device->pdev); + if (!node) + node = of_node_get(device->pdev->dev.of_node); + + if (of_property_read_u32(node, "qcom,vk-device-id", &device_id)) + device_id = ADRENO_DEVICE(device)->chipid; + + of_node_put(node); + + return device_id; +} + +#if IS_ENABLED(CONFIG_QCOM_LLCC) +static int adreno_probe_llcc(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + int ret; + + /* Get the system cache slice descriptor for GPU */ + adreno_dev->gpu_llc_slice = llcc_slice_getd(LLCC_GPU); + ret = PTR_ERR_OR_ZERO(adreno_dev->gpu_llc_slice); + + if (ret) { + /* Propagate EPROBE_DEFER back to the probe function */ + if (ret == -EPROBE_DEFER) + return ret; + + if (ret != -ENOENT) + dev_warn(&pdev->dev, + "Unable to get the GPU LLC slice: %d\n", ret); + } else + adreno_dev->gpu_llc_slice_enable = true; + + /* Get the system cache slice descriptor for GPU pagetables */ + adreno_dev->gpuhtw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); + ret = PTR_ERR_OR_ZERO(adreno_dev->gpuhtw_llc_slice); + if (ret) { + if (ret == -EPROBE_DEFER) { + llcc_slice_putd(adreno_dev->gpu_llc_slice); + return ret; + } + + if (ret != -ENOENT) + dev_warn(&pdev->dev, + "Unable to get GPU HTW LLC slice: %d\n", ret); + } else + adreno_dev->gpuhtw_llc_slice_enable = true; + + return 0; +} +#else +static int adreno_probe_llcc(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + return 0; +} +#endif + +static void adreno_regmap_op_preaccess(struct kgsl_regmap_region *region) +{ + struct kgsl_device *device = region->priv; + /* + * kgsl panic notifier will be called in atomic context to get + * GPU snapshot. Also panic handler will skip snapshot dumping + * incase GPU is in SLUMBER state. So we can safely ignore the + * kgsl_pre_hwaccess(). + */ + if (!device->snapshot_atomic && !in_interrupt()) + kgsl_pre_hwaccess(device); +} + +static const struct kgsl_regmap_ops adreno_regmap_ops = { + .preaccess = adreno_regmap_op_preaccess, +}; + +static const struct kgsl_functable adreno_functable; + +static void adreno_setup_device(struct adreno_device *adreno_dev) +{ + u32 i; + + adreno_dev->dev.name = "kgsl-3d0"; + adreno_dev->dev.ftbl = &adreno_functable; + + init_completion(&adreno_dev->dev.hwaccess_gate); + init_completion(&adreno_dev->dev.halt_gate); + + idr_init(&adreno_dev->dev.context_idr); + + mutex_init(&adreno_dev->dev.mutex); + INIT_LIST_HEAD(&adreno_dev->dev.globals); + + /* Set the fault tolerance policy to replay, skip, throttle */ + adreno_dev->ft_policy = BIT(KGSL_FT_REPLAY) | + BIT(KGSL_FT_SKIPCMD) | BIT(KGSL_FT_THROTTLE); + + /* Enable command timeouts by default */ + adreno_dev->long_ib_detect = true; + + INIT_WORK(&adreno_dev->input_work, adreno_input_work); + + INIT_LIST_HEAD(&adreno_dev->active_list); + spin_lock_init(&adreno_dev->active_list_lock); + + for (i = 0; i < ARRAY_SIZE(adreno_dev->ringbuffers); i++) { + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[i]; + + INIT_LIST_HEAD(&rb->events.group); + } +} + +static const struct of_device_id adreno_gmu_match[] = { + { .compatible = "qcom,gen7-gmu" }, + { .compatible = "qcom,gpu-gmu" }, + { .compatible = "qcom,gpu-rgmu" }, + {}, +}; + +int adreno_device_probe(struct platform_device *pdev, + struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct device *dev = &pdev->dev; + unsigned int priv = 0; + int status; + u32 size; + + /* Initialize the adreno device structure */ + adreno_setup_device(adreno_dev); + + dev_set_drvdata(dev, device); + + device->pdev = pdev; + + adreno_update_soc_hw_revision_quirks(adreno_dev, pdev); + + status = adreno_read_speed_bin(pdev); + if (status < 0) + return status; + + device->speed_bin = status; + + status = adreno_of_get_power(adreno_dev, pdev); + if (status) + return status; + + status = kgsl_bus_init(device, pdev); + if (status) + goto err; + + status = kgsl_regmap_init(pdev, &device->regmap, "kgsl_3d0_reg_memory", + &adreno_regmap_ops, device); + if (status) + goto err; + + /* + * Bind the GMU components (if applicable) before doing the KGSL + * platform probe + */ + if (of_find_matching_node(dev->of_node, adreno_gmu_match)) { + status = component_bind_all(dev, NULL); + if (status) { + kgsl_bus_close(device); + return status; + } + } + + /* + * The SMMU APIs use unsigned long for virtual addresses which means + * that we cannot use 64 bit virtual addresses on a 32 bit kernel even + * though the hardware and the rest of the KGSL driver supports it. + */ + if (adreno_support_64bit(adreno_dev)) + kgsl_mmu_set_feature(device, KGSL_MMU_64BIT); + + /* + * Set the SMMU aperture on A6XX/Gen7 targets to use per-process + * pagetables. + */ + if (ADRENO_GPUREV(adreno_dev) >= 600) + kgsl_mmu_set_feature(device, KGSL_MMU_SMMU_APERTURE); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_IOCOHERENT)) + kgsl_mmu_set_feature(device, KGSL_MMU_IO_COHERENT); + + device->pwrctrl.bus_width = adreno_dev->gpucore->bus_width; + + device->mmu.secured = (IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) && + ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION)); + + /* Probe the LLCC - this could return -EPROBE_DEFER */ + status = adreno_probe_llcc(adreno_dev, pdev); + if (status) + goto err; + + /* + * IF the GPU HTW slice was successsful set the MMU feature so the + * domain can set the appropriate attributes + */ + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + kgsl_mmu_set_feature(device, KGSL_MMU_LLCC_ENABLE); + + status = kgsl_request_irq(pdev, "kgsl_3d0_irq", adreno_irq_handler, device); + if (status < 0) + goto err; + + device->pwrctrl.interrupt_num = status; + + status = kgsl_device_platform_probe(device); + if (status) + goto err; + + adreno_fence_trace_array_init(device); + + /* Probe for the optional CX_DBGC block */ + adreno_cx_dbgc_probe(device); + + /* Probe for the optional CX_MISC block */ + adreno_cx_misc_probe(device); + + adreno_isense_probe(device); + + /* Allocate the memstore for storing timestamps and other useful info */ + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + priv |= KGSL_MEMDESC_PRIVILEGED; + + device->memstore = kgsl_allocate_global(device, + KGSL_MEMSTORE_SIZE, 0, 0, priv, "memstore"); + + status = PTR_ERR_OR_ZERO(device->memstore); + if (status) { + kgsl_device_platform_remove(device); + goto err; + } + + /* Initialize the snapshot engine */ + size = adreno_dev->gpucore->snapshot_size; + + /* + * Use a default size if one wasn't specified, but print a warning so + * the developer knows to fix it + */ + + if (WARN(!size, "The snapshot size was not specified in the gpucore\n")) + size = SZ_1M; + + kgsl_device_snapshot_probe(device, size); + + adreno_debugfs_init(adreno_dev); + adreno_profile_init(adreno_dev); + + adreno_sysfs_init(adreno_dev); + + kgsl_pwrscale_init(device, pdev, CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR); + + /* Initialize coresight for the target */ + adreno_coresight_init(adreno_dev); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_L3_VOTE)) + device->l3_vote = true; + +#ifdef CONFIG_INPUT + + if (!of_property_read_bool(pdev->dev.of_node, + "qcom,disable-wake-on-touch")) { + adreno_input_handler.private = device; + /* + * It isn't fatal if we cannot register the input handler. Sad, + * perhaps, but not fatal + */ + if (input_register_handler(&adreno_input_handler)) { + adreno_input_handler.private = NULL; + dev_err(device->dev, + "Unable to register the input handler\n"); + } + } +#endif + + kgsl_qcom_va_md_register(device); + + return 0; +err: + device->pdev = NULL; + + if (of_find_matching_node(dev->of_node, adreno_gmu_match)) + component_unbind_all(dev, NULL); + + kgsl_bus_close(device); + + return status; +} + +static int adreno_bind(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + const struct adreno_gpu_core *gpucore; + int ret; + u32 chipid; + + gpucore = adreno_identify_gpu(pdev, &chipid); + if (IS_ERR(gpucore)) + return PTR_ERR(gpucore); + + ret = gpucore->gpudev->probe(pdev, chipid, gpucore); + + if (!ret) { + struct kgsl_device *device = dev_get_drvdata(dev); + + device->pdev_loaded = true; + } + + return ret; +} + +static void adreno_unbind(struct device *dev) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + const struct adreno_gpudev *gpudev; + + device = dev_get_drvdata(dev); + if (!device) + return; + + device->pdev_loaded = false; + + adreno_dev = ADRENO_DEVICE(device); + gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + trace_array_put(device->fence_trace_array); + + if (gpudev->remove != NULL) + gpudev->remove(adreno_dev); + +#ifdef CONFIG_INPUT + if (adreno_input_handler.private) + input_unregister_handler(&adreno_input_handler); +#endif + + adreno_coresight_remove(adreno_dev); + adreno_profile_close(adreno_dev); + + /* Release the system cache slice descriptor */ + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_putd(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_putd(adreno_dev->gpuhtw_llc_slice); + + kgsl_pwrscale_close(device); + + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->close) + adreno_dev->dispatch_ops->close(adreno_dev); + + kgsl_device_platform_remove(device); + + if (of_find_matching_node(dev->of_node, adreno_gmu_match)) + component_unbind_all(dev, NULL); + + if (device->num_l3_pwrlevels != 0) + qcom_dcvs_unregister_voter(KGSL_L3_DEVICE, DCVS_L3, + DCVS_SLOW_PATH); + + clear_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); + clear_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv); +} + +static void adreno_resume(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (device->state == KGSL_STATE_SUSPEND) { + adreno_put_gpu_halt(adreno_dev); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + } else if (device->state != KGSL_STATE_INIT) { + /* + * This is an error situation so wait for the device to idle and + * then put the device in SLUMBER state. This will get us to + * the right place when we resume. + */ + if (device->state == KGSL_STATE_ACTIVE) + adreno_idle(device); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + dev_err(device->dev, "resume invoked without a suspend\n"); + } +} + +static int adreno_pm_resume(struct device *dev) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + mutex_lock(&device->mutex); + ops->pm_resume(adreno_dev); + mutex_unlock(&device->mutex); + + return 0; +} + +static int adreno_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int status = kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + + if (!status && device->state == KGSL_STATE_SUSPEND) + adreno_get_gpu_halt(adreno_dev); + + return status; +} + +static int adreno_pm_suspend(struct device *dev) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + int status; + + mutex_lock(&device->mutex); + status = ops->pm_suspend(adreno_dev); + mutex_unlock(&device->mutex); + + return status; +} + +void adreno_create_profile_buffer(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int priv = 0; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + priv = KGSL_MEMDESC_PRIVILEGED; + + adreno_allocate_global(device, &adreno_dev->profile_buffer, + PAGE_SIZE, 0, 0, priv, "alwayson"); + + adreno_dev->profile_index = 0; + + if (!IS_ERR(adreno_dev->profile_buffer)) + set_bit(ADRENO_DEVICE_DRAWOBJ_PROFILE, + &adreno_dev->priv); +} + +static int adreno_init(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + if (ret) + return ret; + + /* + * initialization only needs to be done once initially until + * device is shutdown + */ + if (test_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv)) + return 0; + + ret = gpudev->init(adreno_dev); + if (ret) + return ret; + + set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv); + + return 0; +} + +static bool regulators_left_on(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (gmu_core_gpmu_isenabled(device)) + return false; + + if (!IS_ERR_OR_NULL(pwr->cx_gdsc)) + if (regulator_is_enabled(pwr->cx_gdsc)) + return true; + + if (!IS_ERR_OR_NULL(pwr->gx_gdsc)) + return regulator_is_enabled(pwr->gx_gdsc); + + return false; +} + +void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev) +{ + int i; + struct adreno_ringbuffer *rb; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + if (rb->drawctxt_active) + kgsl_context_put(&(rb->drawctxt_active->base)); + rb->drawctxt_active = NULL; + + kgsl_sharedmem_writel(rb->pagetable_desc, + PT_INFO_OFFSET(current_rb_ptname), 0); + } +} + +static int adreno_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * active_cnt special case: we are starting up for the first + * time, so use this sequence instead of the kgsl_pwrctrl_wake() + * which will be called by adreno_active_count_get(). + */ + atomic_inc(&device->active_cnt); + + memset(device->memstore->hostptr, 0, device->memstore->size); + + ret = adreno_init(device); + if (ret) + goto err; + + ret = adreno_start(device, 0); + if (ret) + goto err; + + complete_all(&device->hwaccess_gate); + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + adreno_active_count_put(adreno_dev); + + return 0; +err: + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + atomic_dec(&device->active_cnt); + + return ret; +} + +static int adreno_first_open(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + if (!device->pdev_loaded) + return -ENODEV; + + return ops->first_open(adreno_dev); +} + +static int adreno_close(struct adreno_device *adreno_dev) +{ + return kgsl_pwrctrl_change_state(KGSL_DEVICE(adreno_dev), + KGSL_STATE_INIT); +} + +static int adreno_last_close(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + /* + * Wait up to 1 second for the active count to go low + * and then start complaining about it + */ + if (kgsl_active_count_wait(device, 0, HZ)) { + dev_err(device->dev, + "Waiting for the active count to become 0\n"); + + while (kgsl_active_count_wait(device, 0, HZ)) + dev_err(device->dev, + "Still waiting for the active count\n"); + } + + return ops->last_close(adreno_dev); +} + +static int adreno_pwrctrl_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0) && + (device->state != KGSL_STATE_ACTIVE)) { + mutex_unlock(&device->mutex); + wait_for_completion(&device->hwaccess_gate); + mutex_lock(&device->mutex); + device->pwrctrl.superfast = true; + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + } + if (ret == 0) + atomic_inc(&device->active_cnt); + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + return ret; +} + +static void adreno_pwrctrl_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + bool nap_on = !(device->pwrctrl.ctrl_flags & + BIT(KGSL_PWRFLAGS_NAP_OFF)); + if (nap_on && device->state == KGSL_STATE_ACTIVE && + device->requested_state == KGSL_STATE_NONE) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); + kgsl_schedule_work(&device->idle_check_ws); + } else if (!nap_on) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + } + + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +int adreno_active_count_get(struct adreno_device *adreno_dev) +{ + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + return ops->active_count_get(adreno_dev); +} + +void adreno_active_count_put(struct adreno_device *adreno_dev) +{ + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + ops->active_count_put(adreno_dev); +} + +void adreno_get_bus_counters(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + if (!device->pwrctrl.bus_control) + return; + + /* VBIF waiting for RAM */ + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 0, + &adreno_dev->starved_ram_lo, NULL); + + /* Target has GBIF */ + if (adreno_is_gen7(adreno_dev) || + (adreno_is_a6xx(adreno_dev) && !adreno_is_a630(adreno_dev))) { + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 1, + &adreno_dev->starved_ram_lo_ch1, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI0_READ_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI1_READ_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo_ch1_read, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI0_WRITE_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo_ch0_write, NULL); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + GBIF_AXI1_WRITE_DATA_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo_ch1_write, NULL); + } else { + /* VBIF DDR cycles */ + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + VBIF_AXI_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo, NULL); + } + + if (ret) + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "Unable to get perf counters for bus DCVS\n"); +} + +/** + * _adreno_start - Power up the GPU and prepare to accept commands + * @adreno_dev: Pointer to an adreno_device structure + * + * The core function that powers up and initalizes the GPU. This function is + * called at init and after coming out of SLUMBER + */ +static int _adreno_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int status; + unsigned int state = device->state; + bool regulator_left_on; + + /* make sure ADRENO_DEVICE_STARTED is not set here */ + WARN_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)); + + regulator_left_on = regulators_left_on(device); + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + /* Put the GPU in a responsive state */ + status = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + if (status) + goto error_pwr_off; + + /* Set any stale active contexts to NULL */ + adreno_set_active_ctxs_null(adreno_dev); + + /* Set the bit to indicate that we've just powered on */ + set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv); + + adreno_ringbuffer_set_global(adreno_dev, 0); + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + /* Soft reset the GPU if a regulator is stuck on*/ + if (regulator_left_on) + _soft_reset(adreno_dev); + + /* Start the GPU */ + status = gpudev->start(adreno_dev); + if (status) + goto error_pwr_off; + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_irqctrl(adreno_dev, 1); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + status = gpudev->rb_start(adreno_dev); + if (status) + goto error_pwr_off; + + /* + * At this point it is safe to assume that we recovered. Setting + * this field allows us to take a new snapshot for the next failure + * if we are prioritizing the first unrecoverable snapshot. + */ + if (device->snapshot) + device->snapshot->recovered = true; + + /* Start the dispatcher */ + adreno_dispatcher_start(device); + + device->reset_counter++; + + set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + return 0; + +error_pwr_off: + /* set the state back to original state */ + kgsl_pwrctrl_change_state(device, state); + + return status; +} + +/** + * adreno_start() - Power up and initialize the GPU + * @device: Pointer to the KGSL device to power up + * @priority: Boolean flag to specify of the start should be scheduled in a low + * latency work queue + * + * Power up the GPU and initialize it. If priority is specified then elevate + * the thread priority for the duration of the start operation + */ +int adreno_start(struct kgsl_device *device, int priority) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int nice = task_nice(current); + int ret; + + if (priority && (adreno_wake_nice < nice)) + set_user_nice(current, adreno_wake_nice); + + ret = _adreno_start(adreno_dev); + + if (priority) + set_user_nice(current, nice); + + return ret; +} + +static int adreno_stop(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int error = 0; + + if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) + return 0; + + kgsl_pwrscale_update_stats(device); + + adreno_irqctrl(adreno_dev, 0); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + /* Save physical performance counter values before GPU power down*/ + adreno_perfcounter_save(adreno_dev); + + if (gpudev->clear_pending_transactions) + gpudev->clear_pending_transactions(adreno_dev); + + adreno_dispatcher_stop(adreno_dev); + + adreno_ringbuffer_stop(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + adreno_set_active_ctxs_null(adreno_dev); + + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + return error; +} + +/** + * adreno_reset() - Helper function to reset the GPU + * @device: Pointer to the KGSL device structure for the GPU + * @fault: Type of fault. Needed to skip soft reset for MMU fault + * + * Try to reset the GPU to recover from a fault. First, try to do a low latency + * soft reset. If the soft reset fails for some reason, then bring out the big + * guns and toggle the footswitch. + */ +int adreno_reset(struct kgsl_device *device, int fault) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret = -EINVAL; + int i; + + if (gpudev->reset) + return gpudev->reset(adreno_dev); + + /* + * Try soft reset first Do not do soft reset for a IOMMU fault (because + * the IOMMU hardware needs a reset too) + */ + + if (!(fault & ADRENO_IOMMU_PAGE_FAULT)) + ret = adreno_soft_reset(device); + + if (ret) { + /* If soft reset failed/skipped, then pull the power */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + /* since device is officially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* Try to reset the device */ + ret = adreno_start(device, 0); + + for (i = 0; ret && i < 4; i++) { + msleep(20); + ret = adreno_start(device, 0); + } + + if (ret) + return ret; + + if (i != 0) + dev_warn(device->dev, + "Device hard reset tried %d tries\n", i); + } + + /* + * If active_cnt is non-zero then the system was active before + * going into a reset - put it back in that state + */ + + if (atomic_read(&device->active_cnt)) + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + else + kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP); + + return ret; +} + +static int copy_prop(struct kgsl_device_getproperty *param, + void *src, size_t size) +{ + if (copy_to_user(param->value, src, + min_t(u32, size, param->sizebytes))) + return -EFAULT; + + return 0; +} + +static int adreno_prop_device_info(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_devinfo devinfo = { + .device_id = device->id + 1, + .chip_id = adreno_dev->chipid, + .mmu_enabled = kgsl_mmu_has_feature(device, KGSL_MMU_PAGED), + .gmem_gpubaseaddr = adreno_dev->gpucore->gmem_base, + .gmem_sizebytes = adreno_dev->gpucore->gmem_size, + }; + + return copy_prop(param, &devinfo, sizeof(devinfo)); +} + +static int adreno_prop_gpu_model(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_gpu_model model = {0}; + + strlcpy(model.gpu_model, adreno_get_gpu_model(device), + sizeof(model.gpu_model)); + + return copy_prop(param, &model, sizeof(model)); +} + +static int adreno_prop_device_shadow(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_shadowprop shadowprop = { 0 }; + + if (device->memstore->hostptr) { + /* Pass a dummy address to identify memstore */ + shadowprop.gpuaddr = KGSL_MEMSTORE_TOKEN_ADDRESS; + shadowprop.size = device->memstore->size; + + shadowprop.flags = KGSL_FLAGS_INITIALIZED | + KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS; + } + + return copy_prop(param, &shadowprop, sizeof(shadowprop)); +} + +static int adreno_prop_device_qdss_stm(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_qdss_stm_prop qdssprop = {0}; + + if (!IS_ERR_OR_NULL(device->qdss_desc)) { + qdssprop.gpuaddr = device->qdss_desc->gpuaddr; + qdssprop.size = device->qdss_desc->size; + } + + return copy_prop(param, &qdssprop, sizeof(qdssprop)); +} + +static int adreno_prop_device_qtimer(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct kgsl_qtimer_prop qtimerprop = {0}; + + if (!IS_ERR_OR_NULL(device->qtimer_desc)) { + qtimerprop.gpuaddr = device->qtimer_desc->gpuaddr; + qtimerprop.size = device->qtimer_desc->size; + } + + return copy_prop(param, &qtimerprop, sizeof(qtimerprop)); +} + +static int adreno_prop_s32(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + int val = 0; + + if (param->type == KGSL_PROP_MMU_ENABLE) + val = kgsl_mmu_has_feature(device, KGSL_MMU_PAGED); + else if (param->type == KGSL_PROP_INTERRUPT_WAITS) + val = 1; + + return copy_prop(param, &val, sizeof(val)); +} + +static int adreno_prop_uche_gmem_addr(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 vaddr = adreno_dev->gpucore->gmem_base; + + return copy_prop(param, &vaddr, sizeof(vaddr)); +} + +static int adreno_prop_ucode_version(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_ucode_version ucode = { + .pfp = adreno_dev->fw[ADRENO_FW_PFP].version, + .pm4 = adreno_dev->fw[ADRENO_FW_PM4].version, + }; + + return copy_prop(param, &ucode, sizeof(ucode)); +} + +static int adreno_prop_gaming_bin(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + void *buf; + size_t len; + int ret; + struct nvmem_cell *cell; + + cell = nvmem_cell_get(&device->pdev->dev, "gaming_bin"); + if (IS_ERR(cell)) + return -EINVAL; + + buf = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (!IS_ERR(buf)) { + ret = copy_prop(param, buf, len); + kfree(buf); + return ret; + } + + dev_err(device->dev, "failed to read gaming_bin nvmem cell\n"); + return -EINVAL; +} + +static int adreno_prop_u32(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 val = 0; + + if (param->type == KGSL_PROP_HIGHEST_BANK_BIT) { + val = adreno_dev->highest_bank_bit; + } else if (param->type == KGSL_PROP_MIN_ACCESS_LENGTH) + of_property_read_u32(device->pdev->dev.of_node, + "qcom,min-access-length", &val); + else if (param->type == KGSL_PROP_UBWC_MODE) + of_property_read_u32(device->pdev->dev.of_node, + "qcom,ubwc-mode", &val); + else if (param->type == KGSL_PROP_DEVICE_BITNESS) + val = adreno_support_64bit(adreno_dev) ? 48 : 32; + else if (param->type == KGSL_PROP_SPEED_BIN) + val = device->speed_bin; + else if (param->type == KGSL_PROP_VK_DEVICE_ID) + val = adreno_get_vk_device_id(device); + + return copy_prop(param, &val, sizeof(val)); +} + +static const struct { + int type; + int (*func)(struct kgsl_device *device, + struct kgsl_device_getproperty *param); +} adreno_property_funcs[] = { + { KGSL_PROP_DEVICE_INFO, adreno_prop_device_info }, + { KGSL_PROP_DEVICE_SHADOW, adreno_prop_device_shadow }, + { KGSL_PROP_DEVICE_QDSS_STM, adreno_prop_device_qdss_stm }, + { KGSL_PROP_DEVICE_QTIMER, adreno_prop_device_qtimer }, + { KGSL_PROP_MMU_ENABLE, adreno_prop_s32 }, + { KGSL_PROP_INTERRUPT_WAITS, adreno_prop_s32 }, + { KGSL_PROP_UCHE_GMEM_VADDR, adreno_prop_uche_gmem_addr }, + { KGSL_PROP_UCODE_VERSION, adreno_prop_ucode_version }, + { KGSL_PROP_HIGHEST_BANK_BIT, adreno_prop_u32 }, + { KGSL_PROP_MIN_ACCESS_LENGTH, adreno_prop_u32 }, + { KGSL_PROP_UBWC_MODE, adreno_prop_u32 }, + { KGSL_PROP_DEVICE_BITNESS, adreno_prop_u32 }, + { KGSL_PROP_SPEED_BIN, adreno_prop_u32 }, + { KGSL_PROP_GAMING_BIN, adreno_prop_gaming_bin }, + { KGSL_PROP_GPU_MODEL, adreno_prop_gpu_model}, + { KGSL_PROP_VK_DEVICE_ID, adreno_prop_u32}, +}; + +static int adreno_getproperty(struct kgsl_device *device, + struct kgsl_device_getproperty *param) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(adreno_property_funcs); i++) { + if (param->type == adreno_property_funcs[i].type) + return adreno_property_funcs[i].func(device, param); + } + + return -ENODEV; +} + +static int adreno_query_property_list(struct kgsl_device *device, u32 *list, + u32 count) +{ + int i; + + if (!list) + return ARRAY_SIZE(adreno_property_funcs); + + for (i = 0; i < count && i < ARRAY_SIZE(adreno_property_funcs); i++) + list[i] = adreno_property_funcs[i].type; + + return i; +} + +int adreno_set_constraint(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_device_constraint *constraint) +{ + int status = 0; + + switch (constraint->type) { + case KGSL_CONSTRAINT_PWRLEVEL: { + struct kgsl_device_constraint_pwrlevel pwr; + + if (constraint->size != sizeof(pwr)) { + status = -EINVAL; + break; + } + + if (copy_from_user(&pwr, + (void __user *)constraint->data, + sizeof(pwr))) { + status = -EFAULT; + break; + } + if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) { + status = -EINVAL; + break; + } + + context->pwr_constraint.type = + KGSL_CONSTRAINT_PWRLEVEL; + context->pwr_constraint.sub_type = pwr.level; + trace_kgsl_user_pwrlevel_constraint(device, + context->id, + context->pwr_constraint.type, + context->pwr_constraint.sub_type); + } + break; + case KGSL_CONSTRAINT_NONE: + if (context->pwr_constraint.type == KGSL_CONSTRAINT_PWRLEVEL) + trace_kgsl_user_pwrlevel_constraint(device, + context->id, + KGSL_CONSTRAINT_NONE, + context->pwr_constraint.sub_type); + context->pwr_constraint.type = KGSL_CONSTRAINT_NONE; + break; + case KGSL_CONSTRAINT_L3_PWRLEVEL: { + struct kgsl_device_constraint_pwrlevel pwr; + + if (constraint->size != sizeof(pwr)) { + status = -EINVAL; + break; + } + + if (copy_from_user(&pwr, constraint->data, sizeof(pwr))) { + status = -EFAULT; + break; + } + + status = register_l3_voter(device); + if (status) + break; + + if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) + pwr.level = KGSL_CONSTRAINT_PWR_MAXLEVELS - 1; + + context->l3_pwr_constraint.type = KGSL_CONSTRAINT_L3_PWRLEVEL; + context->l3_pwr_constraint.sub_type = pwr.level; + trace_kgsl_user_pwrlevel_constraint(device, context->id, + context->l3_pwr_constraint.type, + context->l3_pwr_constraint.sub_type); + } + break; + case KGSL_CONSTRAINT_L3_NONE: { + unsigned int type = context->l3_pwr_constraint.type; + + if (type == KGSL_CONSTRAINT_L3_PWRLEVEL) + trace_kgsl_user_pwrlevel_constraint(device, context->id, + KGSL_CONSTRAINT_L3_NONE, + context->l3_pwr_constraint.sub_type); + context->l3_pwr_constraint.type = KGSL_CONSTRAINT_L3_NONE; + } + break; + default: + status = -EINVAL; + break; + } + + /* If a new constraint has been set for a context, cancel the old one */ + if ((status == 0) && + (context->id == device->pwrctrl.constraint.owner_id)) { + trace_kgsl_constraint(device, device->pwrctrl.constraint.type, + device->pwrctrl.active_pwrlevel, 0); + device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE; + } + + return status; +} + +static int adreno_setproperty(struct kgsl_device_private *dev_priv, + unsigned int type, + void __user *value, + unsigned int sizebytes) +{ + int status = -EINVAL; + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + switch (type) { + case KGSL_PROP_PWR_CONSTRAINT: + case KGSL_PROP_L3_PWR_CONSTRAINT: { + struct kgsl_device_constraint constraint; + struct kgsl_context *context; + + if (sizebytes != sizeof(constraint)) + break; + + if (copy_from_user(&constraint, value, + sizeof(constraint))) { + status = -EFAULT; + break; + } + + context = kgsl_context_get_owner(dev_priv, + constraint.context_id); + + if (context == NULL) + break; + + status = adreno_set_constraint(device, context, + &constraint); + + kgsl_context_put(context); + } + break; + default: + status = gpudev->setproperty(dev_priv, type, value, sizebytes); + break; + } + + return status; +} + +/* + * adreno_soft_reset - Do a soft reset of the GPU hardware + * @device: KGSL device to soft reset + * + * "soft reset" the GPU hardware - this is a fast path GPU reset + * The GPU hardware is reset but we never pull power so we can skip + * a lot of the standard adreno_stop/adreno_start sequence + */ +static int adreno_soft_reset(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + /* + * Don't allow a soft reset for a304 because the SMMU needs to be hard + * reset + */ + if (adreno_is_a304(adreno_dev)) + return -ENODEV; + + if (gpudev->clear_pending_transactions) { + ret = gpudev->clear_pending_transactions(adreno_dev); + if (ret) + return ret; + } + + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + adreno_set_active_ctxs_null(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + + adreno_clear_gpu_fault(adreno_dev); + /* since device is oficially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* save physical performance counter values before GPU soft reset */ + adreno_perfcounter_save(adreno_dev); + + _soft_reset(adreno_dev); + + /* Clear the busy_data stats - we're starting over from scratch */ + adreno_dev->busy_data.gpu_busy = 0; + adreno_dev->busy_data.bif_ram_cycles = 0; + adreno_dev->busy_data.bif_ram_cycles_read_ch1 = 0; + adreno_dev->busy_data.bif_ram_cycles_write_ch0 = 0; + adreno_dev->busy_data.bif_ram_cycles_write_ch1 = 0; + adreno_dev->busy_data.bif_starved_ram = 0; + adreno_dev->busy_data.bif_starved_ram_ch1 = 0; + + /* Set the page table back to the default page table */ + adreno_ringbuffer_set_global(adreno_dev, 0); + + /* Reinitialize the GPU */ + gpudev->start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + /* Enable IRQ */ + adreno_irqctrl(adreno_dev, 1); + + /* stop all ringbuffers to cancel RB events */ + adreno_ringbuffer_stop(adreno_dev); + + /* Start the ringbuffer(s) again */ + ret = gpudev->rb_start(adreno_dev); + if (ret == 0) { + device->reset_counter++; + set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + } + + /* Restore physical performance counter values after soft reset */ + adreno_perfcounter_restore(adreno_dev); + + if (ret) + dev_err(device->dev, "Device soft reset failed: %d\n", ret); + + return ret; +} + +bool adreno_isidle(struct adreno_device *adreno_dev) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + int i; + + if (!kgsl_state_is_awake(KGSL_DEVICE(adreno_dev))) + return true; + + /* + * wptr is updated when we add commands to ringbuffer, add a barrier + * to make sure updated wptr is compared to rptr + */ + smp_mb(); + + /* + * ringbuffer is truly idle when all ringbuffers read and write + * pointers are equal + */ + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + if (!adreno_rb_empty(rb)) + return false; + } + + return gpudev->hw_isidle(adreno_dev); +} + +/** + * adreno_spin_idle() - Spin wait for the GPU to idle + * @adreno_dev: Pointer to an adreno device + * @timeout: milliseconds to wait before returning error + * + * Spin the CPU waiting for the RBBM status to return idle + */ +int adreno_spin_idle(struct adreno_device *adreno_dev, unsigned int timeout) +{ + unsigned long wait = jiffies + msecs_to_jiffies(timeout); + + do { + /* + * If we fault, stop waiting and return an error. The dispatcher + * will clean up the fault from the work queue, but we need to + * make sure we don't block it by waiting for an idle that + * will never come. + */ + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EDEADLK; + + if (adreno_isidle(adreno_dev)) + return 0; + + } while (time_before(jiffies, wait)); + + /* + * Under rare conditions, preemption can cause the while loop to exit + * without checking if the gpu is idle. check one last time before we + * return failure. + */ + if (adreno_gpu_fault(adreno_dev) != 0) + return -EDEADLK; + + if (adreno_isidle(adreno_dev)) + return 0; + + return -ETIMEDOUT; +} + +/** + * adreno_idle() - wait for the GPU hardware to go idle + * @device: Pointer to the KGSL device structure for the GPU + * + * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet. + * Caller must hold the device mutex, and must not hold the dispatcher mutex. + */ + +int adreno_idle(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret; + + /* + * Make sure the device mutex is held so the dispatcher can't send any + * more commands to the hardware + */ + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EDEADLK; + + /* Check if we are already idle before idling dispatcher */ + if (adreno_isidle(adreno_dev)) + return 0; + /* + * Wait for dispatcher to finish completing commands + * already submitted + */ + ret = adreno_dispatcher_idle(adreno_dev); + if (ret) + return ret; + + return adreno_spin_idle(adreno_dev, ADRENO_IDLE_TIMEOUT); +} + +static int adreno_drain_and_idle(struct kgsl_device *device) +{ + int ret; + + reinit_completion(&device->halt_gate); + + ret = kgsl_active_count_wait(device, 0, HZ); + if (ret) + return ret; + + return adreno_idle(device); +} + +/* Caller must hold the device mutex. */ +int adreno_suspend_context(struct kgsl_device *device) +{ + /* process any profiling results that are available */ + adreno_profile_process_results(ADRENO_DEVICE(device)); + + /* Wait for the device to go idle */ + return adreno_idle(device); +} + +bool adreno_is_cx_dbgc_register(struct kgsl_device *device, + unsigned int offsetwords) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + return adreno_dev->cx_dbgc_virt && + (offsetwords >= (adreno_dev->cx_dbgc_base >> 2)) && + (offsetwords < (adreno_dev->cx_dbgc_base + + adreno_dev->cx_dbgc_len) >> 2); +} + +void adreno_cx_dbgc_regread(struct kgsl_device *device, + unsigned int offsetwords, unsigned int *value) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int cx_dbgc_offset; + + if (!adreno_is_cx_dbgc_register(device, offsetwords)) + return; + + cx_dbgc_offset = (offsetwords << 2) - adreno_dev->cx_dbgc_base; + *value = __raw_readl(adreno_dev->cx_dbgc_virt + cx_dbgc_offset); + + /* + * ensure this read finishes before the next one. + * i.e. act like normal readl() + */ + rmb(); +} + +void adreno_cx_dbgc_regwrite(struct kgsl_device *device, + unsigned int offsetwords, unsigned int value) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int cx_dbgc_offset; + + if (!adreno_is_cx_dbgc_register(device, offsetwords)) + return; + + cx_dbgc_offset = (offsetwords << 2) - adreno_dev->cx_dbgc_base; + trace_kgsl_regwrite(offsetwords, value); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + __raw_writel(value, adreno_dev->cx_dbgc_virt + cx_dbgc_offset); +} + +void adreno_cx_misc_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value) +{ + unsigned int cx_misc_offset; + + cx_misc_offset = (offsetwords << 2); + if (!adreno_dev->cx_misc_virt || + (cx_misc_offset >= adreno_dev->cx_misc_len)) + return; + + *value = __raw_readl(adreno_dev->cx_misc_virt + cx_misc_offset); + + /* + * ensure this read finishes before the next one. + * i.e. act like normal readl() + */ + rmb(); +} + +void adreno_isense_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value) +{ + unsigned int isense_offset; + + isense_offset = (offsetwords << 2); + if (!adreno_dev->isense_virt || + (isense_offset >= adreno_dev->isense_len)) + return; + + *value = __raw_readl(adreno_dev->isense_virt + isense_offset); + + /* + * ensure this read finishes before the next one. + * i.e. act like normal readl() + */ + rmb(); +} + +void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int value) +{ + unsigned int cx_misc_offset; + + cx_misc_offset = (offsetwords << 2); + if (!adreno_dev->cx_misc_virt || + (cx_misc_offset >= adreno_dev->cx_misc_len)) + return; + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + __raw_writel(value, adreno_dev->cx_misc_virt + cx_misc_offset); +} + +void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + unsigned int val = 0; + + adreno_cx_misc_regread(adreno_dev, offsetwords, &val); + val &= ~mask; + adreno_cx_misc_regwrite(adreno_dev, offsetwords, val | bits); +} + +void adreno_profile_submit_time(struct adreno_submit_time *time) +{ + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_mem_entry *entry; + struct kgsl_drawobj_profiling_buffer *profile_buffer; + + if (!time) + return; + + drawobj = time->drawobj; + if (drawobj == NULL) + return; + + cmdobj = CMDOBJ(drawobj); + entry = cmdobj->profiling_buf_entry; + if (!entry) + return; + + profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, + cmdobj->profiling_buffer_gpuaddr); + + if (profile_buffer == NULL) + return; + + /* Return kernel clock time to the client if requested */ + if (drawobj->flags & KGSL_DRAWOBJ_PROFILING_KTIME) { + u64 secs = time->ktime; + + profile_buffer->wall_clock_ns = + do_div(secs, NSEC_PER_SEC); + profile_buffer->wall_clock_s = secs; + } else { + profile_buffer->wall_clock_s = time->utime.tv_sec; + profile_buffer->wall_clock_ns = time->utime.tv_nsec; + } + + profile_buffer->gpu_ticks_queued = time->ticks; + + kgsl_memdesc_unmap(&entry->memdesc); +} + +/** + * adreno_waittimestamp - sleep while waiting for the specified timestamp + * @device - pointer to a KGSL device structure + * @context - pointer to the active kgsl context + * @timestamp - GPU timestamp to wait for + * @msecs - amount of time to wait (in milliseconds) + * + * Wait up to 'msecs' milliseconds for the specified timestamp to expire. + */ +static int adreno_waittimestamp(struct kgsl_device *device, + struct kgsl_context *context, + unsigned int timestamp, + unsigned int msecs) +{ + int ret; + + if (context == NULL) { + /* If they are doing then complain once */ + dev_WARN_ONCE(device->dev, 1, + "IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n"); + return -ENOTTY; + } + + /* Return -ENOENT if the context has been detached */ + if (kgsl_context_detached(context)) + return -ENOENT; + + ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context, + timestamp, msecs); + + /* If the context got invalidated then return a specific error */ + if (kgsl_context_invalid(context)) + ret = -EDEADLK; + + /* + * Return -EPROTO if the device has faulted since the last time we + * checked. Userspace uses this as a marker for performing post + * fault activities + */ + + if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv)) + ret = -EPROTO; + + return ret; +} + +/** + * __adreno_readtimestamp() - Reads the timestamp from memstore memory + * @adreno_dev: Pointer to an adreno device + * @index: Index into the memstore memory + * @type: Type of timestamp to read + * @timestamp: The out parameter where the timestamp is read + */ +static int __adreno_readtimestamp(struct adreno_device *adreno_dev, int index, + int type, unsigned int *timestamp) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int status = 0; + + switch (type) { + case KGSL_TIMESTAMP_CONSUMED: + kgsl_sharedmem_readl(device->memstore, timestamp, + KGSL_MEMSTORE_OFFSET(index, soptimestamp)); + break; + case KGSL_TIMESTAMP_RETIRED: + kgsl_sharedmem_readl(device->memstore, timestamp, + KGSL_MEMSTORE_OFFSET(index, eoptimestamp)); + break; + default: + status = -EINVAL; + *timestamp = 0; + break; + } + return status; +} + +/** + * adreno_rb_readtimestamp(): Return the value of given type of timestamp + * for a RB + * @adreno_dev: adreno device whose timestamp values are being queried + * @priv: The object being queried for a timestamp (expected to be a rb pointer) + * @type: The type of timestamp (one of 3) to be read + * @timestamp: Pointer to where the read timestamp is to be written to + * + * CONSUMED and RETIRED type timestamps are sorted by id and are constantly + * updated by the GPU through shared memstore memory. QUEUED type timestamps + * are read directly from context struct. + + * The function returns 0 on success and timestamp value at the *timestamp + * address and returns -EINVAL on any read error/invalid type and timestamp = 0. + */ +int adreno_rb_readtimestamp(struct adreno_device *adreno_dev, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + int status = 0; + struct adreno_ringbuffer *rb = priv; + + if (type == KGSL_TIMESTAMP_QUEUED) + *timestamp = rb->timestamp; + else + status = __adreno_readtimestamp(adreno_dev, + rb->id + KGSL_MEMSTORE_MAX, + type, timestamp); + + return status; +} + +/** + * adreno_readtimestamp(): Return the value of given type of timestamp + * @device: GPU device whose timestamp values are being queried + * @priv: The object being queried for a timestamp (expected to be a context) + * @type: The type of timestamp (one of 3) to be read + * @timestamp: Pointer to where the read timestamp is to be written to + * + * CONSUMED and RETIRED type timestamps are sorted by id and are constantly + * updated by the GPU through shared memstore memory. QUEUED type timestamps + * are read directly from context struct. + + * The function returns 0 on success and timestamp value at the *timestamp + * address and returns -EINVAL on any read error/invalid type and timestamp = 0. + */ +static int adreno_readtimestamp(struct kgsl_device *device, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + int status = 0; + struct kgsl_context *context = priv; + + if (type == KGSL_TIMESTAMP_QUEUED) { + struct adreno_context *ctxt = ADRENO_CONTEXT(context); + + *timestamp = ctxt->timestamp; + } else + status = __adreno_readtimestamp(ADRENO_DEVICE(device), + context->id, type, timestamp); + + return status; +} + +/** + * adreno_device_private_create(): Allocate an adreno_device_private structure + */ +static struct kgsl_device_private *adreno_device_private_create(void) +{ + struct adreno_device_private *adreno_priv = + kzalloc(sizeof(*adreno_priv), GFP_KERNEL); + + if (adreno_priv) { + INIT_LIST_HEAD(&adreno_priv->perfcounter_list); + return &adreno_priv->dev_priv; + } + return NULL; +} + +/** + * adreno_device_private_destroy(): Destroy an adreno_device_private structure + * and release the perfcounters held by the kgsl fd. + * @dev_priv: The kgsl device private structure + */ +static void adreno_device_private_destroy(struct kgsl_device_private *dev_priv) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_device_private *adreno_priv = + container_of(dev_priv, struct adreno_device_private, + dev_priv); + struct adreno_perfcounter_list_node *p, *tmp; + + mutex_lock(&device->mutex); + list_for_each_entry_safe(p, tmp, &adreno_priv->perfcounter_list, node) { + adreno_perfcounter_put(adreno_dev, p->groupid, + p->countable, PERFCOUNTER_FLAG_NONE); + list_del(&p->node); + kfree(p); + } + mutex_unlock(&device->mutex); + + kfree(adreno_priv); +} + +/** + * adreno_power_stats() - Reads the counters needed for freq decisions + * @device: Pointer to device whose counters are read + * @stats: Pointer to stats set that needs updating + * Power: The caller is expected to be in a clock enabled state as this + * function does reg reads + */ +static void adreno_power_stats(struct kgsl_device *device, + struct kgsl_power_stats *stats) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + return gpudev->power_stats(adreno_dev, stats); +} + +static int adreno_regulator_enable(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->regulator_enable) + return gpudev->regulator_enable(adreno_dev); + + return 0; +} + +static bool adreno_is_hw_collapsible(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (!gpudev->is_hw_collapsible(adreno_dev)) + return false; + + if (gpudev->clear_pending_transactions(adreno_dev)) + return false; + + adreno_dispatcher_stop_fault_timer(device); + + return true; +} + +static void adreno_regulator_disable(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->regulator_disable) + gpudev->regulator_disable(adreno_dev); +} + +static void adreno_pwrlevel_change_settings(struct kgsl_device *device, + unsigned int prelevel, unsigned int postlevel, bool post) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->pwrlevel_change_settings) + gpudev->pwrlevel_change_settings(adreno_dev, prelevel, + postlevel, post); +} + +static void adreno_clk_set_options(struct kgsl_device *device, const char *name, + struct clk *clk, bool on) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->clk_set_options) + gpudev->clk_set_options(adreno_dev, name, clk, on); +} + +static bool adreno_is_hwcg_on(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + return adreno_dev->hwcg_enabled; +} + +static int adreno_queue_cmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + u32 count, u32 *timestamp) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->queue_cmds)) + return -ENODEV; + + return adreno_dev->dispatch_ops->queue_cmds(dev_priv, context, drawobj, + count, timestamp); +} + +static void adreno_drawctxt_sched(struct kgsl_device *device, + struct kgsl_context *context) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (WARN_ON(!adreno_dev->dispatch_ops || !adreno_dev->dispatch_ops->queue_context)) + return; + + adreno_dev->dispatch_ops->queue_context(adreno_dev, + ADRENO_CONTEXT(context)); +} + +int adreno_power_cycle(struct adreno_device *adreno_dev, + void (*callback)(struct adreno_device *adreno_dev, void *priv), + void *priv) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + int ret; + + mutex_lock(&device->mutex); + ret = ops->pm_suspend(adreno_dev); + + if (!ret) { + callback(adreno_dev, priv); + ops->pm_resume(adreno_dev); + } + + mutex_unlock(&device->mutex); + + return ret; +} + +struct cycle_data { + void *ptr; + void *val; +}; + +static void cycle_set_bool(struct adreno_device *adreno_dev, void *priv) +{ + struct cycle_data *data = priv; + + *((bool *) data->ptr) = *((bool *) data->val); +} + +int adreno_power_cycle_bool(struct adreno_device *adreno_dev, + bool *flag, bool val) +{ + struct cycle_data data = { .ptr = flag, .val = &val }; + + return adreno_power_cycle(adreno_dev, cycle_set_bool, &data); +} + +static void cycle_set_u32(struct adreno_device *adreno_dev, void *priv) +{ + struct cycle_data *data = priv; + + *((u32 *) data->ptr) = *((u32 *) data->val); +} + +int adreno_power_cycle_u32(struct adreno_device *adreno_dev, + u32 *flag, u32 val) +{ + struct cycle_data data = { .ptr = flag, .val = &val }; + + return adreno_power_cycle(adreno_dev, cycle_set_u32, &data); +} + +static int adreno_gpu_clock_set(struct kgsl_device *device, u32 pwrlevel) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pl = &pwr->pwrlevels[pwrlevel]; + int ret; + + if (ops->gpu_clock_set) + return ops->gpu_clock_set(adreno_dev, pwrlevel); + + ret = clk_set_rate(pwr->grp_clks[0], pl->gpu_freq); + if (ret) + dev_err(device->dev, "GPU clk freq set failure: %d\n", ret); + + return ret; +} + +static int adreno_interconnect_bus_set(struct adreno_device *adreno_dev, + int level, u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if ((level == pwr->cur_buslevel) && (ab == pwr->cur_ab)) + return 0; + + pwr->cur_buslevel = level; + pwr->cur_ab = ab; + + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), + kBps_to_icc(pwr->ddr_table[level])); + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, level); + + return 0; +} + +static int adreno_gpu_bus_set(struct kgsl_device *device, int level, u32 ab) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_power_ops *ops = ADRENO_POWER_OPS(adreno_dev); + + if (ops->gpu_bus_set) + return ops->gpu_bus_set(adreno_dev, level, ab); + + return adreno_interconnect_bus_set(adreno_dev, level, ab); +} + +static void adreno_deassert_gbif_halt(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->deassert_gbif_halt) + gpudev->deassert_gbif_halt(adreno_dev); +} + +static const struct kgsl_functable adreno_functable = { + /* Mandatory functions */ + .suspend_context = adreno_suspend_context, + .first_open = adreno_first_open, + .start = adreno_start, + .stop = adreno_stop, + .last_close = adreno_last_close, + .getproperty = adreno_getproperty, + .getproperty_compat = adreno_getproperty_compat, + .waittimestamp = adreno_waittimestamp, + .readtimestamp = adreno_readtimestamp, + .queue_cmds = adreno_queue_cmds, + .ioctl = adreno_ioctl, + .compat_ioctl = adreno_compat_ioctl, + .power_stats = adreno_power_stats, + .snapshot = adreno_snapshot, + .drain_and_idle = adreno_drain_and_idle, + .device_private_create = adreno_device_private_create, + .device_private_destroy = adreno_device_private_destroy, + /* Optional functions */ + .drawctxt_create = adreno_drawctxt_create, + .drawctxt_detach = adreno_drawctxt_detach, + .drawctxt_destroy = adreno_drawctxt_destroy, + .drawctxt_dump = adreno_drawctxt_dump, + .setproperty = adreno_setproperty, + .setproperty_compat = adreno_setproperty_compat, + .drawctxt_sched = adreno_drawctxt_sched, + .resume = adreno_dispatcher_start, + .regulator_enable = adreno_regulator_enable, + .is_hw_collapsible = adreno_is_hw_collapsible, + .regulator_disable = adreno_regulator_disable, + .pwrlevel_change_settings = adreno_pwrlevel_change_settings, + .clk_set_options = adreno_clk_set_options, + .query_property_list = adreno_query_property_list, + .is_hwcg_on = adreno_is_hwcg_on, + .gpu_clock_set = adreno_gpu_clock_set, + .gpu_bus_set = adreno_gpu_bus_set, + .deassert_gbif_halt = adreno_deassert_gbif_halt, +}; + +static const struct component_master_ops adreno_ops = { + .bind = adreno_bind, + .unbind = adreno_unbind, +}; + +const struct adreno_power_ops adreno_power_operations = { + .first_open = adreno_open, + .last_close = adreno_close, + .active_count_get = adreno_pwrctrl_active_count_get, + .active_count_put = adreno_pwrctrl_active_count_put, + .pm_suspend = adreno_suspend, + .pm_resume = adreno_resume, + .touch_wakeup = adreno_touch_wakeup, +}; + +static int _compare_of(struct device *dev, void *data) +{ + return (dev->of_node == data); +} + +static void _release_of(struct device *dev, void *data) +{ + of_node_put(data); +} + +static void adreno_add_gmu_components(struct device *dev, + struct component_match **match) +{ + struct device_node *node; + + node = of_find_matching_node(NULL, adreno_gmu_match); + if (!node) + return; + + if (!of_device_is_available(node)) { + of_node_put(node); + return; + } + + component_match_add_release(dev, match, _release_of, + _compare_of, node); +} + +static int adreno_probe(struct platform_device *pdev) +{ + struct component_match *match = NULL; + + adreno_add_gmu_components(&pdev->dev, &match); + + if (match) + return component_master_add_with_match(&pdev->dev, + &adreno_ops, match); + else + return adreno_bind(&pdev->dev); +} + +static int adreno_remove(struct platform_device *pdev) +{ + if (of_find_matching_node(NULL, adreno_gmu_match)) + component_master_del(&pdev->dev, &adreno_ops); + else + adreno_unbind(&pdev->dev); + + return 0; +} + +static const struct dev_pm_ops adreno_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(adreno_pm_suspend, adreno_pm_resume) +}; + +static struct platform_driver adreno_platform_driver = { + .probe = adreno_probe, + .remove = adreno_remove, + .driver = { + .name = "kgsl-3d", + .pm = &adreno_pm_ops, + .of_match_table = of_match_ptr(adreno_match_table), + } +}; + +static int __init kgsl_3d_init(void) +{ + int ret; + + ret = kgsl_core_init(); + if (ret) + return ret; + + gmu_core_register(); + ret = platform_driver_register(&adreno_platform_driver); + if (ret) + kgsl_core_exit(); + + return ret; +} + +static void __exit kgsl_3d_exit(void) +{ + platform_driver_unregister(&adreno_platform_driver); + gmu_core_unregister(); + kgsl_core_exit(); +} + +module_init(kgsl_3d_init); +module_exit(kgsl_3d_exit); + +MODULE_DESCRIPTION("3D Graphics driver"); +MODULE_LICENSE("GPL v2"); +MODULE_SOFTDEP("pre: qcom-arm-smmu-mod nvmem_qfprom"); diff --git a/adreno.h b/adreno.h new file mode 100644 index 0000000000..0ed5a5f282 --- /dev/null +++ b/adreno.h @@ -0,0 +1,1781 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_H +#define __ADRENO_H + +#include +#include +#include "adreno_coresight.h" +#include "adreno_dispatch.h" +#include "adreno_drawctxt.h" +#include "adreno_hwsched.h" +#include "adreno_perfcounter.h" +#include "adreno_profile.h" +#include "adreno_ringbuffer.h" +#include "kgsl_sharedmem.h" + +/* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */ +#define ADRENO_DEVICE(device) \ + container_of(device, struct adreno_device, dev) + +/* KGSL_DEVICE - given an adreno_device, return the KGSL device struct */ +#define KGSL_DEVICE(_dev) (&((_dev)->dev)) + +/* ADRENO_CONTEXT - Given a context return the adreno context struct */ +#define ADRENO_CONTEXT(context) \ + container_of(context, struct adreno_context, base) + +/* ADRENO_GPU_DEVICE - Given an adreno device return the GPU specific struct */ +#define ADRENO_GPU_DEVICE(_a) ((_a)->gpucore->gpudev) + +/* + * ADRENO_POWER_OPS - Given an adreno device return the GPU specific power + * ops + */ +#define ADRENO_POWER_OPS(_a) ((_a)->gpucore->gpudev->power_ops) + +#define ADRENO_CHIPID_CORE(_id) FIELD_GET(GENMASK(31, 24), _id) +#define ADRENO_CHIPID_MAJOR(_id) FIELD_GET(GENMASK(23, 16), _id) +#define ADRENO_CHIPID_MINOR(_id) FIELD_GET(GENMASK(15, 8), _id) +#define ADRENO_CHIPID_PATCH(_id) FIELD_GET(GENMASK(7, 0), _id) + +#define ADRENO_GMU_CHIPID(_id) \ + (FIELD_PREP(GENMASK(31, 24), ADRENO_CHIPID_CORE(_id)) | \ + FIELD_PREP(GENMASK(23, 16), ADRENO_CHIPID_MAJOR(_id)) | \ + FIELD_PREP(GENMASK(15, 12), ADRENO_CHIPID_MINOR(_id)) | \ + FIELD_PREP(GENMASK(11, 8), ADRENO_CHIPID_PATCH(_id))) + +/* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */ +#define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev) + +/* + * ADRENO_FEATURE - return true if the specified feature is supported by the GPU + * core + */ +#define ADRENO_FEATURE(_dev, _bit) \ + ((_dev)->gpucore->features & (_bit)) + +/** + * ADRENO_QUIRK - return true if the specified quirk is required by the GPU + */ +#define ADRENO_QUIRK(_dev, _bit) \ + ((_dev)->quirks & (_bit)) + +#define ADRENO_FW(a, f) (&(a->fw[f])) + +/* Adreno core features */ +/* The core supports SP/TP hw controlled power collapse */ +#define ADRENO_SPTP_PC BIT(0) +/* The GPU supports content protection */ +#define ADRENO_CONTENT_PROTECTION BIT(1) +/* The GPU supports preemption */ +#define ADRENO_PREEMPTION BIT(2) +/* The GPMU supports Limits Management */ +#define ADRENO_LM BIT(3) +/* The GPU supports retention for cpz registers */ +#define ADRENO_CPZ_RETENTION BIT(4) +/* The core has soft fault detection available */ +#define ADRENO_SOFT_FAULT_DETECT BIT(5) +/* The GMU supports IFPC power management*/ +#define ADRENO_IFPC BIT(6) +/* The core supports IO-coherent memory */ +#define ADRENO_IOCOHERENT BIT(7) +/* + * The GMU supports Adaptive Clock Distribution (ACD) + * for droop mitigation + */ +#define ADRENO_ACD BIT(8) +/* Cooperative reset enabled GMU */ +#define ADRENO_COOP_RESET BIT(9) +/* Indicates that the specific target is no longer supported */ +#define ADRENO_DEPRECATED BIT(10) +/* The target supports ringbuffer level APRIV */ +#define ADRENO_APRIV BIT(11) +/* The GMU supports Battery Current Limiting */ +#define ADRENO_BCL BIT(12) +/* L3 voting is supported with L3 constraints */ +#define ADRENO_L3_VOTE BIT(13) + +/* + * Adreno GPU quirks - control bits for various workarounds + */ + +/* Set TWOPASSUSEWFI in PC_DBG_ECO_CNTL (5XX/6XX) */ +#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) +/* Submit critical packets at GPU wake up */ +#define ADRENO_QUIRK_CRITICAL_PACKETS BIT(1) +/* Mask out RB1-3 activity signals from HW hang detection logic */ +#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(2) +/* Disable RB sampler datapath clock gating optimization */ +#define ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING BIT(3) +/* Disable local memory(LM) feature to avoid corner case error */ +#define ADRENO_QUIRK_DISABLE_LMLOADKILL BIT(4) +/* Allow HFI to use registers to send message to GMU */ +#define ADRENO_QUIRK_HFI_USE_REG BIT(5) +/* Only set protected SECVID registers once */ +#define ADRENO_QUIRK_SECVID_SET_ONCE BIT(6) +/* + * Limit number of read and write transactions from + * UCHE block to GBIF to avoid possible deadlock + * between GBIF, SMMU and MEMNOC. + */ +#define ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW BIT(8) +/* Do explicit mode control of cx gdsc */ +#define ADRENO_QUIRK_CX_GDSC BIT(9) + +/* Command identifiers */ +#define CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF +#define CMD_IDENTIFIER 0x2EEDFACE +#define CMD_INTERNAL_IDENTIFIER 0x2EEDD00D +#define START_IB_IDENTIFIER 0x2EADEABE +#define END_IB_IDENTIFIER 0x2ABEDEAD +#define START_PROFILE_IDENTIFIER 0x2DEFADE1 +#define END_PROFILE_IDENTIFIER 0x2DEFADE2 +#define PWRON_FIXUP_IDENTIFIER 0x2AFAFAFA + +/* One cannot wait forever for the core to idle, so set an upper limit to the + * amount of time to wait for the core to go idle + */ +#define ADRENO_IDLE_TIMEOUT (20 * 1000) + +#define ADRENO_FW_PFP 0 +#define ADRENO_FW_SQE 0 +#define ADRENO_FW_PM4 1 + +enum adreno_gpurev { + ADRENO_REV_UNKNOWN = 0, + ADRENO_REV_A304 = 304, + ADRENO_REV_A305 = 305, + ADRENO_REV_A305C = 306, + ADRENO_REV_A306 = 307, + ADRENO_REV_A306A = 308, + ADRENO_REV_A310 = 310, + ADRENO_REV_A320 = 320, + ADRENO_REV_A330 = 330, + ADRENO_REV_A305B = 335, + ADRENO_REV_A405 = 405, + ADRENO_REV_A418 = 418, + ADRENO_REV_A420 = 420, + ADRENO_REV_A430 = 430, + ADRENO_REV_A505 = 505, + ADRENO_REV_A506 = 506, + ADRENO_REV_A508 = 508, + ADRENO_REV_A510 = 510, + ADRENO_REV_A512 = 512, + ADRENO_REV_A530 = 530, + ADRENO_REV_A540 = 540, + ADRENO_REV_A610 = 610, + ADRENO_REV_A612 = 612, + ADRENO_REV_A615 = 615, + ADRENO_REV_A616 = 616, + ADRENO_REV_A618 = 618, + ADRENO_REV_A619 = 619, + ADRENO_REV_A620 = 620, + ADRENO_REV_A630 = 630, + ADRENO_REV_A635 = 635, + ADRENO_REV_A640 = 640, + ADRENO_REV_A650 = 650, + ADRENO_REV_A660 = 660, + ADRENO_REV_A680 = 680, + /* + * Gen7 and higher version numbers may exceed 1 digit + * Bits 16-23: Major + * Bits 8-15: Minor + * Bits 0-7: Patch id + */ + ADRENO_REV_GEN7_0_0 = 0x070000, + ADRENO_REV_GEN7_0_1 = 0x070001, +}; + +#define ADRENO_SOFT_FAULT BIT(0) +#define ADRENO_HARD_FAULT BIT(1) +#define ADRENO_TIMEOUT_FAULT BIT(2) +#define ADRENO_IOMMU_PAGE_FAULT BIT(3) +#define ADRENO_PREEMPT_FAULT BIT(4) +#define ADRENO_GMU_FAULT BIT(5) +#define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6) +#define ADRENO_GMU_FAULT_SKIP_SNAPSHOT BIT(7) + +/* number of throttle counters for DCVS adjustment */ +#define ADRENO_GPMU_THROTTLE_COUNTERS 4 + +struct adreno_gpudev; + +/* Time to allow preemption to complete (in ms) */ +#define ADRENO_PREEMPT_TIMEOUT 10000 + +/** + * enum adreno_preempt_states + * ADRENO_PREEMPT_NONE: No preemption is scheduled + * ADRENO_PREEMPT_START: The S/W has started + * ADRENO_PREEMPT_TRIGGERED: A preeempt has been triggered in the HW + * ADRENO_PREEMPT_FAULTED: The preempt timer has fired + * ADRENO_PREEMPT_PENDING: The H/W has signaled preemption complete + * ADRENO_PREEMPT_COMPLETE: Preemption could not be finished in the IRQ handler, + * worker has been scheduled + */ +enum adreno_preempt_states { + ADRENO_PREEMPT_NONE = 0, + ADRENO_PREEMPT_START, + ADRENO_PREEMPT_TRIGGERED, + ADRENO_PREEMPT_FAULTED, + ADRENO_PREEMPT_PENDING, + ADRENO_PREEMPT_COMPLETE, +}; + +/** + * struct adreno_protected_regs - container for a protect register span + */ +struct adreno_protected_regs { + /** @reg: Physical protected mode register to write to */ + u32 reg; + /** @start: Dword offset of the starting register in the range */ + u32 start; + /** + * @end: Dword offset of the ending register in the range + * (inclusive) + */ + u32 end; + /** + * @noaccess: 1 if the register should not be accessible from + * userspace, 0 if it can be read (but not written) + */ + u32 noaccess; +}; + +/** + * struct adreno_preemption + * @state: The current state of preemption + * @scratch: Per-target scratch memory for implementation specific functionality + * @timer: A timer to make sure preemption doesn't stall + * @work: A work struct for the preemption worker (for 5XX) + * preempt_level: The level of preemption (for 6XX) + * skipsaverestore: To skip saverestore during L1 preemption (for 6XX) + * usesgmem: enable GMEM save/restore across preemption (for 6XX) + * count: Track the number of preemptions triggered + */ +struct adreno_preemption { + atomic_t state; + struct kgsl_memdesc *scratch; + struct timer_list timer; + struct work_struct work; + unsigned int preempt_level; + bool skipsaverestore; + bool usesgmem; + unsigned int count; +}; + +struct adreno_busy_data { + unsigned int gpu_busy; + unsigned int bif_ram_cycles; + unsigned int bif_ram_cycles_read_ch1; + unsigned int bif_ram_cycles_write_ch0; + unsigned int bif_ram_cycles_write_ch1; + unsigned int bif_starved_ram; + unsigned int bif_starved_ram_ch1; + unsigned int num_ifpc; + unsigned int throttle_cycles[ADRENO_GPMU_THROTTLE_COUNTERS]; +}; + +/** + * struct adreno_firmware - Struct holding fw details + * @fwvirt: Buffer which holds the ucode + * @size: Size of ucode buffer + * @version: Version of ucode + * @memdesc: Memory descriptor which holds ucode buffer info + */ +struct adreno_firmware { + unsigned int *fwvirt; + size_t size; + unsigned int version; + struct kgsl_memdesc *memdesc; +}; + +/** + * struct adreno_perfcounter_list_node - struct to store perfcounters + * allocated by a process on a kgsl fd. + * @groupid: groupid of the allocated perfcounter + * @countable: countable assigned to the allocated perfcounter + * @node: list node for perfcounter_list of a process + */ +struct adreno_perfcounter_list_node { + unsigned int groupid; + unsigned int countable; + struct list_head node; +}; + +/** + * struct adreno_device_private - Adreno private structure per fd + * @dev_priv: the kgsl device private structure + * @perfcounter_list: list of perfcounters used by the process + */ +struct adreno_device_private { + struct kgsl_device_private dev_priv; + struct list_head perfcounter_list; +}; + +/** + * struct adreno_reglist_list - A container for list of registers and + * number of registers in the list + */ +struct adreno_reglist_list { + /** @reg: List of register **/ + const u32 *regs; + /** @count: Number of registers in the list **/ + u32 count; +}; + +/** + * struct adreno_power_ops - Container for target specific power up/down + * sequences + */ +struct adreno_power_ops { + /** + * @first_open: Target specific function triggered when first kgsl + * instance is opened + */ + int (*first_open)(struct adreno_device *adreno_dev); + /** + * @last_close: Target specific function triggered when last kgsl + * instance is closed + */ + int (*last_close)(struct adreno_device *adreno_dev); + /** + * @active_count_get: Target specific function to keep gpu from power + * collapsing + */ + int (*active_count_get)(struct adreno_device *adreno_dev); + /** + * @active_count_put: Target specific function to allow gpu to power + * collapse + */ + void (*active_count_put)(struct adreno_device *adreno_dev); + /** @pm_suspend: Target specific function to suspend the driver */ + int (*pm_suspend)(struct adreno_device *adreno_dev); + /** @pm_resume: Target specific function to resume the driver */ + void (*pm_resume)(struct adreno_device *adreno_dev); + /** + * @touch_wakeup: Target specific function to start gpu on touch event + */ + void (*touch_wakeup)(struct adreno_device *adreno_dev); + /** @gpu_clock_set: Target specific function to set gpu frequency */ + int (*gpu_clock_set)(struct adreno_device *adreno_dev, u32 pwrlevel); + /** @gpu_bus_set: Target specific function to set gpu bandwidth */ + int (*gpu_bus_set)(struct adreno_device *adreno_dev, int bus_level, + u32 ab); +}; + +/** + * struct adreno_gpu_core - A specific GPU core definition + * @gpurev: Unique GPU revision identifier + * @core: Match for the core version of the GPU + * @major: Match for the major version of the GPU + * @minor: Match for the minor version of the GPU + * @patchid: Match for the patch revision of the GPU + * @features: Common adreno features supported by this core + * @gpudev: Pointer to the GPU family specific functions for this core + * @gmem_base: Base address of binning memory (GMEM/OCMEM) + * @gmem_size: Amount of binning memory (GMEM/OCMEM) to reserve for the core + * @bus_width: Bytes transferred in 1 cycle + */ +struct adreno_gpu_core { + enum adreno_gpurev gpurev; + /** @chipid: Unique GPU chipid for external identification */ + u32 chipid; + unsigned int core, major, minor, patchid; + /** + * @compatible: If specified, use the compatible string to match the + * device + */ + const char *compatible; + unsigned long features; + const struct adreno_gpudev *gpudev; + const struct adreno_perfcounters *perfcounters; + unsigned long gmem_base; + size_t gmem_size; + u32 bus_width; + /** @snapshot_size: Size of the static snapshot region in bytes */ + u32 snapshot_size; +}; + +/** + * struct adreno_dispatch_ops - Common functions for dispatcher operations + */ +struct adreno_dispatch_ops { + /* @close: Shut down the dispatcher */ + void (*close)(struct adreno_device *adreno_dev); + /* @queue_cmds: Queue a command on the context */ + int (*queue_cmds)(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + u32 count, u32 *timestamp); + /* @queue_context: Queue a context to be dispatched */ + void (*queue_context)(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt); + void (*setup_context)(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt); + void (*fault)(struct adreno_device *adreno_dev, u32 fault); + /* @idle: Wait for dipatcher to become idle */ + int (*idle)(struct adreno_device *adreno_dev); +}; + +/** + * struct adreno_device - The mothership structure for all adreno related info + * @dev: Reference to struct kgsl_device + * @priv: Holds the private flags specific to the adreno_device + * @chipid: Chip ID specific to the GPU + * @cx_misc_len: Length of the CX MISC register block + * @cx_misc_virt: Pointer where the CX MISC block is mapped + * @isense_base: Base physical address of isense block + * @isense_len: Length of the isense register block + * @isense_virt: Pointer where isense block is mapped + * @gpucore: Pointer to the adreno_gpu_core structure + * @pfp_fw: Buffer which holds the pfp ucode + * @pfp_fw_size: Size of pfp ucode buffer + * @pfp_fw_version: Version of pfp ucode + * @pfp: Memory descriptor which holds pfp ucode buffer info + * @pm4_fw: Buffer which holds the pm4 ucode + * @pm4_fw_size: Size of pm4 ucode buffer + * @pm4_fw_version: Version of pm4 ucode + * @pm4: Memory descriptor which holds pm4 ucode buffer info + * @gpmu_cmds_size: Length of gpmu cmd stream + * @gpmu_cmds: gpmu cmd stream + * @ringbuffers: Array of pointers to adreno_ringbuffers + * @num_ringbuffers: Number of ringbuffers for the GPU + * @cur_rb: Pointer to the current ringbuffer + * @next_rb: Ringbuffer we are switching to during preemption + * @prev_rb: Ringbuffer we are switching from during preemption + * @fast_hang_detect: Software fault detection availability + * @ft_policy: Defines the fault tolerance policy + * @long_ib_detect: Long IB detection availability + * @cooperative_reset: Indicates if graceful death handshake is enabled + * between GMU and GPU + * @profile: Container for adreno profiler information + * @dispatcher: Container for adreno GPU dispatcher + * @pwron_fixup: Command buffer to run a post-power collapse shader workaround + * @pwron_fixup_dwords: Number of dwords in the command buffer + * @input_work: Work struct for turning on the GPU after a touch event + * @busy_data: Struct holding GPU VBIF busy stats + * @ram_cycles_lo: Number of DDR clock cycles for the monitor session (Only + * DDR channel 0 read cycles in case of GBIF) + * @ram_cycles_lo_ch1_read: Number of DDR channel 1 Read clock cycles for + * the monitor session + * @ram_cycles_lo_ch0_write: Number of DDR channel 0 Write clock cycles for + * the monitor session + * @ram_cycles_lo_ch1_write: Number of DDR channel 0 Write clock cycles for + * the monitor session + * @starved_ram_lo: Number of cycles VBIF/GBIF is stalled by DDR (Only channel 0 + * stall cycles in case of GBIF) + * @starved_ram_lo_ch1: Number of cycles GBIF is stalled by DDR channel 1 + * @halt: Atomic variable to check whether the GPU is currently halted + * @pending_irq_refcnt: Atomic variable to keep track of running IRQ handlers + * @ctx_d_debugfs: Context debugfs node + * @profile_buffer: Memdesc holding the drawobj profiling buffer + * @profile_index: Index to store the start/stop ticks in the profiling + * buffer + * @pwrup_reglist: Memdesc holding the power up register list + * which is used by CP during preemption and IFPC + * @lm_sequence: Pointer to the start of the register write sequence for LM + * @lm_size: The dword size of the LM sequence + * @lm_limit: limiting value for LM + * @lm_threshold_count: register value for counter for lm threshold breakin + * @lm_threshold_cross: number of current peaks exceeding threshold + * @ifpc_count: Number of times the GPU went into IFPC + * @highest_bank_bit: Value of the highest bank bit + * @csdev: Pointer to a coresight device (if applicable) + * @gpmu_throttle_counters - counteers for number of throttled clocks + * @irq_storm_work: Worker to handle possible interrupt storms + * @active_list: List to track active contexts + * @active_list_lock: Lock to protect active_list + * @gpu_llc_slice: GPU system cache slice descriptor + * @gpu_llc_slice_enable: To enable the GPU system cache slice or not + * @gpuhtw_llc_slice: GPU pagetables system cache slice descriptor + * @gpuhtw_llc_slice_enable: To enable the GPUHTW system cache slice or not + * @zap_loaded: Used to track if zap was successfully loaded or not + */ +struct adreno_device { + struct kgsl_device dev; /* Must be first field in this struct */ + unsigned long priv; + unsigned int chipid; + unsigned long cx_dbgc_base; + unsigned int cx_dbgc_len; + void __iomem *cx_dbgc_virt; + unsigned int cx_misc_len; + void __iomem *cx_misc_virt; + unsigned long isense_base; + unsigned int isense_len; + void __iomem *isense_virt; + const struct adreno_gpu_core *gpucore; + struct adreno_firmware fw[2]; + size_t gpmu_cmds_size; + unsigned int *gpmu_cmds; + struct adreno_ringbuffer ringbuffers[KGSL_PRIORITY_MAX_RB_LEVELS]; + int num_ringbuffers; + struct adreno_ringbuffer *cur_rb; + struct adreno_ringbuffer *next_rb; + struct adreno_ringbuffer *prev_rb; + unsigned int fast_hang_detect; + unsigned long ft_policy; + bool long_ib_detect; + bool cooperative_reset; + struct adreno_profile profile; + struct adreno_dispatcher dispatcher; + struct kgsl_memdesc *pwron_fixup; + unsigned int pwron_fixup_dwords; + struct work_struct input_work; + struct adreno_busy_data busy_data; + unsigned int ram_cycles_lo; + unsigned int ram_cycles_lo_ch1_read; + unsigned int ram_cycles_lo_ch0_write; + unsigned int ram_cycles_lo_ch1_write; + unsigned int starved_ram_lo; + unsigned int starved_ram_lo_ch1; + atomic_t halt; + atomic_t pending_irq_refcnt; + struct dentry *ctx_d_debugfs; + /** @lm_enabled: True if limits management is enabled for this target */ + bool lm_enabled; + /** @acd_enabled: True if acd is enabled for this target */ + bool acd_enabled; + /** @hwcg_enabled: True if hardware clock gating is enabled */ + bool hwcg_enabled; + /** @throttling_enabled: True if LM throttling is enabled on a5xx */ + bool throttling_enabled; + /** @sptp_pc_enabled: True if SPTP power collapse is enabled on a5xx */ + bool sptp_pc_enabled; + /** @bcl_enabled: True if BCL is enabled */ + bool bcl_enabled; + struct kgsl_memdesc *profile_buffer; + unsigned int profile_index; + struct kgsl_memdesc *pwrup_reglist; + uint32_t *lm_sequence; + uint32_t lm_size; + struct adreno_preemption preempt; + struct work_struct gpmu_work; + uint32_t lm_leakage; + uint32_t lm_limit; + uint32_t lm_threshold_count; + uint32_t lm_threshold_cross; + uint32_t ifpc_count; + + unsigned int highest_bank_bit; + unsigned int quirks; + + struct coresight_device *csdev[2]; + uint32_t gpmu_throttle_counters[ADRENO_GPMU_THROTTLE_COUNTERS]; + struct work_struct irq_storm_work; + + struct list_head active_list; + spinlock_t active_list_lock; + + void *gpu_llc_slice; + bool gpu_llc_slice_enable; + void *gpuhtw_llc_slice; + bool gpuhtw_llc_slice_enable; + unsigned int zap_loaded; + /** + * @critpkts: Memory descriptor for 5xx critical packets if applicable + */ + struct kgsl_memdesc *critpkts; + /** + * @critpkts: Memory descriptor for 5xx secure critical packets + */ + struct kgsl_memdesc *critpkts_secure; + /** @irq_mask: The current interrupt mask for the GPU device */ + u32 irq_mask; + /* + * @soft_ft_regs: an array of registers for soft fault detection on a3xx + * targets + */ + u32 *soft_ft_regs; + /* + * @soft_ft_vals: an array of register values for soft fault detection + * on a3xx targets + */ + u32 *soft_ft_vals; + /* + * @soft_ft_vals: number of elements in @soft_ft_regs and @soft_ft_vals + */ + int soft_ft_count; + /** @wake_on_touch: If true our last wakeup was due to a touch event */ + bool wake_on_touch; + /* @dispatch_ops: A pointer to a set of adreno dispatch ops */ + const struct adreno_dispatch_ops *dispatch_ops; + /** @hwsched: Container for the hardware dispatcher */ + struct adreno_hwsched hwsched; +}; + +/** + * enum adreno_device_flags - Private flags for the adreno_device + * @ADRENO_DEVICE_PWRON - Set during init after a power collapse + * @ADRENO_DEVICE_PWRON_FIXUP - Set if the target requires the shader fixup + * after power collapse + * @ADRENO_DEVICE_CORESIGHT - Set if the coresight (trace bus) registers should + * be restored after power collapse + * @ADRENO_DEVICE_STARTED - Set if the device start sequence is in progress + * @ADRENO_DEVICE_FAULT - Set if the device is currently in fault (and shouldn't + * send any more commands to the ringbuffer) + * @ADRENO_DEVICE_DRAWOBJ_PROFILE - Set if the device supports drawobj + * profiling via the ALWAYSON counter + * @ADRENO_DEVICE_PREEMPTION - Turn on/off preemption + * @ADRENO_DEVICE_SOFT_FAULT_DETECT - Set if soft fault detect is enabled + * @ADRENO_DEVICE_GPMU_INITIALIZED - Set if GPMU firmware initialization succeed + * @ADRENO_DEVICE_ISDB_ENABLED - Set if the Integrated Shader DeBugger is + * attached and enabled + * @ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED - Set if a CACHE_FLUSH_TS irq storm + * is in progress + */ +enum adreno_device_flags { + ADRENO_DEVICE_PWRON = 0, + ADRENO_DEVICE_PWRON_FIXUP = 1, + ADRENO_DEVICE_INITIALIZED = 2, + ADRENO_DEVICE_CORESIGHT = 3, + ADRENO_DEVICE_STARTED = 5, + ADRENO_DEVICE_FAULT = 6, + ADRENO_DEVICE_DRAWOBJ_PROFILE = 7, + ADRENO_DEVICE_GPU_REGULATOR_ENABLED = 8, + ADRENO_DEVICE_PREEMPTION = 9, + ADRENO_DEVICE_SOFT_FAULT_DETECT = 10, + ADRENO_DEVICE_GPMU_INITIALIZED = 11, + ADRENO_DEVICE_ISDB_ENABLED = 12, + ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED = 13, + ADRENO_DEVICE_CORESIGHT_CX = 14, +}; + +/** + * struct adreno_drawobj_profile_entry - a single drawobj entry in the + * kernel profiling buffer + * @started: Number of GPU ticks at start of the drawobj + * @retired: Number of GPU ticks at the end of the drawobj + */ +struct adreno_drawobj_profile_entry { + uint64_t started; + uint64_t retired; +}; + +#define ADRENO_DRAWOBJ_PROFILE_OFFSET(_index, _member) \ + ((_index) * sizeof(struct adreno_drawobj_profile_entry) \ + + offsetof(struct adreno_drawobj_profile_entry, _member)) + + +/** + * adreno_regs: List of registers that are used in kgsl driver for all + * 3D devices. Each device type has different offset value for the same + * register, so an array of register offsets are declared for every device + * and are indexed by the enumeration values defined in this enum + */ +enum adreno_regs { + ADRENO_REG_CP_ME_RAM_DATA, + ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, + ADRENO_REG_CP_RB_RPTR_ADDR_LO, + ADRENO_REG_CP_RB_RPTR_ADDR_HI, + ADRENO_REG_CP_RB_RPTR, + ADRENO_REG_CP_RB_WPTR, + ADRENO_REG_CP_ME_CNTL, + ADRENO_REG_CP_RB_CNTL, + ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, + ADRENO_REG_CP_IB1_BUFSZ, + ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, + ADRENO_REG_CP_IB2_BUFSZ, + ADRENO_REG_CP_TIMESTAMP, + ADRENO_REG_CP_SCRATCH_REG6, + ADRENO_REG_CP_SCRATCH_REG7, + ADRENO_REG_CP_PROTECT_STATUS, + ADRENO_REG_CP_PREEMPT, + ADRENO_REG_CP_PREEMPT_DEBUG, + ADRENO_REG_CP_PREEMPT_DISABLE, + ADRENO_REG_CP_PROTECT_REG_0, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, + ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, + ADRENO_REG_CP_PREEMPT_LEVEL_STATUS, + ADRENO_REG_RBBM_STATUS, + ADRENO_REG_RBBM_STATUS3, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3, + ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, + ADRENO_REG_RBBM_INT_0_MASK, + ADRENO_REG_RBBM_PM_OVERRIDE2, + ADRENO_REG_RBBM_SW_RESET_CMD, + ADRENO_REG_RBBM_CLOCK_CTL, + ADRENO_REG_PA_SC_AA_CONFIG, + ADRENO_REG_SQ_GPR_MANAGEMENT, + ADRENO_REG_SQ_INST_STORE_MANAGEMENT, + ADRENO_REG_TP0_CHICKEN, + ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, + ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, + ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, + ADRENO_REG_GMU_AHB_FENCE_STATUS, + ADRENO_REG_GMU_GMU2HOST_INTR_MASK, + ADRENO_REG_GPMU_POWER_COUNTER_ENABLE, + ADRENO_REG_REGISTER_MAX, +}; + +#define ADRENO_REG_UNUSED 0xFFFFFFFF +#define ADRENO_REG_SKIP 0xFFFFFFFE +#define ADRENO_REG_DEFINE(_offset, _reg)[_offset] = _reg + +struct adreno_irq_funcs { + void (*func)(struct adreno_device *adreno_dev, int mask); +}; +#define ADRENO_IRQ_CALLBACK(_c) { .func = _c } + +/* + * struct adreno_debugbus_block - Holds info about debug buses of a chip + * @block_id: Bus identifier + * @dwords: Number of dwords of data that this block holds + */ +struct adreno_debugbus_block { + unsigned int block_id; + unsigned int dwords; +}; + +enum adreno_cp_marker_type { + IFPC_DISABLE, + IFPC_ENABLE, + IB1LIST_START, + IB1LIST_END, +}; + +struct adreno_gpudev { + /* + * These registers are in a different location on different devices, + * so define them in the structure and use them as variables. + */ + unsigned int *const reg_offsets; + + struct adreno_coresight *coresight[2]; + + /* GPU specific function hooks */ + int (*probe)(struct platform_device *pdev, u32 chipid, + const struct adreno_gpu_core *gpucore); + void (*snapshot)(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + irqreturn_t (*irq_handler)(struct adreno_device *adreno_dev); + int (*init)(struct adreno_device *adreno_dev); + void (*remove)(struct adreno_device *adreno_dev); + int (*rb_start)(struct adreno_device *adreno_dev); + int (*start)(struct adreno_device *adreno_dev); + int (*regulator_enable)(struct adreno_device *adreno_dev); + void (*regulator_disable)(struct adreno_device *adreno_dev); + void (*pwrlevel_change_settings)(struct adreno_device *adreno_dev, + unsigned int prelevel, unsigned int postlevel, + bool post); + void (*preemption_schedule)(struct adreno_device *adreno_dev); + int (*preemption_context_init)(struct kgsl_context *context); + void (*context_detach)(struct adreno_context *drawctxt); + void (*clk_set_options)(struct adreno_device *adreno_dev, + const char *name, struct clk *clk, bool on); + void (*pre_reset)(struct adreno_device *adreno_dev); + void (*gpu_keepalive)(struct adreno_device *adreno_dev, + bool state); + bool (*hw_isidle)(struct adreno_device *adreno_dev); + const char *(*iommu_fault_block)(struct kgsl_device *device, + unsigned int fsynr1); + int (*reset)(struct adreno_device *adreno_dev); + /** @read_alwayson: Return the current value of the alwayson counter */ + u64 (*read_alwayson)(struct adreno_device *adreno_dev); + /** + * @power_ops: Target specific function pointers to power up/down the + * gpu + */ + const struct adreno_power_ops *power_ops; + int (*clear_pending_transactions)(struct adreno_device *adreno_dev); + void (*deassert_gbif_halt)(struct adreno_device *adreno_dev); + int (*ringbuffer_submitcmd)(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + /** + * @is_hw_collapsible: Return true if the hardware can be collapsed. + * Only used by non GMU/RGMU targets + */ + bool (*is_hw_collapsible)(struct adreno_device *adreno_dev); + /** + * @power_stats - Return the perfcounter statistics for DCVS + */ + void (*power_stats)(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats); + int (*setproperty)(struct kgsl_device_private *priv, u32 type, + void __user *value, u32 sizebytes); + int (*add_to_va_minidump)(struct adreno_device *adreno_dev); +}; + +/** + * enum kgsl_ft_policy_bits - KGSL fault tolerance policy bits + * @KGSL_FT_OFF: Disable fault detection (not used) + * @KGSL_FT_REPLAY: Replay the faulting command + * @KGSL_FT_SKIPIB: Skip the faulting indirect buffer + * @KGSL_FT_SKIPFRAME: Skip the frame containing the faulting IB + * @KGSL_FT_DISABLE: Tells the dispatcher to disable FT for the command obj + * @KGSL_FT_TEMP_DISABLE: Disables FT for all commands + * @KGSL_FT_THROTTLE: Disable the context if it faults too often + * @KGSL_FT_SKIPCMD: Skip the command containing the faulting IB + */ +enum kgsl_ft_policy_bits { + KGSL_FT_OFF = 0, + KGSL_FT_REPLAY, + KGSL_FT_SKIPIB, + KGSL_FT_SKIPFRAME, + KGSL_FT_DISABLE, + KGSL_FT_TEMP_DISABLE, + KGSL_FT_THROTTLE, + KGSL_FT_SKIPCMD, + /* KGSL_FT_MAX_BITS is used to calculate the mask */ + KGSL_FT_MAX_BITS, + /* Internal bits - set during GFT */ + /* Skip the PM dump on replayed command obj's */ + KGSL_FT_SKIP_PMDUMP = 31, +}; + +#define KGSL_FT_POLICY_MASK GENMASK(KGSL_FT_MAX_BITS - 1, 0) + +#define FOR_EACH_RINGBUFFER(_dev, _rb, _i) \ + for ((_i) = 0, (_rb) = &((_dev)->ringbuffers[0]); \ + (_i) < (_dev)->num_ringbuffers; \ + (_i)++, (_rb)++) + +extern const struct adreno_power_ops adreno_power_operations; + +extern const struct adreno_gpudev adreno_a3xx_gpudev; +extern const struct adreno_gpudev adreno_a5xx_gpudev; +extern const struct adreno_gpudev adreno_a6xx_gpudev; +extern const struct adreno_gpudev adreno_a6xx_rgmu_gpudev; +extern const struct adreno_gpudev adreno_a619_holi_gpudev; + +extern int adreno_wake_nice; +extern unsigned int adreno_wake_timeout; + +int adreno_start(struct kgsl_device *device, int priority); +long adreno_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + +long adreno_ioctl_helper(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len); + +/* + * adreno_switch_to_unsecure_mode - Execute a zap shader + * @adreno_dev: An Adreno GPU handle + * @rb: The ringbuffer to execute on + * + * Execute the zap shader from the CP to take the GPU out of secure mode. + * Return: 0 on success or negative on failure + */ +int adreno_switch_to_unsecure_mode(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb); + +int adreno_spin_idle(struct adreno_device *device, unsigned int timeout); +int adreno_idle(struct kgsl_device *device); + +int adreno_set_constraint(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_device_constraint *constraint); + +void adreno_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_context *context); + +int adreno_reset(struct kgsl_device *device, int fault); + +void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct kgsl_drawobj *drawobj); + +void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit); +void adreno_cp_callback(struct adreno_device *adreno_dev, int bit); + +int adreno_sysfs_init(struct adreno_device *adreno_dev); + +void adreno_irqctrl(struct adreno_device *adreno_dev, int state); + +long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +bool adreno_is_cx_dbgc_register(struct kgsl_device *device, + unsigned int offset); +void adreno_cx_dbgc_regread(struct kgsl_device *adreno_device, + unsigned int offsetwords, unsigned int *value); +void adreno_cx_dbgc_regwrite(struct kgsl_device *device, + unsigned int offsetwords, unsigned int value); +void adreno_cx_misc_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value); +void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int value); +void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, + unsigned int offsetwords, + unsigned int mask, unsigned int bits); +void adreno_isense_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value); + +/** + * adreno_active_count_get - Wrapper for target specific active count get + * @adreno_dev: pointer to the adreno device + * + * Increase the active count for the KGSL device and execute slumber exit + * sequence if this is the first reference. Code paths that need to touch the + * hardware or wait for the hardware to complete an operation must hold an + * active count reference until they are finished. The device mutex must be held + * while calling this function. + * + * Return: 0 on success or negative error on failure to wake up the device + */ +int adreno_active_count_get(struct adreno_device *adreno_dev); + +/** + * adreno_active_count_put - Wrapper for target specific active count put + * @adreno_dev: pointer to the adreno device + * + * Decrease the active or the KGSL device and schedule the idle thread to + * execute the slumber sequence if there are no remaining references. The + * device mutex must be held while calling this function. + */ +void adreno_active_count_put(struct adreno_device *adreno_dev); + +#define ADRENO_TARGET(_name, _id) \ +static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \ +{ \ + return (ADRENO_GPUREV(adreno_dev) == (_id)); \ +} + +static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) +{ + return ((ADRENO_GPUREV(adreno_dev) >= 300) && + (ADRENO_GPUREV(adreno_dev) < 400)); +} + +ADRENO_TARGET(a304, ADRENO_REV_A304) +ADRENO_TARGET(a306, ADRENO_REV_A306) +ADRENO_TARGET(a306a, ADRENO_REV_A306A) + +static inline int adreno_is_a5xx(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 500 && + ADRENO_GPUREV(adreno_dev) < 600; +} + +ADRENO_TARGET(a505, ADRENO_REV_A505) +ADRENO_TARGET(a506, ADRENO_REV_A506) +ADRENO_TARGET(a508, ADRENO_REV_A508) +ADRENO_TARGET(a510, ADRENO_REV_A510) +ADRENO_TARGET(a512, ADRENO_REV_A512) +ADRENO_TARGET(a530, ADRENO_REV_A530) +ADRENO_TARGET(a540, ADRENO_REV_A540) + +static inline int adreno_is_a530v2(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); +} + +static inline int adreno_is_a530v3(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 2); +} + +static inline int adreno_is_a505_or_a506(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 505 && + ADRENO_GPUREV(adreno_dev) <= 506; +} + +static inline int adreno_is_a6xx(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 600 && + ADRENO_GPUREV(adreno_dev) < 700; +} + +static inline int adreno_is_a660_shima(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A660) && + (adreno_dev->gpucore->compatible && + !strcmp(adreno_dev->gpucore->compatible, + "qcom,adreno-gpu-a660-shima")); +} + +ADRENO_TARGET(a610, ADRENO_REV_A610) +ADRENO_TARGET(a612, ADRENO_REV_A612) +ADRENO_TARGET(a618, ADRENO_REV_A618) +ADRENO_TARGET(a619, ADRENO_REV_A619) +ADRENO_TARGET(a620, ADRENO_REV_A620) +ADRENO_TARGET(a630, ADRENO_REV_A630) +ADRENO_TARGET(a635, ADRENO_REV_A635) +ADRENO_TARGET(a640, ADRENO_REV_A640) +ADRENO_TARGET(a650, ADRENO_REV_A650) +ADRENO_TARGET(a680, ADRENO_REV_A680) + +/* A635 is derived from A660 and shares same logic */ +static inline int adreno_is_a660(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); +} + +/* + * All the derived chipsets from A615 needs to be added to this + * list such as A616, A618, A619 etc. + */ +static inline int adreno_is_a615_family(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A615 || rev == ADRENO_REV_A616 || + rev == ADRENO_REV_A618 || rev == ADRENO_REV_A619); +} + +/* + * Derived GPUs from A640 needs to be added to this list. + * A640 and A680 belongs to this family. + */ +static inline int adreno_is_a640_family(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A640 || rev == ADRENO_REV_A680); +} + +/* + * Derived GPUs from A650 needs to be added to this list. + * A650 is derived from A640 but register specs has been + * changed hence do not belongs to A640 family. A620, + * A660, A690 follows the register specs of A650. + * + */ +static inline int adreno_is_a650_family(struct adreno_device *adreno_dev) +{ + unsigned int rev = ADRENO_GPUREV(adreno_dev); + + return (rev == ADRENO_REV_A650 || rev == ADRENO_REV_A620 || + rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); +} + +static inline int adreno_is_a619_holi(struct adreno_device *adreno_dev) +{ + return of_device_is_compatible(adreno_dev->dev.pdev->dev.of_node, + "qcom,adreno-gpu-a619-holi"); +} + +static inline int adreno_is_a620v1(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A620) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0); +} + +static inline int adreno_is_a640v2(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A640) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); +} + +static inline int adreno_is_gen7(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 0x070000 && + ADRENO_GPUREV(adreno_dev) < 0x080000; +} + +ADRENO_TARGET(gen7_0_0, ADRENO_REV_GEN7_0_0) +ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1) + +/* + * adreno_checkreg_off() - Checks the validity of a register enum + * @adreno_dev: Pointer to adreno device + * @offset_name: The register enum that is checked + */ +static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev, + enum adreno_regs offset_name) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (offset_name >= ADRENO_REG_REGISTER_MAX || + gpudev->reg_offsets[offset_name] == ADRENO_REG_UNUSED) + return false; + + /* + * GPU register programming is kept common as much as possible + * across the cores, Use ADRENO_REG_SKIP when certain register + * programming needs to be skipped for certain GPU cores. + * Example: Certain registers on a5xx like IB1_BASE are 64 bit. + * Common programming programs 64bit register but upper 32 bits + * are skipped in a3xx using ADRENO_REG_SKIP. + */ + if (gpudev->reg_offsets[offset_name] == ADRENO_REG_SKIP) + return false; + + return true; +} + +/* + * adreno_readreg() - Read a register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be read + * @val: Register value read is placed here + */ +static inline void adreno_readreg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int *val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, offset_name)) + kgsl_regread(KGSL_DEVICE(adreno_dev), + gpudev->reg_offsets[offset_name], val); + else + *val = 0; +} + +/* + * adreno_writereg() - Write a register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be written + * @val: Value to write + */ +static inline void adreno_writereg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, offset_name)) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), + gpudev->reg_offsets[offset_name], val); +} + +/* + * adreno_getreg() - Returns the offset value of a register from the + * register offset array in the gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum whore offset is returned + */ +static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (!adreno_checkreg_off(adreno_dev, offset_name)) + return ADRENO_REG_REGISTER_MAX; + return gpudev->reg_offsets[offset_name]; +} + +/* + * adreno_write_gmureg() - Write a GMU register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be written + * @val: Value to write + */ +static inline void adreno_write_gmureg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int val) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (adreno_checkreg_off(adreno_dev, offset_name)) + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), + gpudev->reg_offsets[offset_name], val); +} + +/** + * adreno_gpu_fault() - Return the current state of the GPU + * @adreno_dev: A pointer to the adreno_device to query + * + * Return 0 if there is no fault or positive with the last type of fault that + * occurred + */ +static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev) +{ + /* make sure we're reading the latest value */ + smp_rmb(); + return atomic_read(&adreno_dev->dispatcher.fault); +} + +/** + * adreno_set_gpu_fault() - Set the current fault status of the GPU + * @adreno_dev: A pointer to the adreno_device to set + * @state: fault state to set + * + */ +static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev, + int state) +{ + /* only set the fault bit w/o overwriting other bits */ + atomic_or(state, &adreno_dev->dispatcher.fault); + + /* make sure other CPUs see the update */ + smp_wmb(); +} + +static inline bool adreno_gmu_gpu_fault(struct adreno_device *adreno_dev) +{ + return adreno_gpu_fault(adreno_dev) & ADRENO_GMU_FAULT; +} + +/** + * adreno_clear_gpu_fault() - Clear the GPU fault register + * @adreno_dev: A pointer to an adreno_device structure + * + * Clear the GPU fault status for the adreno device + */ + +static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev) +{ + atomic_set(&adreno_dev->dispatcher.fault, 0); + + /* make sure other CPUs see the update */ + smp_wmb(); +} + +/** + * adreno_gpu_halt() - Return the GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline int adreno_gpu_halt(struct adreno_device *adreno_dev) +{ + /* make sure we're reading the latest value */ + smp_rmb(); + return atomic_read(&adreno_dev->halt); +} + + +/** + * adreno_clear_gpu_halt() - Clear the GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_clear_gpu_halt(struct adreno_device *adreno_dev) +{ + atomic_set(&adreno_dev->halt, 0); + + /* make sure other CPUs see the update */ + smp_wmb(); +} + +/** + * adreno_get_gpu_halt() - Increment GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_get_gpu_halt(struct adreno_device *adreno_dev) +{ + atomic_inc(&adreno_dev->halt); +} + +/** + * adreno_put_gpu_halt() - Decrement GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_put_gpu_halt(struct adreno_device *adreno_dev) +{ + /* Make sure the refcount is good */ + int ret = atomic_dec_if_positive(&adreno_dev->halt); + + WARN(ret < 0, "GPU halt refcount unbalanced\n"); +} + + +#ifdef CONFIG_DEBUG_FS +void adreno_debugfs_init(struct adreno_device *adreno_dev); +void adreno_context_debugfs_init(struct adreno_device *adreno_dev, + struct adreno_context *ctx); +#else +static inline void adreno_debugfs_init(struct adreno_device *adreno_dev) { } +static inline void adreno_context_debugfs_init(struct adreno_device *device, + struct adreno_context *context) +{ + context->debug_root = NULL; +} +#endif + +/** + * adreno_compare_pm4_version() - Compare the PM4 microcode version + * @adreno_dev: Pointer to the adreno_device struct + * @version: Version number to compare again + * + * Compare the current version against the specified version and return -1 if + * the current code is older, 0 if equal or 1 if newer. + */ +static inline int adreno_compare_pm4_version(struct adreno_device *adreno_dev, + unsigned int version) +{ + if (adreno_dev->fw[ADRENO_FW_PM4].version == version) + return 0; + + return (adreno_dev->fw[ADRENO_FW_PM4].version > version) ? 1 : -1; +} + +/** + * adreno_compare_pfp_version() - Compare the PFP microcode version + * @adreno_dev: Pointer to the adreno_device struct + * @version: Version number to compare against + * + * Compare the current version against the specified version and return -1 if + * the current code is older, 0 if equal or 1 if newer. + */ +static inline int adreno_compare_pfp_version(struct adreno_device *adreno_dev, + unsigned int version) +{ + if (adreno_dev->fw[ADRENO_FW_PFP].version == version) + return 0; + + return (adreno_dev->fw[ADRENO_FW_PFP].version > version) ? 1 : -1; +} + +/** + * adreno_in_preempt_state() - Check if preemption state is equal to given state + * @adreno_dev: Device whose preemption state is checked + * @state: State to compare against + */ +static inline bool adreno_in_preempt_state(struct adreno_device *adreno_dev, + enum adreno_preempt_states state) +{ + return atomic_read(&adreno_dev->preempt.state) == state; +} +/** + * adreno_set_preempt_state() - Set the specified preemption state + * @adreno_dev: Device to change preemption state + * @state: State to set + */ +static inline void adreno_set_preempt_state(struct adreno_device *adreno_dev, + enum adreno_preempt_states state) +{ + /* + * atomic_set doesn't use barriers, so we need to do it ourselves. One + * before... + */ + smp_wmb(); + atomic_set(&adreno_dev->preempt.state, state); + + /* ... and one after */ + smp_wmb(); +} + +static inline bool adreno_is_preemption_enabled( + struct adreno_device *adreno_dev) +{ + return test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); +} + +/* + * adreno_compare_prio_level() - Compares 2 priority levels based on enum values + * @p1: First priority level + * @p2: Second priority level + * + * Returns greater than 0 if p1 is higher priority, 0 if levels are equal else + * less than 0 + */ +static inline int adreno_compare_prio_level(int p1, int p2) +{ + return p2 - p1; +} + +void adreno_readreg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t *val); + +void adreno_writereg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t val); + +unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb); + +static inline bool adreno_rb_empty(struct adreno_ringbuffer *rb) +{ + return (adreno_get_rptr(rb) == rb->wptr); +} + +static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev) +{ + return adreno_dev->fast_hang_detect && + !test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); +} + +static inline bool adreno_long_ib_detect(struct adreno_device *adreno_dev) +{ + return adreno_dev->long_ib_detect && + !test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); +} + +/** + * adreno_support_64bit - Return true if the GPU supports 64 bit addressing + * @adreno_dev: An Adreno GPU device handle + * + * Return: True if the device supports 64 bit addressing + */ +static inline bool adreno_support_64bit(struct adreno_device *adreno_dev) +{ + /* + * The IOMMU API takes a unsigned long for the iova so we can't support + * 64 bit addresses when the kernel is in 32 bit mode even if we wanted + * so we need to check that we are using a5xx or newer and that the + * unsigned long is big enough for our purposes. + */ + return (BITS_PER_LONG > 32 && ADRENO_GPUREV(adreno_dev) >= 500); +} + +static inline void adreno_ringbuffer_set_global( + struct adreno_device *adreno_dev, int name) +{ + kgsl_sharedmem_writel(adreno_dev->ringbuffers[0].pagetable_desc, + PT_INFO_OFFSET(current_global_ptname), name); +} + +static inline void adreno_ringbuffer_set_pagetable(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *pt) +{ + unsigned long flags; + + spin_lock_irqsave(&rb->preempt_lock, flags); + + kgsl_sharedmem_writel(rb->pagetable_desc, + PT_INFO_OFFSET(current_rb_ptname), pt->name); + + kgsl_sharedmem_writeq(rb->pagetable_desc, + PT_INFO_OFFSET(ttbr0), kgsl_mmu_pagetable_get_ttbr0(pt)); + + kgsl_sharedmem_writel(rb->pagetable_desc, + PT_INFO_OFFSET(contextidr), 0); + + spin_unlock_irqrestore(&rb->preempt_lock, flags); +} + +static inline u32 counter_delta(struct kgsl_device *device, + unsigned int reg, unsigned int *counter) +{ + u32 val, ret = 0; + + if (!reg) + return 0; + + kgsl_regread(device, reg, &val); + + if (*counter) { + if (val >= *counter) + ret = val - *counter; + else + ret = (UINT_MAX - *counter) + val; + } + + *counter = val; + return ret; +} + +static inline int adreno_perfcntr_active_oob_get( + struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = adreno_active_count_get(adreno_dev); + + if (!ret) { + ret = gmu_core_dev_oob_set(device, oob_perfcntr); + if (ret) + adreno_active_count_put(adreno_dev); + } + + return ret; +} + +static inline void adreno_perfcntr_active_oob_put( + struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_dev_oob_clear(device, oob_perfcntr); + adreno_active_count_put(adreno_dev); +} + +/** + * adreno_wait_for_halt_ack - wait for acknowlegement for a bus halt request + * @ack_reg: register offset to wait for acknowledge + * @mask: A mask value to wait for + * + * Return: 0 on success or -ETIMEDOUT if the request timed out + */ +static inline int adreno_wait_for_halt_ack(struct kgsl_device *device, + int ack_reg, unsigned int mask) +{ + u32 val; + int ret = kgsl_regmap_read_poll_timeout(&device->regmap, ack_reg, + val, (val & mask) == mask, 100, 100 * 1000); + + if (ret) + dev_err(device->dev, + "GBIF/VBIF Halt ack timeout: reg=%08x mask=%08x status=%08x\n", + ack_reg, mask, val); + + return ret; +} + +/** + * adreno_move_preempt_state - Update the preemption state + * @adreno_dev: An Adreno GPU device handle + * @old: The current state of the preemption + * @new: The new state of the preemption + * + * Return: True if the state was updated or false if not + */ +static inline bool adreno_move_preempt_state(struct adreno_device *adreno_dev, + enum adreno_preempt_states old, enum adreno_preempt_states new) +{ + return (atomic_cmpxchg(&adreno_dev->preempt.state, old, new) == old); +} + +/** + * adreno_reg_offset_init - Helper function to initialize reg_offsets + * @reg_offsets: Pointer to an array of register offsets + * + * Helper function to setup register_offsets for a target. Go through + * and set ADRENO_REG_UNUSED for all unused entries in the list. + */ +static inline void adreno_reg_offset_init(u32 *reg_offsets) +{ + int i; + + /* + * Initialize uninitialzed gpu registers, only needs to be done once. + * Make all offsets that are not initialized to ADRENO_REG_UNUSED + */ + for (i = 0; i < ADRENO_REG_REGISTER_MAX; i++) { + if (!reg_offsets[i]) + reg_offsets[i] = ADRENO_REG_UNUSED; + } +} + +static inline u32 adreno_get_level(u32 priority) +{ + u32 level = priority / KGSL_PRIORITY_MAX_RB_LEVELS; + + return min_t(u32, level, KGSL_PRIORITY_MAX_RB_LEVELS - 1); +} + + +/** + * adreno_get_firwmare - Load firmware into a adreno_firmware struct + * @adreno_dev: An Adreno GPU device handle + * @fwfile: Firmware file to load + * @firmware: A &struct adreno_firmware container for the firmware. + * + * Load the specified firmware file into the memdesc in &struct adreno_firmware + * and get the size and version from the data. + * + * Return: 0 on success or negative on failure + */ +int adreno_get_firmware(struct adreno_device *adreno_dev, + const char *fwfile, struct adreno_firmware *firmware); +/** + * adreno_zap_shader_load - Helper function for loading the zap shader + * adreno_dev: A handle to an Adreno GPU device + * name: Name of the zap shader to load + * + * A target indepedent helper function for loading the zap shader. + * + * Return: 0 on success or negative on failure. + */ +int adreno_zap_shader_load(struct adreno_device *adreno_dev, + const char *name); + +/** + * adreno_irq_callbacks - Helper function to handle IRQ callbacks + * @adreno_dev: Adreno GPU device handle + * @funcs: List of callback functions + * @status: Interrupt status + * + * Walk the bits in the interrupt status and call any applicable callbacks. + * Return: IRQ_HANDLED if one or more interrupt callbacks were called. + */ +irqreturn_t adreno_irq_callbacks(struct adreno_device *adreno_dev, + const struct adreno_irq_funcs *funcs, u32 status); + + +/** + * adreno_device_probe - Generic adreno device probe function + * @pdev: Pointer to the platform device + * @adreno_dev: Adreno GPU device handle + * + * Do the generic setup for the Adreno device. Called from the target specific + * probe functions. + * + * Return: 0 on success or negative on failure + */ +int adreno_device_probe(struct platform_device *pdev, + struct adreno_device *adreno_dev); + +/** + * adreno_power_cycle - Suspend and resume the device + * @adreno_dev: Pointer to the adreno device + * @callback: Function that needs to be executed + * @priv: Argument to be passed to the callback + * + * Certain properties that can be set via sysfs need to power + * cycle the device to take effect. This function suspends + * the device, executes the callback, and resumes the device. + * + * Return: 0 on success or negative on failure + */ +int adreno_power_cycle(struct adreno_device *adreno_dev, + void (*callback)(struct adreno_device *adreno_dev, void *priv), + void *priv); + +/** + * adreno_power_cycle_bool - Power cycle the device to change device setting + * @adreno_dev: Pointer to the adreno device + * @flag: Flag that needs to be set + * @val: The value flag should be set to + * + * Certain properties that can be set via sysfs need to power cycle the device + * to take effect. This function suspends the device, sets the flag, and + * resumes the device. + * + * Return: 0 on success or negative on failure + */ +int adreno_power_cycle_bool(struct adreno_device *adreno_dev, + bool *flag, bool val); + +/** + * adreno_power_cycle_u32 - Power cycle the device to change device setting + * @adreno_dev: Pointer to the adreno device + * @flag: Flag that needs to be set + * @val: The value flag should be set to + * + * Certain properties that can be set via sysfs need to power cycle the device + * to take effect. This function suspends the device, sets the flag, and + * resumes the device. + * + * Return: 0 on success or negative on failure + */ +int adreno_power_cycle_u32(struct adreno_device *adreno_dev, + u32 *flag, u32 val); + +/** + * adreno_set_active_ctxs_null - Give up active context refcount + * @adreno_dev: Adreno GPU device handle + * + * This puts back the reference for that last active context on + * each ringbuffer when going in and out of slumber. + */ +void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev); + +/** + * adreno_get_bus_counters - Allocate the bus dcvs counters + * @adreno_dev: Adreno GPU device handle + * + * This function allocates the various gpu counters to measure + * gpu bus usage for bus dcvs + */ +void adreno_get_bus_counters(struct adreno_device *adreno_dev); + +/** + * adreno_suspend_context - Make sure device is idle + * @device: Pointer to the kgsl device + * + * This function processes the profiling results and checks if the + * device is idle so that it can be turned off safely + * + * Return: 0 on success or negative error on failure + */ +int adreno_suspend_context(struct kgsl_device *device); + +/* + * adreno_profile_submit_time - Populate profiling buffer with timestamps + * @time: Container for the statistics + * + * Populate the draw object user profiling buffer with the timestamps + * recored in the adreno_submit_time structure at the time of draw object + * submission. + */ +void adreno_profile_submit_time(struct adreno_submit_time *time); + +void adreno_preemption_timer(struct timer_list *t); + +/** + * adreno_create_profile_buffer - Create a buffer to store profiling data + * @adreno_dev: Adreno GPU device handle + */ +void adreno_create_profile_buffer(struct adreno_device *adreno_dev); + +/** + * adreno_isidle - return true if the hardware is idle + * @adreno_dev: Adreno GPU device handle + * + * Return: True if the hardware is idle + */ +bool adreno_isidle(struct adreno_device *adreno_dev); + +/** + * adreno_allocate_global - Helper function to allocate a global GPU object + * @device: A GPU device handle + * @memdesc: Pointer to a &struct kgsl_memdesc pointer + * @size: Size of the allocation in bytes + * @padding: Amount of extra adding to add to the VA allocation + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * @name: Name of the allocation (for the debugfs file) + * + * Allocate a global object if it hasn't already been alllocated and put it in + * the pointer pointed to by @memdesc. + * Return: 0 on success or negative on error + */ +static inline int adreno_allocate_global(struct kgsl_device *device, + struct kgsl_memdesc **memdesc, u64 size, u32 padding, u64 flags, + u32 priv, const char *name) +{ + if (!IS_ERR_OR_NULL(*memdesc)) + return 0; + + *memdesc = kgsl_allocate_global(device, size, padding, flags, priv, name); + return PTR_ERR_OR_ZERO(*memdesc); +} + +/** + * adreno_regulator_disable_poll - Disable the regulator and wait for it to + * complete + * @device: A GPU device handle + * @reg: Pointer to the regulator to disable + * @offset: Offset of the register to poll for success + * @timeout: Timeout (in milliseconds) + * + * Return: true if the regulator got disabled or false on timeout + */ +bool adreno_regulator_disable_poll(struct kgsl_device *device, + struct regulator *reg, u32 offset, u32 timeout); + +static inline void adreno_set_dispatch_ops(struct adreno_device *adreno_dev, + const struct adreno_dispatch_ops *ops) +{ + adreno_dev->dispatch_ops = ops; +} + +/** + * adreno_fence_trace_array_init - Initialize an always on trace array + * @device: A GPU device handle + * + * Register an always-on trace array to for fence timeout debugging + */ +void adreno_fence_trace_array_init(struct kgsl_device *device); + +/* + * adreno_drawobj_set_constraint - Set a power constraint + * @device: Pointer to a KGSL device structure + * @drawobj: Draw object for which constraint is to be set + * + * Set the power constraint if requested by this context + */ +void adreno_drawobj_set_constraint(struct kgsl_device *device, + struct kgsl_drawobj *drawobj); + +/** + * adreno_get_gpu_model - Gets gpu model name from device tree (or) chipid + * @device: A GPU device handle + * + * Return: GPU model name string + */ +const char *adreno_get_gpu_model(struct kgsl_device *device); +#endif /*__ADRENO_H */ diff --git a/adreno_a3xx.c b/adreno_a3xx.c new file mode 100644 index 0000000000..c4dbaf2803 --- /dev/null +++ b/adreno_a3xx.c @@ -0,0 +1,1569 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_cp_parser.h" +#include "adreno_a3xx.h" +#include "adreno_pm4types.h" +#include "adreno_snapshot.h" +#include "adreno_trace.h" + +/* + * Define registers for a3xx that contain addresses used by the + * cp parser logic + */ +const unsigned int a3xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = { + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0, + A3XX_VSC_PIPE_DATA_ADDRESS_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0, + A3XX_VSC_PIPE_DATA_LENGTH_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1, + A3XX_VSC_PIPE_DATA_ADDRESS_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1, + A3XX_VSC_PIPE_DATA_LENGTH_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2, + A3XX_VSC_PIPE_DATA_ADDRESS_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2, + A3XX_VSC_PIPE_DATA_LENGTH_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3, + A3XX_VSC_PIPE_DATA_ADDRESS_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3, + A3XX_VSC_PIPE_DATA_LENGTH_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4, + A3XX_VSC_PIPE_DATA_ADDRESS_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4, + A3XX_VSC_PIPE_DATA_LENGTH_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5, + A3XX_VSC_PIPE_DATA_ADDRESS_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5, + A3XX_VSC_PIPE_DATA_LENGTH_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6, + A3XX_VSC_PIPE_DATA_ADDRESS_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6, + A3XX_VSC_PIPE_DATA_LENGTH_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7, + A3XX_VSC_PIPE_DATA_ADDRESS_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7, + A3XX_VSC_PIPE_DATA_LENGTH_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, + A3XX_VFD_FETCH_INSTR_1_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1, + A3XX_VFD_FETCH_INSTR_1_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2, + A3XX_VFD_FETCH_INSTR_1_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3, + A3XX_VFD_FETCH_INSTR_1_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4, + A3XX_VFD_FETCH_INSTR_1_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5, + A3XX_VFD_FETCH_INSTR_1_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6, + A3XX_VFD_FETCH_INSTR_1_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7, + A3XX_VFD_FETCH_INSTR_1_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8, + A3XX_VFD_FETCH_INSTR_1_8), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9, + A3XX_VFD_FETCH_INSTR_1_9), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10, + A3XX_VFD_FETCH_INSTR_1_A), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11, + A3XX_VFD_FETCH_INSTR_1_B), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12, + A3XX_VFD_FETCH_INSTR_1_C), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13, + A3XX_VFD_FETCH_INSTR_1_D), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14, + A3XX_VFD_FETCH_INSTR_1_E), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15, + A3XX_VFD_FETCH_INSTR_1_F), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS, + A3XX_VSC_SIZE_ADDRESS), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR, + A3XX_SP_VS_PVT_MEM_ADDR_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR, + A3XX_SP_FS_PVT_MEM_ADDR_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG, + A3XX_SP_VS_OBJ_START_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG, + A3XX_SP_FS_OBJ_START_REG), +}; + +static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = { + 0x00000000, 0x302CC300, 0x00000000, 0x302CC304, + 0x00000000, 0x302CC308, 0x00000000, 0x302CC30C, + 0x00000000, 0x302CC310, 0x00000000, 0x302CC314, + 0x00000000, 0x302CC318, 0x00000000, 0x302CC31C, + 0x00000000, 0x302CC320, 0x00000000, 0x302CC324, + 0x00000000, 0x302CC328, 0x00000000, 0x302CC32C, + 0x00000000, 0x302CC330, 0x00000000, 0x302CC334, + 0x00000000, 0x302CC338, 0x00000000, 0x302CC33C, + 0x00000000, 0x00000400, 0x00020000, 0x63808003, + 0x00060004, 0x63828007, 0x000A0008, 0x6384800B, + 0x000E000C, 0x6386800F, 0x00120010, 0x63888013, + 0x00160014, 0x638A8017, 0x001A0018, 0x638C801B, + 0x001E001C, 0x638E801F, 0x00220020, 0x63908023, + 0x00260024, 0x63928027, 0x002A0028, 0x6394802B, + 0x002E002C, 0x6396802F, 0x00320030, 0x63988033, + 0x00360034, 0x639A8037, 0x003A0038, 0x639C803B, + 0x003E003C, 0x639E803F, 0x00000000, 0x00000400, + 0x00000003, 0x80D60003, 0x00000007, 0x80D60007, + 0x0000000B, 0x80D6000B, 0x0000000F, 0x80D6000F, + 0x00000013, 0x80D60013, 0x00000017, 0x80D60017, + 0x0000001B, 0x80D6001B, 0x0000001F, 0x80D6001F, + 0x00000023, 0x80D60023, 0x00000027, 0x80D60027, + 0x0000002B, 0x80D6002B, 0x0000002F, 0x80D6002F, + 0x00000033, 0x80D60033, 0x00000037, 0x80D60037, + 0x0000003B, 0x80D6003B, 0x0000003F, 0x80D6003F, + 0x00000000, 0x03000000, 0x00000000, 0x00000000, +}; + +/** + * _a3xx_pwron_fixup() - Initialize a special command buffer to run a + * post-power collapse shader workaround + * @adreno_dev: Pointer to a adreno_device struct + * + * Some targets require a special workaround shader to be executed after + * power-collapse. Construct the IB once at init time and keep it + * handy + * + * Returns: 0 on success or negative on error + */ +static int _a3xx_pwron_fixup(struct adreno_device *adreno_dev) +{ + unsigned int *cmds; + int count = ARRAY_SIZE(_a3xx_pwron_fixup_fs_instructions); + + /* Return if the fixup is already in place */ + if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) + return 0; + + adreno_dev->pwron_fixup = kgsl_allocate_global(KGSL_DEVICE(adreno_dev), + PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "pwron_fixup"); + + if (IS_ERR(adreno_dev->pwron_fixup)) + return PTR_ERR(adreno_dev->pwron_fixup); + + cmds = adreno_dev->pwron_fixup->hostptr; + + *cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmds++ = 0x00000000; + *cmds++ = 0x90000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_REG_RMW, 3); + *cmds++ = A3XX_RBBM_CLOCK_CTL; + *cmds++ = 0xFFFCFFFF; + *cmds++ = 0x00010000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1); + *cmds++ = 0x00000040; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1); + *cmds++ = 0x80000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1); + *cmds++ = 0x0D001002; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1); + *cmds++ = 0x00401101; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1); + *cmds++ = 0x00000400; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1); + *cmds++ = 0x00000010; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1); + *cmds++ = 0x00040000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1); + *cmds++ = 0x0000000A; + *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1); + *cmds++ = 0x00000004; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1); + *cmds++ = 0x04008001; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1); + *cmds++ = 0x0DB0400A; + *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1); + *cmds++ = 0x00300402; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1); + *cmds++ = 0x00010000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1); + *cmds++ = 0x04008001; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1); + *cmds++ = 0x0000000D; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_CLIP_CNTL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_GB_CLIP_ADJ, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_MINMAX, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_SIZE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_OFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_SCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_MODE_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_BR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_BR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_TSE_DEBUG_ECO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER0_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER1_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER2_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER3_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1); + *cmds++ = 0x00008000; + *cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_RED, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_GREEN, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_BLUE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_ALPHA, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_BASE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CLEAR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CLEAR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK_BF, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_LRZ_VSC_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_WINDOW_OFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MIN, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MAX, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_GMEM_BASE_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEBUG_ECO_CONTROLS_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (1 << CP_LOADSTATE_DSTOFFSET_SHIFT) | + (0 << CP_LOADSTATE_STATESRC_SHIFT) | + (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT) | + (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x00400000; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (2 << CP_LOADSTATE_DSTOFFSET_SHIFT) | + (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); + *cmds++ = 0x00400220; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count); + *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (13 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = 0x00000000; + + memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2); + + cmds += count; + + *cmds++ = cp_type3_packet(CP_EXEC_CL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmds++ = 0x1E000050; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_REG_RMW, 3); + *cmds++ = A3XX_RBBM_CLOCK_CTL; + *cmds++ = 0xFFFCFFFF; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* + * Remember the number of dwords in the command buffer for when we + * program the indirect buffer call in the ringbuffer + */ + adreno_dev->pwron_fixup_dwords = + (cmds - (unsigned int *) adreno_dev->pwron_fixup->hostptr); + + /* Mark the flag in ->priv to show that we have the fix */ + set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); + return 0; +} + +static int a3xx_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + int ret; + + adreno_dev = (struct adreno_device *) + of_device_get_match_data(&pdev->dev); + + memset(adreno_dev, 0, sizeof(*adreno_dev)); + + adreno_dev->gpucore = gpucore; + adreno_dev->chipid = chipid; + + adreno_reg_offset_init(gpucore->gpudev->reg_offsets); + + + device = KGSL_DEVICE(adreno_dev); + + timer_setup(&device->idle_timer, kgsl_timer, 0); + + INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + + ret = adreno_device_probe(pdev, adreno_dev); + if (ret) + return ret; + + return adreno_dispatcher_init(adreno_dev); +} + +static int a3xx_send_me_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, 18); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_type3_packet(CP_ME_INIT, 17); + + *cmds++ = 0x000003f7; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000080; + *cmds++ = 0x00000100; + *cmds++ = 0x00000180; + *cmds++ = 0x00006600; + *cmds++ = 0x00000150; + *cmds++ = 0x0000014e; + *cmds++ = 0x00000154; + *cmds++ = 0x00000001; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* Enable protected mode registers for A3XX */ + *cmds++ = 0x20000000; + + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* Submit the command to the ringbuffer */ + kgsl_pwrscale_busy(device); + kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr); + rb->wptr = rb->_wptr; + + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + dev_err(device->dev, "CP initialization failed to idle\n"); + kgsl_device_snapshot(device, NULL, false); + } + + return ret; +} + +static void a3xx_microcode_load(struct adreno_device *adreno_dev); + +static int a3xx_rb_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + + memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); + rb->wptr = 0; + rb->_wptr = 0; + rb->wptr_preempt_end = ~0; + + /* + * The size of the ringbuffer in the hardware is the log2 + * representation of the size in quadwords (sizedwords / 2). + * Also disable the host RPTR shadow register as it might be unreliable + * in certain circumstances. + */ + + kgsl_regwrite(device, A3XX_CP_RB_CNTL, + (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) | + (1 << 27)); + + kgsl_regwrite(device, A3XX_CP_RB_BASE, rb->buffer_desc->gpuaddr); + + a3xx_microcode_load(adreno_dev); + + /* clear ME_HALT to start micro engine */ + kgsl_regwrite(device, A3XX_CP_ME_CNTL, 0); + + return a3xx_send_me_init(adreno_dev, rb); +} + +/* + * a3xx soft fault detection + * + * a3xx targets do not have hardware fault detection so we need to do it the old + * fashioned way by periodically reading a set of registers and counters and + * checking that they are advancing. There are 6 registers and four 64 bit + * counters that we keep an eye on. + */ + +#define A3XX_SOFT_FAULT_DETECT_REGS 6 +#define A3XX_SOFT_FAULT_DETECT_COUNTERS 4 +#define A3XX_SOFT_FAULT_DETECT_COUNT \ + (A3XX_SOFT_FAULT_DETECT_REGS + (A3XX_SOFT_FAULT_DETECT_COUNTERS * 2)) + +static bool a3xx_soft_fault_detect_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 reg; + + if (kgsl_state_is_awake(device)) { + if (!adreno_rb_empty(adreno_dev->cur_rb)) + return false; + + /* only check rbbm status to determine if GPU is idle */ + kgsl_regread(device, A3XX_RBBM_STATUS, ®); + + if (reg & 0x7ffffffe) + return false; + } + + memset(adreno_dev->soft_ft_vals, 0, A3XX_SOFT_FAULT_DETECT_COUNT << 2); + return true; +} + +/* Read the fault detect registers and compare them to the stored version */ +static int a3xx_soft_fault_detect_read_compare(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + int i, ret = 0; + unsigned int ts; + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return 1; + + /* Check to see if the device is idle - if so report no hang */ + if (a3xx_soft_fault_detect_isidle(adreno_dev)) + ret = 1; + + for (i = 0; i < A3XX_SOFT_FAULT_DETECT_COUNT; i++) { + unsigned int val; + + if (!adreno_dev->soft_ft_regs[i]) + continue; + + kgsl_regread(device, adreno_dev->soft_ft_regs[i], &val); + if (val != adreno_dev->soft_ft_vals[i]) + ret = 1; + adreno_dev->soft_ft_vals[i] = val; + } + + if (!adreno_rb_readtimestamp(adreno_dev, adreno_dev->cur_rb, + KGSL_TIMESTAMP_RETIRED, &ts)) { + if (ts != rb->fault_detect_ts) + ret = 1; + + rb->fault_detect_ts = ts; + } + + return ret; +} + +/* + * This is called on a regular basis while cmdobjs are inflight. Fault + * detection registers are read and compared to the existing values - if they + * changed then the GPU is still running. If they are the same between + * subsequent calls then the GPU may have faulted + */ +static void a3xx_soft_fault_timer(struct timer_list *t) +{ + struct adreno_dispatcher *dispatcher = from_timer(dispatcher, + t, fault_timer); + struct adreno_device *adreno_dev = container_of(dispatcher, + struct adreno_device, dispatcher); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Leave if the user decided to turn off fast hang detection */ + if (!adreno_soft_fault_detect(adreno_dev)) + return; + + if (adreno_gpu_fault(adreno_dev)) { + adreno_dispatcher_schedule(device); + return; + } + + /* + * Read the fault registers - if it returns 0 then they haven't changed + * so mark the dispatcher as faulted and schedule the work loop. + */ + + if (!a3xx_soft_fault_detect_read_compare(adreno_dev)) + adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); + else if (dispatcher->inflight > 0) + adreno_dispatcher_start_fault_timer(adreno_dev); +} + +/* + * Start fault detection. The counters are only assigned while fault detection + * is running so that they can be used for other purposes if fault detection is + * disabled + */ +static void a3xx_soft_fault_detect_start(struct adreno_device *adreno_dev) +{ + u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS]; + int ret = 0; + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return; + + if (adreno_dev->fast_hang_detect == 1) + return; + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES, + ®s[0], ®s[1]); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES, + ®s[2], ®s[3]); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_SP, SP_FS_CFLOW_INSTRUCTIONS, + ®s[4], ®s[5]); + + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM, + ®s[6], ®s[7]); + + WARN(ret, "Unable to allocate one or more fault detect counters\n"); + adreno_dev->fast_hang_detect = 1; +} + +/* Helper function to put back a counter */ +static void put_counter(struct adreno_device *adreno_dev, + int group, int countable, u32 *lo, u32 *hi) +{ + adreno_perfcounter_put(adreno_dev, group, countable, + PERFCOUNTER_FLAG_KERNEL); + + *lo = 0; + *hi = 0; +} + +/* Stop fault detection and return the counters */ +static void a3xx_soft_fault_detect_stop(struct adreno_device *adreno_dev) +{ + u32 *regs = &adreno_dev->soft_ft_regs[A3XX_SOFT_FAULT_DETECT_COUNTERS]; + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return; + + if (!adreno_dev->fast_hang_detect) + return; + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES, + ®s[0], ®s[1]); + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES, + ®s[2], ®s[3]); + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, + SP_FS_CFLOW_INSTRUCTIONS, ®s[4], ®s[5]); + + put_counter(adreno_dev, KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM, + ®s[6], ®s[7]); + + adreno_dev->fast_hang_detect = 0; +} + +/* Initialize the registers and set up the data structures */ +static void a3xx_soft_fault_detect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT)) + return; + + /* Disable the fast hang detect bit until we know its a go */ + adreno_dev->fast_hang_detect = 0; + + adreno_dev->soft_ft_regs = devm_kcalloc(&device->pdev->dev, + A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL); + + adreno_dev->soft_ft_vals = devm_kcalloc(&device->pdev->dev, + A3XX_SOFT_FAULT_DETECT_COUNT, sizeof(u32), GFP_KERNEL); + + if (!adreno_dev->soft_ft_regs || !adreno_dev->soft_ft_vals) + return; + + adreno_dev->soft_ft_count = A3XX_SOFT_FAULT_DETECT_COUNT; + + adreno_dev->soft_ft_regs[0] = A3XX_RBBM_STATUS; + adreno_dev->soft_ft_regs[1] = A3XX_CP_RB_RPTR; + adreno_dev->soft_ft_regs[2] = A3XX_CP_IB1_BASE; + adreno_dev->soft_ft_regs[3] = A3XX_CP_IB1_BUFSZ; + adreno_dev->soft_ft_regs[4] = A3XX_CP_IB2_BASE; + adreno_dev->soft_ft_regs[5] = A3XX_CP_IB2_BUFSZ; + + set_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv); + + a3xx_soft_fault_detect_start(adreno_dev); +} + +static void a3xx_remove(struct adreno_device *adreno_dev) +{ + a3xx_soft_fault_detect_stop(adreno_dev); +} + +static int a3xx_microcode_read(struct adreno_device *adreno_dev); + +/* + * a3xx_init() - Initialize gpu specific data + * @adreno_dev: Pointer to adreno device + */ +static int a3xx_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int ret; + + /* + * Set up the a3xx only soft fault timer before heading into the generic + * dispatcher setup + */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_SOFT_FAULT_DETECT)) + timer_setup(&dispatcher->fault_timer, a3xx_soft_fault_timer, 0); + + ret = a3xx_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = a3xx_microcode_read(adreno_dev); + if (ret) + return ret; + + _a3xx_pwron_fixup(adreno_dev); + + ret = adreno_allocate_global(device, &iommu->setstate, PAGE_SIZE, + 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "setstate"); + + if (!ret) + kgsl_sharedmem_writel(iommu->setstate, + KGSL_IOMMU_SETSTATE_NOP_OFFSET, + cp_type3_packet(CP_NOP, 1)); + + kgsl_mmu_set_feature(device, KGSL_MMU_NEED_GUARD_PAGE); + + /* Put the hardware in a responsive state to set up fault detection*/ + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + if (ret) + return ret; + + a3xx_soft_fault_detect_init(adreno_dev); + + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + return 0; +} + +/* + * a3xx_err_callback() - Call back for a3xx error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + switch (bit) { + case A3XX_INT_RBBM_AHB_ERROR: { + kgsl_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + dev_crit_ratelimited(device->dev, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, + (reg >> 20) & 0x3, + (reg >> 24) & 0xF); + + /* Clear the error */ + kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); + break; + } + case A3XX_INT_RBBM_ATB_BUS_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB bus oveflow\n"); + break; + case A3XX_INT_CP_T0_PACKET_IN_IB: + dev_crit_ratelimited(device->dev, + "ringbuffer TO packet in IB interrupt\n"); + break; + case A3XX_INT_CP_OPCODE_ERROR: + dev_crit_ratelimited(device->dev, + "ringbuffer opcode error interrupt\n"); + break; + case A3XX_INT_CP_RESERVED_BIT_ERROR: + dev_crit_ratelimited(device->dev, + "ringbuffer reserved bit error interrupt\n"); + break; + case A3XX_INT_CP_HW_FAULT: + kgsl_regread(device, A3XX_CP_HW_FAULT, ®); + dev_crit_ratelimited(device->dev, + "CP | Ringbuffer HW fault | status=%x\n", + reg); + break; + case A3XX_INT_CP_REG_PROTECT_FAULT: + kgsl_regread(device, A3XX_CP_PROTECT_STATUS, ®); + dev_crit_ratelimited(device->dev, + "CP | Protected mode error| %s | addr=%x\n", + reg & (1 << 24) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2); + break; + case A3XX_INT_CP_AHB_ERROR_HALT: + dev_crit_ratelimited(device->dev, + "ringbuffer AHB error interrupt\n"); + break; + case A3XX_INT_UCHE_OOB_ACCESS: + dev_crit_ratelimited(device->dev, + "UCHE: Out of bounds access\n"); + break; + default: + dev_crit_ratelimited(device->dev, "Unknown interrupt\n"); + } +} + +#define A3XX_INT_MASK \ + ((1 << A3XX_INT_RBBM_AHB_ERROR) | \ + (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \ + (1 << A3XX_INT_CP_OPCODE_ERROR) | \ + (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \ + (1 << A3XX_INT_CP_HW_FAULT) | \ + (1 << A3XX_INT_CP_IB1_INT) | \ + (1 << A3XX_INT_CP_IB2_INT) | \ + (1 << A3XX_INT_CP_RB_INT) | \ + (1 << A3XX_INT_CACHE_FLUSH_TS) | \ + (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \ + (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \ + (1 << A3XX_INT_UCHE_OOB_ACCESS)) + +static const struct adreno_irq_funcs a3xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 2 - RBBM_REG_TIMEOUT */ + ADRENO_IRQ_CALLBACK(NULL), /* 3 - RBBM_ME_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(NULL), /* 4 - RBBM_PFP_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */ + ADRENO_IRQ_CALLBACK(NULL), /* 6 - RBBM_VFD_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 7 - CP_SW */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */ + /* 10 - CP_RESERVED_BIT_ERROR */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */ + ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + /* 16 - CP_REG_PROTECT_FAULT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + /* 21 - CP_AHB_ERROR_FAULT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 22 - Unused */ + ADRENO_IRQ_CALLBACK(NULL), /* 23 - Unused */ + /* 24 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ +}; + +static struct { + u32 reg; + u32 base; + u32 count; +} a3xx_protected_blocks[] = { + /* RBBM */ + { A3XX_CP_PROTECT_REG_0, 0x0018, 0 }, + { A3XX_CP_PROTECT_REG_0 + 1, 0x0020, 2 }, + { A3XX_CP_PROTECT_REG_0 + 2, 0x0033, 0 }, + { A3XX_CP_PROTECT_REG_0 + 3, 0x0042, 0 }, + { A3XX_CP_PROTECT_REG_0 + 4, 0x0050, 4 }, + { A3XX_CP_PROTECT_REG_0 + 5, 0x0063, 0 }, + { A3XX_CP_PROTECT_REG_0 + 6, 0x0100, 4 }, + /* CP */ + { A3XX_CP_PROTECT_REG_0 + 7, 0x01c0, 5 }, + { A3XX_CP_PROTECT_REG_0 + 8, 0x01ec, 1 }, + { A3XX_CP_PROTECT_REG_0 + 9, 0x01f6, 1 }, + { A3XX_CP_PROTECT_REG_0 + 10, 0x01f8, 2 }, + { A3XX_CP_PROTECT_REG_0 + 11, 0x045e, 2 }, + { A3XX_CP_PROTECT_REG_0 + 12, 0x0460, 4 }, + /* RB */ + { A3XX_CP_PROTECT_REG_0 + 13, 0x0cc0, 0 }, + /* VBIF */ + { A3XX_CP_PROTECT_REG_0 + 14, 0x3000, 6 }, + /* SMMU */ + { A3XX_CP_PROTECT_REG_0 + 15, 0xa000, 12 }, + /* There are no remaining protected mode registers for a3xx */ +}; + +static void a3xx_protect_init(struct kgsl_device *device) +{ + int i; + + kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007); + + for (i = 0; i < ARRAY_SIZE(a3xx_protected_blocks); i++) { + u32 val = 0x60000000 | + (a3xx_protected_blocks[i].count << 24) | + (a3xx_protected_blocks[i].base << 2); + + kgsl_regwrite(device, a3xx_protected_blocks[i].reg, val); + } +} + +static int a3xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev); + int ret; + + ret = kgsl_mmu_start(device); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + adreno_perfcounter_restore(adreno_dev); + + if (adreno_dev->soft_ft_regs) + memset(adreno_dev->soft_ft_regs, 0, + adreno_dev->soft_ft_count << 2); + + adreno_dev->irq_mask = A3XX_INT_MASK; + + /* Set up VBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, a3xx_core->vbif, + a3xx_core->vbif_count); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Tune the hystersis counters for SP and CP idle detection */ + kgsl_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10); + kgsl_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); + + /* + * Enable the RBBM error reporting bits. This lets us get + * useful information on failure + */ + + kgsl_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting */ + kgsl_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF); + + /* Turn on the power counters */ + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000); + + /* + * Turn on hang detection - this spews a lot of useful information + * into the RBBM registers on a hang + */ + kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL, + (1 << 16) | 0xFFF); + + /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */ + kgsl_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); + + /* Enable VFD to access most of the UCHE (7 ways out of 8) */ + kgsl_regwrite(device, A3XX_UCHE_CACHE_WAYS_VFD, 0x07); + + /* Enable Clock gating */ + kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL_DEFAULT); + + /* Turn on protection */ + a3xx_protect_init(device); + + /* Turn on performance counters */ + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01); + + kgsl_regwrite(device, A3XX_CP_DEBUG, A3XX_CP_DEBUG_DEFAULT); + + /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ + kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, 0x000E0602); + return 0; +} + +#ifdef CONFIG_QCOM_KGSL_CORESIGHT +static struct adreno_coresight_register a3xx_coresight_registers[] = { + { A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F }, + { A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff }, + { A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f }, + { A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff }, + { A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 }, + { A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 }, + { A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 }, + { A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE }, + { A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 }, +}; + +static ADRENO_CORESIGHT_ATTR(config_debug_bus, + &a3xx_coresight_registers[0]); +static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt, + &a3xx_coresight_registers[1]); +static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt, + &a3xx_coresight_registers[2]); +static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt, + &a3xx_coresight_registers[3]); +static ADRENO_CORESIGHT_ATTR(config_trace_cmd, + &a3xx_coresight_registers[4]); +static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl, + &a3xx_coresight_registers[5]); + +static struct attribute *a3xx_coresight_attrs[] = { + &coresight_attr_config_debug_bus.attr.attr, + &coresight_attr_config_trace_start_cnt.attr.attr, + &coresight_attr_config_trace_stop_cnt.attr.attr, + &coresight_attr_config_trace_period_cnt.attr.attr, + &coresight_attr_config_trace_cmd.attr.attr, + &coresight_attr_config_trace_bus_ctl.attr.attr, + NULL, +}; + +static const struct attribute_group a3xx_coresight_group = { + .attrs = a3xx_coresight_attrs, +}; + +static const struct attribute_group *a3xx_coresight_groups[] = { + &a3xx_coresight_group, + NULL, +}; + +static struct adreno_coresight a3xx_coresight = { + .registers = a3xx_coresight_registers, + .count = ARRAY_SIZE(a3xx_coresight_registers), + .groups = a3xx_coresight_groups, +}; +#endif + +/* Register offset defines for A3XX */ +static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A3XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A3XX_CP_IB1_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A3XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A3XX_CP_IB2_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_TIMESTAMP, A3XX_CP_SCRATCH_REG0), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A3XX_CP_SCRATCH_REG6), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A3XX_CP_SCRATCH_REG7), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A3XX_CP_PROTECT_REG_0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A3XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, + A3XX_RBBM_PERFCTR_PWR_1_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A3XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_PA_SC_AA_CONFIG, A3XX_PA_SC_AA_CONFIG), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PM_OVERRIDE2, A3XX_RBBM_PM_OVERRIDE2), + ADRENO_REG_DEFINE(ADRENO_REG_SQ_GPR_MANAGEMENT, A3XX_SQ_GPR_MANAGEMENT), + ADRENO_REG_DEFINE(ADRENO_REG_SQ_INST_STORE_MANAGEMENT, + A3XX_SQ_INST_STORE_MANAGEMENT), + ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, A3XX_TP0_CHICKEN), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD), +}; + +static int _load_firmware(struct kgsl_device *device, const char *fwfile, + void **buf, int *len) +{ + const struct firmware *fw = NULL; + int ret; + + ret = request_firmware(&fw, fwfile, &device->pdev->dev); + + if (ret) { + dev_err(&device->pdev->dev, "request_firmware(%s) failed: %d\n", + fwfile, ret); + return ret; + } + + if (!fw) + return -EINVAL; + + *buf = devm_kmemdup(&device->pdev->dev, fw->data, fw->size, GFP_KERNEL); + *len = fw->size; + + release_firmware(fw); + return (*buf) ? 0 : -ENOMEM; +} + +static int a3xx_microcode_read(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + const struct adreno_a3xx_core *a3xx_core = to_a3xx_core(adreno_dev); + + if (pm4_fw->fwvirt == NULL) { + int len; + void *ptr; + + int ret = _load_firmware(device, + a3xx_core->pm4fw_name, &ptr, &len); + + if (ret) { + dev_err(device->dev, "Failed to read pm4 ucode %s\n", + a3xx_core->pm4fw_name); + return ret; + } + + /* PM4 size is 3 dword aligned plus 1 dword of version */ + if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) { + dev_err(device->dev, + "Bad pm4 microcode size: %d\n", + len); + kfree(ptr); + return -ENOMEM; + } + + pm4_fw->size = len / sizeof(uint32_t); + pm4_fw->fwvirt = ptr; + pm4_fw->version = pm4_fw->fwvirt[1]; + } + + if (pfp_fw->fwvirt == NULL) { + int len; + void *ptr; + + int ret = _load_firmware(device, + a3xx_core->pfpfw_name, &ptr, &len); + if (ret) { + dev_err(device->dev, "Failed to read pfp ucode %s\n", + a3xx_core->pfpfw_name); + return ret; + } + + /* PFP size shold be dword aligned */ + if (len % sizeof(uint32_t) != 0) { + dev_err(device->dev, + "Bad PFP microcode size: %d\n", + len); + kfree(ptr); + return -ENOMEM; + } + + pfp_fw->size = len / sizeof(uint32_t); + pfp_fw->fwvirt = ptr; + pfp_fw->version = pfp_fw->fwvirt[1]; + } + + return 0; +} + +static void a3xx_microcode_load(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + size_t pm4_size = adreno_dev->fw[ADRENO_FW_PM4].size; + size_t pfp_size = adreno_dev->fw[ADRENO_FW_PFP].size; + + /* load the CP ucode using AHB writes */ + kgsl_regwrite(device, A3XX_CP_ME_RAM_WADDR, 0); + + kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_ME_RAM_DATA, + &adreno_dev->fw[ADRENO_FW_PM4].fwvirt[1], pm4_size - 1); + + kgsl_regwrite(device, A3XX_CP_PFP_UCODE_ADDR, 0); + + kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_PFP_UCODE_DATA, + &adreno_dev->fw[ADRENO_FW_PFP].fwvirt[1], pfp_size - 1); +} + +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) +static void a3xx_clk_set_options(struct adreno_device *adreno_dev, + const char *name, struct clk *clk, bool on) +{ + if (!clk || !adreno_is_a306a(adreno_dev)) + return; + + /* Handle clock settings for GFX PSCBCs */ + if (on) { + if (!strcmp(name, "mem_iface_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } else if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_MEM); + } + } else { + if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } + } +} +#endif + +static u64 a3xx_read_alwayson(struct adreno_device *adreno_dev) +{ + /* A3XX does not have a always on timer */ + return 0; +} + +static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret; + u32 status; + + /* Get the current interrupt status */ + kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status); + + /* + * Clear all the interrupt bits except A3XX_INT_RBBM_AHB_ERROR. + * The interrupt will stay asserted until it is cleared by the handler + * so don't touch it yet to avoid a storm + */ + + kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD, + status & ~A3XX_INT_RBBM_AHB_ERROR); + + /* Call the helper to execute the callbacks */ + ret = adreno_irq_callbacks(adreno_dev, a3xx_irq_funcs, status); + + trace_kgsl_a3xx_irq_status(adreno_dev, status); + + /* Now clear AHB_ERROR if it was set */ + if (status & A3XX_INT_RBBM_AHB_ERROR) + kgsl_regwrite(device, A3XX_RBBM_INT_CLEAR_CMD, + A3XX_INT_RBBM_AHB_ERROR); + + return ret; +} + +static bool a3xx_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + kgsl_regread(device, A3XX_RBBM_STATUS, &status); + + if (status & 0x7ffffffe) + return false; + + kgsl_regread(device, A3XX_RBBM_INT_0_STATUS, &status); + + /* Return busy if a interrupt is pending */ + return !((status & adreno_dev->irq_mask) || + atomic_read(&adreno_dev->pending_irq_refcnt)); +} + +static int a3xx_clear_pending_transactions(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask = A30X_VBIF_XIN_HALT_CTRL0_MASK; + int ret; + + kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, mask); + ret = adreno_wait_for_halt_ack(device, A3XX_VBIF_XIN_HALT_CTRL1, mask); + kgsl_regwrite(device, A3XX_VBIF_XIN_HALT_CTRL0, 0); + + return ret; +} + +static bool a3xx_is_hw_collapsible(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * Skip power collapse for A304, if power ctrl flag is set to + * non zero. As A304 soft_reset will not work, power collapse + * needs to disable to avoid soft_reset. + */ + if (adreno_is_a304(adreno_dev) && device->pwrctrl.ctrl_flags) + return false; + + return adreno_isidle(adreno_dev); +} + +static void a3xx_power_stats(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_busy_data *busy = &adreno_dev->busy_data; + s64 freq = kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000; + u64 gpu_busy; + + /* Set the GPU busy counter for frequency scaling */ + gpu_busy = counter_delta(device, A3XX_RBBM_PERFCTR_PWR_1_LO, + &busy->gpu_busy); + + stats->busy_time = gpu_busy / freq; + + if (!device->pwrctrl.bus_control) + return; + + stats->ram_time = counter_delta(device, adreno_dev->ram_cycles_lo, + &busy->bif_ram_cycles); + + stats->ram_wait = counter_delta(device, adreno_dev->starved_ram_lo, + &busy->bif_starved_ram); +} + +static int a3xx_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + if (enable) { + device->pwrctrl.ctrl_flags = 0; + + if (!adreno_active_count_get(adreno_dev)) { + a3xx_soft_fault_detect_start(adreno_dev); + adreno_active_count_put(adreno_dev); + } + + kgsl_pwrscale_enable(device); + } else { + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + device->pwrctrl.ctrl_flags = KGSL_PWR_ON; + + a3xx_soft_fault_detect_stop(adreno_dev); + kgsl_pwrscale_disable(device, true); + } + mutex_unlock(&device->mutex); + + return 0; +} + +const struct adreno_gpudev adreno_a3xx_gpudev = { + .reg_offsets = a3xx_register_offsets, + .irq_handler = a3xx_irq_handler, + .probe = a3xx_probe, + .rb_start = a3xx_rb_start, + .init = a3xx_init, + .start = a3xx_start, + .snapshot = a3xx_snapshot, +#ifdef CONFIG_QCOM_KGSL_CORESIGHT + .coresight = {&a3xx_coresight}, +#endif +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) + .clk_set_options = a3xx_clk_set_options, +#endif + .read_alwayson = a3xx_read_alwayson, + .hw_isidle = a3xx_hw_isidle, + .power_ops = &adreno_power_operations, + .clear_pending_transactions = a3xx_clear_pending_transactions, + .ringbuffer_submitcmd = a3xx_ringbuffer_submitcmd, + .is_hw_collapsible = a3xx_is_hw_collapsible, + .power_stats = a3xx_power_stats, + .setproperty = a3xx_setproperty, + .remove = a3xx_remove, +}; diff --git a/adreno_a3xx.h b/adreno_a3xx.h new file mode 100644 index 0000000000..d1e2e908a4 --- /dev/null +++ b/adreno_a3xx.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved. + */ +#ifndef __A3XX_H +#define __A3XX_H + +#include "a3xx_reg.h" +/** + * struct adreno_a3xx_core - a3xx specific GPU core definitions + */ +struct adreno_a3xx_core { + /** @base: Container for the generic &struct adreno_gpu_core */ + struct adreno_gpu_core base; + /** pm4fw_name: Name of the PM4 microcode file */ + const char *pm4fw_name; + /** pfpfw_name: Name of the PFP microcode file */ + const char *pfpfw_name; + /** @vbif: List of registers and values to write for VBIF */ + const struct kgsl_regmap_list *vbif; + /** @vbif_count: Number of registers in @vbif */ + u32 vbif_count; +}; + +struct adreno_device; + +/** + * to_a3xx_core - return the a3xx specific GPU core struct + * @adreno_dev: An Adreno GPU device handle + * + * Returns: + * A pointer to the a3xx specific GPU core struct + */ +static inline const struct adreno_a3xx_core * +to_a3xx_core(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *core = adreno_dev->gpucore; + + return container_of(core, struct adreno_a3xx_core, base); +} + +void a3xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +extern const struct adreno_perfcounters adreno_a3xx_perfcounters; + +/** + * a3xx_ringbuffer_init - Initialize the ringbuffer + * @adreno_dev: An Adreno GPU handle + * + * Initialize the ringbuffer for a3xx. + * Return: 0 on success or negative on failure + */ +int a3xx_ringbuffer_init(struct adreno_device *adreno_dev); + +/** + * a3xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @cmdobj: Pointer to a user command object + * @flags: Internal submit flags + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + +#endif /*__A3XX_H */ diff --git a/adreno_a3xx_perfcounter.c b/adreno_a3xx_perfcounter.c new file mode 100644 index 0000000000..a525fef97b --- /dev/null +++ b/adreno_a3xx_perfcounter.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_perfcounter.h" +#include "kgsl_device.h" + +/* Bit flag for RBMM_PERFCTR_CTL */ +#define RBBM_PERFCTR_CTL_ENABLE 0x00000001 +#define VBIF2_PERF_CNT_SEL_MASK 0x7F +/* offset of clear register from select register */ +#define VBIF2_PERF_CLR_REG_SEL_OFF 8 +/* offset of enable register from select register */ +#define VBIF2_PERF_EN_REG_SEL_OFF 16 +/* offset of clear register from the enable register */ +#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8 + +static void a3xx_counter_load(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int index = reg->load_bit / 32; + u32 enable = BIT(reg->load_bit & 31); + + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_LO, + lower_32_bits(reg->value)); + + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_HI, + upper_32_bits(reg->value)); + + if (index == 0) + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, enable); + else + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, enable); +} + +static int a3xx_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, reg->select, countable); + reg->value = 0; + + return 0; +} + +static u64 a3xx_counter_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 val, hi, lo; + + kgsl_regread(device, A3XX_RBBM_PERFCTR_CTL, &val); + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, + val & ~RBBM_PERFCTR_CTL_ENABLE); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val); + + return (((u64) hi) << 32) | lo; +} + +static int a3xx_counter_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + return 0; +} + +static u64 a3xx_counter_pwr_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 val, hi, lo; + + kgsl_regread(device, A3XX_RBBM_RBBM_CTL, &val); + + /* Freeze the counter so we can read it */ + if (!counter) + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x10000); + else + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x20000); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int a3xx_counter_vbif_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > VBIF2_PERF_CNT_SEL_MASK) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1); + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0); + kgsl_regwrite(device, + reg->select, countable & VBIF2_PERF_CNT_SEL_MASK); + /* enable reg is 8 DWORDS before select reg */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); + + kgsl_regwrite(device, reg->select, countable); + + reg->value = 0; + return 0; +} + +static u64 a3xx_counter_vbif_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + /* freeze counter */ + kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 0); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* un-freeze counter */ + kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int a3xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1); + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0); + kgsl_regwrite(device, reg->select, 1); + + reg->value = 0; + return 0; +} + +static u64 a3xx_counter_vbif_pwr_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + /* freeze counter */ + kgsl_regwrite(device, reg->select, 0); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* un-freeze counter */ + kgsl_regwrite(device, reg->select, 1); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +/* + * Define the available perfcounter groups - these get used by + * adreno_perfcounter_get and adreno_perfcounter_put + */ + +static struct adreno_perfcount_register a3xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO, + A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, + A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, + A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO, + A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO, + A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO, + A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO, + A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, + A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, + A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, + A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9, + A3XX_HLSQ_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, + A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10, + A3XX_HLSQ_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, + A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11, + A3XX_HLSQ_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, + A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12, + A3XX_HLSQ_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, + A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13, + A3XX_HLSQ_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, + A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14, + A3XX_HLSQ_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, + A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, + A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, + A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, + A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, + A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, + A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, + A3XX_RBBM_PERFCTR_UCHE_0_HI, 21, + A3XX_UCHE_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, + A3XX_RBBM_PERFCTR_UCHE_1_HI, 22, + A3XX_UCHE_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, + A3XX_RBBM_PERFCTR_UCHE_2_HI, 23, + A3XX_UCHE_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, + A3XX_RBBM_PERFCTR_UCHE_3_HI, 24, + A3XX_UCHE_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, + A3XX_RBBM_PERFCTR_UCHE_4_HI, 25, + A3XX_UCHE_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, + A3XX_RBBM_PERFCTR_UCHE_5_HI, 26, + A3XX_UCHE_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO, + A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO, + A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO, + A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO, + A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO, + A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO, + A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO, + A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO, + A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO, + A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO, + A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO, + A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO, + A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO, + A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO, + A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO, + A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO, + A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, + A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 }, + /* + * A3XX_RBBM_PERFCTR_PWR_1_LO is used for frequency scaling and removed + * from the pool of available counters + */ +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0, + A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1, + A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2, + A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3, + A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 }, +}; +/* + * Placing EN register in select field since vbif perf counters + * don't have select register to program + */ +static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW0, + A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW1, + A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW2, + A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN2 }, +}; + +#define A3XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name, enable, read, load) + +#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags, enable, read, load) + +#define A3XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + A3XX_PERFCOUNTER_GROUP(offset, name, a3xx_counter_enable,\ + a3xx_counter_read, a3xx_counter_load) + +static const struct adreno_perfcount_group +a3xx_perfcounter_groups[KGSL_PERFCOUNTER_GROUP_MAX] = { + A3XX_REGULAR_PERFCOUNTER_GROUP(CP, cp), + A3XX_REGULAR_PERFCOUNTER_GROUP(RBBM, rbbm), + A3XX_REGULAR_PERFCOUNTER_GROUP(PC, pc), + A3XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + A3XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq), + A3XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + A3XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse), + A3XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras), + A3XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + A3XX_REGULAR_PERFCOUNTER_GROUP(TP, tp), + A3XX_REGULAR_PERFCOUNTER_GROUP(SP, sp), + A3XX_REGULAR_PERFCOUNTER_GROUP(RB, rb), + A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a3xx_counter_pwr_enable, a3xx_counter_pwr_read, NULL), + A3XX_PERFCOUNTER_GROUP(VBIF, vbif2, + a3xx_counter_vbif_enable, a3xx_counter_vbif_read, NULL), + A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif2_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a3xx_counter_vbif_pwr_enable, a3xx_counter_vbif_pwr_read, + NULL), + +}; + +const struct adreno_perfcounters adreno_a3xx_perfcounters = { + a3xx_perfcounter_groups, + ARRAY_SIZE(a3xx_perfcounter_groups), +}; diff --git a/adreno_a3xx_ringbuffer.c b/adreno_a3xx_ringbuffer.c new file mode 100644 index 0000000000..9222af6b7c --- /dev/null +++ b/adreno_a3xx_ringbuffer.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static int a3xx_wait_reg(unsigned int *cmds, unsigned int addr, + unsigned int val, unsigned int mask, + unsigned int interval) +{ + cmds[0] = cp_type3_packet(CP_WAIT_REG_EQ, 4); + cmds[1] = addr; + cmds[2] = val; + cmds[3] = mask; + cmds[4] = interval; + + return 5; +} + +static int a3xx_vbif_lock(unsigned int *cmds) +{ + int count; + + /* + * glue commands together until next + * WAIT_FOR_ME + */ + count = a3xx_wait_reg(cmds, A3XX_CP_WFI_PEND_CTR, + 1, 0xFFFFFFFF, 0xF); + + /* MMU-500 VBIF stall */ + cmds[count++] = cp_type3_packet(CP_REG_RMW, 3); + cmds[count++] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; + /* AND to unmask the HALT bit */ + cmds[count++] = ~(VBIF_RECOVERABLE_HALT_CTRL); + /* OR to set the HALT bit */ + cmds[count++] = 0x1; + + /* Wait for acknowledgment */ + count += a3xx_wait_reg(&cmds[count], + A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1, + 1, 0xFFFFFFFF, 0xF); + + return count; +} + +static int a3xx_vbif_unlock(unsigned int *cmds) +{ + /* MMU-500 VBIF unstall */ + cmds[0] = cp_type3_packet(CP_REG_RMW, 3); + cmds[1] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; + /* AND to unmask the HALT bit */ + cmds[2] = ~(VBIF_RECOVERABLE_HALT_CTRL); + /* OR to reset the HALT bit */ + cmds[3] = 0; + + /* release all commands since _vbif_lock() with wait_for_me */ + cmds[4] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[5] = 0; + + return 6; +} + +#define A3XX_GPU_OFFSET 0xa000 + +static int a3xx_cp_smmu_reg(unsigned int *cmds, + u32 reg, + unsigned int num) +{ + cmds[0] = cp_type3_packet(CP_REG_WR_NO_CTXT, num + 1); + cmds[1] = (A3XX_GPU_OFFSET + reg) >> 2; + + return 2; +} + +/* This function is only needed for A3xx targets */ +static int a3xx_tlbiall(unsigned int *cmds) +{ + unsigned int tlbstatus = (A3XX_GPU_OFFSET + + KGSL_IOMMU_CTX_TLBSTATUS) >> 2; + int count; + + count = a3xx_cp_smmu_reg(cmds, KGSL_IOMMU_CTX_TLBIALL, 1); + cmds[count++] = 1; + + count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TLBSYNC, 1); + cmds[count++] = 0; + + count += a3xx_wait_reg(&cmds[count], tlbstatus, 0, + KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF); + + return count; +} + +/* offset at which a nop command is placed in setstate */ +#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024 + +static int a3xx_rb_pagetable_switch(struct adreno_device *adreno_dev, + struct kgsl_pagetable *pagetable, u32 *cmds) +{ + u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + int count = 0; + + /* + * Adding an indirect buffer ensures that the prefetch stalls until + * the commands in indirect buffer have completed. We need to stall + * prefetch with a nop indirect buffer when updating pagetables + * because it provides stabler synchronization. + */ + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[count++] = 0; + + cmds[count++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[count++] = lower_32_bits(iommu->setstate->gpuaddr); + cmds[count++] = 2; + + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[count++] = 0; + + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[count++] = 0; + + count += a3xx_vbif_lock(&cmds[count]); + + count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TTBR0, 2); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + + count += a3xx_vbif_unlock(&cmds[count]); + + count += a3xx_tlbiall(&cmds[count]); + + /* wait for me to finish the TLBI */ + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1); + cmds[count++] = 0; + cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[count++] = 0; + + /* Invalidate the state */ + cmds[count++] = cp_type3_packet(CP_INVALIDATE_STATE, 1); + cmds[count++] = 0x7ffff; + + return count; +} + +#define RB_SOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) +#define CTXT_SOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) + +#define RB_EOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) +#define CTXT_EOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) + +int a3xx_ringbuffer_init(struct adreno_device *adreno_dev) +{ + adreno_dev->num_ringbuffers = 1; + + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + return adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[0], 0); +} + +#define A3XX_SUBMIT_MAX 55 + +static int a3xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 size = A3XX_SUBMIT_MAX + dwords; + u32 *cmds, index = 0; + u64 profile_gpuaddr; + u32 profile_dwords; + + if (adreno_drawctxt_detached(drawctxt)) + return -ENOENT; + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + + rb->timestamp++; + + if (drawctxt) + drawctxt->internal_timestamp = rb->timestamp; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + /* Identify the start of a command */ + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; + + if (IS_PWRON_FIXUP(flags)) { + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = PWRON_FIXUP_IDENTIFIER; + + cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(adreno_dev->pwron_fixup->gpuaddr); + cmds[index++] = adreno_dev->pwron_fixup_dwords; + + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + } + + profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (drawctxt) { + cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + } + + cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + } + + memcpy(&cmds[index], in, dwords << 2); + index += dwords; + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 1; + } + + /* + * Flush HLSQ lazy updates to make sure there are no resourses pending + * for indirect loads after the timestamp + */ + + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 0x07; /* HLSQ FLUSH */ + cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[index++] = 0; + + profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + /* + * If this is an internal command, just write the ringbuffer timestamp, + * otherwise, write both + */ + if (!drawctxt) { + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } else { + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + + cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3); + cmds[index++] = CACHE_FLUSH_TS; + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } + + /* Trigger a context rollover */ + cmds[index++] = cp_type3_packet(CP_SET_CONSTANT, 2); + cmds[index++] = (4 << 16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000); + cmds[index++] = 0; + + if (IS_WFI(flags)) { + cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + cmds[index++] = 0; + } + + /* Adjust the thing for the number of bytes we actually wrote */ + rb->_wptr -= (size - index); + + kgsl_pwrscale_busy(device); + kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr); + rb->wptr = rb->_wptr; + + return 0; +} + +static int a3xx_rb_context_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_pagetable *pagetable = + adreno_drawctxt_get_pagetable(drawctxt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + u32 cmds[64]; + + if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) + count += a3xx_rb_pagetable_switch(adreno_dev, pagetable, cmds); + + cmds[count++] = cp_type3_packet(CP_NOP, 1); + cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; + + cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + cmds[count++] = 0; + cmds[count++] = 0x90000000; + + return a3xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, + cmds, count, 0, NULL); +} + +static int a3xx_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (rb->drawctxt_active == drawctxt) + return 0; + + if (kgsl_context_detached(&drawctxt->base)) + return -ENOENT; + + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + trace_adreno_drawctxt_switch(rb, drawctxt); + + a3xx_rb_context_switch(adreno_dev, rb, drawctxt); + + /* Release the current drawctxt as soon as the new one is switched */ + adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, + rb, rb->timestamp); + + rb->drawctxt_active = drawctxt; + return 0; +} + +#define A3XX_COMMAND_DWORDS 4 + +int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + int ret = 0, numibs = 0, index = 0; + u32 *cmds; + + /* Count the number of IBs (if we are not skipping) */ + if (!IS_SKIP(flags)) { + struct list_head *tmp; + + list_for_each(tmp, &cmdobj->cmdlist) + numibs++; + } + + cmds = kmalloc((A3XX_COMMAND_DWORDS + (numibs * 4)) << 2, GFP_KERNEL); + if (!cmds) { + ret = -ENOMEM; + goto done; + } + + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = START_IB_IDENTIFIER; + + if (numibs) { + struct kgsl_memobj_node *ib; + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + if (ib->priv & MEMOBJ_SKIP || + (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE + && !IS_PREAMBLE(flags))) + cmds[index++] = cp_type3_packet(CP_NOP, 3); + + cmds[index++] = + cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + cmds[index++] = lower_32_bits(ib->gpuaddr); + cmds[index++] = ib->size >> 2; + } + } + + cmds[index++] = cp_type3_packet(CP_NOP, 1); + cmds[index++] = END_IB_IDENTIFIER; + + ret = a3xx_drawctxt_switch(adreno_dev, rb, drawctxt); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + dev_err(device->dev, + "Unable to switch draw context: %d\n", + ret); + goto done; + } + + adreno_drawobj_set_constraint(device, drawobj); + + ret = a3xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, + flags, cmds, index, drawobj->timestamp, NULL); + +done: + trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, + drawobj->timestamp, drawobj->flags, ret, drawctxt->type); + + kfree(cmds); + return ret; +} diff --git a/adreno_a3xx_snapshot.c b/adreno_a3xx_snapshot.c new file mode 100644 index 0000000000..b43eb75a31 --- /dev/null +++ b/adreno_a3xx_snapshot.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_a3xx.h" +#include "adreno_snapshot.h" +#include "kgsl_device.h" + +/* + * Set of registers to dump for A3XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a3xx_registers[] = { + 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, + 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, + 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, + 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, + 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, + 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9, + 0x01fc, 0x01ff, + 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, + 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, + 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, + 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, + 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, + 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, + 0x0e41, 0x0e45, 0x0e64, 0x0e65, + 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, + 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, + 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, + 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, + 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, + 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, + 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, + 0x2240, 0x227e, + 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, + 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, + 0x22ff, 0x22ff, 0x2340, 0x2343, + 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, + 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, + 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, + 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, + 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, + 0x25f0, 0x25f0, + 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, + 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, + 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, + 0x300C, 0x300E, 0x301C, 0x301D, + 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036, + 0x303C, 0x303C, 0x305E, 0x305F, +}; + +/* Removed the following HLSQ register ranges from being read during + * fault tolerance since reading the registers may cause the device to hang: + */ +static const unsigned int a3xx_hlsq_registers[] = { + 0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, + 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, + 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, +}; + +/* Shader memory size in words */ +#define SHADER_MEMORY_SIZE 0x4000 + +/** + * _rbbm_debug_bus_read - Helper function to read data from the RBBM + * debug bus. + * @device - GPU device to read/write registers + * @block_id - Debug bus block to read from + * @index - Index in the debug bus block to read + * @ret - Value of the register read + */ +static void _rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int block = (block_id << 8) | 1 << 16; + + kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index); + kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val); +} + +/** + * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader + * memory to the snapshot buffer. + * @device: GPU device whose shader memory is to be dumped + * @buf: Pointer to binary snapshot data blob being made + * @remain: Number of remaining bytes in the snapshot blob + * @priv: Unused parameter + * + */ +static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + void *data = buf + sizeof(*header); + unsigned int shader_read_len = SHADER_MEMORY_SIZE; + + if (remain < DEBUG_SECTION_SZ(shader_read_len)) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_SHADER_MEMORY; + header->size = shader_read_len; + + /* Map shader memory to kernel, for dumping */ + if (IS_ERR_OR_NULL(device->shader_mem_virt)) { + struct resource *res; + + res = platform_get_resource_byname(device->pdev, + IORESOURCE_MEM, "kgsl_3d0_shader_memory"); + + if (res) + device->shader_mem_virt = + devm_ioremap_resource(&device->pdev->dev, res); + } + + if (IS_ERR_OR_NULL(device->shader_mem_virt)) { + dev_err(device->dev, "Unable to map the shader memory\n"); + return 0; + } + + memcpy_fromio(data, device->shader_mem_virt, shader_read_len << 2); + + return DEBUG_SECTION_SZ(shader_read_len); +} + +static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header + = (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size; + + size = (0x40 * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = block->block_id; + header->count = 0x40; + + for (i = 0; i < 0x40; i++) + _rbbm_debug_bus_read(device, block->block_id, i, &data[i]); + + return size; +} + +static struct adreno_debugbus_block debugbus_blocks[] = { + { RBBM_BLOCK_ID_CP, 0x52, }, + { RBBM_BLOCK_ID_RBBM, 0x40, }, + { RBBM_BLOCK_ID_VBIF, 0x40, }, + { RBBM_BLOCK_ID_HLSQ, 0x40, }, + { RBBM_BLOCK_ID_UCHE, 0x40, }, + { RBBM_BLOCK_ID_PC, 0x40, }, + { RBBM_BLOCK_ID_VFD, 0x40, }, + { RBBM_BLOCK_ID_VPC, 0x40, }, + { RBBM_BLOCK_ID_TSE, 0x40, }, + { RBBM_BLOCK_ID_RAS, 0x40, }, + { RBBM_BLOCK_ID_VSC, 0x40, }, + { RBBM_BLOCK_ID_SP_0, 0x40, }, + { RBBM_BLOCK_ID_SP_1, 0x40, }, + { RBBM_BLOCK_ID_SP_2, 0x40, }, + { RBBM_BLOCK_ID_SP_3, 0x40, }, + { RBBM_BLOCK_ID_TPL1_0, 0x40, }, + { RBBM_BLOCK_ID_TPL1_1, 0x40, }, + { RBBM_BLOCK_ID_TPL1_2, 0x40, }, + { RBBM_BLOCK_ID_TPL1_3, 0x40, }, + { RBBM_BLOCK_ID_RB_0, 0x40, }, + { RBBM_BLOCK_ID_RB_1, 0x40, }, + { RBBM_BLOCK_ID_RB_2, 0x40, }, + { RBBM_BLOCK_ID_RB_3, 0x40, }, + { RBBM_BLOCK_ID_MARB_0, 0x40, }, + { RBBM_BLOCK_ID_MARB_1, 0x40, }, + { RBBM_BLOCK_ID_MARB_2, 0x40, }, + { RBBM_BLOCK_ID_MARB_3, 0x40, }, +}; + +static void a3xx_snapshot_debugbus(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, + a3xx_snapshot_debugbus_block, + (void *) &debugbus_blocks[i]); + } +} + +static void _snapshot_hlsq_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int next_pif = 0; + + /* + * Trying to read HLSQ registers when the HLSQ block is busy + * will cause the device to hang. The RBBM_DEBUG_BUS has information + * that will tell us if the HLSQ block is busy or not. Read values + * from the debug bus to ensure the HLSQ block is not busy (this + * is hardware dependent). If the HLSQ block is busy do not + * dump the registers, otherwise dump the HLSQ registers. + */ + + /* + * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0] + * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0] + * + * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10)) + * then dump HLSQ registers + */ + + /* check tpif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif); + next_pif &= 0x1f; + if (next_pif != 0 && next_pif != 1 && next_pif != 28) + return; + + /* check spif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif); + next_pif &= 0x3f; + if (next_pif != 0 && next_pif != 1 && next_pif != 10) + return; + + SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers); +} + +#define VPC_MEM_SIZE 512 + +static size_t a3xx_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size = 4 * VPC_MEM_SIZE; + int bank, addr, i = 0; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_VPC_MEMORY; + header->size = size; + + for (bank = 0; bank < 4; bank++) { + for (addr = 0; addr < VPC_MEM_SIZE; addr++) { + unsigned int val = bank | (addr << 4); + + kgsl_regwrite(device, A3XX_VPC_VPC_DEBUG_RAM_SEL, val); + kgsl_regread(device, A3XX_VPC_VPC_DEBUG_RAM_READ, + &data[i++]); + } + } + + return DEBUG_SECTION_SZ(size); +} + +static size_t a3xx_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + size_t size = fw->size - 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PM4_RAM; + header->size = size; + + /* + * Read the firmware from the GPU rather than use our cache in order to + * try to catch mis-programming or corruption in the hardware. We do + * use the cached version of the size, however, instead of trying to + * maintain always changing hardcoded constants + */ + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ME_RAM_RADDR, + A3XX_CP_ME_RAM_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +static size_t a3xx_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + int size = fw->size - 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PFP_RAM; + header->size = size; + + /* + * Read the firmware from the GPU rather than use our cache in order to + * try to catch mis-programming or corruption in the hardware. We do + * use the cached version of the size, however, instead of trying to + * maintain always changing hardcoded constants + */ + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_PFP_UCODE_ADDR, + A3XX_CP_PFP_UCODE_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +static size_t a3xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + + if (remain < DEBUG_SECTION_SZ(128)) { + SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_ROQ; + header->size = 128; + + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ROQ_ADDR, + A3XX_CP_ROQ_DATA, data, 128); + + return DEBUG_SECTION_SZ(128); +} + +static size_t a3xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + + if (remain < DEBUG_SECTION_SZ(16)) { + SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MEQ; + header->size = 16; + + kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_MEQ_ADDR, + A3XX_CP_MEQ_DATA, data, 16); + + return DEBUG_SECTION_SZ(16); +} + +/* + * a3xx_snapshot() - A3XX GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Snapshot meta data + * @remain: Amount of space left in snapshot memory + * + * This is where all of the A3XX specific bits and pieces are grabbed + * into the snapshot memory + */ +void a3xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + /* Disable Clock gating temporarily for the debug bus to work */ + kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, 0x0); + + /* Save some CP information that the generic snapshot uses */ + kgsl_regread(device, A3XX_CP_IB1_BASE, ®); + snapshot->ib1base = (u64) reg; + + kgsl_regread(device, A3XX_CP_IB2_BASE, ®); + snapshot->ib2base = (u64) reg; + + kgsl_regread(device, A3XX_CP_IB1_BUFSZ, &snapshot->ib1size); + kgsl_regread(device, A3XX_CP_IB2_BUFSZ, &snapshot->ib2size); + + SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers); + + _snapshot_hlsq_regs(device, snapshot); + + kgsl_snapshot_indexed_registers(device, snapshot, + A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA, 0, 0x14); + + /* CP_ME indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44); + + /* VPC memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_vpc_memory, NULL); + + /* CP MEQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, + a3xx_snapshot_cp_meq, NULL); + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_shader_memory, NULL); + + + /* CP PFP and PM4 */ + + /* + * Reading the microcode while the CP is running will + * basically move the CP instruction pointer to + * whatever address we read. Big badaboom ensues. Stop the CP + * (if it isn't already stopped) to ensure that we are safe. + * We do this here and not earlier to avoid corrupting the RBBM + * status and CP registers - by the time we get here we don't + * care about the contents of the CP anymore. + */ + + kgsl_regread(device, A3XX_CP_ME_CNTL, ®); + reg |= (1 << 27) | (1 << 28); + kgsl_regwrite(device, A3XX_CP_ME_CNTL, reg); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_cp_pfp_ram, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_cp_pm4_ram, NULL); + + /* CP ROQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_cp_roq, NULL); + + a3xx_snapshot_debugbus(device, snapshot); +} diff --git a/adreno_a5xx.c b/adreno_a5xx.c new file mode 100644 index 0000000000..0b48af11fb --- /dev/null +++ b/adreno_a5xx.c @@ -0,0 +1,2726 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_a5xx_packets.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static int critical_packet_constructed; +static unsigned int crit_pkts_dwords; + +static void a5xx_irq_storm_worker(struct work_struct *work); +static int _read_fw2_block_header(struct kgsl_device *device, + uint32_t *header, uint32_t remain, + uint32_t id, uint32_t major, uint32_t minor); +static void a5xx_gpmu_reset(struct work_struct *work); +static int a5xx_gpmu_init(struct adreno_device *adreno_dev); + +/** + * Number of times to check if the regulator enabled before + * giving up and returning failure. + */ +#define PWR_RETRY 100 + +/** + * Number of times to check if the GPMU firmware is initialized before + * giving up and returning failure. + */ +#define GPMU_FW_INIT_RETRY 5000 + +#define A530_QFPROM_RAW_PTE_ROW0_MSB 0x134 +#define A530_QFPROM_RAW_PTE_ROW2_MSB 0x144 + +#define A5XX_INT_MASK \ + ((1 << A5XX_INT_RBBM_AHB_ERROR) | \ + (1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ME_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW) | \ + (1 << A5XX_INT_RBBM_GPC_ERROR) | \ + (1 << A5XX_INT_CP_HW_ERROR) | \ + (1 << A5XX_INT_CP_CACHE_FLUSH_TS) | \ + (1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A5XX_INT_MISC_HANG_DETECT) | \ + (1 << A5XX_INT_UCHE_OOB_ACCESS) | \ + (1 << A5XX_INT_UCHE_TRAP_INTR) | \ + (1 << A5XX_INT_CP_SW) | \ + (1 << A5XX_INT_GPMU_FIRMWARE) | \ + (1 << A5XX_INT_GPMU_VOLTAGE_DROOP)) + +static int a5xx_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + int ret; + + adreno_dev = (struct adreno_device *) + of_device_get_match_data(&pdev->dev); + + memset(adreno_dev, 0, sizeof(*adreno_dev)); + + adreno_dev->gpucore = gpucore; + adreno_dev->chipid = chipid; + + adreno_reg_offset_init(gpucore->gpudev->reg_offsets); + + adreno_dev->sptp_pc_enabled = + ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC); + + if (adreno_is_a540(adreno_dev)) + adreno_dev->throttling_enabled = true; + + adreno_dev->hwcg_enabled = true; + adreno_dev->lm_enabled = + ADRENO_FEATURE(adreno_dev, ADRENO_LM); + + /* Setup defaults that might get changed by the fuse bits */ + adreno_dev->lm_leakage = 0x4e001a; + + device = KGSL_DEVICE(adreno_dev); + + timer_setup(&device->idle_timer, kgsl_timer, 0); + + INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + + ret = adreno_device_probe(pdev, adreno_dev); + if (ret) + return ret; + + return adreno_dispatcher_init(adreno_dev); +} + +static void _do_fixup(const struct adreno_critical_fixup *fixups, int count, + uint64_t *gpuaddrs, unsigned int *buffer) +{ + int i; + + for (i = 0; i < count; i++) { + buffer[fixups[i].lo_offset] = + lower_32_bits(gpuaddrs[fixups[i].buffer]) | + fixups[i].mem_offset; + + buffer[fixups[i].hi_offset] = + upper_32_bits(gpuaddrs[fixups[i].buffer]); + } +} + +static int a5xx_critical_packet_construct(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int *cmds; + uint64_t gpuaddrs[4]; + + adreno_dev->critpkts = kgsl_allocate_global(device, + PAGE_SIZE * 4, 0, 0, 0, "crit_pkts"); + if (IS_ERR(adreno_dev->critpkts)) + return PTR_ERR(adreno_dev->critpkts); + + adreno_dev->critpkts_secure = kgsl_allocate_global(device, + PAGE_SIZE, 0, KGSL_MEMFLAGS_SECURE, 0, "crit_pkts_secure"); + if (IS_ERR(adreno_dev->critpkts_secure)) + return PTR_ERR(adreno_dev->critpkts_secure); + + cmds = adreno_dev->critpkts->hostptr; + + gpuaddrs[0] = adreno_dev->critpkts_secure->gpuaddr; + gpuaddrs[1] = adreno_dev->critpkts->gpuaddr + PAGE_SIZE; + gpuaddrs[2] = adreno_dev->critpkts->gpuaddr + (PAGE_SIZE * 2); + gpuaddrs[3] = adreno_dev->critpkts->gpuaddr + (PAGE_SIZE * 3); + + crit_pkts_dwords = ARRAY_SIZE(_a5xx_critical_pkts); + + memcpy(cmds, _a5xx_critical_pkts, crit_pkts_dwords << 2); + + _do_fixup(critical_pkt_fixups, ARRAY_SIZE(critical_pkt_fixups), + gpuaddrs, cmds); + + cmds = adreno_dev->critpkts->hostptr + PAGE_SIZE; + memcpy(cmds, _a5xx_critical_pkts_mem01, + ARRAY_SIZE(_a5xx_critical_pkts_mem01) << 2); + + cmds = adreno_dev->critpkts->hostptr + (PAGE_SIZE * 2); + memcpy(cmds, _a5xx_critical_pkts_mem02, + ARRAY_SIZE(_a5xx_critical_pkts_mem02) << 2); + + cmds = adreno_dev->critpkts->hostptr + (PAGE_SIZE * 3); + memcpy(cmds, _a5xx_critical_pkts_mem03, + ARRAY_SIZE(_a5xx_critical_pkts_mem03) << 2); + + _do_fixup(critical_pkt_mem03_fixups, + ARRAY_SIZE(critical_pkt_mem03_fixups), gpuaddrs, cmds); + + critical_packet_constructed = 1; + + return 0; +} + +static int a5xx_microcode_read(struct adreno_device *adreno_dev); + +static int a5xx_init(struct adreno_device *adreno_dev) +{ + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + int ret; + + ret = a5xx_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = a5xx_microcode_read(adreno_dev); + if (ret) + return ret; + + if (a5xx_has_gpmu(adreno_dev)) + INIT_WORK(&adreno_dev->gpmu_work, a5xx_gpmu_reset); + + adreno_dev->highest_bank_bit = a5xx_core->highest_bank_bit; + + INIT_WORK(&adreno_dev->irq_storm_work, a5xx_irq_storm_worker); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS)) + a5xx_critical_packet_construct(adreno_dev); + + adreno_create_profile_buffer(adreno_dev); + a5xx_crashdump_init(adreno_dev); + + return 0; +} + +static const struct { + u32 reg; + u32 base; + u32 count; +} a5xx_protected_blocks[] = { + /* RBBM */ + { A5XX_CP_PROTECT_REG_0, 0x004, 2 }, + { A5XX_CP_PROTECT_REG_0 + 1, 0x008, 3 }, + { A5XX_CP_PROTECT_REG_0 + 2, 0x010, 4 }, + { A5XX_CP_PROTECT_REG_0 + 3, 0x020, 5 }, + { A5XX_CP_PROTECT_REG_0 + 4, 0x040, 6 }, + { A5XX_CP_PROTECT_REG_0 + 5, 0x080, 6 }, + /* Content protection */ + { A5XX_CP_PROTECT_REG_0 + 6, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 4 }, + { A5XX_CP_PROTECT_REG_0 + 7, A5XX_RBBM_SECVID_TRUST_CNTL, 1 }, + /* CP */ + { A5XX_CP_PROTECT_REG_0 + 8, 0x800, 6 }, + { A5XX_CP_PROTECT_REG_0 + 9, 0x840, 3 }, + { A5XX_CP_PROTECT_REG_0 + 10, 0x880, 5 }, + { A5XX_CP_PROTECT_REG_0 + 11, 0xaa0, 0 }, + /* RB */ + { A5XX_CP_PROTECT_REG_0 + 12, 0xcc0, 0 }, + { A5XX_CP_PROTECT_REG_0 + 13, 0xcf0, 1 }, + /* VPC */ + { A5XX_CP_PROTECT_REG_0 + 14, 0xe68, 3 }, + { A5XX_CP_PROTECT_REG_0 + 15, 0xe70, 4 }, + /* UCHE */ + { A5XX_CP_PROTECT_REG_0 + 16, 0xe80, 4 }, + /* A5XX_CP_PROTECT_REG_17 will be used for SMMU */ + /* A5XX_CP_PROTECT_REG_18 - A5XX_CP_PROTECT_REG_31 are available */ +}; + +static void _setprotectreg(struct kgsl_device *device, u32 offset, + u32 base, u32 count) +{ + kgsl_regwrite(device, offset, 0x60000000 | (count << 24) | (base << 2)); +} + +static void a5xx_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 reg; + int i; + + /* enable access protection to privileged registers */ + kgsl_regwrite(device, A5XX_CP_PROTECT_CNTL, 0x00000007); + + for (i = 0; i < ARRAY_SIZE(a5xx_protected_blocks); i++) { + reg = a5xx_protected_blocks[i].reg; + + _setprotectreg(device, reg, a5xx_protected_blocks[i].base, + a5xx_protected_blocks[i].count); + } + + /* + * For a530 and a540 the SMMU region is 0x20000 bytes long and 0x10000 + * bytes on all other targets. The base offset for both is 0x40000. + * Write it to the next available slot + */ + if (adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev)) + _setprotectreg(device, reg + 1, 0x40000, ilog2(0x20000)); + else + _setprotectreg(device, reg + 1, 0x40000, ilog2(0x10000)); +} + +/* + * _poll_gdsc_status() - Poll the GDSC status register + * @adreno_dev: The adreno device pointer + * @status_reg: Offset of the status register + * @status_value: The expected bit value + * + * Poll the status register till the power-on bit is equal to the + * expected value or the max retries are exceeded. + */ +static int _poll_gdsc_status(struct adreno_device *adreno_dev, + unsigned int status_reg, + unsigned int status_value) +{ + unsigned int reg, retry = PWR_RETRY; + + /* Bit 20 is the power on bit of SPTP and RAC GDSC status register */ + do { + udelay(1); + kgsl_regread(KGSL_DEVICE(adreno_dev), status_reg, ®); + } while (((reg & BIT(20)) != (status_value << 20)) && retry--); + if ((reg & BIT(20)) != (status_value << 20)) + return -ETIMEDOUT; + return 0; +} + +static void a5xx_restore_isense_regs(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg, i, ramp = GPMU_ISENSE_SAVE; + static unsigned int isense_regs[6] = {0xFFFF}, isense_reg_addr[] = { + A5XX_GPU_CS_DECIMAL_ALIGN, + A5XX_GPU_CS_SENSOR_PARAM_CORE_1, + A5XX_GPU_CS_SENSOR_PARAM_CORE_2, + A5XX_GPU_CS_SW_OV_FUSE_EN, + A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE, + A5XX_GPMU_TEMP_SENSOR_CONFIG}; + + if (!adreno_is_a540(adreno_dev)) + return; + + /* read signature */ + kgsl_regread(device, ramp++, ®); + + if (reg == 0xBABEFACE) { + /* store memory locations in buffer */ + for (i = 0; i < ARRAY_SIZE(isense_regs); i++) + kgsl_regread(device, ramp + i, isense_regs + i); + + /* clear signature */ + kgsl_regwrite(device, GPMU_ISENSE_SAVE, 0x0); + } + + /* if we never stored memory locations - do nothing */ + if (isense_regs[0] == 0xFFFF) + return; + + /* restore registers from memory */ + for (i = 0; i < ARRAY_SIZE(isense_reg_addr); i++) + kgsl_regwrite(device, isense_reg_addr[i], isense_regs[i]); + +} + +/* + * a5xx_regulator_enable() - Enable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly enabled + * on a restart. Clocks must be on during this call. + */ +static int a5xx_regulator_enable(struct adreno_device *adreno_dev) +{ + unsigned int ret; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv)) + return 0; + + if (!(adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev))) { + /* Halt the sp_input_clk at HM level */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x00000055); + a5xx_hwcg_set(adreno_dev, true); + /* Turn on sp_input_clk at HM level */ + kgsl_regrmw(device, A5XX_RBBM_CLOCK_CNTL, 0xFF, 0); + + set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv); + return 0; + } + + /* + * Turn on smaller power domain first to reduce voltage droop. + * Set the default register values; set SW_COLLAPSE to 0. + */ + kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); + /* Insert a delay between RAC and SPTP GDSC to reduce voltage droop */ + udelay(3); + ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1); + if (ret) { + dev_err(device->dev, "RBCCU GDSC enable failed\n"); + return ret; + } + + kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778000); + ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_SP_PWR_CLK_STATUS, 1); + if (ret) { + dev_err(device->dev, "SPTP GDSC enable failed\n"); + return ret; + } + + /* Disable SP clock */ + kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL, + CNTL_IP_CLK_ENABLE, 0); + /* Enable hardware clockgating */ + a5xx_hwcg_set(adreno_dev, true); + /* Enable SP clock */ + kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL, + CNTL_IP_CLK_ENABLE, 1); + + a5xx_restore_isense_regs(adreno_dev); + + set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, &adreno_dev->priv); + return 0; +} + +/* + * a5xx_regulator_disable() - Disable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly disabled + * on a power down to prevent current spikes. Clocks must be on + * during this call. + */ +static void a5xx_regulator_disable(struct adreno_device *adreno_dev) +{ + unsigned int reg; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_is_a512(adreno_dev) || adreno_is_a508(adreno_dev)) + return; + + if (!test_and_clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv)) + return; + + /* If feature is not supported or not enabled */ + if (!adreno_dev->sptp_pc_enabled) { + /* Set the default register values; set SW_COLLAPSE to 1 */ + kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778001); + /* + * Insert a delay between SPTP and RAC GDSC to reduce voltage + * droop. + */ + udelay(3); + if (_poll_gdsc_status(adreno_dev, + A5XX_GPMU_SP_PWR_CLK_STATUS, 0)) + dev_warn(device->dev, "SPTP GDSC disable failed\n"); + + kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778001); + if (_poll_gdsc_status(adreno_dev, + A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 0)) + dev_warn(device->dev, "RBCCU GDSC disable failed\n"); + } else if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, + &adreno_dev->priv)) { + /* GPMU firmware is supposed to turn off SPTP & RAC GDSCs. */ + kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + dev_warn(device->dev, "SPTP GDSC is not disabled\n"); + kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + dev_warn(device->dev, "RBCCU GDSC is not disabled\n"); + /* + * GPMU firmware is supposed to set GMEM to non-retention. + * Bit 14 is the memory core force on bit. + */ + kgsl_regread(device, A5XX_GPMU_RBCCU_CLOCK_CNTL, ®); + if (reg & BIT(14)) + dev_warn(device->dev, "GMEM is forced on\n"); + } + + if (adreno_is_a530(adreno_dev)) { + /* Reset VBIF before PC to avoid popping bogus FIFO entries */ + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, + 0x003C0000); + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, 0); + } +} + +/* + * a5xx_enable_pc() - Enable the GPMU based power collapse of the SPTP and RAC + * blocks + * @adreno_dev: The adreno device pointer + */ +static void a5xx_enable_pc(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_dev->sptp_pc_enabled) + return; + + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL, 0x0000007F); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_BINNING_CTRL, 0); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_HYST, 0x000A0080); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_STAGGER_DELAY, 0x00600040); + + trace_adreno_sp_tp((unsigned long) __builtin_return_address(0)); +}; + +/* + * The maximum payload of a type4 packet is the max size minus one for the + * opcode + */ +#define TYPE4_MAX_PAYLOAD (PM4_TYPE4_PKT_SIZE_MAX - 1) + +static int _gpmu_create_load_cmds(struct adreno_device *adreno_dev, + uint32_t *ucode, uint32_t size) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + uint32_t *start, *cmds; + uint32_t offset = 0; + uint32_t cmds_size = size; + + /* Add a dword for each PM4 packet */ + cmds_size += (size / TYPE4_MAX_PAYLOAD) + 1; + + /* Add 4 dwords for the protected mode */ + cmds_size += 4; + + if (adreno_dev->gpmu_cmds != NULL) + return 0; + + adreno_dev->gpmu_cmds = devm_kmalloc(&device->pdev->dev, + cmds_size << 2, GFP_KERNEL); + if (adreno_dev->gpmu_cmds == NULL) + return -ENOMEM; + + cmds = adreno_dev->gpmu_cmds; + start = cmds; + + /* Turn CP protection OFF */ + cmds += cp_protected_mode(adreno_dev, cmds, 0); + + /* + * Prebuild the cmd stream to send to the GPU to load + * the GPMU firmware + */ + while (size > 0) { + int tmp_size = size; + + if (size >= TYPE4_MAX_PAYLOAD) + tmp_size = TYPE4_MAX_PAYLOAD; + + *cmds++ = cp_type4_packet( + A5XX_GPMU_INST_RAM_BASE + offset, + tmp_size); + + memcpy(cmds, &ucode[offset], tmp_size << 2); + + cmds += tmp_size; + offset += tmp_size; + size -= tmp_size; + } + + /* Turn CP protection ON */ + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + adreno_dev->gpmu_cmds_size = (size_t) (cmds - start); + + return 0; +} + + +/* + * _load_gpmu_firmware() - Load the ucode into the GPMU RAM + * @adreno_dev: Pointer to adreno device + */ +static int _load_gpmu_firmware(struct adreno_device *adreno_dev) +{ + uint32_t *data; + const struct firmware *fw = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + uint32_t *cmds, cmd_size; + int ret = -EINVAL; + u32 gmu_major = 1; + + if (!a5xx_has_gpmu(adreno_dev)) + return 0; + + /* a530 used GMU major 1 and A540 used GMU major 3 */ + if (adreno_is_a540(adreno_dev)) + gmu_major = 3; + + /* gpmu fw already saved and verified so do nothing new */ + if (adreno_dev->gpmu_cmds_size != 0) + return 0; + + if (a5xx_core->gpmufw_name == NULL) + return 0; + + ret = request_firmware(&fw, a5xx_core->gpmufw_name, &device->pdev->dev); + if (ret || fw == NULL) { + dev_err(&device->pdev->dev, + "request_firmware (%s) failed: %d\n", + a5xx_core->gpmufw_name, ret); + return ret; + } + + data = (uint32_t *)fw->data; + + if (data[0] >= (fw->size / sizeof(uint32_t)) || data[0] < 2) + goto err; + + if (data[1] != GPMU_FIRMWARE_ID) + goto err; + ret = _read_fw2_block_header(device, &data[2], + data[0] - 2, GPMU_FIRMWARE_ID, gmu_major, 0); + if (ret) + goto err; + + /* Integer overflow check for cmd_size */ + if (data[2] > (data[0] - 2)) + goto err; + + cmds = data + data[2] + 3; + cmd_size = data[0] - data[2] - 2; + + if (cmd_size > GPMU_INST_RAM_SIZE) { + dev_err(device->dev, + "GPMU firmware block size is larger than RAM size\n"); + goto err; + } + + /* Everything is cool, so create some commands */ + ret = _gpmu_create_load_cmds(adreno_dev, cmds, cmd_size); +err: + if (fw) + release_firmware(fw); + + return ret; +} + +static void a5xx_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int rptr, wptr; + unsigned int status, status3, intstatus; + unsigned int hwfault; + + dev_err(device->dev, str); + + kgsl_regread(device, A5XX_CP_RB_RPTR, &rptr); + kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr); + + kgsl_regread(device, A5XX_RBBM_STATUS, &status); + kgsl_regread(device, A5XX_RBBM_STATUS3, &status3); + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &intstatus); + kgsl_regread(device, A5XX_CP_HW_FAULT, &hwfault); + + + dev_err(device->dev, + "rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n", + adreno_dev->cur_rb->id, rptr, wptr, status, status3, intstatus); + + dev_err(device->dev, " hwfault=%8.8X\n", hwfault); + + kgsl_device_snapshot(device, NULL, false); +} + +static int _gpmu_send_init_cmds(struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + uint32_t *cmds; + uint32_t size = adreno_dev->gpmu_cmds_size; + int ret; + + if (size == 0 || adreno_dev->gpmu_cmds == NULL) + return -EINVAL; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + /* Copy to the RB the predefined fw sequence cmds */ + memcpy(cmds, adreno_dev->gpmu_cmds, size << 2); + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "gpmu initialization failed to idle\n"); + } + return ret; +} + +/* + * a5xx_gpmu_start() - Initialize and start the GPMU + * @adreno_dev: Pointer to adreno device + * + * Load the GPMU microcode, set up any features such as hardware clock gating + * or IFPC, and take the GPMU out of reset. + */ +static int a5xx_gpmu_start(struct adreno_device *adreno_dev) +{ + int ret; + unsigned int reg, retry = GPMU_FW_INIT_RETRY; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!a5xx_has_gpmu(adreno_dev)) + return 0; + + ret = _gpmu_send_init_cmds(adreno_dev); + if (ret) + return ret; + + if (adreno_is_a530(adreno_dev)) { + /* GPMU clock gating setup */ + kgsl_regwrite(device, A5XX_GPMU_WFI_CONFIG, 0x00004014); + } + /* Kick off GPMU firmware */ + kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 0); + /* + * The hardware team's estimation of GPMU firmware initialization + * latency is about 3000 cycles, that's about 5 to 24 usec. + */ + do { + udelay(1); + kgsl_regread(device, A5XX_GPMU_GENERAL_0, ®); + } while ((reg != 0xBABEFACE) && retry--); + + if (reg != 0xBABEFACE) { + dev_err(device->dev, + "GPMU firmware initialization timed out\n"); + return -ETIMEDOUT; + } + + if (!adreno_is_a530(adreno_dev)) { + kgsl_regread(device, A5XX_GPMU_GENERAL_1, ®); + + if (reg) { + dev_err(device->dev, + "GPMU firmware initialization failed: %d\n", + reg); + return -EIO; + } + } + set_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv); + /* + * We are in AWARE state and IRQ line from GPU to host is + * disabled. + * Read pending GPMU interrupts and clear GPMU_RBBM_INTR_INFO. + */ + kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, ®); + /* + * Clear RBBM interrupt mask if any of GPMU interrupts + * are pending. + */ + if (reg) + kgsl_regwrite(device, + A5XX_RBBM_INT_CLEAR_CMD, + 1 << A5XX_INT_GPMU_FIRMWARE); + return ret; +} + +void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + int i; + + if (!adreno_dev->hwcg_enabled) + return; + + for (i = 0; i < a5xx_core->hwcg_count; i++) + kgsl_regwrite(device, a5xx_core->hwcg[i].offset, + on ? a5xx_core->hwcg[i].val : 0); + + /* enable top level HWCG */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, on ? 0xAAA8AA00 : 0); + kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, on ? 0x00000182 : 0x00000180); +} + +static int _read_fw2_block_header(struct kgsl_device *device, + uint32_t *header, uint32_t remain, + uint32_t id, uint32_t major, uint32_t minor) +{ + uint32_t header_size; + int i = 1; + + if (header == NULL) + return -ENOMEM; + + header_size = header[0]; + /* Headers have limited size and always occur as pairs of words */ + if (header_size > MAX_HEADER_SIZE || header_size >= remain || + header_size % 2 || header_size == 0) + return -EINVAL; + /* Sequences must have an identifying id first thing in their header */ + if (id == GPMU_SEQUENCE_ID) { + if (header[i] != HEADER_SEQUENCE || + (header[i + 1] >= MAX_SEQUENCE_ID)) + return -EINVAL; + i += 2; + } + for (; i < header_size; i += 2) { + switch (header[i]) { + /* Major Version */ + case HEADER_MAJOR: + if ((major > header[i + 1]) && + header[i + 1]) { + dev_err(device->dev, + "GPMU major version mis-match %d, %d\n", + major, header[i + 1]); + return -EINVAL; + } + break; + case HEADER_MINOR: + if (minor > header[i + 1]) + dev_err(device->dev, + "GPMU minor version mis-match %d %d\n", + minor, header[i + 1]); + break; + case HEADER_DATE: + case HEADER_TIME: + break; + default: + dev_err(device->dev, "GPMU unknown header ID %d\n", + header[i]); + } + } + return 0; +} + +/* + * Read in the register sequence file and save pointers to the + * necessary sequences. + * + * GPU sequence file format (one dword per field unless noted): + * Block 1 length (length dword field not inclusive) + * Block 1 type = Sequence = 3 + * Block Header length (length dword field not inclusive) + * BH field ID = Sequence field ID + * BH field data = Sequence ID + * BH field ID + * BH field data + * ... + * Opcode 0 ID + * Opcode 0 data M words + * Opcode 1 ID + * Opcode 1 data N words + * ... + * Opcode X ID + * Opcode X data O words + * Block 2 length... + */ +static void _load_regfile(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + const struct firmware *fw; + uint64_t block_size = 0, block_total = 0; + uint32_t fw_size, *block; + int ret = -EINVAL; + u32 lm_major = 1; + + if (!a5xx_core->regfw_name) + return; + + ret = request_firmware(&fw, a5xx_core->regfw_name, &device->pdev->dev); + if (ret) { + dev_err(&device->pdev->dev, "request firmware failed %d, %s\n", + ret, a5xx_core->regfw_name); + return; + } + + /* a530v2 lm_major was 3. a530v3 lm_major was 1 */ + if (adreno_is_a530v2(adreno_dev)) + lm_major = 3; + + fw_size = fw->size / sizeof(uint32_t); + /* Min valid file of size 6, see file description */ + if (fw_size < 6) + goto err; + block = (uint32_t *)fw->data; + /* All offset numbers calculated from file description */ + while (block_total < fw_size) { + block_size = block[0]; + if (((block_total + block_size) >= fw_size) + || block_size < 5) + goto err; + if (block[1] != GPMU_SEQUENCE_ID) + goto err; + + /* For now ignore blocks other than the LM sequence */ + if (block[4] == LM_SEQUENCE_ID) { + ret = _read_fw2_block_header(device, &block[2], + block_size - 2, GPMU_SEQUENCE_ID, + lm_major, 0); + if (ret) + goto err; + + if (block[2] > (block_size - 2)) + goto err; + adreno_dev->lm_sequence = block + block[2] + 3; + adreno_dev->lm_size = block_size - block[2] - 2; + } + block_total += (block_size + 1); + block += (block_size + 1); + } + if (adreno_dev->lm_sequence) + return; + +err: + release_firmware(fw); + dev_err(device->dev, + "Register file failed to load sz=%d bsz=%llu header=%d\n", + fw_size, block_size, ret); +} + +static int _execute_reg_sequence(struct adreno_device *adreno_dev, + uint32_t *opcode, uint32_t length) +{ + uint32_t *cur = opcode; + uint64_t reg, val; + + /* todo double check the reg writes */ + while ((cur - opcode) < length) { + if (cur[0] == 1 && (length - (cur - opcode) >= 4)) { + /* Write a 32 bit value to a 64 bit reg */ + reg = cur[2]; + reg = (reg << 32) | cur[1]; + kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, cur[3]); + cur += 4; + } else if (cur[0] == 2 && (length - (cur - opcode) >= 5)) { + /* Write a 64 bit value to a 64 bit reg */ + reg = cur[2]; + reg = (reg << 32) | cur[1]; + val = cur[4]; + val = (val << 32) | cur[3]; + kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, val); + cur += 5; + } else if (cur[0] == 3 && (length - (cur - opcode) >= 2)) { + /* Delay for X usec */ + udelay(cur[1]); + cur += 2; + } else + return -EINVAL; + } + return 0; +} + +static uint32_t _write_voltage_table(struct adreno_device *adreno_dev, + unsigned int addr) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + int i; + struct dev_pm_opp *opp; + unsigned int mvolt = 0; + + kgsl_regwrite(device, addr++, a5xx_core->max_power); + kgsl_regwrite(device, addr++, pwr->num_pwrlevels); + + /* Write voltage in mV and frequency in MHz */ + for (i = 0; i < pwr->num_pwrlevels; i++) { + opp = dev_pm_opp_find_freq_exact(&device->pdev->dev, + pwr->pwrlevels[i].gpu_freq, true); + /* _opp_get returns uV, convert to mV */ + if (!IS_ERR(opp)) { + mvolt = dev_pm_opp_get_voltage(opp) / 1000; + dev_pm_opp_put(opp); + } + kgsl_regwrite(device, addr++, mvolt); + kgsl_regwrite(device, addr++, + pwr->pwrlevels[i].gpu_freq / 1000000); + } + return (pwr->num_pwrlevels * 2 + 2); +} + +static uint32_t lm_limit(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_dev->lm_limit) + return adreno_dev->lm_limit; + + if (of_property_read_u32(device->pdev->dev.of_node, "qcom,lm-limit", + &adreno_dev->lm_limit)) + adreno_dev->lm_limit = LM_DEFAULT_LIMIT; + + return adreno_dev->lm_limit; +} +/* + * a5xx_lm_init() - Initialize LM/DPM on the GPMU + * @adreno_dev: The adreno device pointer + */ +static void a530_lm_init(struct adreno_device *adreno_dev) +{ + uint32_t length; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + + if (!adreno_dev->lm_enabled) + return; + + /* If something was wrong with the sequence file, return */ + if (adreno_dev->lm_sequence == NULL) + return; + + /* Write LM registers including DPM ucode, coefficients, and config */ + if (_execute_reg_sequence(adreno_dev, adreno_dev->lm_sequence, + adreno_dev->lm_size)) { + /* If the sequence is invalid, it's not getting better */ + adreno_dev->lm_sequence = NULL; + dev_warn(device->dev, + "Invalid LM sequence\n"); + return; + } + + kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_ID, a5xx_core->gpmu_tsens); + kgsl_regwrite(device, A5XX_GPMU_DELTA_TEMP_THRESHOLD, 0x1); + kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, 0x1); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE, + (0x80000000 | device->pwrctrl.active_pwrlevel)); + /* use the leakage to set this value at runtime */ + kgsl_regwrite(device, A5XX_GPMU_BASE_LEAKAGE, + adreno_dev->lm_leakage); + + /* Enable the power threshold and set it to 6000m */ + kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD, + 0x80000000 | lm_limit(adreno_dev)); + + kgsl_regwrite(device, A5XX_GPMU_BEC_ENABLE, 0x10001FFF); + kgsl_regwrite(device, A5XX_GDPM_CONFIG1, 0x00201FF1); + + /* Send an initial message to the GPMU with the LM voltage table */ + kgsl_regwrite(device, AGC_MSG_STATE, 1); + kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID); + length = _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD); + kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, length * sizeof(uint32_t)); + kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE); +} + +/* + * a5xx_lm_enable() - Enable the LM/DPM feature on the GPMU + * @adreno_dev: The adreno device pointer + */ +static void a530_lm_enable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_dev->lm_enabled) + return; + + /* If no sequence properly initialized, return */ + if (adreno_dev->lm_sequence == NULL) + return; + + kgsl_regwrite(device, A5XX_GDPM_INT_MASK, 0x00000000); + kgsl_regwrite(device, A5XX_GDPM_INT_EN, 0x0000000A); + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, 0x00000001); + kgsl_regwrite(device, A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK, + 0x00050000); + kgsl_regwrite(device, A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL, + 0x00030000); + + if (adreno_is_a530(adreno_dev)) + /* Program throttle control, do not enable idle DCS on v3+ */ + kgsl_regwrite(device, A5XX_GPMU_CLOCK_THROTTLE_CTRL, + adreno_is_a530v2(adreno_dev) ? 0x00060011 : 0x00000011); +} + +static void a540_lm_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + uint32_t agc_lm_config = AGC_BCL_DISABLED | + ((ADRENO_CHIPID_PATCH(adreno_dev->chipid) & 0x3) + << AGC_GPU_VERSION_SHIFT); + unsigned int r; + + if (!adreno_dev->throttling_enabled) + agc_lm_config |= AGC_THROTTLE_DISABLE; + + if (adreno_dev->lm_enabled) { + agc_lm_config |= + AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE | + AGC_LM_CONFIG_ISENSE_ENABLE; + + kgsl_regread(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, &r); + + if ((r & GPMU_ISENSE_STATUS) == GPMU_ISENSE_END_POINT_CAL_ERR) { + dev_err(device->dev, + "GPMU: ISENSE end point calibration failure\n"); + agc_lm_config |= AGC_LM_CONFIG_ENABLE_ERROR; + } + } + + kgsl_regwrite(device, AGC_MSG_STATE, 0x80000001); + kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID); + (void) _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD); + kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LM_CONFIG, agc_lm_config); + kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LEVEL_CONFIG, + (unsigned int) ~(GENMASK(LM_DCVS_LIMIT, 0) | + GENMASK(16+LM_DCVS_LIMIT, 16))); + + kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, + (AGC_LEVEL_CONFIG + 1) * sizeof(uint32_t)); + kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE, + (0x80000000 | device->pwrctrl.active_pwrlevel)); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD, + PWR_THRESHOLD_VALID | lm_limit(adreno_dev)); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, + VOLTAGE_INTR_EN); +} + + +static void a5xx_lm_enable(struct adreno_device *adreno_dev) +{ + if (adreno_is_a530(adreno_dev)) + a530_lm_enable(adreno_dev); +} + +static void a5xx_lm_init(struct adreno_device *adreno_dev) +{ + if (adreno_is_a530(adreno_dev)) + a530_lm_init(adreno_dev); + else if (adreno_is_a540(adreno_dev)) + a540_lm_init(adreno_dev); +} + +static int gpmu_set_level(struct adreno_device *adreno_dev, unsigned int val) +{ + unsigned int reg; + int retry = 100; + + kgsl_regwrite(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE, val); + + do { + kgsl_regread(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE, + ®); + } while ((reg & 0x80000000) && retry--); + + return (reg & 0x80000000) ? -ETIMEDOUT : 0; +} + +/* + * a5xx_pwrlevel_change_settings() - Program the hardware during power level + * transitions + * @adreno_dev: The adreno device pointer + * @prelevel: The previous power level + * @postlevel: The new power level + * @post: True if called after the clock change has taken effect + */ +static void a5xx_pwrlevel_change_settings(struct adreno_device *adreno_dev, + unsigned int prelevel, unsigned int postlevel, + bool post) +{ + /* + * On pre A540 HW only call through if LMx is supported and enabled, and + * always call through for a540 + */ + if (!adreno_is_a540(adreno_dev) && !adreno_dev->lm_enabled) + return; + + if (!post) { + if (gpmu_set_level(adreno_dev, (0x80000010 | postlevel))) + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "GPMU pre powerlevel did not stabilize\n"); + } else { + if (gpmu_set_level(adreno_dev, (0x80000000 | postlevel))) + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "GPMU post powerlevel did not stabilize\n"); + } +} + +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) +static void a5xx_clk_set_options(struct adreno_device *adreno_dev, + const char *name, struct clk *clk, bool on) +{ + if (!clk) + return; + + if (!adreno_is_a540(adreno_dev) && !adreno_is_a512(adreno_dev) && + !adreno_is_a508(adreno_dev)) + return; + + /* Handle clock settings for GFX PSCBCs */ + if (on) { + if (!strcmp(name, "mem_iface_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } else if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_RETAIN_MEM); + } + } else { + if (!strcmp(name, "core_clk")) { + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); + qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); + } + } +} +#endif + +/* FW driven idle 10% throttle */ +#define IDLE_10PCT 0 +/* number of cycles when clock is throttled by 50% (CRC) */ +#define CRC_50PCT 1 +/* number of cycles when clock is throttled by more than 50% (CRC) */ +#define CRC_MORE50PCT 2 +/* number of cycles when clock is throttle by less than 50% (CRC) */ +#define CRC_LESS50PCT 3 + +static int64_t a5xx_read_throttling_counters(struct adreno_device *adreno_dev) +{ + int i; + int64_t adj; + uint32_t th[ADRENO_GPMU_THROTTLE_COUNTERS]; + struct adreno_busy_data *busy = &adreno_dev->busy_data; + + if (!adreno_dev->throttling_enabled) + return 0; + + for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) { + if (!adreno_dev->gpmu_throttle_counters[i]) + return 0; + + th[i] = counter_delta(KGSL_DEVICE(adreno_dev), + adreno_dev->gpmu_throttle_counters[i], + &busy->throttle_cycles[i]); + } + adj = th[CRC_MORE50PCT] - th[IDLE_10PCT]; + adj = th[CRC_50PCT] + th[CRC_LESS50PCT] / 3 + (adj < 0 ? 0 : adj) * 3; + + trace_kgsl_clock_throttling( + th[IDLE_10PCT], th[CRC_50PCT], + th[CRC_MORE50PCT], th[CRC_LESS50PCT], + adj); + return adj; +} + +/* + * a5xx_gpmu_reset() - Re-enable GPMU based power features and restart GPMU + * @work: Pointer to the work struct for gpmu reset + * + * Load the GPMU microcode, set up any features such as hardware clock gating + * or IFPC, and take the GPMU out of reset. + */ +static void a5xx_gpmu_reset(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, gpmu_work); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv)) + return; + + /* + * If GPMU has already experienced a restart or is in the process of it + * after the watchdog timeout, then there is no need to reset GPMU + * again. + */ + if (device->state != KGSL_STATE_NAP && + device->state != KGSL_STATE_AWARE && + device->state != KGSL_STATE_ACTIVE) + return; + + mutex_lock(&device->mutex); + + if (device->state == KGSL_STATE_NAP) + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + + if (a5xx_regulator_enable(adreno_dev)) + goto out; + + /* Soft reset of the GPMU block */ + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, BIT(16)); + + /* GPU comes up in secured mode, make it unsecured by default */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION)) + kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); + + + a5xx_gpmu_init(adreno_dev); + +out: + mutex_unlock(&device->mutex); +} + +static void _setup_throttling_counters(struct adreno_device *adreno_dev) +{ + int i, ret = 0; + + if (!adreno_is_a540(adreno_dev)) + return; + + for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) { + /* reset throttled cycles ivalue */ + adreno_dev->busy_data.throttle_cycles[i] = 0; + + /* Throttle countables start at off set 43 */ + ret |= adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 43 + i, + &adreno_dev->gpmu_throttle_counters[i], NULL); + } + + WARN_ONCE(ret, "Unable to get one or more clock throttling registers\n"); +} + +/* + * a5xx_start() - Device start + * @adreno_dev: Pointer to adreno device + * + * a5xx device start + */ +static int a5xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + unsigned int bit; + int ret; + + ret = kgsl_mmu_start(device); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + adreno_perfcounter_restore(adreno_dev); + + adreno_dev->irq_mask = A5XX_INT_MASK; + + if (adreno_is_a530(adreno_dev) && + ADRENO_FEATURE(adreno_dev, ADRENO_LM)) + adreno_perfcounter_kernel_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 27, + &adreno_dev->lm_threshold_count, NULL); + + /* Enable 64 bit addressing */ + kgsl_regwrite(device, A5XX_CP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VSC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_GRAS_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_RB_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_PC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_HLSQ_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VFD_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VPC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_UCHE_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_SP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_TPL1_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); + + _setup_throttling_counters(adreno_dev); + + /* Set up VBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, a5xx_core->vbif, + a5xx_core->vbif_count); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Program RBBM counter 0 to report GPU busy for frequency scaling */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); + + /* + * Enable the RBBM error reporting bits. This lets us get + * useful information on failure + */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL0, 0x00000001); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_FAULT_DETECT_MASK)) { + /* + * We have 4 RB units, and only RB0 activity signals are + * working correctly. Mask out RB1-3 activity signals + * from the HW hang detection logic as per + * recommendation of hardware team. + */ + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, + 0xF0000000); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, + 0xFFFFFFFF); + } + + /* + * Set hang detection threshold to 4 million cycles + * (0x3FFFF*16) + */ + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_INT_CNTL, + (1 << 30) | 0x3FFFF); + + /* Turn on performance counters */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_CNTL, 0x01); + + /* + * This is to increase performance by restricting VFD's cache access, + * so that LRZ and other data get evicted less. + */ + kgsl_regwrite(device, A5XX_UCHE_CACHE_WAYS, 0x02); + + /* + * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively + * disabling L2 bypass + */ + kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_LO, 0xffff0000); + kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_HI, 0x0001ffff); + kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_LO, 0xffff0000); + kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); + + /* Program the GMEM VA range for the UCHE path */ + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_LO, + adreno_dev->gpucore->gmem_base); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x0); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_LO, + adreno_dev->gpucore->gmem_base + + adreno_dev->gpucore->gmem_size - 1); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x0); + + /* + * Below CP registers are 0x0 by default, program init + * values based on a5xx flavor. + */ + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + } else if (adreno_is_a510(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x20); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + } else if (adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + } else { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x40); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + } + + /* + * vtxFifo and primFifo thresholds default values + * are different. + */ + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev)) + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x100 << 11 | 0x100 << 22)); + else if (adreno_is_a510(adreno_dev) || adreno_is_a512(adreno_dev)) + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x200 << 11 | 0x200 << 22)); + else + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x400 << 11 | 0x300 << 22)); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) { + /* + * Set TWOPASSUSEWFI in A5XX_PC_DBG_ECO_CNTL for + * microcodes after v77 + */ + if ((adreno_compare_pfp_version(adreno_dev, 0x5FF077) >= 0)) + kgsl_regrmw(device, A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); + } + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING)) { + /* + * Disable RB sampler datapath DP2 clock gating + * optimization for 1-SP GPU's, by default it is enabled. + */ + kgsl_regrmw(device, A5XX_RB_DBG_ECO_CNT, 0, (1 << 9)); + } + /* + * Disable UCHE global filter as SP can invalidate/flush + * independently + */ + kgsl_regwrite(device, A5XX_UCHE_MODE_CNTL, BIT(29)); + /* Set the USE_RETENTION_FLOPS chicken bit */ + kgsl_regwrite(device, A5XX_CP_CHICKEN_DBG, 0x02000000); + + /* Enable ISDB mode if requested */ + if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv)) { + if (!adreno_active_count_get(adreno_dev)) { + /* + * Disable ME/PFP split timeouts when the debugger is + * enabled because the CP doesn't know when a shader is + * in active debug + */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0x06FFFFFF); + + /* Force the SP0/SP1 clocks on to enable ISDB */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP0, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP1, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP2, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP3, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP0, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP1, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP2, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP3, 0x0); + + /* disable HWCG */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x0); + kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x0); + } else + dev_err(device->dev, + "Active count failed while turning on ISDB\n"); + } else { + /* if not in ISDB mode enable ME/PFP split notification */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); + } + + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL2, 0x0000003F); + bit = adreno_dev->highest_bank_bit ? + (adreno_dev->highest_bank_bit - 13) & 0x03 : 0; + /* + * Program the highest DDR bank bit that was passed in + * from the DT in a handful of registers. Some of these + * registers will also be written by the UMD, but we + * want to program them in case we happen to use the + * UCHE before the UMD does + */ + + kgsl_regwrite(device, A5XX_TPL1_MODE_CNTL, bit << 7); + kgsl_regwrite(device, A5XX_RB_MODE_CNTL, bit << 1); + if (adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev)) + kgsl_regwrite(device, A5XX_UCHE_DBG_ECO_CNTL_2, bit); + + /* Disable All flat shading optimization */ + kgsl_regrmw(device, A5XX_VPC_DBG_ECO_CNTL, 0, 0x1 << 10); + + /* + * VPC corner case with local memory load kill leads to corrupt + * internal state. Normal Disable does not work for all a5x chips. + * So do the following setting to disable it. + */ + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_LMLOADKILL)) { + kgsl_regrmw(device, A5XX_VPC_DBG_ECO_CNTL, 0, 0x1 << 23); + kgsl_regrmw(device, A5XX_HLSQ_DBG_ECO_CNTL, 0x1 << 18, 0); + } + + if (device->mmu.secured) { + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_CNTL, 0x0); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, + lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, + KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + } + + a5xx_preemption_start(adreno_dev); + a5xx_protect_init(adreno_dev); + + return 0; +} + +/* + * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move + * to a different ringbuffer, if desired + */ +static int _preemption_init( + struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context) +{ + unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = rb->preemption_desc->gpuaddr; + + /* Turn CP protection OFF */ + cmds += cp_protected_mode(adreno_dev, cmds, 0); + /* + * CP during context switch will save context switch info to + * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR + */ + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1); + *cmds++ = upper_32_bits(gpuaddr); + + /* Turn CP protection ON */ + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1); + *cmds++ = 0; + + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 1; + + /* Enable yield in RB only */ + *cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1); + *cmds++ = 1; + + *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); + *cmds++ = 0; + /* generate interrupt on preemption completion */ + *cmds++ = 1; + + return cmds - cmds_orig; +} + +static int a5xx_post_start(struct adreno_device *adreno_dev) +{ + int ret; + unsigned int *cmds, *start; + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + + if (!adreno_is_a530(adreno_dev) && + !adreno_is_preemption_enabled(adreno_dev)) + return 0; + + cmds = adreno_ringbuffer_allocspace(rb, 42); + if (IS_ERR(cmds)) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + dev_err(device->dev, + "error allocating preemtion init cmds\n"); + return PTR_ERR(cmds); + } + start = cmds; + + /* + * Send a pipeline stat event whenever the GPU gets powered up + * to cause misbehaving perf counters to start ticking + */ + if (adreno_is_a530(adreno_dev)) { + *cmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1); + *cmds++ = 0xF; + } + + if (adreno_is_preemption_enabled(adreno_dev)) { + cmds += _preemption_init(adreno_dev, rb, cmds, NULL); + rb->_wptr = rb->_wptr - (42 - (cmds - start)); + ret = a5xx_ringbuffer_submit(rb, NULL, false); + } else { + rb->_wptr = rb->_wptr - (42 - (cmds - start)); + ret = a5xx_ringbuffer_submit(rb, NULL, true); + } + + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "hw initialization failed to idle\n"); + } + + return ret; +} + +static int a5xx_gpmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + /* Set up LM before initializing the GPMU */ + a5xx_lm_init(adreno_dev); + + /* Enable SPTP based power collapse before enabling GPMU */ + a5xx_enable_pc(adreno_dev); + + ret = a5xx_gpmu_start(adreno_dev); + if (ret) + return ret; + + /* Enable limits management */ + a5xx_lm_enable(adreno_dev); + return 0; +} + +static int a5xx_zap_shader_resume(struct kgsl_device *device) +{ + int ret = qcom_scm_set_remote_state(0, 13); + + if (ret) + dev_err(device->dev, + "SCM zap resume call failed: %d\n", ret); + + return ret; +} + +/* + * a5xx_microcode_load() - Load microcode + * @adreno_dev: Pointer to adreno device + */ +static int a5xx_microcode_load(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + uint64_t gpuaddr; + + gpuaddr = pm4_fw->memdesc->gpuaddr; + kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_LO, + lower_32_bits(gpuaddr)); + kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_HI, + upper_32_bits(gpuaddr)); + + gpuaddr = pfp_fw->memdesc->gpuaddr; + kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_LO, + lower_32_bits(gpuaddr)); + kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_HI, + upper_32_bits(gpuaddr)); + + /* + * Do not invoke to load zap shader if MMU does + * not support secure mode. + */ + if (!device->mmu.secured) + return 0; + + if (adreno_dev->zap_loaded && !(ADRENO_FEATURE(adreno_dev, + ADRENO_CPZ_RETENTION))) + return a5xx_zap_shader_resume(device); + + return adreno_zap_shader_load(adreno_dev, a5xx_core->zap_name); +} + +static int _me_init_ucode_workarounds(struct adreno_device *adreno_dev) +{ + switch (ADRENO_GPUREV(adreno_dev)) { + case ADRENO_REV_A510: + return 0x00000001; /* Ucode workaround for token end syncs */ + case ADRENO_REV_A505: + case ADRENO_REV_A506: + case ADRENO_REV_A530: + /* + * Ucode workarounds for token end syncs, + * WFI after every direct-render 3D mode draw and + * WFI after every 2D Mode 3 draw. + */ + return 0x0000000B; + default: + return 0x00000000; /* No ucode workarounds enabled */ + } +} + +/* + * CP_INIT_MAX_CONTEXT bit tells if the multiple hardware contexts can + * be used at once of if they should be serialized + */ +#define CP_INIT_MAX_CONTEXT BIT(0) + +/* Enables register protection mode */ +#define CP_INIT_ERROR_DETECTION_CONTROL BIT(1) + +/* Header dump information */ +#define CP_INIT_HEADER_DUMP BIT(2) /* Reserved */ + +/* Default Reset states enabled for PFP and ME */ +#define CP_INIT_DEFAULT_RESET_STATE BIT(3) + +/* Drawcall filter range */ +#define CP_INIT_DRAWCALL_FILTER_RANGE BIT(4) + +/* Ucode workaround masks */ +#define CP_INIT_UCODE_WORKAROUND_MASK BIT(5) + +#define CP_INIT_MASK (CP_INIT_MAX_CONTEXT | \ + CP_INIT_ERROR_DETECTION_CONTROL | \ + CP_INIT_HEADER_DUMP | \ + CP_INIT_DEFAULT_RESET_STATE | \ + CP_INIT_UCODE_WORKAROUND_MASK) + +static int a5xx_critical_packet_submit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int ret; + + if (!critical_packet_constructed) + return 0; + + cmds = adreno_ringbuffer_allocspace(rb, 4); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, adreno_dev->critpkts->gpuaddr); + *cmds++ = crit_pkts_dwords; + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 20); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "Critical packet submission failed to idle\n"); + } + + return ret; +} + +/* + * a5xx_send_me_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, + */ +static int a5xx_send_me_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int i = 0, ret; + + cmds = adreno_ringbuffer_allocspace(rb, 9); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + cmds[i++] = cp_type7_packet(CP_ME_INIT, 8); + + /* Enabled ordinal mask */ + cmds[i++] = CP_INIT_MASK; + + if (CP_INIT_MASK & CP_INIT_MAX_CONTEXT) + cmds[i++] = 0x00000003; + + if (CP_INIT_MASK & CP_INIT_ERROR_DETECTION_CONTROL) + cmds[i++] = 0x20000000; + + if (CP_INIT_MASK & CP_INIT_HEADER_DUMP) { + /* Header dump address */ + cmds[i++] = 0x00000000; + /* Header dump enable and dump size */ + cmds[i++] = 0x00000000; + } + + if (CP_INIT_MASK & CP_INIT_DRAWCALL_FILTER_RANGE) { + /* Start range */ + cmds[i++] = 0x00000000; + /* End range (inclusive) */ + cmds[i++] = 0x00000000; + } + + if (CP_INIT_MASK & CP_INIT_UCODE_WORKAROUND_MASK) + cmds[i++] = _me_init_ucode_workarounds(adreno_dev); + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a5xx_spin_idle_debug(adreno_dev, + "CP initialization failed to idle\n"); + } + + return ret; +} + +/* + * a5xx_rb_start() - Start the ringbuffer + * @adreno_dev: Pointer to adreno device + */ +static int a5xx_rb_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + uint64_t addr; + unsigned int *cmds; + int ret, i; + + /* Clear all the ringbuffers */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RPTR_OFFSET(rb->id), 0); + + rb->wptr = 0; + rb->_wptr = 0; + rb->wptr_preempt_end = ~0; + } + + /* Set up the current ringbuffer */ + rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + + kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); + kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); + + /* + * The size of the ringbuffer in the hardware is the log2 + * representation of the size in quadwords (sizedwords / 2). + * Also disable the host RPTR shadow register as it might be unreliable + * in certain circumstances. + */ + + kgsl_regwrite(device, A5XX_CP_RB_CNTL, + A5XX_CP_RB_CNTL_DEFAULT); + + kgsl_regwrite(device, A5XX_CP_RB_BASE, + lower_32_bits(rb->buffer_desc->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_RB_BASE_HI, + upper_32_bits(rb->buffer_desc->gpuaddr)); + + ret = a5xx_microcode_load(adreno_dev); + if (ret) + return ret; + + /* clear ME_HALT to start micro engine */ + + kgsl_regwrite(device, A5XX_CP_ME_CNTL, 0); + + ret = a5xx_send_me_init(adreno_dev, rb); + if (ret) + return ret; + + /* Run the critical packets if we need to */ + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS)) { + ret = a5xx_critical_packet_submit(adreno_dev, rb); + if (ret) + return ret; + } + + /* + * Try to execute the zap shader if it exists, otherwise just try + * directly writing to the control register + */ + if (!adreno_dev->zap_loaded) + kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0); + else { + cmds = adreno_ringbuffer_allocspace(rb, 2); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1); + *cmds++ = 0; + + ret = a5xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + a5xx_spin_idle_debug(adreno_dev, + "Switch to unsecure failed to idle\n"); + return ret; + } + } + } + + ret = a5xx_gpmu_init(adreno_dev); + if (ret) + return ret; + + a5xx_post_start(adreno_dev); + + return 0; +} + +/* + * a5xx_microcode_read() - Read microcode + * @adreno_dev: Pointer to adreno device + */ +static int a5xx_microcode_read(struct adreno_device *adreno_dev) +{ + int ret; + struct adreno_firmware *pm4_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + struct adreno_firmware *pfp_fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + const struct adreno_a5xx_core *a5xx_core = to_a5xx_core(adreno_dev); + + ret = adreno_get_firmware(adreno_dev, a5xx_core->pm4fw_name, pm4_fw); + if (ret) + return ret; + + ret = adreno_get_firmware(adreno_dev, a5xx_core->pfpfw_name, pfp_fw); + if (ret) + return ret; + + ret = _load_gpmu_firmware(adreno_dev); + if (ret) + return ret; + + _load_regfile(adreno_dev); + + return ret; +} + +/* Register offset defines for A5XX, in order of enum adreno_regs */ +static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, + A5XX_CP_RB_RPTR_ADDR_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI, + A5XX_CP_RB_RPTR_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A5XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A5XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A5XX_CP_IB1_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A5XX_CP_IB1_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A5XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A5XX_CP_IB2_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A5XX_CP_IB2_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A5XX_CP_PROTECT_REG_0), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A5XX_CP_CONTEXT_SWITCH_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A5XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A5XX_RBBM_CLOCK_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A5XX_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_GPMU_POWER_COUNTER_ENABLE, + A5XX_GPMU_POWER_COUNTER_ENABLE), +}; + +static void a5xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status1, status2; + + kgsl_regread(device, A5XX_CP_INTERRUPT_STATUS, &status1); + + if (status1 & BIT(A5XX_CP_OPCODE_ERROR)) { + unsigned int val; + + kgsl_regwrite(device, A5XX_CP_PFP_STAT_ADDR, 0); + + /* + * A5XX_CP_PFP_STAT_DATA is indexed, so read it twice to get the + * value we want + */ + kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val); + kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val); + + dev_crit_ratelimited(device->dev, + "ringbuffer opcode error | possible opcode=0x%8.8X\n", + val); + } + if (status1 & BIT(A5XX_CP_RESERVED_BIT_ERROR)) + dev_crit_ratelimited(device->dev, + "ringbuffer reserved bit error interrupt\n"); + if (status1 & BIT(A5XX_CP_HW_FAULT_ERROR)) { + kgsl_regread(device, A5XX_CP_HW_FAULT, &status2); + dev_crit_ratelimited(device->dev, + "CP | Ringbuffer HW fault | status=%x\n", + status2); + } + if (status1 & BIT(A5XX_CP_DMA_ERROR)) + dev_crit_ratelimited(device->dev, "CP | DMA error\n"); + if (status1 & BIT(A5XX_CP_REGISTER_PROTECTION_ERROR)) { + kgsl_regread(device, A5XX_CP_PROTECT_STATUS, &status2); + dev_crit_ratelimited(device->dev, + "CP | Protected mode error| %s | addr=%x | status=%x\n", + status2 & (1 << 24) ? "WRITE" : "READ", + (status2 & 0xFFFFF) >> 2, status2); + } + if (status1 & BIT(A5XX_CP_AHB_ERROR)) { + kgsl_regread(device, A5XX_CP_AHB_FAULT, &status2); + dev_crit_ratelimited(device->dev, + "ringbuffer AHB error interrupt | status=%x\n", + status2); + } +} + +static void a5xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + switch (bit) { + case A5XX_INT_RBBM_AHB_ERROR: { + kgsl_regread(device, A5XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + dev_crit_ratelimited(device->dev, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, + (reg >> 20) & 0x3, + (reg >> 24) & 0xF); + + /* Clear the error */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CMD, (1 << 4)); + break; + } + case A5XX_INT_RBBM_TRANSFER_TIMEOUT: + dev_crit_ratelimited(device->dev, + "RBBM: AHB transfer timeout\n"); + break; + case A5XX_INT_RBBM_ME_MS_TIMEOUT: + kgsl_regread(device, A5XX_RBBM_AHB_ME_SPLIT_STATUS, ®); + dev_crit_ratelimited(device->dev, + "RBBM | ME master split timeout | status=%x\n", + reg); + break; + case A5XX_INT_RBBM_PFP_MS_TIMEOUT: + kgsl_regread(device, A5XX_RBBM_AHB_PFP_SPLIT_STATUS, ®); + dev_crit_ratelimited(device->dev, + "RBBM | PFP master split timeout | status=%x\n", + reg); + break; + case A5XX_INT_RBBM_ETS_MS_TIMEOUT: + dev_crit_ratelimited(device->dev, + "RBBM: ME master split timeout\n"); + break; + case A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB ASYNC overflow\n"); + break; + case A5XX_INT_RBBM_ATB_BUS_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB bus overflow\n"); + break; + case A5XX_INT_UCHE_OOB_ACCESS: + dev_crit_ratelimited(device->dev, + "UCHE: Out of bounds access\n"); + break; + case A5XX_INT_UCHE_TRAP_INTR: + dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n"); + break; + case A5XX_INT_GPMU_VOLTAGE_DROOP: + dev_crit_ratelimited(device->dev, "GPMU: Voltage droop\n"); + break; + default: + dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n", + bit); + } +} + +static void a5xx_irq_storm_worker(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, irq_storm_work); + struct kgsl_device *device = &adreno_dev->dev; + unsigned int status; + + mutex_lock(&device->mutex); + + /* Wait for the storm to clear up */ + do { + kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD, + BIT(A5XX_INT_CP_CACHE_FLUSH_TS)); + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status); + } while (status & BIT(A5XX_INT_CP_CACHE_FLUSH_TS)); + + /* Re-enable the interrupt bit in the mask */ + adreno_dev->irq_mask |= BIT(A5XX_INT_CP_CACHE_FLUSH_TS); + kgsl_regwrite(device, A5XX_RBBM_INT_0_MASK, adreno_dev->irq_mask); + clear_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv); + + dev_warn(device->dev, "Re-enabled A5XX_INT_CP_CACHE_FLUSH_TS\n"); + mutex_unlock(&device->mutex); + + /* Reschedule just to make sure everything retires */ + adreno_dispatcher_schedule(device); +} + +static void a5xx_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int cur; + static unsigned int count; + static unsigned int prev; + + if (test_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv)) + return; + + kgsl_sharedmem_readl(device->memstore, &cur, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + ref_wait_ts)); + + /* + * prev holds a previously read value + * from memory. It should be changed by the GPU with every + * interrupt. If the value we know about and the value we just + * read are the same, then we are likely in a storm. + * If this happens twice, disable the interrupt in the mask + * so the dispatcher can take care of the issue. It is then + * up to the dispatcher to re-enable the mask once all work + * is done and the storm has ended. + */ + if (prev == cur) { + count++; + if (count == 2) { + /* disable interrupt from the mask */ + set_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, + &adreno_dev->priv); + + adreno_dev->irq_mask &= + ~BIT(A5XX_INT_CP_CACHE_FLUSH_TS); + + kgsl_regwrite(device, A5XX_RBBM_INT_0_MASK, + adreno_dev->irq_mask); + + kgsl_schedule_work(&adreno_dev->irq_storm_work); + + return; + } + } else { + count = 0; + prev = cur; + } + + a5xx_preemption_trigger(adreno_dev); + adreno_dispatcher_schedule(device); +} + +static const char *gpmu_int_msg[32] = { + [FW_INTR_INFO] = "FW_INTR_INFO", + [LLM_ACK_ERR_INTR] = "LLM_ACK_ERR_INTR", + [ISENS_TRIM_ERR_INTR] = "ISENS_TRIM_ERR_INTR", + [ISENS_ERR_INTR] = "ISENS_ERR_INTR", + [ISENS_IDLE_ERR_INTR] = "ISENS_IDLE_ERR_INTR", + [ISENS_PWR_ON_ERR_INTR] = "ISENS_PWR_ON_ERR_INTR", + [6 ... 30] = "", + [WDOG_EXPITED] = "WDOG_EXPITED"}; + +static void a5xx_gpmu_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg, i; + + kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, ®); + + if (reg & (~VALID_GPMU_IRQ)) { + dev_crit_ratelimited(device->dev, + "GPMU: Unknown IRQ mask 0x%08lx in 0x%08x\n", + reg & (~VALID_GPMU_IRQ), reg); + } + + for (i = 0; i < 32; i++) + switch (reg & BIT(i)) { + case BIT(WDOG_EXPITED): + if (test_and_clear_bit(ADRENO_DEVICE_GPMU_INITIALIZED, + &adreno_dev->priv)) { + /* Stop GPMU */ + kgsl_regwrite(device, + A5XX_GPMU_CM3_SYSRESET, 1); + kgsl_schedule_work(&adreno_dev->gpmu_work); + } + /* fallthrough */ + case BIT(FW_INTR_INFO): + case BIT(LLM_ACK_ERR_INTR): + case BIT(ISENS_TRIM_ERR_INTR): + case BIT(ISENS_ERR_INTR): + case BIT(ISENS_IDLE_ERR_INTR): + case BIT(ISENS_PWR_ON_ERR_INTR): + dev_crit_ratelimited(device->dev, + "GPMU: interrupt %s(%08lx)\n", + gpmu_int_msg[i], + BIT(i)); + break; + } +} + +/* + * a5x_gpc_err_int_callback() - Isr for GPC error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a5x_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * GPC error is typically the result of mistake SW programming. + * Force GPU fault for this interrupt so that we can debug it + * with help of register dump. + */ + + dev_crit(device->dev, "RBBM: GPC error\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); + adreno_dispatcher_schedule(device); +} + +u64 a5xx_read_alwayson(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 lo = 0, hi = 0; + + kgsl_regread(device, A5XX_RBBM_ALWAYSON_COUNTER_LO, &lo); + + /* The upper 32 bits are only reliable on A540 targets */ + if (adreno_is_a540(adreno_dev)) + kgsl_regread(device, A5XX_RBBM_ALWAYSON_COUNTER_HI, &hi); + + return (((u64) hi) << 32) | lo; +} + + +static const struct adreno_irq_funcs a5xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 2 - RBBM_TRANSFER_TIMEOUT */ + /* 3 - RBBM_ME_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 4 - RBBM_PFP_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 5 - RBBM_ETS_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 6 - RBBM_ATB_ASYNC_OVERFLOW */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + ADRENO_IRQ_CALLBACK(a5x_gpc_err_int_callback), /* 7 - GPC_ERR */ + ADRENO_IRQ_CALLBACK(a5xx_preempt_callback),/* 8 - CP_SW */ + ADRENO_IRQ_CALLBACK(a5xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */ + /* 10 - CP_CCU_FLUSH_DEPTH_TS */ + ADRENO_IRQ_CALLBACK(NULL), + /* 11 - CP_CCU_FLUSH_COLOR_TS */ + ADRENO_IRQ_CALLBACK(NULL), + /* 12 - CP_CCU_RESOLVE_TS */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(NULL), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(NULL), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(NULL), /* 15 - CP_RB_INT */ + /* 16 - CCP_UNUSED_1 */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNKNOWN_1 */ + ADRENO_IRQ_CALLBACK(a5xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + /* 21 - UNUSED_2 */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ + /* 23 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 24 - UCHE_OOB_ACCESS */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 25 - UCHE_TRAP_INTR */ + ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */ + ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 28 - GPMU_VOLTAGE_DROOP */ + ADRENO_IRQ_CALLBACK(a5xx_gpmu_int_callback), /* 29 - GPMU_FIRMWARE */ + ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */ + ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */ +}; + +static irqreturn_t a5xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret; + u32 status; + + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status); + + /* + * Clear all the interrupt bits except A5XX_INT_RBBM_AHB_ERROR. + * The interrupt will stay asserted until it is cleared by the handler + * so don't touch it yet to avoid a storm + */ + kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD, + status & ~A5XX_INT_RBBM_AHB_ERROR); + + /* Call the helper function for callbacks */ + ret = adreno_irq_callbacks(adreno_dev, a5xx_irq_funcs, status); + + trace_kgsl_a5xx_irq_status(adreno_dev, status); + + /* Now chear AHB_ERROR if it was set */ + if (status & A5XX_INT_RBBM_AHB_ERROR) + kgsl_regwrite(device, A5XX_RBBM_INT_CLEAR_CMD, + A5XX_INT_RBBM_AHB_ERROR); + + return ret; +} + +static bool a5xx_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + /* + * Due to CRC idle throttling the GPU idle hysteresis on a540 can take + * up to 5uS to expire + */ + if (adreno_is_a540(adreno_dev)) + udelay(5); + + kgsl_regread(device, A5XX_RBBM_STATUS, &status); + + if (status & 0xfffffffe) + return false; + + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &status); + + /* Return busy if a interrupt is pending */ + return !((status & adreno_dev->irq_mask) || + atomic_read(&adreno_dev->pending_irq_refcnt)); +} + +static int a5xx_clear_pending_transactions(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask = A5XX_VBIF_XIN_HALT_CTRL0_MASK; + int ret; + + kgsl_regwrite(device, A5XX_VBIF_XIN_HALT_CTRL0, mask); + ret = adreno_wait_for_halt_ack(device, A5XX_VBIF_XIN_HALT_CTRL1, mask); + kgsl_regwrite(device, A5XX_VBIF_XIN_HALT_CTRL0, 0); + + return ret; +} + +static bool a5xx_is_hw_collapsible(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + if (!adreno_isidle(adreno_dev)) + return false; + + /* If feature is not supported or enabled, no worry */ + if (!adreno_dev->sptp_pc_enabled) + return true; + kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + return false; + kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, ®); + return !(reg & BIT(20)); +} + +static void a5xx_remove(struct adreno_device *adreno_dev) +{ + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + del_timer(&adreno_dev->preempt.timer); +} + +static void a5xx_power_stats(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats) +{ + static u32 rbbm0_hi; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + s64 freq = kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000; + struct adreno_busy_data *busy = &adreno_dev->busy_data; + s64 gpu_busy = 0; + u32 lo, hi; + s64 adj; + + /* Sometimes this counter can go backwards, so try to detect that */ + kgsl_regread(device, A5XX_RBBM_PERFCTR_RBBM_0_LO, &lo); + kgsl_regread(device, A5XX_RBBM_PERFCTR_RBBM_0_HI, &hi); + + if (busy->gpu_busy) { + if (lo < busy->gpu_busy) { + if (hi == rbbm0_hi) { + dev_warn_once(device->dev, + "abmormal value from RBBM_0 perfcounter: %x %x\n", + lo, busy->gpu_busy); + gpu_busy = 0; + } else { + gpu_busy = (UINT_MAX - busy->gpu_busy) + lo; + rbbm0_hi = hi; + } + } else + gpu_busy = lo - busy->gpu_busy; + } else { + gpu_busy = 0; + rbbm0_hi = 0; + } + + busy->gpu_busy = lo; + + adj = a5xx_read_throttling_counters(adreno_dev); + if (-adj <= gpu_busy) + gpu_busy += adj; + else + gpu_busy = 0; + + stats->busy_time = gpu_busy / freq; + + if (adreno_is_a530(adreno_dev) && adreno_dev->lm_threshold_count) + kgsl_regread(device, adreno_dev->lm_threshold_count, + &adreno_dev->lm_threshold_cross); + else if (adreno_is_a540(adreno_dev)) + adreno_dev->lm_threshold_cross = adj; + + if (!device->pwrctrl.bus_control) + return; + + stats->ram_time = counter_delta(device, adreno_dev->ram_cycles_lo, + &busy->bif_ram_cycles); + + stats->ram_wait = counter_delta(device, adreno_dev->starved_ram_lo, + &busy->bif_starved_ram); +} + +static int a5xx_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + + if (enable) { + device->pwrctrl.ctrl_flags = 0; + kgsl_pwrscale_enable(device); + } else { + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + device->pwrctrl.ctrl_flags = KGSL_PWR_ON; + kgsl_pwrscale_disable(device, true); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +#ifdef CONFIG_QCOM_KGSL_CORESIGHT +static struct adreno_coresight_register a5xx_coresight_registers[] = { + { A5XX_RBBM_CFG_DBGBUS_SEL_A }, + { A5XX_RBBM_CFG_DBGBUS_SEL_B }, + { A5XX_RBBM_CFG_DBGBUS_SEL_C }, + { A5XX_RBBM_CFG_DBGBUS_SEL_D }, + { A5XX_RBBM_CFG_DBGBUS_CNTLT }, + { A5XX_RBBM_CFG_DBGBUS_CNTLM }, + { A5XX_RBBM_CFG_DBGBUS_OPL }, + { A5XX_RBBM_CFG_DBGBUS_OPE }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_0 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_2 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_3 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_0 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_1 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_2 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_3 }, + { A5XX_RBBM_CFG_DBGBUS_BYTEL_0 }, + { A5XX_RBBM_CFG_DBGBUS_BYTEL_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_0 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_2 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_3 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_0 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_1 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_2 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_3 }, + { A5XX_RBBM_CFG_DBGBUS_NIBBLEE }, + { A5XX_RBBM_CFG_DBGBUS_PTRC0 }, + { A5XX_RBBM_CFG_DBGBUS_PTRC1 }, + { A5XX_RBBM_CFG_DBGBUS_LOADREG }, + { A5XX_RBBM_CFG_DBGBUS_IDX }, + { A5XX_RBBM_CFG_DBGBUS_CLRC }, + { A5XX_RBBM_CFG_DBGBUS_LOADIVT }, + { A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC }, + { A5XX_RBBM_CFG_DBGBUS_OVER }, + { A5XX_RBBM_CFG_DBGBUS_COUNT0 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT1 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT2 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT3 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT4 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT5 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 }, + { A5XX_RBBM_CFG_DBGBUS_MISR0 }, + { A5XX_RBBM_CFG_DBGBUS_MISR1 }, + { A5XX_RBBM_AHB_DBG_CNTL }, + { A5XX_RBBM_READ_AHB_THROUGH_DBG }, + { A5XX_RBBM_DBG_LO_HI_GPIO }, + { A5XX_RBBM_EXT_TRACE_BUS_CNTL }, + { A5XX_RBBM_EXT_VBIF_DBG_CNTL }, +}; + +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a5xx_coresight_registers[0]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a5xx_coresight_registers[1]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a5xx_coresight_registers[2]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a5xx_coresight_registers[3]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a5xx_coresight_registers[4]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a5xx_coresight_registers[5]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a5xx_coresight_registers[6]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a5xx_coresight_registers[7]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a5xx_coresight_registers[8]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a5xx_coresight_registers[9]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a5xx_coresight_registers[10]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a5xx_coresight_registers[11]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a5xx_coresight_registers[12]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a5xx_coresight_registers[13]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a5xx_coresight_registers[14]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a5xx_coresight_registers[15]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a5xx_coresight_registers[16]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a5xx_coresight_registers[17]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a5xx_coresight_registers[18]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a5xx_coresight_registers[19]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a5xx_coresight_registers[20]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a5xx_coresight_registers[21]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a5xx_coresight_registers[22]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a5xx_coresight_registers[23]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a5xx_coresight_registers[24]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a5xx_coresight_registers[25]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a5xx_coresight_registers[26]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a5xx_coresight_registers[27]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a5xx_coresight_registers[28]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a5xx_coresight_registers[29]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a5xx_coresight_registers[30]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a5xx_coresight_registers[31]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a5xx_coresight_registers[32]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_event_logic, + &a5xx_coresight_registers[33]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_over, &a5xx_coresight_registers[34]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count0, &a5xx_coresight_registers[35]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count1, &a5xx_coresight_registers[36]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count2, &a5xx_coresight_registers[37]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count3, &a5xx_coresight_registers[38]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count4, &a5xx_coresight_registers[39]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count5, &a5xx_coresight_registers[40]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_addr, + &a5xx_coresight_registers[41]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf0, + &a5xx_coresight_registers[42]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, + &a5xx_coresight_registers[43]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, + &a5xx_coresight_registers[44]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf3, + &a5xx_coresight_registers[45]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf4, + &a5xx_coresight_registers[46]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr0, &a5xx_coresight_registers[47]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr1, &a5xx_coresight_registers[48]); +static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a5xx_coresight_registers[49]); +static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, + &a5xx_coresight_registers[50]); +static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a5xx_coresight_registers[51]); +static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a5xx_coresight_registers[52]); +static ADRENO_CORESIGHT_ATTR(ext_vbif_dbg_cntl, &a5xx_coresight_registers[53]); + +static struct attribute *a5xx_coresight_attrs[] = { + &coresight_attr_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_cfg_dbgbus_event_logic.attr.attr, + &coresight_attr_cfg_dbgbus_over.attr.attr, + &coresight_attr_cfg_dbgbus_count0.attr.attr, + &coresight_attr_cfg_dbgbus_count1.attr.attr, + &coresight_attr_cfg_dbgbus_count2.attr.attr, + &coresight_attr_cfg_dbgbus_count3.attr.attr, + &coresight_attr_cfg_dbgbus_count4.attr.attr, + &coresight_attr_cfg_dbgbus_count5.attr.attr, + &coresight_attr_cfg_dbgbus_trace_addr.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf0.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf3.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf4.attr.attr, + &coresight_attr_cfg_dbgbus_misr0.attr.attr, + &coresight_attr_cfg_dbgbus_misr1.attr.attr, + &coresight_attr_ahb_dbg_cntl.attr.attr, + &coresight_attr_read_ahb_through_dbg.attr.attr, + &coresight_attr_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_ext_trace_bus_cntl.attr.attr, + &coresight_attr_ext_vbif_dbg_cntl.attr.attr, + NULL, +}; + +static const struct attribute_group a5xx_coresight_group = { + .attrs = a5xx_coresight_attrs, +}; + +static const struct attribute_group *a5xx_coresight_groups[] = { + &a5xx_coresight_group, + NULL, +}; + +static struct adreno_coresight a5xx_coresight = { + .registers = a5xx_coresight_registers, + .count = ARRAY_SIZE(a5xx_coresight_registers), + .groups = a5xx_coresight_groups, +}; +#endif + +const struct adreno_gpudev adreno_a5xx_gpudev = { + .reg_offsets = a5xx_register_offsets, +#ifdef CONFIG_QCOM_KGSL_CORESIGHT + .coresight = {&a5xx_coresight}, +#endif + .probe = a5xx_probe, + .start = a5xx_start, + .snapshot = a5xx_snapshot, + .init = a5xx_init, + .irq_handler = a5xx_irq_handler, + .rb_start = a5xx_rb_start, + .regulator_enable = a5xx_regulator_enable, + .regulator_disable = a5xx_regulator_disable, + .pwrlevel_change_settings = a5xx_pwrlevel_change_settings, + .preemption_schedule = a5xx_preemption_schedule, +#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) + .clk_set_options = a5xx_clk_set_options, +#endif + .read_alwayson = a5xx_read_alwayson, + .hw_isidle = a5xx_hw_isidle, + .power_ops = &adreno_power_operations, + .clear_pending_transactions = a5xx_clear_pending_transactions, + .remove = a5xx_remove, + .ringbuffer_submitcmd = a5xx_ringbuffer_submitcmd, + .is_hw_collapsible = a5xx_is_hw_collapsible, + .power_stats = a5xx_power_stats, + .setproperty = a5xx_setproperty, +}; diff --git a/adreno_a5xx.h b/adreno_a5xx.h new file mode 100644 index 0000000000..7a03e5f86d --- /dev/null +++ b/adreno_a5xx.h @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015-2017,2019-2020 The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_A5XX_H_ +#define _ADRENO_A5XX_H_ + +#include "a5xx_reg.h" + +/** + * struct adreno_a5xx_core - a5xx specific GPU core definitions + */ +struct adreno_a5xx_core { + /** @base: Container for the generic &struct adreno_gpu_core */ + struct adreno_gpu_core base; + /** @gpmu_tsens: ID for the temperature sensor used by the GPMU */ + unsigned int gpmu_tsens; + /** @max_power: Max possible power draw of a core */ + unsigned int max_power; + /** pm4fw_name: Name of the PM4 microcode file */ + const char *pm4fw_name; + /** pfpfw_name: Name of the PFP microcode file */ + const char *pfpfw_name; + /** gpmufw_name: Name of the GPMU microcode file */ + const char *gpmufw_name; + /** @regfw_name: Filename for the LM registers if applicable */ + const char *regfw_name; + /** @zap_name: Name of the CPZ zap file */ + const char *zap_name; + /** @hwcg: List of registers and values to write for HWCG */ + const struct kgsl_regmap_list *hwcg; + /** @hwcg_count: Number of registers in @hwcg */ + u32 hwcg_count; + /** @vbif: List of registers and values to write for VBIF */ + const struct kgsl_regmap_list *vbif; + /** @vbif_count: Number of registers in @vbif */ + u32 vbif_count; + /** @highest_bank_bit: The bit of the highest DDR bank */ + u32 highest_bank_bit; +}; + +#define A5XX_CP_CTXRECORD_MAGIC_REF 0x27C4BAFCUL +/* Size of each CP preemption record */ +#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES 0x10000 +/* Size of the preemption counter block (in bytes) */ +#define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE (16 * 4) + +/** + * struct a5xx_cp_preemption_record - CP context record for + * preemption. + * @magic: (00) Value at this offset must be equal to + * A5XX_CP_CTXRECORD_MAGIC_REF. + * @info: (04) Type of record. Written non-zero (usually) by CP. + * we must set to zero for all ringbuffers. + * @data: (08) DATA field in SET_RENDER_MODE or checkpoint packets. + * Written by CP when switching out. Not used on switch-in. + * we must initialize to zero. + * @cntl: (12) RB_CNTL, saved and restored by CP. + * @rptr: (16) RB_RPTR, saved and restored by CP. + * @wptr: (20) RB_WPTR, saved and restored by CP. + * @rptr_addr: (24) RB_RPTR_ADDR_LO|HI saved and restored. + * rbase: (32) RB_BASE_LO|HI saved and restored. + * counter: (40) Pointer to preemption counter + */ +struct a5xx_cp_preemption_record { + uint32_t magic; + uint32_t info; + uint32_t data; + uint32_t cntl; + uint32_t rptr; + uint32_t wptr; + uint64_t rptr_addr; + uint64_t rbase; + uint64_t counter; +}; + +#define A5XX_CP_SMMU_INFO_MAGIC_REF 0x3618CDA3UL + +/** + * struct a5xx_cp_smmu_info - CP preemption SMMU info. + * @magic: (00) The value at this offset must be equal to + * A5XX_CP_SMMU_INFO_MAGIC_REF. + * @_pad4: (04) Reserved/padding + * @ttbr0: (08) Base address of the page table for the + * incoming context. + * @context_idr: (16) Context Identification Register value. + */ +struct a5xx_cp_smmu_info { + uint32_t magic; + uint32_t _pad4; + uint64_t ttbr0; + uint32_t asid; + uint32_t context_idr; +}; + +void a5xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +unsigned int a5xx_num_registers(void); + +void a5xx_crashdump_init(struct adreno_device *adreno_dev); + +void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on); + +#define A5XX_CP_RB_CNTL_DEFAULT ((1 << 27) | ((ilog2(4) << 8) & 0x1F00) | \ + (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F)) +/* GPMU interrupt multiplexor */ +#define FW_INTR_INFO (0) +#define LLM_ACK_ERR_INTR (1) +#define ISENS_TRIM_ERR_INTR (2) +#define ISENS_ERR_INTR (3) +#define ISENS_IDLE_ERR_INTR (4) +#define ISENS_PWR_ON_ERR_INTR (5) +#define WDOG_EXPITED (31) + +#define VALID_GPMU_IRQ (\ + BIT(FW_INTR_INFO) | \ + BIT(LLM_ACK_ERR_INTR) | \ + BIT(ISENS_TRIM_ERR_INTR) | \ + BIT(ISENS_ERR_INTR) | \ + BIT(ISENS_IDLE_ERR_INTR) | \ + BIT(ISENS_PWR_ON_ERR_INTR) | \ + BIT(WDOG_EXPITED)) + +/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL */ +#define STATE_OF_CHILD GENMASK(5, 4) +#define STATE_OF_CHILD_01 BIT(4) +#define STATE_OF_CHILD_11 (BIT(4) | BIT(5)) +#define IDLE_FULL_LM_SLEEP BIT(0) + +/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS */ +#define WAKEUP_ACK BIT(1) +#define IDLE_FULL_ACK BIT(0) + +/* A5XX_GPMU_GPMU_ISENSE_CTRL */ +#define ISENSE_CGC_EN_DISABLE BIT(0) + +/* A5XX_GPMU_TEMP_SENSOR_CONFIG */ +#define GPMU_BCL_ENABLED BIT(4) +#define GPMU_LLM_ENABLED BIT(9) +#define GPMU_ISENSE_STATUS GENMASK(3, 0) +#define GPMU_ISENSE_END_POINT_CAL_ERR BIT(0) + +#define AMP_CALIBRATION_RETRY_CNT 3 +#define AMP_CALIBRATION_TIMEOUT 6 + +/* A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK */ +#define VOLTAGE_INTR_EN BIT(0) + +/* A5XX_GPMU_GPMU_PWR_THRESHOLD */ +#define PWR_THRESHOLD_VALID 0x80000000 + +/* A5XX_GPMU_GPMU_SP_CLOCK_CONTROL */ +#define CNTL_IP_CLK_ENABLE BIT(0) +/* AGC */ +#define AGC_INIT_BASE A5XX_GPMU_DATA_RAM_BASE +#define AGC_INIT_MSG_MAGIC (AGC_INIT_BASE + 5) +#define AGC_MSG_BASE (AGC_INIT_BASE + 7) + +#define AGC_MSG_STATE (AGC_MSG_BASE + 0) +#define AGC_MSG_COMMAND (AGC_MSG_BASE + 1) +#define AGC_MSG_PAYLOAD_SIZE (AGC_MSG_BASE + 3) +#define AGC_MSG_PAYLOAD (AGC_MSG_BASE + 5) + +#define AGC_INIT_MSG_VALUE 0xBABEFACE +#define AGC_POWER_CONFIG_PRODUCTION_ID 1 + +#define AGC_LM_CONFIG (136/4) +#define AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE (1) + +#define AGC_LM_CONFIG_ENABLE_ERROR (3 << 4) +#define AGC_LM_CONFIG_ISENSE_ENABLE (1 << 4) + +#define AGC_THROTTLE_SEL_DCS (1 << 8) +#define AGC_THROTTLE_DISABLE (2 << 8) + + +#define AGC_LLM_ENABLED (1 << 16) +#define AGC_GPU_VERSION_MASK GENMASK(18, 17) +#define AGC_GPU_VERSION_SHIFT 17 +#define AGC_BCL_DISABLED (1 << 24) + + +#define AGC_LEVEL_CONFIG (140/4) + +#define LM_DCVS_LIMIT 1 +/* FW file tages */ +#define GPMU_FIRMWARE_ID 2 +#define GPMU_SEQUENCE_ID 3 +#define GPMU_INST_RAM_SIZE 0xFFF + +#define HEADER_MAJOR 1 +#define HEADER_MINOR 2 +#define HEADER_DATE 3 +#define HEADER_TIME 4 +#define HEADER_SEQUENCE 5 + +#define MAX_HEADER_SIZE 10 + +#define LM_SEQUENCE_ID 1 +#define MAX_SEQUENCE_ID 3 + +#define GPMU_ISENSE_SAVE (A5XX_GPMU_DATA_RAM_BASE + 200/4) +/* LM defaults */ +#define LM_DEFAULT_LIMIT 6000 +#define A530_DEFAULT_LEAKAGE 0x004E001A + +/** + * to_a5xx_core - return the a5xx specific GPU core struct + * @adreno_dev: An Adreno GPU device handle + * + * Returns: + * A pointer to the a5xx specific GPU core struct + */ +static inline const struct adreno_a5xx_core * +to_a5xx_core(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *core = adreno_dev->gpucore; + + return container_of(core, struct adreno_a5xx_core, base); +} + +/* Preemption functions */ +void a5xx_preemption_trigger(struct adreno_device *adreno_dev); +void a5xx_preemption_schedule(struct adreno_device *adreno_dev); +void a5xx_preemption_start(struct adreno_device *adreno_dev); +int a5xx_preemption_init(struct adreno_device *adreno_dev); + +/** + * a5xx_preemption_post_ibsubmit - Insert commands following a submission + * @adreno_dev: Adreno GPU handle + * @cmds: Pointer to the ringbuffer to insert opcodes + * + * Return: The number of opcodes written to @cmds + */ +u32 a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds); + +/** + * a5xx_preemption_post_ibsubmit - Insert opcodes before a submission + * @adreno_dev: Adreno GPU handle + * @rb: The ringbuffer being written + * @drawctxt: The draw context being written + * @cmds: Pointer to the ringbuffer to insert opcodes + * + * Return: The number of opcodes written to @cmds + */ +u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds); + +void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit); + +u64 a5xx_read_alwayson(struct adreno_device *adreno_dev); + +extern const struct adreno_perfcounters adreno_a5xx_perfcounters; + +/** + * a5xx_ringbuffer_init - Initialize the ringbuffers + * @adreno_dev: An Adreno GPU handle + * + * Initialize the ringbuffer(s) for a5xx. + * Return: 0 on success or negative on failure + */ +int a5xx_ringbuffer_init(struct adreno_device *adreno_dev); + +/** +* a5xx_ringbuffer_addcmds - Submit a command to the ringbuffer +* @adreno_dev: An Adreno GPU handle +* @rb: Pointer to the ringbuffer to submit on +* @drawctxt: Pointer to the draw context for the submission, or NULL for +* internal submissions +* @flags: Flags for the submission +* @in: Commands to write to the ringbuffer +* @dwords: Size of @in (in dwords) +* @timestamp: Timestamp for the submission +* @time: Optional pointer to a submit time structure +* +* Submit a command to the ringbuffer. +* Return: 0 on success or negative on failure +*/ +int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time); + +/** + * a5xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @cmdobj: Pointer to a user command object + * @flags: Internal submit flags + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + +int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time, bool sync); + +static inline bool a5xx_has_gpmu(struct adreno_device *adreno_dev) +{ + return (adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev)); +} + +#endif diff --git a/adreno_a5xx_packets.h b/adreno_a5xx_packets.h new file mode 100644 index 0000000000..55276e46bc --- /dev/null +++ b/adreno_a5xx_packets.h @@ -0,0 +1,1406 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016,2019, The Linux Foundation. All rights reserved. + */ + +struct adreno_critical_fixup { + unsigned int lo_offset; + unsigned int hi_offset; + int buffer; + uint64_t mem_offset; +}; + +static unsigned int _a5xx_critical_pkts[] = { + 0x400E0601, /* [0x0000] == TYPE4 == */ + 0x00000002, /* [0x0001] A5X_HLSQ_MODE_CNTL (0x0E06)*/ + 0x40E78A01, /* [0x0002] == TYPE4 == */ + 0x000FFFFF, /* [0x0003] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x48E78401, /* [0x0004] == TYPE4 == */ + 0x00000005, /* [0x0005] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/ + 0x40E78501, /* [0x0006] == TYPE4 == */ + 0x00000009, /* [0x0007] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/ + 0x48E78B85, /* [0x0008] == TYPE4 == */ + 0x00000001, /* [0x0009] A5X_HLSQ_VS_CONFIG_CTX_0 (0xE78B)*/ + 0x00002085, /* [0x000A] A5X_HLSQ_PS_CONFIG_CTX_0 (0xE78C)*/ + 0x00002084, /* [0x000B] A5X_HLSQ_HS_CONFIG_CTX_0 (0xE78D)*/ + 0x00002084, /* [0x000C] A5X_HLSQ_DS_CONFIG_CTX_0 (0xE78E)*/ + 0x00002084, /* [0x000D] A5X_HLSQ_GS_CONFIG_CTX_0 (0xE78F)*/ + 0x40E58485, /* [0x000E] == TYPE4 == */ + 0x00000001, /* [0x000F] A5X_SP_VS_CONFIG_CTX_0 (0xE584)*/ + 0x00002085, /* [0x0010] A5X_SP_PS_CONFIG_CTX_0 (0xE585)*/ + 0x00002084, /* [0x0011] A5X_SP_HS_CONFIG_CTX_0 (0xE586)*/ + 0x00002084, /* [0x0012] A5X_SP_DS_CONFIG_CTX_0 (0xE587)*/ + 0x00002084, /* [0x0013] A5X_SP_GS_CONFIG_CTX_0 (0xE588)*/ + 0x40E79101, /* [0x0014] == TYPE4 == */ + 0x00000004, /* [0x0015] A5X_HLSQ_VS_CNTL_CTX_0 (0xE791)*/ + 0x40E79201, /* [0x0016] == TYPE4 == */ + 0x00000002, /* [0x0017] A5X_HLSQ_PS_CNTL_CTX_0 (0xE792)*/ + 0x48E58001, /* [0x0018] == TYPE4 == */ + 0x00000010, /* [0x0019] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/ + 0x70B00043, /* [0x001A] == TYPE7: LOAD_STATE (30) == */ + 0x00A00000, /* [0x001B] */ + 0x00000000, /* [0x001C] */ + 0x00000000, /* [0x001D] */ + 0x20020003, /* [0x001E] */ + 0x56D81803, /* [0x001F] */ + 0x00000003, /* [0x0020] */ + 0x20150000, /* [0x0021] */ + 0x00000000, /* [0x0022] */ + 0x00000200, /* [0x0023] */ + 0x00000000, /* [0x0024] */ + 0x201100F4, /* [0x0025] */ + 0x00000000, /* [0x0026] */ + 0x00000500, /* [0x0027] */ + 0x00000C21, /* [0x0028] */ + 0x20154004, /* [0x0029] */ + 0x00000C20, /* [0x002A] */ + 0x20154003, /* [0x002B] */ + 0x00000C23, /* [0x002C] */ + 0x20154008, /* [0x002D] */ + 0x00000C22, /* [0x002E] */ + 0x20156007, /* [0x002F] */ + 0x00000000, /* [0x0030] */ + 0x20554005, /* [0x0031] */ + 0x3F800000, /* [0x0032] */ + 0x20554006, /* [0x0033] */ + 0x00000000, /* [0x0034] */ + 0x03000000, /* [0x0035] */ + 0x20050000, /* [0x0036] */ + 0x46F00009, /* [0x0037] */ + 0x201F0000, /* [0x0038] */ + 0x4398000A, /* [0x0039] */ + 0x201F0009, /* [0x003A] */ + 0x43980809, /* [0x003B] */ + 0x20180009, /* [0x003C] */ + 0x46100809, /* [0x003D] */ + 0x00091014, /* [0x003E] */ + 0x62050009, /* [0x003F] */ + 0x00000000, /* [0x0040] */ + 0x00000500, /* [0x0041] */ + 0x04800006, /* [0x0042] */ + 0xC2C61300, /* [0x0043] */ + 0x0280000E, /* [0x0044] */ + 0xC2C61310, /* [0x0045] */ + 0x00000000, /* [0x0046] */ + 0x04800000, /* [0x0047] */ + 0x00000000, /* [0x0048] */ + 0x05000000, /* [0x0049] */ + 0x00000000, /* [0x004A] */ + 0x00000000, /* [0x004B] */ + 0x00000000, /* [0x004C] */ + 0x00000000, /* [0x004D] */ + 0x00000000, /* [0x004E] */ + 0x00000000, /* [0x004F] */ + 0x00000000, /* [0x0050] */ + 0x00000000, /* [0x0051] */ + 0x00000000, /* [0x0052] */ + 0x00000000, /* [0x0053] */ + 0x00000000, /* [0x0054] */ + 0x00000000, /* [0x0055] */ + 0x00000000, /* [0x0056] */ + 0x00000000, /* [0x0057] */ + 0x00000000, /* [0x0058] */ + 0x00000000, /* [0x0059] */ + 0x00000000, /* [0x005A] */ + 0x00000000, /* [0x005B] */ + 0x00000000, /* [0x005C] */ + 0x00000000, /* [0x005D] */ + 0x70B00023, /* [0x005E] == TYPE7: LOAD_STATE (30) == */ + 0x00700000, /* [0x005F] */ + 0x00000000, /* [0x0060] */ + 0x00000000, /* [0x0061] */ + 0x00000000, /* [0x0062] */ + 0x03000000, /* [0x0063] */ + 0x00000000, /* [0x0064] */ + 0x00000000, /* [0x0065] */ + 0x00000000, /* [0x0066] */ + 0x00000000, /* [0x0067] */ + 0x00000000, /* [0x0068] */ + 0x00000000, /* [0x0069] */ + 0x00000000, /* [0x006A] */ + 0x00000000, /* [0x006B] */ + 0x00000000, /* [0x006C] */ + 0x00000000, /* [0x006D] */ + 0x00000000, /* [0x006E] */ + 0x00000000, /* [0x006F] */ + 0x00000000, /* [0x0070] */ + 0x00000000, /* [0x0071] */ + 0x00000000, /* [0x0072] */ + 0x00000000, /* [0x0073] */ + 0x00000000, /* [0x0074] */ + 0x00000000, /* [0x0075] */ + 0x00000000, /* [0x0076] */ + 0x00000000, /* [0x0077] */ + 0x00000000, /* [0x0078] */ + 0x00000000, /* [0x0079] */ + 0x00000000, /* [0x007A] */ + 0x00000000, /* [0x007B] */ + 0x00000000, /* [0x007C] */ + 0x00000000, /* [0x007D] */ + 0x00000000, /* [0x007E] */ + 0x00000000, /* [0x007F] */ + 0x00000000, /* [0x0080] */ + 0x00000000, /* [0x0081] */ + 0x70B08003, /* [0x0082] == TYPE7: LOAD_STATE (30) == */ + 0x00620000, /* [0x0083] */ + 0x00000000, /* [0x0084] */ + 0x00000000, /* [0x0085] */ + 0x70B08003, /* [0x0086] == TYPE7: LOAD_STATE (30) == */ + 0x01220008, /* [0x0087] */ + 0x00000000, /* [0x0088] */ + 0x00000000, /* [0x0089] */ + 0x70B0000B, /* [0x008A] == TYPE7: LOAD_STATE (30) == */ + 0x01180000, /* [0x008B] */ + 0x00000001, /* [0x008C] */ + 0x00000000, /* [0x008D] */ + 0x00000000, /* [0x008E] */ + 0x00000000, /* [0x008F] */ + 0x00000000, /* [0x0090] */ + 0x00000000, /* [0x0091] */ + 0x00000000, /* [0x0092] */ + 0x00000000, /* [0x0093] */ + 0x00000000, /* [0x0094] */ + 0x01400000, /* [0x0095] */ + 0x70460001, /* [0x0096] == TYPE7: EVENT_WRITE (46) == */ + 0x00000019, /* [0x0097] */ + 0x70460004, /* [0x0098] == TYPE7: EVENT_WRITE (46) == */ + 0x0000001D, /* [0x0099] */ + 0x00000000, /* [0x009A] */ + 0x00000000, /* [0x009B] */ + 0x00000001, /* [0x009C] */ + 0x70460004, /* [0x009D] == TYPE7: EVENT_WRITE (46) == */ + 0x0000001C, /* [0x009E] */ + 0x00000000, /* [0x009F] */ + 0x00000000, /* [0x00A0] */ + 0x00000001, /* [0x00A1] */ + 0x480E9185, /* [0x00A2] == TYPE4 == */ + 0x00000000, /* [0x00A3] A5X_UCHE_CACHE_INVALIDATE_MIN_LO (0x0E91)*/ + 0x00000000, /* [0x00A4] A5X_UCHE_CACHE_INVALIDATE_MIN_HI (0x0E92)*/ + 0x00000000, /* [0x00A5] A5X_UCHE_CACHE_INVALIDATE_MAX_LO (0x0E93)*/ + 0x00000000, /* [0x00A6] A5X_UCHE_CACHE_INVALIDATE_MAX_HI (0x0E94)*/ + 0x00000012, /* [0x00A7] A5X_UCHE_CACHE_INVALIDATE (0x0E95)*/ + 0x70268000, /* [0x00A8] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x40E78A01, /* [0x00A9] == TYPE4 == */ + 0x000FFFFF, /* [0x00AA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x70D08003, /* [0x00AB] == TYPE7: PERFCOUNTER_ACTION (50) == */ + 0x00000000, /* [0x00AC] */ + 0x00000000, /* [0x00AD] */ + 0x00000000, /* [0x00AE] */ + 0x70D08003, /* [0x00AF] == TYPE7: PERFCOUNTER_ACTION (50) == */ + 0x00000010, /* [0x00B0] */ + 0x00000000, /* [0x00B1] */ + 0x00000000, /* [0x00B2] */ + 0x70268000, /* [0x00B3] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x48E38C01, /* [0x00B4] == TYPE4 == */ + 0xFFFFFFFF, /* [0x00B5] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/ + 0x40E38801, /* [0x00B6] == TYPE4 == */ + 0x00000012, /* [0x00B7] A5X_PC_RASTER_CNTL_CTX_0 (0xE388)*/ + 0x48E09102, /* [0x00B8] == TYPE4 == */ + 0xFFC00010, /* [0x00B9] A5X_GRAS_SU_POINT_MINMAX_CTX_0 (0xE091)*/ + 0x00000008, /* [0x00BA] A5X_GRAS_SU_POINT_SIZE_CTX_0 (0xE092)*/ + 0x40E09901, /* [0x00BB] == TYPE4 == */ + 0x00000000, /* [0x00BC] A5X_GRAS_SU_CONSERVATIVE_RAS_CNTL_CTX_0 + * (0xE099) + */ + 0x48E0A401, /* [0x00BD] == TYPE4 == */ + 0x00000000, /* [0x00BE] A5X_GRAS_SC_SCREEN_SCISSOR_CNTL_CTX_0 (0xE0A4)*/ + 0x48E58A01, /* [0x00BF] == TYPE4 == */ + 0x00000000, /* [0x00C0] A5X_SP_VS_CONFIG_MAX_CONST_CTX_0 (0xE58A)*/ + 0x40E58B01, /* [0x00C1] == TYPE4 == */ + 0x00000000, /* [0x00C2] A5X_SP_PS_CONFIG_MAX_CONST_CTX_0 (0xE58B)*/ + 0x480CC601, /* [0x00C3] == TYPE4 == */ + 0x00000044, /* [0x00C4] A5X_RB_MODE_CNTL (0x0CC6)*/ + 0x400CC401, /* [0x00C5] == TYPE4 == */ + 0x00100000, /* [0x00C6] A5X_RB_DBG_ECO_CNTL (0x0CC4)*/ + 0x400E4201, /* [0x00C7] == TYPE4 == */ + 0x00000000, /* [0x00C8] A5X_VFD_MODE_CNTL (0x0E42)*/ + 0x480D0201, /* [0x00C9] == TYPE4 == */ + 0x0000001F, /* [0x00CA] A5X_PC_MODE_CNTL (0x0D02)*/ + 0x480EC201, /* [0x00CB] == TYPE4 == */ + 0x0000001E, /* [0x00CC] A5X_SP_MODE_CNTL (0x0EC2)*/ + 0x400EC001, /* [0x00CD] == TYPE4 == */ + 0x40000800, /* [0x00CE] A5X_SP_DBG_ECO_CNTL (0x0EC0)*/ + 0x400F0201, /* [0x00CF] == TYPE4 == */ + 0x00000544, /* [0x00D0] A5X_TPL1_MODE_CNTL (0x0F02)*/ + 0x400E0002, /* [0x00D1] == TYPE4 == */ + 0x00000080, /* [0x00D2] A5X_HLSQ_TIMEOUT_THRESHOLD_0 (0x0E00)*/ + 0x00000000, /* [0x00D3] A5X_HLSQ_TIMEOUT_THRESHOLD_1 (0x0E01)*/ + 0x400E6001, /* [0x00D4] == TYPE4 == */ + 0x00000400, /* [0x00D5] A5X_VPC_DBG_ECO_CNTL (0x0E60)*/ + 0x400E0601, /* [0x00D6] == TYPE4 == */ + 0x00000001, /* [0x00D7] A5X_HLSQ_MODE_CNTL (0x0E06)*/ + 0x480E6201, /* [0x00D8] == TYPE4 == */ + 0x00000000, /* [0x00D9] A5X_VPC_MODE_CNTL (0x0E62)*/ + 0x70EC8005, /* [0x00DA] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000002, /* [0x00DB] */ + 0x00000000, /* [0x00DC] */ + 0x00000000, /* [0x00DD] */ + 0x00000008, /* [0x00DE] */ + 0x00000001, /* [0x00DF] */ + 0x40E14001, /* [0x00E0] == TYPE4 == */ + 0x00000204, /* [0x00E1] A5X_RB_CNTL_CTX_0 (0xE140)*/ + 0x709D0001, /* [0x00E2] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */ + 0x00000000, /* [0x00E3] */ + 0x48E0EA02, /* [0x00E4] == TYPE4 == */ + 0x00000000, /* [0x00E5] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/ + 0x001F0073, /* [0x00E6] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/ + 0x48E21102, /* [0x00E7] == TYPE4 == */ + 0x00000000, /* [0x00E8] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/ + 0x00000000, /* [0x00E9] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/ + 0x480BC283, /* [0x00EA] == TYPE4 == */ + 0x00000204, /* [0x00EB] UNKNOWN (0x0BC2)*/ + 0x00000000, /* [0x00EC] UNKNOWN (0x0BC3)*/ + 0x00000000, /* [0x00ED] UNKNOWN (0x0BC4)*/ + 0x400BC502, /* [0x00EE] == TYPE4 == */ + 0x00000000, /* [0x00EF] UNKNOWN (0x0BC5)*/ + 0x00000000, /* [0x00F0] UNKNOWN (0x0BC6)*/ + 0x480BD001, /* [0x00F1] == TYPE4 == */ + 0x01100000, /* [0x00F2] UNKNOWN (0x0BD0)*/ + 0x480BE002, /* [0x00F3] == TYPE4 == */ + 0x00000000, /* [0x00F4] UNKNOWN (0x0BE0)*/ + 0x00000000, /* [0x00F5] UNKNOWN (0x0BE1)*/ + 0x480C0001, /* [0x00F6] == TYPE4 == */ + 0x00000020, /* [0x00F7] A5X_VSC_PIPE_DATA_LENGTH_0 (0x0C00)*/ + 0x48E3B001, /* [0x00F8] == TYPE4 == */ + 0x00000003, /* [0x00F9] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/ + 0x48E4F001, /* [0x00FA] == TYPE4 == */ + 0x00000003, /* [0x00FB] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/ + 0x480E6201, /* [0x00FC] == TYPE4 == */ + 0x00000001, /* [0x00FD] A5X_VPC_MODE_CNTL (0x0E62)*/ + 0x70460001, /* [0x00FE] == TYPE7: EVENT_WRITE (46) == */ + 0x0000002C, /* [0x00FF] */ + 0x40E1D001, /* [0x0100] == TYPE4 == */ + 0x00000000, /* [0x0101] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/ + 0x70BF8003, /* [0x0102] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x0103] */ + 0x00000000, /* [0x0104] */ + 0x000000A0, /* [0x0105] */ + 0x70460001, /* [0x0106] == TYPE7: EVENT_WRITE (46) == */ + 0x0000002D, /* [0x0107] */ + 0x70460004, /* [0x0108] == TYPE7: EVENT_WRITE (46) == */ + 0x00000004, /* [0x0109] */ + 0x00000000, /* [0x010A] */ + 0x00000000, /* [0x010B] */ + 0x00000000, /* [0x010C] */ + 0x70268000, /* [0x010D] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x480E6201, /* [0x010E] == TYPE4 == */ + 0x00000000, /* [0x010F] A5X_VPC_MODE_CNTL (0x0E62)*/ + 0x48E3B001, /* [0x0110] == TYPE4 == */ + 0x00000003, /* [0x0111] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/ + 0x48E4F001, /* [0x0112] == TYPE4 == */ + 0x00000003, /* [0x0113] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/ + 0x70268000, /* [0x0114] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x400CC701, /* [0x0115] == TYPE4 == */ + 0x7C13C080, /* [0x0116] A5X_RB_CCU_CNTL (0x0CC7)*/ + 0x70EC8005, /* [0x0117] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000001, /* [0x0118] */ + 0x00000000, /* [0x0119] */ + 0x00000000, /* [0x011A] */ + 0x00000010, /* [0x011B] */ + 0x00000001, /* [0x011C] */ + 0x70EA0001, /* [0x011D] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */ + 0x00000000, /* [0x011E] */ + 0x48E0EA02, /* [0x011F] == TYPE4 == */ + 0x00000000, /* [0x0120] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/ + 0x001F0073, /* [0x0121] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/ + 0x48E21102, /* [0x0122] == TYPE4 == */ + 0x00000000, /* [0x0123] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/ + 0x00030007, /* [0x0124] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/ + 0x70138000, /* [0x0125] == TYPE7: WAIT_FOR_ME (13) == */ + 0x70640001, /* [0x0126] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000000, /* [0x0127] */ + 0x702F8005, /* [0x0128] == TYPE7: SET_BIN_DATA (2F) == */ + 0x00010000, /* [0x0129] */ + 0x00000000, /* [0x012A] */ + 0x00000000, /* [0x012B] */ + 0x00000000, /* [0x012C] */ + 0x00000000, /* [0x012D] */ + 0x40E1D001, /* [0x012E] == TYPE4 == */ + 0x00000000, /* [0x012F] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/ + 0x40E2A201, /* [0x0130] == TYPE4 == */ + 0x00000001, /* [0x0131] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/ + 0x70640001, /* [0x0132] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000000, /* [0x0133] */ + 0x48E1B285, /* [0x0134] == TYPE4 == */ + 0x00000001, /* [0x0135] A5X_RB_DEPTH_BUFFER_INFO_CTX_0 (0xE1B2)*/ + 0x00004000, /* [0x0136] A5X_RB_DEPTH_BUFFER_BASE_LO_CTX_0 (0xE1B3)*/ + 0x00000000, /* [0x0137] A5X_RB_DEPTH_BUFFER_BASE_HI_CTX_0 (0xE1B4)*/ + 0x00000004, /* [0x0138] A5X_RB_DEPTH_BUFFER_PITCH_CTX_0 (0xE1B5)*/ + 0x000000C0, /* [0x0139] A5X_RB_DEPTH_BUFFER_ARRAY_PITCH_CTX_0 (0xE1B6)*/ + 0x48E09801, /* [0x013A] == TYPE4 == */ + 0x00000001, /* [0x013B] A5X_GRAS_SU_DEPTH_BUFFER_INFO_CTX_0 (0xE098)*/ + 0x40E24083, /* [0x013C] == TYPE4 == */ + 0x00000000, /* [0x013D] A5X_RB_DEPTH_FLAG_BUFFER_BASE_LO_CTX_0 + * (0xE240) + */ + 0x00000000, /* [0x013E] A5X_RB_DEPTH_FLAG_BUFFER_BASE_HI_CTX_0 + * (0xE241) + */ + 0x00000000, /* [0x013F] A5X_RB_DEPTH_FLAG_BUFFER_PITCH_CTX_0 (0xE242)*/ + 0x40E15285, /* [0x0140] == TYPE4 == */ + 0x00001230, /* [0x0141] A5X_RB_MRT_BUFFER_INFO_0_CTX_0 (0xE152)*/ + 0x00000008, /* [0x0142] A5X_RB_MRT_BUFFER_PITCH_0_CTX_0 (0xE153)*/ + 0x00000100, /* [0x0143] A5X_RB_MRT_BUFFER_ARRAY_PITCH_0_CTX_0 (0xE154)*/ + 0x00000000, /* [0x0144] A5X_RB_MRT_BUFFER_BASE_LO_0_CTX_0 (0xE155)*/ + 0x00000000, /* [0x0145] A5X_RB_MRT_BUFFER_BASE_HI_0_CTX_0 (0xE156)*/ + 0x40E40801, /* [0x0146] == TYPE4 == */ + 0x00000000, /* [0x0147] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/ + 0x48E40901, /* [0x0148] == TYPE4 == */ + 0x00000000, /* [0x0149] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/ + 0x70BF8003, /* [0x014A] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x014B] */ + 0x00000000, /* [0x014C] */ + 0x00000112, /* [0x014D] */ + 0x70230001, /* [0x014E] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */ + 0x00000000, /* [0x014F] */ + 0x70BF8003, /* [0x0150] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x0151] */ + 0x00000000, /* [0x0152] */ + 0x0000001B, /* [0x0153] */ + 0x70EC8005, /* [0x0154] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000001, /* [0x0155] */ + 0x00000000, /* [0x0156] */ + 0x00000000, /* [0x0157] */ + 0x00000000, /* [0x0158] */ + 0x00000001, /* [0x0159] */ + 0x70438003, /* [0x015A] == TYPE7: SET_DRAW_STATE (43) == */ + 0x00080059, /* [0x015B] */ + 0x00000000, /* [0x015C] */ + 0x00000000, /* [0x015D] */ + 0x70388003, /* [0x015E] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00000888, /* [0x015F] */ + 0x00000000, /* [0x0160] */ + 0x00000002, /* [0x0161] */ + 0x70A88003, /* [0x0162] == TYPE7: DRAW_INDIRECT (28) == */ + 0x00200884, /* [0x0163] */ + 0x00000000, /* [0x0164] */ + 0x00000000, /* [0x0165] */ + 0x70298006, /* [0x0166] == TYPE7: DRAW_INDX_INDIRECT (29) == */ + 0x00200404, /* [0x0167] */ + 0x00000000, /* [0x0168] */ + 0x00000000, /* [0x0169] */ + 0x00000006, /* [0x016A] */ + 0x00000000, /* [0x016B] */ + 0x00000000, /* [0x016C] */ + 0x40E2A783, /* [0x016D] == TYPE4 == */ + 0x00000000, /* [0x016E] A5X_VPC_SO_BUFFER_BASE_LO_0_CTX_0 (0xE2A7)*/ + 0x00000000, /* [0x016F] A5X_VPC_SO_BUFFER_BASE_HI_0_CTX_0 (0xE2A8)*/ + 0x00000004, /* [0x0170] A5X_VPC_SO_BUFFER_SIZE_0_CTX_0 (0xE2A9)*/ + 0x48E2AC02, /* [0x0171] == TYPE4 == */ + 0x00000000, /* [0x0172] A5X_VPC_SO_FLUSH_BASE_LO_0_CTX_0 (0xE2AC)*/ + 0x00000000, /* [0x0173] A5X_VPC_SO_FLUSH_BASE_HI_0_CTX_0 (0xE2AD)*/ + 0x70460001, /* [0x0174] == TYPE7: EVENT_WRITE (46) == */ + 0x00000011, /* [0x0175] */ + 0x48E10001, /* [0x0176] == TYPE4 == */ + 0x00000009, /* [0x0177] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/ + 0x70460001, /* [0x0178] == TYPE7: EVENT_WRITE (46) == */ + 0x00000026, /* [0x0179] */ + 0x48E10001, /* [0x017A] == TYPE4 == */ + 0x00000008, /* [0x017B] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/ + 0x40E10185, /* [0x017C] == TYPE4 == */ + 0x00000000, /* [0x017D] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/ + 0x00000000, /* [0x017E] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/ + 0x00000001, /* [0x017F] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/ + 0x00000000, /* [0x0180] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0 + * (0xE104) + */ + 0x00000000, /* [0x0181] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0 + * (0xE105) + */ + 0x70460001, /* [0x0182] == TYPE7: EVENT_WRITE (46) == */ + 0x00000025, /* [0x0183] */ + 0x70460001, /* [0x0184] == TYPE7: EVENT_WRITE (46) == */ + 0x00000019, /* [0x0185] */ + 0x70460001, /* [0x0186] == TYPE7: EVENT_WRITE (46) == */ + 0x00000018, /* [0x0187] */ + 0x70EA0001, /* [0x0188] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */ + 0x00000000, /* [0x0189] */ + 0x70EC0001, /* [0x018A] == TYPE7: SET_RENDER_MODE (6C) == */ + 0x00000006, /* [0x018B] */ + 0x70438003, /* [0x018C] == TYPE7: SET_DRAW_STATE (43) == */ + 0x00080059, /* [0x018D] */ + 0x00000000, /* [0x018E] */ + 0x00000000, /* [0x018F] */ + 0x70DC0002, /* [0x0190] == TYPE7: CONTEXT_REG_BUNCH (5C) == */ + 0x0000E2A1, /* [0x0191] */ + 0x00008001, /* [0x0192] */ + 0x709D0001, /* [0x0193] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */ + 0x00000000, /* [0x0194] */ + 0x70138000, /* [0x0195] == TYPE7: WAIT_FOR_ME (13) == */ + 0x70640001, /* [0x0196] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000001, /* [0x0197] */ + 0x70380007, /* [0x0198] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200506, /* [0x0199] */ + 0x00000000, /* [0x019A] */ + 0x00000004, /* [0x019B] */ + 0x00000000, /* [0x019C] */ + 0x00000000, /* [0x019D] */ + 0x00000000, /* [0x019E] */ + 0x00000004, /* [0x019F] */ + 0x703D8005, /* [0x01A0] == TYPE7: MEM_WRITE (3D) == */ + 0x00000000, /* [0x01A1] */ + 0x00000000, /* [0x01A2] */ + 0x00000001, /* [0x01A3] */ + 0x00000001, /* [0x01A4] */ + 0x00000001, /* [0x01A5] */ + 0x70928000, /* [0x01A6] == TYPE7: WAIT_MEM_WRITES (12) == */ + 0x70BF8003, /* [0x01A7] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */ + 0x00000000, /* [0x01A8] */ + 0x00000000, /* [0x01A9] */ + 0x00000028, /* [0x01AA] */ + 0x70C48006, /* [0x01AB] == TYPE7: COND_EXEC (44) == */ + 0x00000000, /* [0x01AC] */ + 0x00000000, /* [0x01AD] */ + 0x00000000, /* [0x01AE] */ + 0x00000000, /* [0x01AF] */ + 0x00000001, /* [0x01B0] */ + 0x00000002, /* [0x01B1] */ + 0x70100001, /* [0x01B2] == TYPE7: NOP (10) == */ + 0x00000000, /* [0x01B3] */ + 0x70C28003, /* [0x01B4] == TYPE7: MEM_TO_REG (42) == */ + 0xC000E2AB, /* [0x01B5] */ + 0x00000000, /* [0x01B6] */ + 0x00000000, /* [0x01B7] */ + 0x70230001, /* [0x01B8] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */ + 0x00000000, /* [0x01B9] */ + 0x70E90001, /* [0x01BA] == TYPE7: PREEMPT_ENABLE_GLOBAL (69) == */ + 0x00000000, /* [0x01BB] */ + 0x70BC8006, /* [0x01BC] == TYPE7: WAIT_REG_MEM (3C) == */ + 0x00000010, /* [0x01BD] */ + 0x00000000, /* [0x01BE] */ + 0x00000000, /* [0x01BF] */ + 0x00000001, /* [0x01C0] */ + 0xFFFFFFFF, /* [0x01C1] */ + 0x00000001, /* [0x01C2] */ + 0x70738009, /* [0x01C3] == TYPE7: MEM_TO_MEM (73) == */ + 0x20000004, /* [0x01C4] */ + 0x00000000, /* [0x01C5] */ + 0x00000000, /* [0x01C6] */ + 0x00000000, /* [0x01C7] */ + 0x00000000, /* [0x01C8] */ + 0x00000000, /* [0x01C9] */ + 0x00000000, /* [0x01CA] */ + 0x00000000, /* [0x01CB] */ + 0x00000000, /* [0x01CC] */ + 0x70738009, /* [0x01CD] == TYPE7: MEM_TO_MEM (73) == */ + 0xE0000004, /* [0x01CE] */ + 0x00000000, /* [0x01CF] */ + 0x00000000, /* [0x01D0] */ + 0x00000000, /* [0x01D1] */ + 0x00000000, /* [0x01D2] */ + 0x00000000, /* [0x01D3] */ + 0x00000000, /* [0x01D4] */ + 0x00000000, /* [0x01D5] */ + 0x00000000, /* [0x01D6] */ + 0x70B50001, /* [0x01D7] == TYPE7: SET_SUBDRAW_SIZE (35) == */ + 0x00000001, /* [0x01D8] */ + 0x40E78A01, /* [0x01D9] == TYPE4 == */ + 0x000FFFFF, /* [0x01DA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x70268000, /* [0x01DB] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x400E0601, /* [0x01DC] == TYPE4 == */ + 0x00000001, /* [0x01DD] A5X_HLSQ_MODE_CNTL (0x0E06)*/ + 0x706E0004, /* [0x01DE] == TYPE7: COMPUTE_CHECKPOINT (6E) == */ + 0x00000000, /* [0x01DF] */ + 0x00000000, /* [0x01E0] */ + 0x00000018, /* [0x01E1] */ + 0x00000001, /* [0x01E2] */ + 0x40E14001, /* [0x01E3] == TYPE4 == */ + 0x00020000, /* [0x01E4] A5X_RB_CNTL_CTX_0 (0xE140)*/ + 0x40E78A01, /* [0x01E5] == TYPE4 == */ + 0x01F00000, /* [0x01E6] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/ + 0x70268000, /* [0x01E7] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x48E38C01, /* [0x01E8] == TYPE4 == */ + 0xFFFFFFFF, /* [0x01E9] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/ + 0x480D0201, /* [0x01EA] == TYPE4 == */ + 0x0000001F, /* [0x01EB] A5X_PC_MODE_CNTL (0x0D02)*/ + 0x480EC201, /* [0x01EC] == TYPE4 == */ + 0x0000001E, /* [0x01ED] A5X_SP_MODE_CNTL (0x0EC2)*/ + 0x48E58001, /* [0x01EE] == TYPE4 == */ + 0x00000000, /* [0x01EF] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/ + 0x40E2A201, /* [0x01F0] == TYPE4 == */ + 0x00000001, /* [0x01F1] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/ + 0x70640001, /* [0x01F2] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */ + 0x00000001, /* [0x01F3] */ + 0x48E78401, /* [0x01F4] == TYPE4 == */ + 0x00000881, /* [0x01F5] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/ + 0x40E5F001, /* [0x01F6] == TYPE4 == */ + 0x00000C06, /* [0x01F7] A5X_SP_CS_CNTL_0_CTX_0 (0xE5F0)*/ + 0x48E79001, /* [0x01F8] == TYPE4 == */ + 0x00000001, /* [0x01F9] A5X_HLSQ_CS_CONFIG_CTX_0 (0xE790)*/ + 0x48E79601, /* [0x01FA] == TYPE4 == */ + 0x00000005, /* [0x01FB] A5X_HLSQ_CS_CNTL_CTX_0 (0xE796)*/ + 0x48E58901, /* [0x01FC] == TYPE4 == */ + 0x00000001, /* [0x01FD] A5X_SP_CS_CONFIG_CTX_0 (0xE589)*/ + 0x40E7DC01, /* [0x01FE] == TYPE4 == */ + 0x00000030, /* [0x01FF] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_3_CTX_0 (0xE7DC)*/ + 0x48E7DD01, /* [0x0200] == TYPE4 == */ + 0x00000002, /* [0x0201] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_4_CTX_0 (0xE7DD)*/ + 0x40E7B001, /* [0x0202] == TYPE4 == */ + 0x00000003, /* [0x0203] A5X_HLSQ_CS_NDRANGE_0_CTX_0 (0xE7B0)*/ + 0x48E7B702, /* [0x0204] == TYPE4 == */ + 0x00FCC0CF, /* [0x0205] A5X_HLSQ_CS_CNTL_0_CTX_0 (0xE7B7)*/ + 0x00000000, /* [0x0206] A5X_HLSQ_CS_CNTL_1_CTX_0 (0xE7B8)*/ + 0x40E7B983, /* [0x0207] == TYPE4 == */ + 0x00000001, /* [0x0208] A5X_HLSQ_CS_KERNEL_GROUP_X_CTX_0 (0xE7B9)*/ + 0x00000001, /* [0x0209] A5X_HLSQ_CS_KERNEL_GROUP_Y_CTX_0 (0xE7BA)*/ + 0x00000001, /* [0x020A] A5X_HLSQ_CS_KERNEL_GROUP_Z_CTX_0 (0xE7BB)*/ + 0x70B08003, /* [0x020B] == TYPE7: LOAD_STATE (30) == */ + 0x00B60000, /* [0x020C] */ + 0x00000000, /* [0x020D] */ + 0x00000000, /* [0x020E] */ + 0x70B08003, /* [0x020F] == TYPE7: LOAD_STATE (30) == */ + 0x01360008, /* [0x0210] */ + 0x00000000, /* [0x0211] */ + 0x00000000, /* [0x0212] */ + 0x70B0000B, /* [0x0213] == TYPE7: LOAD_STATE (30) == */ + 0x00BC0000, /* [0x0214] */ + 0x00000000, /* [0x0215] */ + 0x00000000, /* [0x0216] */ + 0x00000000, /* [0x0217] */ + 0x00000000, /* [0x0218] */ + 0x00000000, /* [0x0219] */ + 0x00000000, /* [0x021A] */ + 0x00000000, /* [0x021B] */ + 0x00000000, /* [0x021C] */ + 0x00000000, /* [0x021D] */ + 0x00000000, /* [0x021E] */ + 0x70B00007, /* [0x021F] == TYPE7: LOAD_STATE (30) == */ + 0x00BC0000, /* [0x0220] */ + 0x00000001, /* [0x0221] */ + 0x00000000, /* [0x0222] */ + 0x00040000, /* [0x0223] */ + 0x00000000, /* [0x0224] */ + 0x00040000, /* [0x0225] */ + 0x00000000, /* [0x0226] */ + 0x70B00007, /* [0x0227] == TYPE7: LOAD_STATE (30) == */ + 0x00BC0000, /* [0x0228] */ + 0x00000002, /* [0x0229] */ + 0x00000000, /* [0x022A] */ + 0x00000000, /* [0x022B] */ + 0x00000000, /* [0x022C] */ + 0x00000000, /* [0x022D] */ + 0x00000000, /* [0x022E] */ + 0x48E7B186, /* [0x022F] == TYPE4 == */ + 0x00000001, /* [0x0230] A5X_HLSQ_CS_NDRANGE_1_CTX_0 (0xE7B1)*/ + 0x00000000, /* [0x0231] A5X_HLSQ_CS_NDRANGE_2_CTX_0 (0xE7B2)*/ + 0x00000001, /* [0x0232] A5X_HLSQ_CS_NDRANGE_3_CTX_0 (0xE7B3)*/ + 0x00000000, /* [0x0233] A5X_HLSQ_CS_NDRANGE_4_CTX_0 (0xE7B4)*/ + 0x00000001, /* [0x0234] A5X_HLSQ_CS_NDRANGE_5_CTX_0 (0xE7B5)*/ + 0x00000000, /* [0x0235] A5X_HLSQ_CS_NDRANGE_6_CTX_0 (0xE7B6)*/ + 0x70B30004, /* [0x0236] == TYPE7: EXEC_CS (33) == */ + 0x00000000, /* [0x0237] */ + 0x00000001, /* [0x0238] */ + 0x00000001, /* [0x0239] */ + 0x00000001, /* [0x023A] */ + 0x480E6201, /* [0x023B] == TYPE4 == */ + 0x00000001, /* [0x023C] A5X_VPC_MODE_CNTL (0x0E62)*/ +}; + +/* + * These are fixups for the addresses _a5xx_critical_pkts[]. The first two + * numbers are the dword offsets into the buffer above. The third enum is a + * clue as to which buffer is being patched in and the final number is an offset + * in said buffer. + */ +static const struct adreno_critical_fixup critical_pkt_fixups[] = { + { 132, 133, 2, 0x0000 }, + { 136, 137, 2, 0x0001 }, + { 154, 155, 2, 0x0100 }, + { 159, 160, 2, 0x0104 }, + { 173, 174, 2, 0x0200 }, + { 177, 178, 2, 0x0300 }, + { 236, 237, 0, 0x0000 }, + { 244, 245, 0, 0x0040 }, + { 259, 260, 3, 0x0000 }, + { 266, 267, 2, 0x0108 }, + { 298, 299, 0, 0x0040 }, + { 300, 301, 2, 0x0080 }, + { 331, 332, 3, 0x02A0 }, + { 337, 338, 3, 0x0700 }, + { 348, 349, 3, 0x0920 }, + { 356, 357, 1, 0x008C }, + { 360, 361, 1, 0x0080 }, + { 363, 364, 1, 0x008C }, + { 366, 367, 0, 0x0100 }, + { 370, 371, 0, 0x0120 }, + { 381, 382, 1, 0x0480 }, + { 384, 385, 1, 0x0400 }, + { 398, 399, 3, 0x0920 }, + { 413, 414, 1, 0x0080 }, + { 417, 418, 1, 0x0300 }, + { 424, 425, 3, 0x0880 }, + { 428, 429, 1, 0x0300 }, + { 430, 431, 1, 0x0300 }, + { 438, 439, 1, 0x0300 }, + { 446, 447, 1, 0x0300 }, + { 453, 454, 1, 0x0320 }, + { 455, 456, 1, 0x0300 }, + { 457, 458, 1, 0x0304 }, + { 459, 460, 1, 0x0308 }, + { 463, 464, 1, 0x0320 }, + { 465, 466, 1, 0x0300 }, + { 467, 468, 1, 0x0304 }, + { 469, 470, 1, 0x0308 }, + { 525, 526, 1, 0x0160 }, + { 529, 530, 1, 0x0101 }, + { 535, 536, 1, 0x0140 }, + { 539, 540, 0, 0x0800 }, + { 555, 556, 1, 0x0140 }, + { 557, 558, 0, 0x0800 }, +}; + +static unsigned int _a5xx_critical_pkts_mem01[] = { + 0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x3ECCCCCD, + 0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD, 0x00000000, + 0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0xBECCCCCD, + 0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0xBECCCCCD, 0x3ECCCCCD, 0x00000000, + 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, + 0x00000000, 0x00000000, 0x00040003, 0x00090005, 0x000B000A, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000001, + 0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000003, 0x00000001, 0x00000003, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x3EF5C28F, 0x3ED1EB85, 0x3E6147AE, 0x3F800000, + 0x3E947AE1, 0x3E6147AE, 0x3D4CCCCD, 0x3F800000, 0x00000000, 0x20554002, + 0x3F800000, 0x20444003, 0x000000CF, 0x20044904, 0x00000000, 0x00000200, + 0x00050001, 0x42300001, 0x00000002, 0x20154005, 0x00000020, 0x20244006, + 0x00000000, 0x00000000, 0x10200001, 0x46500007, 0x20030004, 0x46D00004, + 0x00000000, 0x20554008, 0x00070001, 0x61830806, 0x00061020, 0x61808001, + 0x00040000, 0x42380800, 0x00010000, 0x42380800, 0x20040000, 0x46D80800, + 0x00000000, 0x20154007, 0x20020000, 0x46F80000, 0x00000007, 0x20154001, + 0x00000000, 0x00000200, 0x60030001, 0x43900004, 0x60030001, 0x43900001, + 0x00000000, 0x00000400, 0x00013600, 0xC6E20004, 0x40040003, 0x50180104, + 0x40060003, 0x40180803, 0x00000003, 0x20044006, 0x00000000, 0x00000500, + 0x00003609, 0xC7260201, 0x00000000, 0x03000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, +}; + +static unsigned int _a5xx_critical_pkts_mem02[] = { + 0x00000000, 0x03000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x0000000C, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x8ACFE7F3, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +static unsigned int _a5xx_critical_pkts_mem03[] = { + 0x70438003, /* [0x0000] == TYPE7: SET_DRAW_STATE (43) == */ + 0x0008003A, /* [0x0001] */ + 0x00000000, /* [0x0002] */ + 0x00000000, /* [0x0003] */ + 0x70B08003, /* [0x0004] == TYPE7: LOAD_STATE (30) == */ + 0x00620000, /* [0x0005] */ + 0x00000000, /* [0x0006] */ + 0x00000000, /* [0x0007] */ + 0x40E29801, /* [0x0008] == TYPE4 == */ + 0x0000FFFF, /* [0x0009] A5X_VPC_GS_SIV_CNTL_CTX_0 (0xE298)*/ + 0x48E2A001, /* [0x000A] == TYPE4 == */ + 0x000000FF, /* [0x000B] A5X_VPC_PS_PRIMITIVEID_CNTL_CTX_0 (0xE2A0)*/ + 0x40E40185, /* [0x000C] == TYPE4 == */ + 0x00FCFCFC, /* [0x000D] A5X_VFD_CNTL_1_CTX_0 (0xE401)*/ + 0x0000FCFC, /* [0x000E] A5X_VFD_CNTL_2_CTX_0 (0xE402)*/ + 0x0000FCFC, /* [0x000F] A5X_VFD_CNTL_3_CTX_0 (0xE403)*/ + 0x000000FC, /* [0x0010] A5X_VFD_CNTL_4_CTX_0 (0xE404)*/ + 0x00000000, /* [0x0011] A5X_VFD_CNTL_5_CTX_0 (0xE405)*/ + 0x48E38F01, /* [0x0012] == TYPE4 == */ + 0x00000000, /* [0x0013] A5X_PC_HS_PARAM_CTX_0 (0xE38F)*/ + 0x48E58001, /* [0x0014] == TYPE4 == */ + 0x00000010, /* [0x0015] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/ + 0x40E00001, /* [0x0016] == TYPE4 == */ + 0x00000080, /* [0x0017] A5X_GRAS_CL_CNTL_CTX_0 (0xE000)*/ + 0x40E09583, /* [0x0018] == TYPE4 == */ + 0x00000000, /* [0x0019] A5X_GRAS_SU_POLY_OFFSET_SCALE_CTX_0 (0xE095)*/ + 0x00000000, /* [0x001A] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CTX_0 (0xE096)*/ + 0x00000000, /* [0x001B] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP_CTX_0 + * (0xE097) + */ + 0x40E09001, /* [0x001C] == TYPE4 == */ + 0x00000010, /* [0x001D] A5X_GRAS_SU_CNTL_CTX_0 (0xE090)*/ + 0x40E0AA02, /* [0x001E] == TYPE4 == */ + 0x00000000, /* [0x001F] A5X_GRAS_SC_SCREEN_SCISSOR_TL_0_CTX_0 (0xE0AA)*/ + 0x001F0073, /* [0x0020] A5X_GRAS_SC_SCREEN_SCISSOR_BR_0_CTX_0 (0xE0AB)*/ + 0x48E01086, /* [0x0021] == TYPE4 == */ + 0x42680000, /* [0x0022] A5X_GRAS_CL_VIEWPORT_XOFFSET_0_CTX_0 (0xE010)*/ + 0x42680000, /* [0x0023] A5X_GRAS_CL_VIEWPORT_XSCALE_0_CTX_0 (0xE011)*/ + 0x41800000, /* [0x0024] A5X_GRAS_CL_VIEWPORT_YOFFSET_0_CTX_0 (0xE012)*/ + 0xC1800000, /* [0x0025] A5X_GRAS_CL_VIEWPORT_YSCALE_0_CTX_0 (0xE013)*/ + 0x3EFFFEE0, /* [0x0026] A5X_GRAS_CL_VIEWPORT_ZOFFSET_0_CTX_0 (0xE014)*/ + 0x3EFFFEE0, /* [0x0027] A5X_GRAS_CL_VIEWPORT_ZSCALE_0_CTX_0 (0xE015)*/ + 0x40E0CA02, /* [0x0028] == TYPE4 == */ + 0x00000000, /* [0x0029] A5X_GRAS_SC_VIEWPORT_SCISSOR_TL_0_CTX_0 + * (0xE0CA) + */ + 0x001F0073, /* [0x002A] A5X_GRAS_SC_VIEWPORT_SCISSOR_BR_0_CTX_0 + * (0xE0CB) + */ + 0x40E00601, /* [0x002B] == TYPE4 == */ + 0x0007FDFF, /* [0x002C] A5X_GRAS_CL_GUARDBAND_CLIP_ADJ_CTX_0 (0xE006)*/ + 0x40E70401, /* [0x002D] == TYPE4 == */ + 0x00000000, /* [0x002E] A5X_TPL1_TP_RAS_MSAA_CNTL_CTX_0 (0xE704)*/ + 0x48E70501, /* [0x002F] == TYPE4 == */ + 0x00000004, /* [0x0030] A5X_TPL1_TP_DEST_MSAA_CNTL_CTX_0 (0xE705)*/ + 0x48E14201, /* [0x0031] == TYPE4 == */ + 0x00000000, /* [0x0032] A5X_RB_RAS_MSAA_CNTL_CTX_0 (0xE142)*/ + 0x40E14301, /* [0x0033] == TYPE4 == */ + 0x00000004, /* [0x0034] A5X_RB_DEST_MSAA_CNTL_CTX_0 (0xE143)*/ + 0x40E78683, /* [0x0035] == TYPE4 == */ + 0xFCFCFCFC, /* [0x0036] A5X_HLSQ_CNTL_2_CTX_0 (0xE786)*/ + 0xFCFCFCFC, /* [0x0037] A5X_HLSQ_CNTL_3_CTX_0 (0xE787)*/ + 0xFCFCFCFC, /* [0x0038] A5X_HLSQ_CNTL_4_CTX_0 (0xE788)*/ + 0x48E0A201, /* [0x0039] == TYPE4 == */ + 0x00000000, /* [0x003A] A5X_GRAS_SC_RAS_MSAA_CNTL_CTX_0 (0xE0A2)*/ + 0x40E0A301, /* [0x003B] == TYPE4 == */ + 0x00000004, /* [0x003C] A5X_GRAS_SC_DEST_MSAA_CNTL_CTX_0 (0xE0A3)*/ + 0x48E14101, /* [0x003D] == TYPE4 == */ + 0x0000C089, /* [0x003E] A5X_RB_RENDER_CNTL_CTX_0 (0xE141)*/ + 0x40E0A001, /* [0x003F] == TYPE4 == */ + 0x00000009, /* [0x0040] A5X_GRAS_SC_CNTL_CTX_0 (0xE0A0)*/ + 0x40E28001, /* [0x0041] == TYPE4 == */ + 0x00010004, /* [0x0042] A5X_VPC_CNTL_0_CTX_0 (0xE280)*/ + 0x40E38401, /* [0x0043] == TYPE4 == */ + 0x00000404, /* [0x0044] A5X_PC_PRIMITIVE_CNTL_CTX_0 (0xE384)*/ + 0x40E78501, /* [0x0045] == TYPE4 == */ + 0x0000003F, /* [0x0046] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/ + 0x48E5D301, /* [0x0047] == TYPE4 == */ + 0x00000030, /* [0x0048] A5X_SP_PS_MRT_0_CTX_0 (0xE5D3)*/ + 0x48E5CB01, /* [0x0049] == TYPE4 == */ + 0x00000100, /* [0x004A] A5X_SP_PS_OUTPUT_0_CTX_0 (0xE5CB)*/ + 0x40E5CA01, /* [0x004B] == TYPE4 == */ + 0x001F9F81, /* [0x004C] A5X_SP_PS_OUTPUT_CNTL_CTX_0 (0xE5CA)*/ + 0x40E14601, /* [0x004D] == TYPE4 == */ + 0x00000001, /* [0x004E] A5X_RB_PS_OUTPUT_CNTL_CTX_0 (0xE146)*/ + 0x40E38E01, /* [0x004F] == TYPE4 == */ + 0x00000000, /* [0x0050] A5X_PC_GS_PARAM_CTX_0 (0xE38E)*/ + 0x40E28A01, /* [0x0051] == TYPE4 == */ + 0x00000000, /* [0x0052] A5X_VPC_VARYING_REPLACE_MODE_0_CTX_0 (0xE28A)*/ + 0x48E1A901, /* [0x0053] == TYPE4 == */ + 0xFFFF0100, /* [0x0054] A5X_RB_BLEND_CNTL_CTX_0 (0xE1A9)*/ + 0x40E5C901, /* [0x0055] == TYPE4 == */ + 0x00000100, /* [0x0056] A5X_SP_BLEND_CNTL_CTX_0 (0xE5C9)*/ + 0x40E76401, /* [0x0057] == TYPE4 == */ + 0x00000000, /* [0x0058] A5X_TPL1_TP_PS_ROTATION_CNTL_CTX_0 (0xE764)*/ + 0x48E09401, /* [0x0059] == TYPE4 == */ + 0x00000000, /* [0x005A] A5X_GRAS_SU_DEPTH_PLANE_CNTL_CTX_0 (0xE094)*/ + 0x40E1B001, /* [0x005B] == TYPE4 == */ + 0x00000000, /* [0x005C] A5X_RB_DEPTH_PLANE_CNTL_CTX_0 (0xE1B0)*/ + 0x48E1B101, /* [0x005D] == TYPE4 == */ + 0x00000000, /* [0x005E] A5X_RB_DEPTH_CNTL_CTX_0 (0xE1B1)*/ + 0x48E40001, /* [0x005F] == TYPE4 == */ + 0x00000001, /* [0x0060] A5X_VFD_CNTL_0_CTX_0 (0xE400)*/ + 0x48E40A04, /* [0x0061] == TYPE4 == */ + 0x00000000, /* [0x0062] A5X_VFD_VERTEX_BUFFER_BASE_LO_0_CTX_0 (0xE40A)*/ + 0x00000000, /* [0x0063] A5X_VFD_VERTEX_BUFFER_BASE_HI_0_CTX_0 (0xE40B)*/ + 0x00000078, /* [0x0064] A5X_VFD_VERTEX_BUFFER_SIZE_0_CTX_0 (0xE40C)*/ + 0x00000008, /* [0x0065] A5X_VFD_VERTEX_BUFFER_STRIDE_0_CTX_0 (0xE40D)*/ + 0x40E48A02, /* [0x0066] == TYPE4 == */ + 0xC6700000, /* [0x0067] A5X_VFD_FETCH_INSTR_0_CTX_0 (0xE48A)*/ + 0x00000001, /* [0x0068] A5X_VFD_FETCH_INSTR_STEP_RATE_0_CTX_0 (0xE48B)*/ + 0x48E4CA01, /* [0x0069] == TYPE4 == */ + 0x0000000F, /* [0x006A] A5X_VFD_DEST_CNTL_0_CTX_0 (0xE4CA)*/ + 0x48E10001, /* [0x006B] == TYPE4 == */ + 0x00000008, /* [0x006C] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/ + 0x48E0A101, /* [0x006D] == TYPE4 == */ + 0x00000004, /* [0x006E] A5X_GRAS_SC_BIN_CNTL_CTX_0 (0xE0A1)*/ + 0x40E10185, /* [0x006F] == TYPE4 == */ + 0x00000000, /* [0x0070] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/ + 0x00000000, /* [0x0071] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/ + 0x00000001, /* [0x0072] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/ + 0x00000000, /* [0x0073] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0 + * (0xE104) + */ + 0x00000000, /* [0x0074] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0 + * (0xE105) + */ + 0x70388003, /* [0x0075] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200884, /* [0x0076] */ + 0x00000001, /* [0x0077] */ + 0x00000003, /* [0x0078] */ + 0x70380007, /* [0x0079] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200404, /* [0x007A] */ + 0x00000001, /* [0x007B] */ + 0x00000003, /* [0x007C] */ + 0x00000000, /* [0x007D] */ + 0x00000000, /* [0x007E] */ + 0x00000000, /* [0x007F] */ + 0x00000006, /* [0x0080] */ + 0x70460004, /* [0x0081] == TYPE7: EVENT_WRITE (46) == */ + 0x00000004, /* [0x0082] */ + 0x00000000, /* [0x0083] */ + 0x00000000, /* [0x0084] */ + 0x00000001, /* [0x0085] */ + 0x70268000, /* [0x0086] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x70A88003, /* [0x0087] == TYPE7: DRAW_INDIRECT (28) == */ + 0x00200884, /* [0x0088] */ + 0x00000000, /* [0x0089] */ + 0x00000000, /* [0x008A] */ + 0x70460004, /* [0x008B] == TYPE7: EVENT_WRITE (46) == */ + 0x00000004, /* [0x008C] */ + 0x00000000, /* [0x008D] */ + 0x00000000, /* [0x008E] */ + 0x00000001, /* [0x008F] */ + 0x70268000, /* [0x0090] == TYPE7: WAIT_FOR_IDLE (26) == */ + 0x70298006, /* [0x0091] == TYPE7: DRAW_INDX_INDIRECT (29) == */ + 0x00200404, /* [0x0092] */ + 0x00000000, /* [0x0093] */ + 0x00000000, /* [0x0094] */ + 0x00000006, /* [0x0095] */ + 0x00000000, /* [0x0096] */ + 0x00000000, /* [0x0097] */ + 0x40E40801, /* [0x0098] == TYPE4 == */ + 0x0000000D, /* [0x0099] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/ + 0x48E40901, /* [0x009A] == TYPE4 == */ + 0x00000000, /* [0x009B] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/ + 0x70388003, /* [0x009C] == TYPE7: DRAW_INDX_OFFSET (38) == */ + 0x00200884, /* [0x009D] */ + 0x00000001, /* [0x009E] */ + 0x00000003, /* [0x009F] */ + 0x00000000, /* [0x00A0] */ + 0x00000000, /* [0x00A1] */ + 0x00000000, /* [0x00A2] */ + 0x00000000, /* [0x00A3] */ + 0x00000000, /* [0x00A4] */ + 0x00000000, /* [0x00A5] */ + 0x00000000, /* [0x00A6] */ + 0x00000000, /* [0x00A7] */ + 0x48E78401, /* [0x00A8] */ + 0x00000881, /* [0x00A9] */ + 0x40E5C001, /* [0x00AA] */ + 0x0004001E, /* [0x00AB] */ + 0x70438003, /* [0x00AC] */ + 0x0000003A, /* [0x00AD] */ + 0x00000000, /* [0x00AE] */ + 0x00000000, /* [0x00AF] */ + 0x70B00023, /* [0x00B0] */ + 0x00600000, /* [0x00B1] */ + 0x00000000, /* [0x00B2] */ + 0x00000000, /* [0x00B3] */ + 0x00000000, /* [0x00B4] */ + 0x03000000, /* [0x00B5] */ + 0x00000000, /* [0x00B6] */ + 0x00000000, /* [0x00B7] */ + 0x00000000, /* [0x00B8] */ + 0x00000000, /* [0x00B9] */ + 0x00000000, /* [0x00BA] */ + 0x00000000, /* [0x00BB] */ + 0x00000000, /* [0x00BC] */ + 0x00000000, /* [0x00BD] */ + 0x00000000, /* [0x00BE] */ + 0x00000000, /* [0x00BF] */ + 0x00000000, /* [0x00C0] */ + 0x00000000, /* [0x00C1] */ + 0x00000000, /* [0x00C2] */ + 0x00000000, /* [0x00C3] */ + 0x00000000, /* [0x00C4] */ + 0x00000000, /* [0x00C5] */ + 0x00000000, /* [0x00C6] */ + 0x00000000, /* [0x00C7] */ + 0x00000000, /* [0x00C8] */ + 0x00000000, /* [0x00C9] */ + 0x00000000, /* [0x00CA] */ + 0x00000000, /* [0x00CB] */ + 0x00000000, /* [0x00CC] */ + 0x00000000, /* [0x00CD] */ + 0x00000000, /* [0x00CE] */ + 0x00000000, /* [0x00CF] */ + 0x00000000, /* [0x00D0] */ + 0x00000000, /* [0x00D1] */ + 0x00000000, /* [0x00D2] */ + 0x00000000, /* [0x00D3] */ + 0x40E09301, /* [0x00D4] */ + 0x00000000, /* [0x00D5] */ + 0x40E38D01, /* [0x00D6] */ + 0x00000000, /* [0x00D7] */ + 0x40E29801, /* [0x00D8] */ + 0x0000FFFF, /* [0x00D9] */ + 0x48E28201, /* [0x00DA] */ + 0xEAEAEAEA, /* [0x00DB] */ + 0x40E29404, /* [0x00DC] */ + 0xFFFFFFFF, /* [0x00DD] */ + 0xFFFFFFFF, /* [0x00DE] */ + 0xFFFFFFFF, /* [0x00DF] */ + 0xFFFFFFFF, /* [0x00E0] */ + 0x40E5DB01, /* [0x00E1] */ + 0x00000000, /* [0x00E2] */ + 0x48E14701, /* [0x00E3] */ + 0x0000000F, /* [0x00E4] */ + 0x70B00023, /* [0x00E5] */ + 0x00700000, /* [0x00E6] */ + 0x00000000, /* [0x00E7] */ + 0x00000000, /* [0x00E8] */ + 0x00003C00, /* [0x00E9] */ + 0x20400000, /* [0x00EA] */ + 0x00000000, /* [0x00EB] */ + 0x20400001, /* [0x00EC] */ + 0x00000000, /* [0x00ED] */ + 0x20400002, /* [0x00EE] */ + 0x00003C00, /* [0x00EF] */ + 0x20400003, /* [0x00F0] */ + 0x00000000, /* [0x00F1] */ + 0x03000000, /* [0x00F2] */ + 0x00000000, /* [0x00F3] */ + 0x00000000, /* [0x00F4] */ + 0x00000000, /* [0x00F5] */ + 0x00000000, /* [0x00F6] */ + 0x00000000, /* [0x00F7] */ + 0x00000000, /* [0x00F8] */ + 0x00000000, /* [0x00F9] */ + 0x00000000, /* [0x00FA] */ + 0x00000000, /* [0x00FB] */ + 0x00000000, /* [0x00FC] */ + 0x00000000, /* [0x00FD] */ + 0x00000000, /* [0x00FE] */ + 0x00000000, /* [0x00FF] */ + 0x00000000, /* [0x0100] */ + 0x00000000, /* [0x0101] */ + 0x00000000, /* [0x0102] */ + 0x00000000, /* [0x0103] */ + 0x00000000, /* [0x0104] */ + 0x00000000, /* [0x0105] */ + 0x00000000, /* [0x0106] */ + 0x00000000, /* [0x0107] */ + 0x00000000, /* [0x0108] */ + 0x48E2A001, /* [0x0109] */ + 0x000000FF, /* [0x010A] */ + 0x40E40185, /* [0x010B] */ + 0x00FCFCFC, /* [0x010C] */ + 0x0000FCFC, /* [0x010D] */ + 0x0000FCFC, /* [0x010E] */ + 0x000000FC, /* [0x010F] */ + 0x00000000, /* [0x0110] */ + 0x48E38F01, /* [0x0111] */ + 0x00000000, /* [0x0112] */ + 0x48E58001, /* [0x0113] */ + 0x00000010, /* [0x0114] */ + 0x40E1A801, /* [0x0115] */ + 0x00000E00, /* [0x0116] */ + 0x48E15001, /* [0x0117] */ + 0x000007E0, /* [0x0118] */ + 0x40E15101, /* [0x0119] */ + 0x00000000, /* [0x011A] */ + 0x40E00001, /* [0x011B] */ + 0x00000080, /* [0x011C] */ + 0x40E09583, /* [0x011D] */ + 0x00000000, /* [0x011E] */ + 0x00000000, /* [0x011F] */ + 0x00000000, /* [0x0120] */ + 0x40E09001, /* [0x0121] */ + 0x00000010, /* [0x0122] */ + 0x40E0AA02, /* [0x0123] */ + 0x00000000, /* [0x0124] */ + 0x001F0073, /* [0x0125] */ + 0x48E01086, /* [0x0126] */ + 0x42680000, /* [0x0127] */ + 0x42680000, /* [0x0128] */ + 0x41800000, /* [0x0129] */ + 0xC1800000, /* [0x012A] */ + 0x3EFFFEE0, /* [0x012B] */ + 0x3EFFFEE0, /* [0x012C] */ + 0x40E0CA02, /* [0x012D] */ + 0x00000000, /* [0x012E] */ + 0x001F0073, /* [0x012F] */ + 0x40E00601, /* [0x0130] */ + 0x0007FDFF, /* [0x0131] */ + 0x40E70401, /* [0x0132] */ + 0x00000000, /* [0x0133] */ + 0x48E70501, /* [0x0134] */ + 0x00000004, /* [0x0135] */ + 0x48E14201, /* [0x0136] */ + 0x00000000, /* [0x0137] */ + 0x40E14301, /* [0x0138] */ + 0x00000004, /* [0x0139] */ + 0x40E78683, /* [0x013A] */ + 0xFCFCFCFC, /* [0x013B] */ + 0xFCFCFCFC, /* [0x013C] */ + 0xFCFCFCFC, /* [0x013D] */ + 0x48E0A201, /* [0x013E] */ + 0x00000000, /* [0x013F] */ + 0x40E0A301, /* [0x0140] */ + 0x00000004, /* [0x0141] */ + 0x48E1B285, /* [0x0142] */ + 0x00000001, /* [0x0143] */ + 0x00004000, /* [0x0144] */ + 0x00000000, /* [0x0145] */ + 0x00000004, /* [0x0146] */ + 0x000000C0, /* [0x0147] */ + 0x48E09801, /* [0x0148] */ + 0x00000001, /* [0x0149] */ + 0x48E00401, /* [0x014A] */ + 0x00000000, /* [0x014B] */ + 0x480CDD02, /* [0x014C] */ + 0x00200074, /* [0x014D] */ + 0x00000000, /* [0x014E] */ + 0x40E15285, /* [0x014F] */ + 0x00000A30, /* [0x0150] */ + 0x00000008, /* [0x0151] */ + 0x00000100, /* [0x0152] */ + 0x00000000, /* [0x0153] */ + 0x00000000, /* [0x0154] */ + 0x48E14101, /* [0x0155] */ + 0x0000C008, /* [0x0156] */ + 0x40E0A001, /* [0x0157] */ + 0x00000008, /* [0x0158] */ + 0x40E28001, /* [0x0159] */ + 0x00010004, /* [0x015A] */ + 0x40E38401, /* [0x015B] */ + 0x00000404, /* [0x015C] */ + 0x40E78501, /* [0x015D] */ + 0x0000003F, /* [0x015E] */ + 0x48E5D301, /* [0x015F] */ + 0x00000030, /* [0x0160] */ + 0x48E5CB01, /* [0x0161] */ + 0x00000100, /* [0x0162] */ + 0x40E5CA01, /* [0x0163] */ + 0x001F9F81, /* [0x0164] */ + 0x40E14601, /* [0x0165] */ + 0x00000001, /* [0x0166] */ + 0x40E38E01, /* [0x0167] */ + 0x00000000, /* [0x0168] */ + 0x40E28A01, /* [0x0169] */ + 0x00000000, /* [0x016A] */ + 0x48E1A901, /* [0x016B] */ + 0xFFFF0100, /* [0x016C] */ + 0x40E5C901, /* [0x016D] */ + 0x00000100, /* [0x016E] */ + 0x40E76401, /* [0x016F] */ + 0x00000000, /* [0x0170] */ + 0x48E09401, /* [0x0171] */ + 0x00000000, /* [0x0172] */ + 0x40E1B001, /* [0x0173] */ + 0x00000000, /* [0x0174] */ + 0x48E1B101, /* [0x0175] */ + 0x00000006, /* [0x0176] */ + 0x48E40001, /* [0x0177] */ + 0x00000001, /* [0x0178] */ + 0x48E40A04, /* [0x0179] */ + 0x00000000, /* [0x017A] */ + 0x00000000, /* [0x017B] */ + 0x00000078, /* [0x017C] */ + 0x00000008, /* [0x017D] */ + 0x40E48A02, /* [0x017E] */ + 0xC6700000, /* [0x017F] */ + 0x00000001, /* [0x0180] */ + 0x48E4CA01, /* [0x0181] */ + 0x0000000F, /* [0x0182] */ + 0x48E10001, /* [0x0183] */ + 0x00000008, /* [0x0184] */ + 0x48E0A101, /* [0x0185] */ + 0x00000000, /* [0x0186] */ + 0x40E10185, /* [0x0187] */ + 0x00000000, /* [0x0188] */ + 0x00000000, /* [0x0189] */ + 0x00000001, /* [0x018A] */ + 0x00000000, /* [0x018B] */ + 0x00000000, /* [0x018C] */ + 0x70230001, /* [0x018D] */ + 0x00000000, /* [0x018E] */ + 0x70388003, /* [0x018F] */ + 0x00200984, /* [0x0190] */ + 0x00000001, /* [0x0191] */ + 0x00000003, /* [0x0192] */ + 0x70380007, /* [0x0193] */ + 0x00200504, /* [0x0194] */ + 0x00000001, /* [0x0195] */ + 0x00000003, /* [0x0196] */ + 0x00000000, /* [0x0197] */ + 0x00000000, /* [0x0198] */ + 0x00000000, /* [0x0199] */ + 0x00000006, /* [0x019A] */ + 0x70460004, /* [0x019B] */ + 0x00000004, /* [0x019C] */ + 0x00000000, /* [0x019D] */ + 0x00000000, /* [0x019E] */ + 0x00000000, /* [0x019F] */ + 0x70268000, /* [0x01A0] */ + 0x70A88003, /* [0x01A1] */ + 0x00200984, /* [0x01A2] */ + 0x00000000, /* [0x01A3] */ + 0x00000000, /* [0x01A4] */ + 0x70460004, /* [0x01A5] */ + 0x00000004, /* [0x01A6] */ + 0x00000000, /* [0x01A7] */ + 0x00000000, /* [0x01A8] */ + 0x00000001, /* [0x01A9] */ + 0x70268000, /* [0x01AA] */ + 0x70298006, /* [0x01AB] */ + 0x00200504, /* [0x01AC] */ + 0x00000000, /* [0x01AD] */ + 0x00000000, /* [0x01AE] */ + 0x00000006, /* [0x01AF] */ + 0x00000000, /* [0x01B0] */ + 0x00000000, /* [0x01B1] */ + 0x40E40801, /* [0x01B2] */ + 0x0000000D, /* [0x01B3] */ + 0x48E40901, /* [0x01B4] */ + 0x00000000, /* [0x01B5] */ + 0x70388003, /* [0x01B6] */ + 0x00200984, /* [0x01B7] */ + 0x00000001, /* [0x01B8] */ + 0x00000003, /* [0x01B9] */ + 0x00000000, /* [0x01BA] */ + 0x00000000, /* [0x01BB] */ + 0x00000000, /* [0x01BC] */ + 0x00000000, /* [0x01BD] */ + 0x00000000, /* [0x01BE] */ + 0x00000000, /* [0x01BF] */ + 0x70EA0001, /* [0x01C0] */ + 0x00000000, /* [0x01C1] */ + 0x40E78A01, /* [0x01C2] */ + 0x000FFFFF, /* [0x01C3] */ + 0x40E09001, /* [0x01C4] */ + 0x00000000, /* [0x01C5] */ + 0x40E00501, /* [0x01C6] */ + 0x00000000, /* [0x01C7] */ + 0x40E00001, /* [0x01C8] */ + 0x00000181, /* [0x01C9] */ + 0x48E10001, /* [0x01CA] */ + 0x00000000, /* [0x01CB] */ + 0x40E21385, /* [0x01CC] */ + 0x00000004, /* [0x01CD] */ + 0x00000000, /* [0x01CE] */ + 0x00000000, /* [0x01CF] */ + 0x00000001, /* [0x01D0] */ + 0x00000001, /* [0x01D1] */ + 0x40E21C01, /* [0x01D2] */ + 0x00000000, /* [0x01D3] */ + 0x40E21001, /* [0x01D4] */ + 0x00000000, /* [0x01D5] */ + 0x70460004, /* [0x01D6] */ + 0x0000001E, /* [0x01D7] */ + 0x00000000, /* [0x01D8] */ + 0x00000000, /* [0x01D9] */ + 0x00000001, /* [0x01DA] */ + 0x00000000, /* [0x01DB] */ + 0x00000000, /* [0x01DC] */ + 0x00000000, /* [0x01DD] */ + 0x00000000, /* [0x01DE] */ + 0x00000000, /* [0x01DF] */ + 0x40E78A01, /* [0x01E0] */ + 0x020FFFFF, /* [0x01E1] */ + 0x48E78B85, /* [0x01E2] */ + 0x00000001, /* [0x01E3] */ + 0x00003F05, /* [0x01E4] */ + 0x00003F04, /* [0x01E5] */ + 0x00003F04, /* [0x01E6] */ + 0x00003F04, /* [0x01E7] */ + 0x48E79001, /* [0x01E8] */ + 0x00000000, /* [0x01E9] */ + 0x40E79101, /* [0x01EA] */ + 0x00000002, /* [0x01EB] */ + 0x40E79201, /* [0x01EC] */ + 0x00000002, /* [0x01ED] */ + 0x40E58485, /* [0x01EE] */ + 0x00000001, /* [0x01EF] */ + 0x00003F05, /* [0x01F0] */ + 0x00003F04, /* [0x01F1] */ + 0x00003F04, /* [0x01F2] */ + 0x00003F04, /* [0x01F3] */ + 0x48E58901, /* [0x01F4] */ + 0x00000000, /* [0x01F5] */ + 0x48E7C302, /* [0x01F6] */ + 0x00000002, /* [0x01F7] */ + 0x00000001, /* [0x01F8] */ + 0x48E7D702, /* [0x01F9] */ + 0x00000002, /* [0x01FA] */ + 0x00000001, /* [0x01FB] */ + 0x40E7C802, /* [0x01FC] */ + 0x00000000, /* [0x01FD] */ + 0x00000000, /* [0x01FE] */ + 0x40E7CD02, /* [0x01FF] */ + 0x00000000, /* [0x0200] */ + 0x00000000, /* [0x0201] */ + 0x48E7D202, /* [0x0202] */ + 0x00000000, /* [0x0203] */ + 0x00000000, /* [0x0204] */ + 0x40E7DC02, /* [0x0205] */ + 0x00000000, /* [0x0206] */ + 0x00000000, /* [0x0207] */ + 0x48E38901, /* [0x0208] */ + 0x00000000, /* [0x0209] */ + 0x48E29A01, /* [0x020A] */ + 0x00FFFF00, /* [0x020B] */ + 0x48E00101, /* [0x020C] */ + 0x00000000, /* [0x020D] */ + 0x40E29D01, /* [0x020E] */ + 0x0000FF00, /* [0x020F] */ + 0x40E59001, /* [0x0210] */ + 0x00000406, /* [0x0211] */ + 0x48E59201, /* [0x0212] */ + 0x00000001, /* [0x0213] */ + 0x40E59301, /* [0x0214] */ + 0x00000F00, /* [0x0215] */ + 0x40E5A301, /* [0x0216] */ + 0x00000000, /* [0x0217] */ + 0x48E38501, /* [0x0218] */ + 0x00000000, /* [0x0219] */ + 0x00000000, /* [0x021A] */ + 0x00000000, /* [0x021B] */ + 0x00000000, /* [0x021C] */ + 0x00000000, /* [0x021D] */ + 0x00000000, /* [0x021E] */ + 0x00000000, /* [0x021F] */ + 0x48210001, /* [0x0220] */ + 0x86000000, /* [0x0221] */ + 0x40218001, /* [0x0222] */ + 0x86000000, /* [0x0223] */ + 0x40211089, /* [0x0224] */ + 0x00001331, /* [0x0225] */ + 0x00000000, /* [0x0226] */ + 0x00000000, /* [0x0227] */ + 0x00020001, /* [0x0228] */ + 0x00000000, /* [0x0229] */ + 0x00000000, /* [0x022A] */ + 0x00000000, /* [0x022B] */ + 0x00000000, /* [0x022C] */ + 0x00000000, /* [0x022D] */ + 0x48218201, /* [0x022E] */ + 0x00001331, /* [0x022F] */ + 0x40214383, /* [0x0230] */ + 0x00000000, /* [0x0231] */ + 0x00000000, /* [0x0232] */ + 0x00000001, /* [0x0233] */ + 0x40210789, /* [0x0234] */ + 0x00000021, /* [0x0235] */ + 0x00000000, /* [0x0236] */ + 0x00000000, /* [0x0237] */ + 0x00020001, /* [0x0238] */ + 0x00000000, /* [0x0239] */ + 0x00000000, /* [0x023A] */ + 0x00000000, /* [0x023B] */ + 0x00000000, /* [0x023C] */ + 0x00000000, /* [0x023D] */ + 0x48218101, /* [0x023E] */ + 0x00000021, /* [0x023F] */ + 0x48218401, /* [0x0240] */ + 0x00000001, /* [0x0241] */ + 0x702C8005, /* [0x0242] */ + 0x00000002, /* [0x0243] */ + 0x00000000, /* [0x0244] */ + 0x00010001, /* [0x0245] */ + 0x00000000, /* [0x0246] */ + 0x00010001, /* [0x0247] */ + 0x70B00023, /* [0x0248] */ + 0x00600000, /* [0x0249] */ + 0x00000000, /* [0x024A] */ + 0x00000000, /* [0x024B] */ + 0x00000000, /* [0x024C] */ + 0x03000000, /* [0x024D] */ + 0x00000000, /* [0x024E] */ + 0x00000000, /* [0x024F] */ + 0x00000000, /* [0x0250] */ + 0x00000000, /* [0x0251] */ + 0x00000000, /* [0x0252] */ + 0x00000000, /* [0x0253] */ + 0x00000000, /* [0x0254] */ + 0x00000000, /* [0x0255] */ + 0x00000000, /* [0x0256] */ + 0x00000000, /* [0x0257] */ + 0x00000000, /* [0x0258] */ + 0x00000000, /* [0x0259] */ + 0x00000000, /* [0x025A] */ + 0x00000000, /* [0x025B] */ + 0x00000000, /* [0x025C] */ + 0x00000000, /* [0x025D] */ + 0x00000000, /* [0x025E] */ + 0x00000000, /* [0x025F] */ + 0x00000000, /* [0x0260] */ + 0x00000000, /* [0x0261] */ + 0x00000000, /* [0x0262] */ + 0x00000000, /* [0x0263] */ + 0x00000000, /* [0x0264] */ + 0x00000000, /* [0x0265] */ + 0x00000000, /* [0x0266] */ + 0x00000000, /* [0x0267] */ + 0x00000000, /* [0x0268] */ + 0x00000000, /* [0x0269] */ + 0x00000000, /* [0x026A] */ + 0x00000000, /* [0x026B] */ + 0x40E09301, /* [0x026C] */ + 0x00000000, /* [0x026D] */ + 0x40E38D01, /* [0x026E] */ + 0x00000000, /* [0x026F] */ + 0x40E29801, /* [0x0270] */ + 0x0000FFFF, /* [0x0271] */ + 0x48E28201, /* [0x0272] */ + 0xEAEAEAEA, /* [0x0273] */ + 0x40E29404, /* [0x0274] */ + 0xFFFFFFFF, /* [0x0275] */ + 0xFFFFFFFF, /* [0x0276] */ + 0xFFFFFFFF, /* [0x0277] */ + 0xFFFFFFFF, /* [0x0278] */ + 0x40E5DB01, /* [0x0279] */ + 0x00000000, /* [0x027A] */ + 0x48E14701, /* [0x027B] */ + 0x0000000F, /* [0x027C] */ + 0x70B00023, /* [0x027D] */ + 0x00700000, /* [0x027E] */ + 0x00000000, /* [0x027F] */ + 0x00000000, /* [0x0280] */ + 0x00003C00, /* [0x0281] */ + 0x20400000, /* [0x0282] */ + 0x00000000, /* [0x0283] */ + 0x20400001, /* [0x0284] */ + 0x00000000, /* [0x0285] */ + 0x20400002, /* [0x0286] */ + 0x00003C00, /* [0x0287] */ + 0x20400003, /* [0x0288] */ + 0x00000000, /* [0x0289] */ + 0x03000000, /* [0x028A] */ + 0x00000000, /* [0x028B] */ + 0x00000000, /* [0x028C] */ + 0x00000000, /* [0x028D] */ + 0x00000000, /* [0x028E] */ + 0x00000000, /* [0x028F] */ + 0x00000000, /* [0x0290] */ + 0x00000000, /* [0x0291] */ + 0x00000000, /* [0x0292] */ + 0x00000000, /* [0x0293] */ + 0x00000000, /* [0x0294] */ + 0x00000000, /* [0x0295] */ + 0x00000000, /* [0x0296] */ + 0x00000000, /* [0x0297] */ + 0x00000000, /* [0x0298] */ + 0x00000000, /* [0x0299] */ + 0x00000000, /* [0x029A] */ + 0x00000000, /* [0x029B] */ + 0x00000000, /* [0x029C] */ + 0x00000000, /* [0x029D] */ + 0x00000000, /* [0x029E] */ + 0x00000000, /* [0x029F] */ + 0x00000000, /* [0x02A0] */ +}; + +/* Fixups for the IBs in _a5xx_critical_pkts_mem03 */ +static const struct adreno_critical_fixup critical_pkt_mem03_fixups[] = { + { 2, 3, 3, 0x0780 }, + { 6, 7, 2, 0x0000 }, + { 98, 99, 1, 0x0000 }, + { 112, 113, 1, 0x0480 }, + { 115, 116, 1, 0x0400 }, + { 126, 127, 1, 0x0080 }, + { 131, 132, 2, 0x0108 }, + { 137, 138, 1, 0x00A0 }, + { 141, 142, 2, 0x0108 }, + { 147, 148, 1, 0x0080 }, + { 150, 151, 1, 0x00C0 }, + { 174, 175, 3, 0x0780 }, + { 378, 379, 1, 0x0000 }, + { 392, 393, 1, 0x0480 }, + { 395, 396, 1, 0x0400 }, + { 408, 409, 1, 0x0080 }, + { 413, 414, 2, 0x0108 }, + { 419, 420, 1, 0x00A0 }, + { 423, 424, 2, 0x0108 }, + { 429, 430, 1, 0x0080 }, + { 432, 433, 1, 0x00C0 }, + { 462, 463, 0, 0x0700 }, + { 472, 473, 2, 0x0110 }, + { 550, 551, 1, 0x0500 }, + { 561, 562, 1, 0x0600 }, + { 566, 567, 1, 0x0700 }, +}; diff --git a/adreno_a5xx_perfcounter.c b/adreno_a5xx_perfcounter.c new file mode 100644 index 0000000000..8886ee24ba --- /dev/null +++ b/adreno_a5xx_perfcounter.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_perfcounter.h" +#include "adreno_pm4types.h" +#include "kgsl_device.h" + +#define VBIF2_PERF_CNT_SEL_MASK 0x7F +/* offset of clear register from select register */ +#define VBIF2_PERF_CLR_REG_SEL_OFF 8 +/* offset of enable register from select register */ +#define VBIF2_PERF_EN_REG_SEL_OFF 16 +/* offset of clear register from the enable register */ +#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8 + +static void a5xx_counter_load(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int index = reg->load_bit / 32; + u32 enable = BIT(reg->load_bit & 31); + + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_LO, + lower_32_bits(reg->value)); + + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_HI, + upper_32_bits(reg->value)); + + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_CMD0 + index, enable); +} + +static u64 a5xx_counter_read_norestore(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int a5xx_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, reg->select, countable); + reg->value = 0; + + return 0; +} + +static int a5xx_counter_inline_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; + u32 cmds[3]; + int ret; + + if (!(device->state == KGSL_STATE_ACTIVE)) + return a5xx_counter_enable(adreno_dev, group, counter, + countable); + + cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[1] = cp_type4_packet(reg->select, 1); + cmds[2] = countable; + + /* submit to highest priority RB always */ + ret = a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL, + F_NOTPROTECTED, cmds, 3, 0, NULL); + + if (ret) + return ret; + + /* + * schedule dispatcher to make sure rb[0] is run, because + * if the current RB is not rb[0] and gpu is idle then + * rb[0] will not get scheduled to run + */ + if (adreno_dev->cur_rb != rb) + adreno_dispatcher_schedule(device); + + /* wait for the above commands submitted to complete */ + ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, + ADRENO_IDLE_TIMEOUT); + + if (ret) { + /* + * If we were woken up because of cancelling rb events + * either due to soft reset or adreno_stop, ignore the + * error and return 0 here. The perfcounter is already + * set up in software and it will be programmed in + * hardware when we wake up or come up after soft reset, + * by adreno_perfcounter_restore. + */ + if (ret == -EAGAIN) + ret = 0; + else + dev_err(device->dev, + "Perfcounter %s/%u/%u start via commands failed %d\n", + group->name, counter, countable, ret); + } + + if (!ret) + reg->value = 0; + + return ret; +} + +static int a5xx_counter_rbbm_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + if (adreno_is_a540(adreno_dev) && countable == A5XX_RBBM_ALWAYS_COUNT) + return -EINVAL; + + return a5xx_counter_inline_enable(adreno_dev, group, counter, + countable); +} + +static u64 a5xx_counter_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return (((u64) hi) << 32) | lo; +} + +static int a5xx_counter_vbif_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > VBIF2_PERF_CNT_SEL_MASK) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1); + kgsl_regwrite(device, + reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0); + kgsl_regwrite(device, + reg->select, countable & VBIF2_PERF_CNT_SEL_MASK); + /* enable reg is 8 DWORDS before select reg */ + kgsl_regwrite(device, + reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); + + kgsl_regwrite(device, reg->select, countable); + + reg->value = 0; + return 0; +} + +static int a5xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1); + kgsl_regwrite(device, reg->select + + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0); + kgsl_regwrite(device, reg->select, 1); + + reg->value = 0; + + return 0; +} + +static int a5xx_counter_alwayson_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + return 0; +} + +static u64 a5xx_counter_alwayson_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct adreno_perfcount_register *reg = &group->regs[counter]; + + return a5xx_read_alwayson(adreno_dev) + reg->value; +} + +static int a5xx_counter_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, reg->select, countable); + kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static int a5xx_counter_pwr_gpmu_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + unsigned int shift = (counter << 3) % (sizeof(unsigned int) * 8); + + if (adreno_is_a530(adreno_dev)) { + if (countable > 43) + return -EINVAL; + } else if (adreno_is_a540(adreno_dev)) { + if (countable > 47) + return -EINVAL; + } + + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static int a5xx_counter_pwr_alwayson_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + kgsl_regwrite(device, A5XX_GPMU_ALWAYS_ON_COUNTER_RESET, 1); + + reg->value = 0; + return 0; +} + +static struct adreno_perfcount_register a5xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_0_LO, + A5XX_RBBM_PERFCTR_CP_0_HI, 0, A5XX_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_1_LO, + A5XX_RBBM_PERFCTR_CP_1_HI, 1, A5XX_CP_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_2_LO, + A5XX_RBBM_PERFCTR_CP_2_HI, 2, A5XX_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_3_LO, + A5XX_RBBM_PERFCTR_CP_3_HI, 3, A5XX_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_4_LO, + A5XX_RBBM_PERFCTR_CP_4_HI, 4, A5XX_CP_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_5_LO, + A5XX_RBBM_PERFCTR_CP_5_HI, 5, A5XX_CP_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_6_LO, + A5XX_RBBM_PERFCTR_CP_6_HI, 6, A5XX_CP_PERFCTR_CP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_7_LO, + A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = { + /* + * A5XX_RBBM_PERFCTR_RBBM_0 is used for frequency scaling and omitted + * from the poool of available counters + */ + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO, + A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO, + A5XX_RBBM_PERFCTR_RBBM_2_HI, 10, A5XX_RBBM_PERFCTR_RBBM_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_3_LO, + A5XX_RBBM_PERFCTR_RBBM_3_HI, 11, A5XX_RBBM_PERFCTR_RBBM_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_0_LO, + A5XX_RBBM_PERFCTR_PC_0_HI, 12, A5XX_PC_PERFCTR_PC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_1_LO, + A5XX_RBBM_PERFCTR_PC_1_HI, 13, A5XX_PC_PERFCTR_PC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_2_LO, + A5XX_RBBM_PERFCTR_PC_2_HI, 14, A5XX_PC_PERFCTR_PC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_3_LO, + A5XX_RBBM_PERFCTR_PC_3_HI, 15, A5XX_PC_PERFCTR_PC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_4_LO, + A5XX_RBBM_PERFCTR_PC_4_HI, 16, A5XX_PC_PERFCTR_PC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_5_LO, + A5XX_RBBM_PERFCTR_PC_5_HI, 17, A5XX_PC_PERFCTR_PC_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_6_LO, + A5XX_RBBM_PERFCTR_PC_6_HI, 18, A5XX_PC_PERFCTR_PC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_7_LO, + A5XX_RBBM_PERFCTR_PC_7_HI, 19, A5XX_PC_PERFCTR_PC_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_0_LO, + A5XX_RBBM_PERFCTR_VFD_0_HI, 20, A5XX_VFD_PERFCTR_VFD_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_1_LO, + A5XX_RBBM_PERFCTR_VFD_1_HI, 21, A5XX_VFD_PERFCTR_VFD_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_2_LO, + A5XX_RBBM_PERFCTR_VFD_2_HI, 22, A5XX_VFD_PERFCTR_VFD_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_3_LO, + A5XX_RBBM_PERFCTR_VFD_3_HI, 23, A5XX_VFD_PERFCTR_VFD_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_4_LO, + A5XX_RBBM_PERFCTR_VFD_4_HI, 24, A5XX_VFD_PERFCTR_VFD_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_5_LO, + A5XX_RBBM_PERFCTR_VFD_5_HI, 25, A5XX_VFD_PERFCTR_VFD_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_6_LO, + A5XX_RBBM_PERFCTR_VFD_6_HI, 26, A5XX_VFD_PERFCTR_VFD_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_7_LO, + A5XX_RBBM_PERFCTR_VFD_7_HI, 27, A5XX_VFD_PERFCTR_VFD_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_0_LO, + A5XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_1_LO, + A5XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_2_LO, + A5XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_3_LO, + A5XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_4_LO, + A5XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_5_LO, + A5XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_6_LO, + A5XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_7_LO, + A5XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_0_LO, + A5XX_RBBM_PERFCTR_VPC_0_HI, 36, A5XX_VPC_PERFCTR_VPC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_1_LO, + A5XX_RBBM_PERFCTR_VPC_1_HI, 37, A5XX_VPC_PERFCTR_VPC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_2_LO, + A5XX_RBBM_PERFCTR_VPC_2_HI, 38, A5XX_VPC_PERFCTR_VPC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_3_LO, + A5XX_RBBM_PERFCTR_VPC_3_HI, 39, A5XX_VPC_PERFCTR_VPC_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_0_LO, + A5XX_RBBM_PERFCTR_CCU_0_HI, 40, A5XX_RB_PERFCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_1_LO, + A5XX_RBBM_PERFCTR_CCU_1_HI, 41, A5XX_RB_PERFCTR_CCU_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_2_LO, + A5XX_RBBM_PERFCTR_CCU_2_HI, 42, A5XX_RB_PERFCTR_CCU_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_3_LO, + A5XX_RBBM_PERFCTR_CCU_3_HI, 43, A5XX_RB_PERFCTR_CCU_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_0_LO, + A5XX_RBBM_PERFCTR_TSE_0_HI, 44, A5XX_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_1_LO, + A5XX_RBBM_PERFCTR_TSE_1_HI, 45, A5XX_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_2_LO, + A5XX_RBBM_PERFCTR_TSE_2_HI, 46, A5XX_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_3_LO, + A5XX_RBBM_PERFCTR_TSE_3_HI, 47, A5XX_GRAS_PERFCTR_TSE_SEL_3 }, +}; + + +static struct adreno_perfcount_register a5xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_0_LO, + A5XX_RBBM_PERFCTR_RAS_0_HI, 48, A5XX_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_1_LO, + A5XX_RBBM_PERFCTR_RAS_1_HI, 49, A5XX_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_2_LO, + A5XX_RBBM_PERFCTR_RAS_2_HI, 50, A5XX_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_3_LO, + A5XX_RBBM_PERFCTR_RAS_3_HI, 51, A5XX_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_0_LO, + A5XX_RBBM_PERFCTR_UCHE_0_HI, 52, A5XX_UCHE_PERFCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_1_LO, + A5XX_RBBM_PERFCTR_UCHE_1_HI, 53, A5XX_UCHE_PERFCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_2_LO, + A5XX_RBBM_PERFCTR_UCHE_2_HI, 54, A5XX_UCHE_PERFCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_3_LO, + A5XX_RBBM_PERFCTR_UCHE_3_HI, 55, A5XX_UCHE_PERFCTR_UCHE_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_4_LO, + A5XX_RBBM_PERFCTR_UCHE_4_HI, 56, A5XX_UCHE_PERFCTR_UCHE_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_5_LO, + A5XX_RBBM_PERFCTR_UCHE_5_HI, 57, A5XX_UCHE_PERFCTR_UCHE_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_6_LO, + A5XX_RBBM_PERFCTR_UCHE_6_HI, 58, A5XX_UCHE_PERFCTR_UCHE_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_7_LO, + A5XX_RBBM_PERFCTR_UCHE_7_HI, 59, A5XX_UCHE_PERFCTR_UCHE_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_0_LO, + A5XX_RBBM_PERFCTR_TP_0_HI, 60, A5XX_TPL1_PERFCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_1_LO, + A5XX_RBBM_PERFCTR_TP_1_HI, 61, A5XX_TPL1_PERFCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_2_LO, + A5XX_RBBM_PERFCTR_TP_2_HI, 62, A5XX_TPL1_PERFCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_3_LO, + A5XX_RBBM_PERFCTR_TP_3_HI, 63, A5XX_TPL1_PERFCTR_TP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_4_LO, + A5XX_RBBM_PERFCTR_TP_4_HI, 64, A5XX_TPL1_PERFCTR_TP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_5_LO, + A5XX_RBBM_PERFCTR_TP_5_HI, 65, A5XX_TPL1_PERFCTR_TP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_6_LO, + A5XX_RBBM_PERFCTR_TP_6_HI, 66, A5XX_TPL1_PERFCTR_TP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_7_LO, + A5XX_RBBM_PERFCTR_TP_7_HI, 67, A5XX_TPL1_PERFCTR_TP_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_0_LO, + A5XX_RBBM_PERFCTR_SP_0_HI, 68, A5XX_SP_PERFCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_1_LO, + A5XX_RBBM_PERFCTR_SP_1_HI, 69, A5XX_SP_PERFCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_2_LO, + A5XX_RBBM_PERFCTR_SP_2_HI, 70, A5XX_SP_PERFCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_3_LO, + A5XX_RBBM_PERFCTR_SP_3_HI, 71, A5XX_SP_PERFCTR_SP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_4_LO, + A5XX_RBBM_PERFCTR_SP_4_HI, 72, A5XX_SP_PERFCTR_SP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_5_LO, + A5XX_RBBM_PERFCTR_SP_5_HI, 73, A5XX_SP_PERFCTR_SP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_6_LO, + A5XX_RBBM_PERFCTR_SP_6_HI, 74, A5XX_SP_PERFCTR_SP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_7_LO, + A5XX_RBBM_PERFCTR_SP_7_HI, 75, A5XX_SP_PERFCTR_SP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_8_LO, + A5XX_RBBM_PERFCTR_SP_8_HI, 76, A5XX_SP_PERFCTR_SP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_9_LO, + A5XX_RBBM_PERFCTR_SP_9_HI, 77, A5XX_SP_PERFCTR_SP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_10_LO, + A5XX_RBBM_PERFCTR_SP_10_HI, 78, A5XX_SP_PERFCTR_SP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_11_LO, + A5XX_RBBM_PERFCTR_SP_11_HI, 79, A5XX_SP_PERFCTR_SP_SEL_11 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_0_LO, + A5XX_RBBM_PERFCTR_RB_0_HI, 80, A5XX_RB_PERFCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_1_LO, + A5XX_RBBM_PERFCTR_RB_1_HI, 81, A5XX_RB_PERFCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_2_LO, + A5XX_RBBM_PERFCTR_RB_2_HI, 82, A5XX_RB_PERFCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_3_LO, + A5XX_RBBM_PERFCTR_RB_3_HI, 83, A5XX_RB_PERFCTR_RB_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_4_LO, + A5XX_RBBM_PERFCTR_RB_4_HI, 84, A5XX_RB_PERFCTR_RB_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_5_LO, + A5XX_RBBM_PERFCTR_RB_5_HI, 85, A5XX_RB_PERFCTR_RB_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_6_LO, + A5XX_RBBM_PERFCTR_RB_6_HI, 86, A5XX_RB_PERFCTR_RB_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_7_LO, + A5XX_RBBM_PERFCTR_RB_7_HI, 87, A5XX_RB_PERFCTR_RB_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vsc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_0_LO, + A5XX_RBBM_PERFCTR_VSC_0_HI, 88, A5XX_VSC_PERFCTR_VSC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_1_LO, + A5XX_RBBM_PERFCTR_VSC_1_HI, 89, A5XX_VSC_PERFCTR_VSC_SEL_1 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_lrz[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_0_LO, + A5XX_RBBM_PERFCTR_LRZ_0_HI, 90, A5XX_GRAS_PERFCTR_LRZ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_1_LO, + A5XX_RBBM_PERFCTR_LRZ_1_HI, 91, A5XX_GRAS_PERFCTR_LRZ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_2_LO, + A5XX_RBBM_PERFCTR_LRZ_2_HI, 92, A5XX_GRAS_PERFCTR_LRZ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_3_LO, + A5XX_RBBM_PERFCTR_LRZ_3_HI, 93, A5XX_GRAS_PERFCTR_LRZ_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_cmp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_0_LO, + A5XX_RBBM_PERFCTR_CMP_0_HI, 94, A5XX_RB_PERFCTR_CMP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_1_LO, + A5XX_RBBM_PERFCTR_CMP_1_HI, 95, A5XX_RB_PERFCTR_CMP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_2_LO, + A5XX_RBBM_PERFCTR_CMP_2_HI, 96, A5XX_RB_PERFCTR_CMP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_3_LO, + A5XX_RBBM_PERFCTR_CMP_3_HI, 97, A5XX_RB_PERFCTR_CMP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW0, + A5XX_VBIF_PERF_CNT_HIGH0, -1, A5XX_VBIF_PERF_CNT_SEL0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW1, + A5XX_VBIF_PERF_CNT_HIGH1, -1, A5XX_VBIF_PERF_CNT_SEL1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW2, + A5XX_VBIF_PERF_CNT_HIGH2, -1, A5XX_VBIF_PERF_CNT_SEL2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW3, + A5XX_VBIF_PERF_CNT_HIGH3, -1, A5XX_VBIF_PERF_CNT_SEL3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW0, + A5XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A5XX_VBIF_PERF_PWR_CNT_EN0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW1, + A5XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A5XX_VBIF_PERF_PWR_CNT_EN1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW2, + A5XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A5XX_VBIF_PERF_PWR_CNT_EN2 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_ALWAYSON_COUNTER_LO, + A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO, + A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_1_LO, + A5XX_SP_POWER_COUNTER_1_HI, -1, A5XX_SP_POWERCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_2_LO, + A5XX_SP_POWER_COUNTER_2_HI, -1, A5XX_SP_POWERCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_3_LO, + A5XX_SP_POWER_COUNTER_3_HI, -1, A5XX_SP_POWERCTR_SP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_0_LO, + A5XX_TP_POWER_COUNTER_0_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_1_LO, + A5XX_TP_POWER_COUNTER_1_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_2_LO, + A5XX_TP_POWER_COUNTER_2_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_3_LO, + A5XX_TP_POWER_COUNTER_3_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_0_LO, + A5XX_RB_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_1_LO, + A5XX_RB_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_2_LO, + A5XX_RB_POWER_COUNTER_2_HI, -1, A5XX_RB_POWERCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_3_LO, + A5XX_RB_POWER_COUNTER_3_HI, -1, A5XX_RB_POWERCTR_RB_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_0_LO, + A5XX_CCU_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_1_LO, + A5XX_CCU_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_0_LO, + A5XX_UCHE_POWER_COUNTER_0_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_1_LO, + A5XX_UCHE_POWER_COUNTER_1_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_2_LO, + A5XX_UCHE_POWER_COUNTER_2_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_3_LO, + A5XX_UCHE_POWER_COUNTER_3_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_0_LO, + A5XX_CP_POWER_COUNTER_0_HI, -1, A5XX_CP_POWERCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_1_LO, + A5XX_CP_POWER_COUNTER_1_HI, -1, A5XX_CP_POWERCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_2_LO, + A5XX_CP_POWER_COUNTER_2_HI, -1, A5XX_CP_POWERCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_3_LO, + A5XX_CP_POWER_COUNTER_3_HI, -1, A5XX_CP_POWERCTR_CP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_gpmu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_0_LO, + A5XX_GPMU_POWER_COUNTER_0_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_1_LO, + A5XX_GPMU_POWER_COUNTER_1_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_2_LO, + A5XX_GPMU_POWER_COUNTER_2_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_3_LO, + A5XX_GPMU_POWER_COUNTER_3_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_4_LO, + A5XX_GPMU_POWER_COUNTER_4_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_5_LO, + A5XX_GPMU_POWER_COUNTER_5_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_ALWAYS_ON_COUNTER_LO, + A5XX_GPMU_ALWAYS_ON_COUNTER_HI, -1 }, +}; + +#define A5XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP(a5xx, offset, name, enable, read, load) + +#define A5XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(a5xx, offset, name, flags, enable, \ + read, load) + +#define A5XX_POWER_COUNTER_GROUP(offset, name, enable, read) \ + [KGSL_PERFCOUNTER_GROUP_##offset##_PWR] = { a5xx_pwrcounters_##name, \ + ARRAY_SIZE(a5xx_pwrcounters_##name), __stringify(name##_pwr), 0, \ + enable, read, NULL } + +#define A5XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + A5XX_PERFCOUNTER_GROUP(offset, name, a5xx_counter_inline_enable, \ + a5xx_counter_read, a5xx_counter_load) + +static struct adreno_perfcount_group a5xx_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + A5XX_REGULAR_PERFCOUNTER_GROUP(CP, cp), + A5XX_PERFCOUNTER_GROUP(RBBM, rbbm, + a5xx_counter_rbbm_enable, a5xx_counter_read, a5xx_counter_load), + A5XX_REGULAR_PERFCOUNTER_GROUP(PC, pc), + A5XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + A5XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq), + A5XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + A5XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), + A5XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), + A5XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse), + A5XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras), + A5XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz), + A5XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + A5XX_REGULAR_PERFCOUNTER_GROUP(TP, tp), + A5XX_REGULAR_PERFCOUNTER_GROUP(SP, sp), + A5XX_REGULAR_PERFCOUNTER_GROUP(RB, rb), + A5XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), + A5XX_PERFCOUNTER_GROUP(VBIF, vbif, + a5xx_counter_vbif_enable, a5xx_counter_read_norestore, NULL), + A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a5xx_counter_vbif_pwr_enable, + a5xx_counter_read_norestore, NULL), + A5XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED, + a5xx_counter_alwayson_enable, a5xx_counter_alwayson_read, NULL), + A5XX_POWER_COUNTER_GROUP(SP, sp, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(TP, tp, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(RB, rb, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(CCU, ccu, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(UCHE, uche, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(CP, cp, + a5xx_counter_pwr_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(GPMU, gpmu, + a5xx_counter_pwr_gpmu_enable, a5xx_counter_read_norestore), + A5XX_POWER_COUNTER_GROUP(ALWAYSON, alwayson, + a5xx_counter_pwr_alwayson_enable, a5xx_counter_read_norestore), +}; + +const struct adreno_perfcounters adreno_a5xx_perfcounters = { + a5xx_perfcounter_groups, + ARRAY_SIZE(a5xx_perfcounter_groups), +}; diff --git a/adreno_a5xx_preempt.c b/adreno_a5xx_preempt.c new file mode 100644 index 0000000000..bf3126ef74 --- /dev/null +++ b/adreno_a5xx_preempt.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2017,2021 The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" + +#define PREEMPT_RECORD(_field) \ + offsetof(struct a5xx_cp_preemption_record, _field) + +#define PREEMPT_SMMU_RECORD(_field) \ + offsetof(struct a5xx_cp_smmu_info, _field) + +static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + unsigned int wptr; + unsigned long flags; + + spin_lock_irqsave(&rb->preempt_lock, flags); + + kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr); + + if (wptr != rb->wptr) { + kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->wptr); + /* + * In case something got submitted while preemption was on + * going, reset the timer. + */ + reset_timer = true; + } + + if (reset_timer) + rb->dispatch_q.expires = jiffies + + msecs_to_jiffies(adreno_drawobj_timeout); + + spin_unlock_irqrestore(&rb->preempt_lock, flags); +} + +static void _a5xx_preemption_done(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * In the very unlikely case that the power is off, do nothing - the + * state will be reset on power up and everybody will be happy + */ + + if (!kgsl_state_is_awake(device)) + return; + + kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status != 0) { + dev_err(device->dev, + "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n", + status, adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + /* Set a fault and restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + + return; + } + + del_timer_sync(&adreno_dev->preempt.timer); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, 0); + + /* Clean up all the bits */ + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr for the new command queue */ + _update_wptr(adreno_dev, true); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + /* Clear the preempt state */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); +} + +static void _a5xx_preemption_fault(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * If the power is on check the preemption status one more time - if it + * was successful then just transition to the complete state + */ + if (kgsl_state_is_awake(device)) { + kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status == 0) { + adreno_set_preempt_state(adreno_dev, + ADRENO_PREEMPT_COMPLETE); + + adreno_dispatcher_schedule(device); + return; + } + } + + dev_err(device->dev, + "Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n", + adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); +} + +static void _a5xx_preemption_worker(struct work_struct *work) +{ + struct adreno_preemption *preempt = container_of(work, + struct adreno_preemption, work); + struct adreno_device *adreno_dev = container_of(preempt, + struct adreno_device, preempt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Need to take the mutex to make sure that the power stays on */ + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED)) + _a5xx_preemption_fault(adreno_dev); + + mutex_unlock(&device->mutex); +} + +/* Find the highest priority active ringbuffer */ +static struct adreno_ringbuffer *a5xx_next_ringbuffer( + struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + unsigned long flags; + unsigned int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + bool empty; + + spin_lock_irqsave(&rb->preempt_lock, flags); + empty = adreno_rb_empty(rb); + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!empty) + return rb; + } + + return NULL; +} + +void a5xx_preemption_trigger(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *next; + uint64_t ttbr0; + unsigned int contextidr; + unsigned long flags; + + /* Put ourselves into a possible trigger state */ + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START)) + return; + + /* Get the next ringbuffer to preempt in */ + next = a5xx_next_ringbuffer(adreno_dev); + + /* + * Nothing to do if every ringbuffer is empty or if the current + * ringbuffer is the only active one + */ + if (next == NULL || next == adreno_dev->cur_rb) { + /* + * Update any critical things that might have been skipped while + * we were looking for a new ringbuffer + */ + + if (next != NULL) { + _update_wptr(adreno_dev, false); + + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + } + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + return; + } + + /* Turn off the dispatcher timer */ + del_timer(&adreno_dev->dispatcher.timer); + + /* + * This is the most critical section - we need to take care not to race + * until we have programmed the CP for the switch + */ + + spin_lock_irqsave(&next->preempt_lock, flags); + + /* + * Get the pagetable from the pagetable info. + * The pagetable_desc is allocated and mapped at probe time, and + * preemption_desc at init time, so no need to check if + * sharedmem accesses to these memdescs succeed. + */ + kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, + PT_INFO_OFFSET(ttbr0)); + kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, + PT_INFO_OFFSET(contextidr)); + + kgsl_sharedmem_writel(next->preemption_desc, + PREEMPT_RECORD(wptr), next->wptr); + + spin_unlock_irqrestore(&next->preempt_lock, flags); + + /* And write it to the smmu info */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), ttbr0); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), contextidr); + } + + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, + lower_32_bits(next->preemption_desc->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, + upper_32_bits(next->preemption_desc->gpuaddr)); + + adreno_dev->next_rb = next; + + /* Start the timer to detect a stuck preemption */ + mod_timer(&adreno_dev->preempt.timer, + jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT)); + + trace_adreno_preempt_trigger(adreno_dev->cur_rb, adreno_dev->next_rb, + 1); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED); + + /* Trigger the preemption */ + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_CNTL, 1); +} + +void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING)) + return; + + kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status != 0) { + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "preempt interrupt with non-zero status: %X\n", + status); + + /* + * Under the assumption that this is a race between the + * interrupt and the register, schedule the worker to clean up. + * If the status still hasn't resolved itself by the time we get + * there then we have to assume something bad happened + */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); + adreno_dispatcher_schedule(device); + return; + } + + del_timer(&adreno_dev->preempt.timer); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, 0); + + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr if it changed while preemption was ongoing */ + _update_wptr(adreno_dev, true); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + a5xx_preemption_trigger(adreno_dev); +} + +void a5xx_preemption_schedule(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE)) + _a5xx_preemption_done(adreno_dev); + + a5xx_preemption_trigger(adreno_dev); + + mutex_unlock(&device->mutex); +} + +u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt, u32 *cmds) +{ + unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = rb->preemption_desc->gpuaddr; + unsigned int preempt_style = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (drawctxt) { + /* + * Preemption from secure to unsecure needs Zap shader to be + * run to clear all secure content. CP does not know during + * preemption if it is switching between secure and unsecure + * contexts so restrict Secure contexts to be preempted at + * ringbuffer level. + */ + if (drawctxt->base.flags & KGSL_CONTEXT_SECURE) + preempt_style = KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER; + else + preempt_style = FIELD_GET(KGSL_CONTEXT_PREEMPT_STYLE_MASK, + drawctxt->base.flags); + } + + /* + * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD + * in ringbuffer. + * 1) set global preemption to 0x0 to disable global preemption. + * Only RB level preemption is allowed in this mode + * 2) Set global preemption to defer(0x2) for finegrain preemption. + * when global preemption is set to defer(0x2), + * CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the + * preemption point. Local preemption + * can be enabled by both UMD(within IB) and KMD. + */ + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1); + *cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) + ? 2 : 0); + + /* Turn CP protection OFF */ + cmds += cp_protected_mode(adreno_dev, cmds, 0); + + /* + * CP during context switch will save context switch info to + * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR + */ + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1); + *cmds++ = upper_32_bits(gpuaddr); + + /* Turn CP protection ON */ + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + /* + * Enable local preemption for finegrain preemption in case of + * a misbehaving IB + */ + if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) { + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 1; + } else { + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 0; + } + + /* Enable CP_CONTEXT_SWITCH_YIELD packets in the IB2s */ + *cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1); + *cmds++ = 2; + + return (unsigned int) (cmds - cmds_orig); +} + +unsigned int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + int dwords = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + cmds[dwords++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + /* Write NULL to the address to skip the data write */ + dwords += cp_gpuaddr(adreno_dev, &cmds[dwords], 0x0); + cmds[dwords++] = 1; + /* generate interrupt on preemption completion */ + cmds[dwords++] = 1; + + return dwords; +} + +void a5xx_preemption_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *rb; + unsigned int i; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + /* Force the state to be clear */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + /* Only set up smmu info when per-process pagetables are enabled */ + + if (kgsl_mmu_is_perprocess(&device->mmu)) { + /* smmu_info is allocated and mapped in a5xx_preemption_iommu_init */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(magic), A5XX_CP_SMMU_INFO_MAGIC_REF); + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device)); + + /* The CP doesn't use the asid record, so poison it */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), 0); + + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + lower_32_bits(iommu->smmu_info->gpuaddr)); + + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + upper_32_bits(iommu->smmu_info->gpuaddr)); + } + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + /* + * preemption_desc is allocated and mapped at init time, + * so no need to check sharedmem_writel return value + */ + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + + adreno_ringbuffer_set_pagetable(rb, + device->mmu.defaultpagetable); + } + +} + +static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, uint64_t counteraddr) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (IS_ERR_OR_NULL(rb->preemption_desc)) + rb->preemption_desc = kgsl_allocate_global(device, + A5XX_CP_CTXRECORD_SIZE_IN_BYTES, SZ_16K, 0, + KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); + + if (IS_ERR(rb->preemption_desc)) + return PTR_ERR(rb->preemption_desc); + + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(info), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(data), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(cntl), A5XX_CP_RB_CNTL_DEFAULT); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR(device, + rb->id)); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(counter), counteraddr); + + return 0; +} + +int a5xx_preemption_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_preemption *preempt = &adreno_dev->preempt; + struct adreno_ringbuffer *rb; + int ret; + unsigned int i; + uint64_t addr; + + /* We are dependent on IOMMU to make preemption go on the CP side */ + if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) + return -ENODEV; + + INIT_WORK(&preempt->work, _a5xx_preemption_worker); + + /* Allocate mem for storing preemption counters */ + if (IS_ERR_OR_NULL(preempt->scratch)) + preempt->scratch = kgsl_allocate_global(device, + adreno_dev->num_ringbuffers * + A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0, 0, + "preemption_counters"); + + ret = PTR_ERR_OR_ZERO(preempt->scratch); + if (ret) + return ret; + + addr = preempt->scratch->gpuaddr; + + /* Allocate mem for storing preemption switch record */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + ret = a5xx_preemption_ringbuffer_init(adreno_dev, rb, addr); + if (ret) + return ret; + + addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE; + } + + /* Allocate mem for storing preemption smmu record */ + if (kgsl_mmu_is_perprocess(&device->mmu) && IS_ERR_OR_NULL(iommu->smmu_info)) + iommu->smmu_info = kgsl_allocate_global(device, PAGE_SIZE, 0, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED, + "smmu_info"); + + if (IS_ERR(iommu->smmu_info)) + return PTR_ERR(iommu->smmu_info); + + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + return 0; +} diff --git a/adreno_a5xx_ringbuffer.c b/adreno_a5xx_ringbuffer.c new file mode 100644 index 0000000000..fb973e9412 --- /dev/null +++ b/adreno_a5xx_ringbuffer.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static int a5xx_rb_pagetable_switch(struct kgsl_device *device, + struct adreno_context *drawctxt, + struct adreno_ringbuffer *rb, + struct kgsl_pagetable *pagetable, u32 *cmds) +{ + u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); + u32 id = drawctxt ? drawctxt->base.id : 0; + + if (pagetable == device->mmu.defaultpagetable) + return 0; + + cmds[0] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3); + cmds[1] = lower_32_bits(ttbr0); + cmds[2] = upper_32_bits(ttbr0); + cmds[3] = id; + + cmds[4] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[5] = cp_type7_packet(CP_WAIT_FOR_ME, 0); + cmds[6] = cp_type4_packet(A5XX_CP_CNTL, 1); + cmds[7] = 1; + + cmds[8] = cp_type7_packet(CP_MEM_WRITE, 5); + cmds[9] = lower_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[10] = upper_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[11] = lower_32_bits(ttbr0); + cmds[12] = upper_32_bits(ttbr0); + cmds[13] = id; + + cmds[14] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[15] = cp_type7_packet(CP_WAIT_FOR_ME, 0); + cmds[16] = cp_type4_packet(A5XX_CP_CNTL, 1); + cmds[17] = 0; + + return 18; +} + +#define RB_SOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) +#define CTXT_SOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) + +#define RB_EOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) +#define CTXT_EOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) + +int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time, bool sync) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned long flags; + + adreno_get_submit_time(adreno_dev, rb, time); + adreno_profile_submit_time(time); + + if (sync) { + u32 *cmds = adreno_ringbuffer_allocspace(rb, 3); + + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2); + cmds[1] = lower_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + cmds[2] = upper_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + } + + spin_lock_irqsave(&rb->preempt_lock, flags); + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { + if (adreno_dev->cur_rb == rb) { + kgsl_pwrscale_busy(device); + kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->_wptr); + } + } + + rb->wptr = rb->_wptr; + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + return 0; +} + +int a5xx_ringbuffer_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int i; + + if (IS_ERR_OR_NULL(device->scratch)) + device->scratch = kgsl_allocate_global(device, PAGE_SIZE, + 0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED, + "scratch"); + + if (IS_ERR(device->scratch)) + return PTR_ERR(device->scratch); + + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) { + adreno_dev->num_ringbuffers = 1; + return adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[0], 0); + } + + adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers); + + for (i = 0; i < adreno_dev->num_ringbuffers; i++) { + int ret; + + ret = adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[i], i); + if (ret) + return ret; + } + + timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0); + a5xx_preemption_init(adreno_dev); + return 0; +} + +#define A5XX_SUBMIT_MAX 64 + +int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + static u32 sequence; + u32 size = A5XX_SUBMIT_MAX + dwords; + u32 *cmds, index = 0; + u64 profile_gpuaddr; + u32 profile_dwords; + + if (adreno_drawctxt_detached(drawctxt)) + return -ENOENT; + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + + rb->timestamp++; + + if (drawctxt) + drawctxt->internal_timestamp = rb->timestamp; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + /* Identify the start of a command */ + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; + + /* 14 dwords */ + index += a5xx_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt, + &cmds[index]); + + profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (drawctxt) { + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + } + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + + if (IS_SECURE(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 1; + } + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 0; + } + + memcpy(&cmds[index], in, dwords << 2); + index += dwords; + + if (IS_NOTPROTECTED(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + cmds[index++] = 1; + } + + /* 4 dwords */ + profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (!adreno_is_a510(adreno_dev) && + test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + &device->mmu.pfpolicy)) + cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); + + /* + * Do a unique memory write from the GPU to assist in early detection of + * interrupt storms + */ + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, ref_wait_ts)); + cmds[index++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, ref_wait_ts)); + cmds[index++] = ++sequence; + + /* + * If this is an internal command, just write the ringbuffer timestamp, + * otherwise, write both + */ + if (!drawctxt) { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } else { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_FLUSH_TS | (1 << 31); + cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_FLUSH_TS; + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } + + if (IS_WFI(flags)) + cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + if (IS_SECURE(flags)) { + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 0; + } + + /* 5 dwords */ + index += a5xx_preemption_post_ibsubmit(adreno_dev, &cmds[index]); + + /* Adjust the thing for the number of bytes we actually wrote */ + rb->_wptr -= (size - index); + + a5xx_ringbuffer_submit(rb, time, + !adreno_is_preemption_enabled(adreno_dev)); + + return 0; +} + +static u32 a5xx_get_alwayson_counter(struct adreno_device *adreno_dev, + u32 *cmds, u64 gpuaddr) +{ + cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3); + cmds[1] = A5XX_RBBM_ALWAYSON_COUNTER_LO; + + /* On some targets the upper 32 bits are not reliable */ + if (ADRENO_GPUREV(adreno_dev) > ADRENO_REV_A530) + cmds[1] |= (1 << 30) | (2 << 18); + + cmds[2] = lower_32_bits(gpuaddr); + cmds[3] = upper_32_bits(gpuaddr); + + return 4; +} + +/* This is the maximum possible size for 64 bit targets */ +#define PROFILE_IB_DWORDS 4 +#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) + +static u64 a5xx_get_user_profiling_ib(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj, + u32 target_offset, u32 *cmds) +{ + u32 offset = rb->profile_index * (PROFILE_IB_DWORDS << 2); + u32 *ib = rb->profile_desc->hostptr + offset; + u64 gpuaddr = rb->profile_desc->gpuaddr + offset; + u32 dwords = a5xx_get_alwayson_counter(adreno_dev, ib, + cmdobj->profiling_buffer_gpuaddr + target_offset); + + cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[1] = lower_32_bits(gpuaddr); + cmds[2] = upper_32_bits(gpuaddr); + cmds[3] = dwords; + + rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; + + return 4; +} + +static int a5xx_rb_context_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_pagetable *pagetable = + adreno_drawctxt_get_pagetable(drawctxt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + u32 cmds[32]; + + if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) + count += a5xx_rb_pagetable_switch(device, drawctxt, + rb, pagetable, cmds); + + cmds[count++] = cp_type7_packet(CP_NOP, 1); + cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type4_packet(A5XX_UCHE_INVALIDATE0, 1); + cmds[count++] = 0x12; + + return a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, + cmds, count, 0, NULL); +} + +static int a5xx_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (rb->drawctxt_active == drawctxt) + return 0; + + if (kgsl_context_detached(&drawctxt->base)) + return -ENOENT; + + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + trace_adreno_drawctxt_switch(rb, drawctxt); + + a5xx_rb_context_switch(adreno_dev, rb, drawctxt); + + /* Release the current drawctxt as soon as the new one is switched */ + adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, + rb, rb->timestamp); + + rb->drawctxt_active = drawctxt; + return 0; +} + + +#define A5XX_USER_PROFILE_IB(dev, rb, cmdobj, cmds, field) \ + a5xx_get_user_profiling_ib((dev), (rb), (cmdobj), \ + offsetof(struct kgsl_drawobj_profiling_buffer, field), \ + (cmds)) + +#define A5XX_KERNEL_PROFILE(dev, cmdobj, cmds, field) \ + a5xx_get_alwayson_counter((dev), (cmds), \ + (dev)->profile_buffer->gpuaddr + \ + ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ + field)) + +#define A5XX_COMMAND_DWORDS 32 + +int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + int ret = 0, numibs = 0, index = 0; + u32 *cmds; + + /* Count the number of IBs (if we are not skipping) */ + if (!IS_SKIP(flags)) { + struct list_head *tmp; + + list_for_each(tmp, &cmdobj->cmdlist) + numibs++; + } + + cmds = kmalloc((A5XX_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL); + if (!cmds) { + ret = -ENOMEM; + goto done; + } + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = START_IB_IDENTIFIER; + + /* Kernel profiling: 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + started); + + /* User profiling: 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj, + &cmds[index], gpu_ticks_submitted); + + if (numibs) { + struct kgsl_memobj_node *ib; + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + if (ib->priv & MEMOBJ_SKIP || + (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE + && !IS_PREAMBLE(flags))) + cmds[index++] = cp_type7_packet(CP_NOP, 4); + + cmds[index++] = + cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(ib->gpuaddr); + cmds[index++] = upper_32_bits(ib->gpuaddr); + + /* Double check that IB_PRIV is never set */ + cmds[index++] = (ib->size >> 2) & 0xfffff; + } + } + + /* + * SRM -- set render mode (ex binning, direct render etc) + * SRM is set by UMD usually at start of IB to tell CP the type of + * preemption. + * KMD needs to set SRM to NULL to indicate CP that rendering is + * done by IB. + */ + cmds[index++] = cp_type7_packet(CP_SET_RENDER_MODE, 5); + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 0; + + cmds[index++] = cp_type7_packet(CP_YIELD_ENABLE, 1); + cmds[index++] = 1; + + /* 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + retired); + + /* 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj, + &cmds[index], gpu_ticks_retired); + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = END_IB_IDENTIFIER; + + ret = a5xx_drawctxt_switch(adreno_dev, rb, drawctxt); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + dev_err(device->dev, + "Unable to switch draw context: %d\n", + ret); + goto done; + } + + adreno_drawobj_set_constraint(device, drawobj); + + ret = a5xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, + flags, cmds, index, drawobj->timestamp, time); + +done: + trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, + drawobj->timestamp, drawobj->flags, ret, drawctxt->type); + + kfree(cmds); + return ret; +} diff --git a/adreno_a5xx_snapshot.c b/adreno_a5xx_snapshot.c new file mode 100644 index 0000000000..a871e5e980 --- /dev/null +++ b/adreno_a5xx_snapshot.c @@ -0,0 +1,1220 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2015-2020, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_a5xx.h" +#include "adreno_snapshot.h" + +enum a5xx_rbbm_debbus_id { + A5XX_RBBM_DBGBUS_CP = 0x1, + A5XX_RBBM_DBGBUS_RBBM = 0x2, + A5XX_RBBM_DBGBUS_VBIF = 0x3, + A5XX_RBBM_DBGBUS_HLSQ = 0x4, + A5XX_RBBM_DBGBUS_UCHE = 0x5, + A5XX_RBBM_DBGBUS_DPM = 0x6, + A5XX_RBBM_DBGBUS_TESS = 0x7, + A5XX_RBBM_DBGBUS_PC = 0x8, + A5XX_RBBM_DBGBUS_VFDP = 0x9, + A5XX_RBBM_DBGBUS_VPC = 0xa, + A5XX_RBBM_DBGBUS_TSE = 0xb, + A5XX_RBBM_DBGBUS_RAS = 0xc, + A5XX_RBBM_DBGBUS_VSC = 0xd, + A5XX_RBBM_DBGBUS_COM = 0xe, + A5XX_RBBM_DBGBUS_DCOM = 0xf, + A5XX_RBBM_DBGBUS_LRZ = 0x10, + A5XX_RBBM_DBGBUS_A2D_DSP = 0x11, + A5XX_RBBM_DBGBUS_CCUFCHE = 0x12, + A5XX_RBBM_DBGBUS_GPMU = 0x13, + A5XX_RBBM_DBGBUS_RBP = 0x14, + A5XX_RBBM_DBGBUS_HM = 0x15, + A5XX_RBBM_DBGBUS_RBBM_CFG = 0x16, + A5XX_RBBM_DBGBUS_VBIF_CX = 0x17, + A5XX_RBBM_DBGBUS_GPC = 0x1d, + A5XX_RBBM_DBGBUS_LARC = 0x1e, + A5XX_RBBM_DBGBUS_HLSQ_SPTP = 0x1f, + A5XX_RBBM_DBGBUS_RB_0 = 0x20, + A5XX_RBBM_DBGBUS_RB_1 = 0x21, + A5XX_RBBM_DBGBUS_RB_2 = 0x22, + A5XX_RBBM_DBGBUS_RB_3 = 0x23, + A5XX_RBBM_DBGBUS_CCU_0 = 0x28, + A5XX_RBBM_DBGBUS_CCU_1 = 0x29, + A5XX_RBBM_DBGBUS_CCU_2 = 0x2a, + A5XX_RBBM_DBGBUS_CCU_3 = 0x2b, + A5XX_RBBM_DBGBUS_A2D_RAS_0 = 0x30, + A5XX_RBBM_DBGBUS_A2D_RAS_1 = 0x31, + A5XX_RBBM_DBGBUS_A2D_RAS_2 = 0x32, + A5XX_RBBM_DBGBUS_A2D_RAS_3 = 0x33, + A5XX_RBBM_DBGBUS_VFD_0 = 0x38, + A5XX_RBBM_DBGBUS_VFD_1 = 0x39, + A5XX_RBBM_DBGBUS_VFD_2 = 0x3a, + A5XX_RBBM_DBGBUS_VFD_3 = 0x3b, + A5XX_RBBM_DBGBUS_SP_0 = 0x40, + A5XX_RBBM_DBGBUS_SP_1 = 0x41, + A5XX_RBBM_DBGBUS_SP_2 = 0x42, + A5XX_RBBM_DBGBUS_SP_3 = 0x43, + A5XX_RBBM_DBGBUS_TPL1_0 = 0x48, + A5XX_RBBM_DBGBUS_TPL1_1 = 0x49, + A5XX_RBBM_DBGBUS_TPL1_2 = 0x4a, + A5XX_RBBM_DBGBUS_TPL1_3 = 0x4b +}; + +static const struct adreno_debugbus_block a5xx_debugbus_blocks[] = { + { A5XX_RBBM_DBGBUS_CP, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM, 0x100, }, + { A5XX_RBBM_DBGBUS_VBIF, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ, 0x100, }, + { A5XX_RBBM_DBGBUS_UCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_DPM, 0x100, }, + { A5XX_RBBM_DBGBUS_TESS, 0x100, }, + { A5XX_RBBM_DBGBUS_PC, 0x100, }, + { A5XX_RBBM_DBGBUS_VFDP, 0x100, }, + { A5XX_RBBM_DBGBUS_VPC, 0x100, }, + { A5XX_RBBM_DBGBUS_TSE, 0x100, }, + { A5XX_RBBM_DBGBUS_RAS, 0x100, }, + { A5XX_RBBM_DBGBUS_VSC, 0x100, }, + { A5XX_RBBM_DBGBUS_COM, 0x100, }, + { A5XX_RBBM_DBGBUS_DCOM, 0x100, }, + { A5XX_RBBM_DBGBUS_LRZ, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, }, + { A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_GPMU, 0x100, }, + { A5XX_RBBM_DBGBUS_RBP, 0x100, }, + { A5XX_RBBM_DBGBUS_HM, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, }, + { A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, }, + { A5XX_RBBM_DBGBUS_GPC, 0x100, }, + { A5XX_RBBM_DBGBUS_LARC, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_0, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_1, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_2, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_3, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_0, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_1, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_2, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_3, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_0, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_1, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_2, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_3, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_0, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_1, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_2, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_3, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_0, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_1, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_2, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_3, 0x100, }, +}; + +#define A5XX_NUM_AXI_ARB_BLOCKS 2 +#define A5XX_NUM_XIN_BLOCKS 4 + +/* Width of A5XX_CP_DRAW_STATE_ADDR is 8 bits */ +#define A5XX_CP_DRAW_STATE_ADDR_WIDTH 8 + +/* a5xx_snapshot_cp_pm4() - Dump PM4 data in snapshot */ +static size_t a5xx_snapshot_cp_pm4(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4); + size_t size = fw->size; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PM4_RAM; + header->size = size; + + memcpy(data, fw->memdesc->hostptr, size * sizeof(uint32_t)); + + return DEBUG_SECTION_SZ(size); +} + +/* a5xx_snapshot_cp_pfp() - Dump the PFP data on snapshot */ +static size_t a5xx_snapshot_cp_pfp(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP); + int size = fw->size; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PFP_RAM; + header->size = size; + + memcpy(data, fw->memdesc->hostptr, size * sizeof(uint32_t)); + + return DEBUG_SECTION_SZ(size); +} + +/* a5xx_rbbm_debug_bus_read() - Read data from trace bus */ +static void a5xx_rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg; + + reg = (block_id << A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) | + (index << A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_D, reg); + + kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2, val); + val++; + kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1, val); + +} + +/* a5xx_snapshot_vbif_debugbus() - Dump the VBIF debug data */ +static size_t a5xx_snapshot_vbif_debugbus(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i, j; + /* + * Total number of VBIF data words considering 3 sections: + * 2 arbiter blocks of 16 words + * 4 AXI XIN blocks of 18 dwords each + * 4 core clock side XIN blocks of 12 dwords each + */ + unsigned int dwords = (16 * A5XX_NUM_AXI_ARB_BLOCKS) + + (18 * A5XX_NUM_XIN_BLOCKS) + (12 * A5XX_NUM_XIN_BLOCKS); + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size; + unsigned int reg_clk; + + size = (dwords * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + header->id = block->block_id; + header->count = dwords; + + kgsl_regread(device, A5XX_VBIF_CLKON, ®_clk); + kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk | + (A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK << + A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT)); + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 0); + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS_OUT_CTRL, + (A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK << + A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT)); + for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, + (1 << (i + 16))); + for (j = 0; j < 16; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1, + ((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks AXI side */ + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i); + for (j = 0; j < 18; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1, + ((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks core clock side */ + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i); + for (j = 0; j < 12; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL1, + ((j & A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + /* restore the clock of VBIF */ + kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk); + return size; +} + +/* a5xx_snapshot_debugbus_block() - Capture debug data for a gpu block */ +static size_t a5xx_snapshot_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int dwords; + size_t size; + + dwords = block->dwords; + + /* For a5xx each debug bus data unit is 2 DWRODS */ + size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = block->block_id; + header->count = dwords * 2; + + for (i = 0; i < dwords; i++) + a5xx_rbbm_debug_bus_read(device, block->block_id, i, + &data[i*2]); + + return size; +} + +/* a5xx_snapshot_debugbus() - Capture debug bus data */ +static void a5xx_snapshot_debugbus(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_CNTLM, + 0xf << A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT); + + for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++) { + if (a5xx_debugbus_blocks[i].block_id == A5XX_RBBM_DBGBUS_VBIF) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a5xx_snapshot_vbif_debugbus, + (void *) &a5xx_debugbus_blocks[i]); + else + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a5xx_snapshot_debugbus_block, + (void *) &a5xx_debugbus_blocks[i]); + } +} + +static const unsigned int a5xx_vbif_registers[] = { + 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302C, 0x3030, 0x3030, + 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, + 0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061, + 0x3064, 0x3068, 0x306C, 0x306D, 0x3080, 0x3088, 0x308C, 0x308C, + 0x3090, 0x3094, 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, + 0x30C8, 0x30C8, 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, + 0x3100, 0x3100, 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, + 0x3120, 0x3120, 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, + 0x340C, 0x340C, 0x3410, 0x3410, 0x3800, 0x3801, +}; + +/* + * Set of registers to dump for A5XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a5xx_registers[] = { + /* RBBM */ + 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, + 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, + 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, + 0x04E0, 0x04F4, 0X04F8, 0x0529, 0x0531, 0x0533, 0x0540, 0x0555, + 0xF400, 0xF400, 0xF800, 0xF807, + /* CP */ + 0x0800, 0x0803, 0x0806, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, + 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0X0B1C, 0X0B1E, 0x0B28, + 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, + /* VSC */ + 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, + /* GRAS */ + 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0, + 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, + /* RB */ + 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, + 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, + 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, + /* PC */ + 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, + 0x24C0, 0x24C0, + /* VFD */ + 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, + /* VPC */ + 0x0E60, 0x0E7C, + /* UCHE */ + 0x0E80, 0x0E8F, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2, + + /* RB CTX 0 */ + 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, + 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C, + 0xE240, 0xE268, + /* GRAS CTX 0 */ + 0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, + 0xE100, 0xE105, + /* PC CTX 0 */ + 0xE380, 0xE38F, 0xE3B0, 0xE3B0, + /* VFD CTX 0 */ + 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, + /* VPC CTX 0 */ + 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, + + /* RB CTX 1 */ + 0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, + 0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, + 0xEA40, 0xEA68, + /* GRAS CTX 1 */ + 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, + 0xE900, 0xE905, + /* PC CTX 1 */ + 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, + /* VFD CTX 1 */ + 0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0, + /* VPC CTX 1 */ + 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2, +}; + +/* + * GPMU registers to dump for A5XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a5xx_gpmu_registers[] = { + /* GPMU */ + 0xA800, 0xA8FF, 0xAC60, 0xAC60, +}; + +/* + * Set of registers to dump for A5XX before actually triggering crash dumper. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ +static const unsigned int a5xx_pre_crashdumper_registers[] = { + /* RBBM: RBBM_STATUS - RBBM_STATUS3 */ + 0x04F5, 0x04F7, 0x0530, 0x0530, + /* CP: CP_STATUS_1 */ + 0x0B1D, 0x0B1D, +}; + + +struct a5xx_hlsq_sp_tp_regs { + unsigned int statetype; + unsigned int ahbaddr; + unsigned int size; + uint64_t offset; +}; + +static struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = { + /* HSLQ non context. 0xe32 - 0xe3f are holes so don't include them */ + { 0x35, 0xE00, 0x32 }, + /* HLSQ CTX 0 2D */ + { 0x31, 0x2080, 0x1 }, + /* HLSQ CTX 1 2D */ + { 0x33, 0x2480, 0x1 }, + /* HLSQ CTX 0 3D. 0xe7e2 - 0xe7ff are holes so don't include them */ + { 0x32, 0xE780, 0x62 }, + /* HLSQ CTX 1 3D. 0xefe2 - 0xefff are holes so don't include them */ + { 0x34, 0xEF80, 0x62 }, + + /* SP non context */ + { 0x3f, 0x0EC0, 0x40 }, + /* SP CTX 0 2D */ + { 0x3d, 0x2040, 0x1 }, + /* SP CTX 1 2D */ + { 0x3b, 0x2440, 0x1 }, + /* SP CTX 0 3D */ + { 0x3e, 0xE580, 0x180 }, + /* SP CTX 1 3D */ + { 0x3c, 0xED80, 0x180 }, + + /* TP non context. 0x0f1c - 0x0f3f are holes so don't include them */ + { 0x3a, 0x0F00, 0x1c }, + /* TP CTX 0 2D. 0x200a - 0x200f are holes so don't include them */ + { 0x38, 0x2000, 0xa }, + /* TP CTX 1 2D. 0x240a - 0x240f are holes so don't include them */ + { 0x36, 0x2400, 0xa }, + /* TP CTX 0 3D */ + { 0x39, 0xE700, 0x80 }, + /* TP CTX 1 3D */ + { 0x37, 0xEF00, 0x80 }, +}; + + +#define A5XX_NUM_SHADER_BANKS 4 +#define A5XX_SHADER_STATETYPE_SHIFT 8 + +enum a5xx_shader_obj { + A5XX_TP_W_MEMOBJ = 1, + A5XX_TP_W_SAMPLER = 2, + A5XX_TP_W_MIPMAP_BASE = 3, + A5XX_TP_W_MEMOBJ_TAG = 4, + A5XX_TP_W_SAMPLER_TAG = 5, + A5XX_TP_S_3D_MEMOBJ = 6, + A5XX_TP_S_3D_SAMPLER = 0x7, + A5XX_TP_S_3D_MEMOBJ_TAG = 0x8, + A5XX_TP_S_3D_SAMPLER_TAG = 0x9, + A5XX_TP_S_CS_MEMOBJ = 0xA, + A5XX_TP_S_CS_SAMPLER = 0xB, + A5XX_TP_S_CS_MEMOBJ_TAG = 0xC, + A5XX_TP_S_CS_SAMPLER_TAG = 0xD, + A5XX_SP_W_INSTR = 0xE, + A5XX_SP_W_CONST = 0xF, + A5XX_SP_W_UAV_SIZE = 0x10, + A5XX_SP_W_CB_SIZE = 0x11, + A5XX_SP_W_UAV_BASE = 0x12, + A5XX_SP_W_CB_BASE = 0x13, + A5XX_SP_W_INST_TAG = 0x14, + A5XX_SP_W_STATE = 0x15, + A5XX_SP_S_3D_INSTR = 0x16, + A5XX_SP_S_3D_CONST = 0x17, + A5XX_SP_S_3D_CB_BASE = 0x18, + A5XX_SP_S_3D_CB_SIZE = 0x19, + A5XX_SP_S_3D_UAV_BASE = 0x1A, + A5XX_SP_S_3D_UAV_SIZE = 0x1B, + A5XX_SP_S_CS_INSTR = 0x1C, + A5XX_SP_S_CS_CONST = 0x1D, + A5XX_SP_S_CS_CB_BASE = 0x1E, + A5XX_SP_S_CS_CB_SIZE = 0x1F, + A5XX_SP_S_CS_UAV_BASE = 0x20, + A5XX_SP_S_CS_UAV_SIZE = 0x21, + A5XX_SP_S_3D_INSTR_DIRTY = 0x22, + A5XX_SP_S_3D_CONST_DIRTY = 0x23, + A5XX_SP_S_3D_CB_BASE_DIRTY = 0x24, + A5XX_SP_S_3D_CB_SIZE_DIRTY = 0x25, + A5XX_SP_S_3D_UAV_BASE_DIRTY = 0x26, + A5XX_SP_S_3D_UAV_SIZE_DIRTY = 0x27, + A5XX_SP_S_CS_INSTR_DIRTY = 0x28, + A5XX_SP_S_CS_CONST_DIRTY = 0x29, + A5XX_SP_S_CS_CB_BASE_DIRTY = 0x2A, + A5XX_SP_S_CS_CB_SIZE_DIRTY = 0x2B, + A5XX_SP_S_CS_UAV_BASE_DIRTY = 0x2C, + A5XX_SP_S_CS_UAV_SIZE_DIRTY = 0x2D, + A5XX_HLSQ_ICB = 0x2E, + A5XX_HLSQ_ICB_DIRTY = 0x2F, + A5XX_HLSQ_ICB_CB_BASE_DIRTY = 0x30, + A5XX_SP_POWER_RESTORE_RAM = 0x40, + A5XX_SP_POWER_RESTORE_RAM_TAG = 0x41, + A5XX_TP_POWER_RESTORE_RAM = 0x42, + A5XX_TP_POWER_RESTORE_RAM_TAG = 0x43, + +}; + +struct a5xx_shader_block { + unsigned int statetype; + unsigned int sz; + uint64_t offset; +}; + +struct a5xx_shader_block_info { + struct a5xx_shader_block *block; + unsigned int bank; + uint64_t offset; +}; + +static struct a5xx_shader_block a5xx_shader_blocks[] = { + {A5XX_TP_W_MEMOBJ, 0x200}, + {A5XX_TP_W_MIPMAP_BASE, 0x3C0}, + {A5XX_TP_W_SAMPLER_TAG, 0x40}, + {A5XX_TP_S_3D_SAMPLER, 0x80}, + {A5XX_TP_S_3D_SAMPLER_TAG, 0x20}, + {A5XX_TP_S_CS_SAMPLER, 0x40}, + {A5XX_TP_S_CS_SAMPLER_TAG, 0x10}, + {A5XX_SP_W_CONST, 0x800}, + {A5XX_SP_W_CB_SIZE, 0x30}, + {A5XX_SP_W_CB_BASE, 0xF0}, + {A5XX_SP_W_STATE, 0x1}, + {A5XX_SP_S_3D_CONST, 0x800}, + {A5XX_SP_S_3D_CB_SIZE, 0x28}, + {A5XX_SP_S_3D_UAV_SIZE, 0x80}, + {A5XX_SP_S_CS_CONST, 0x400}, + {A5XX_SP_S_CS_CB_SIZE, 0x8}, + {A5XX_SP_S_CS_UAV_SIZE, 0x80}, + {A5XX_SP_S_3D_CONST_DIRTY, 0x12}, + {A5XX_SP_S_3D_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_3D_UAV_SIZE_DIRTY, 0x2}, + {A5XX_SP_S_CS_CONST_DIRTY, 0xA}, + {A5XX_SP_S_CS_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_SIZE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB_DIRTY, 0xB}, + {A5XX_SP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_W_SAMPLER, 0x80}, + {A5XX_TP_W_MEMOBJ_TAG, 0x40}, + {A5XX_TP_S_3D_MEMOBJ, 0x200}, + {A5XX_TP_S_3D_MEMOBJ_TAG, 0x20}, + {A5XX_TP_S_CS_MEMOBJ, 0x100}, + {A5XX_TP_S_CS_MEMOBJ_TAG, 0x10}, + {A5XX_SP_W_INSTR, 0x800}, + {A5XX_SP_W_UAV_SIZE, 0x80}, + {A5XX_SP_W_UAV_BASE, 0x80}, + {A5XX_SP_W_INST_TAG, 0x40}, + {A5XX_SP_S_3D_INSTR, 0x800}, + {A5XX_SP_S_3D_CB_BASE, 0xC8}, + {A5XX_SP_S_3D_UAV_BASE, 0x80}, + {A5XX_SP_S_CS_INSTR, 0x400}, + {A5XX_SP_S_CS_CB_BASE, 0x28}, + {A5XX_SP_S_CS_UAV_BASE, 0x80}, + {A5XX_SP_S_3D_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_3D_CB_BASE_DIRTY, 0x5}, + {A5XX_SP_S_3D_UAV_BASE_DIRTY, 0x2}, + {A5XX_SP_S_CS_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_CS_CB_BASE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_BASE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB, 0x200}, + {A5XX_HLSQ_ICB_CB_BASE_DIRTY, 0x4}, + {A5XX_SP_POWER_RESTORE_RAM, 0x140}, + {A5XX_TP_POWER_RESTORE_RAM, 0x40}, +}; + +static struct kgsl_memdesc *capturescript; +static struct kgsl_memdesc *registers; +static bool crash_dump_valid; + +static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader *header = + (struct kgsl_snapshot_shader *) buf; + struct a5xx_shader_block_info *info = + (struct a5xx_shader_block_info *) priv; + struct a5xx_shader_block *block = info->block; + unsigned int *data = (unsigned int *) (buf + sizeof(*header)); + + if (remain < SHADER_SECTION_SZ(block->sz)) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->index = info->bank; + header->size = block->sz; + + memcpy(data, registers->hostptr + info->offset, + block->sz * sizeof(unsigned int)); + + return SHADER_SECTION_SZ(block->sz); +} + +static void a5xx_snapshot_shader(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int i, j; + struct a5xx_shader_block_info info; + + /* Shader blocks can only be read by the crash dumper */ + if (!crash_dump_valid) + return; + + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { + info.block = &a5xx_shader_blocks[i]; + info.bank = j; + info.offset = a5xx_shader_blocks[i].offset + + (j * a5xx_shader_blocks[i].sz); + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SHADER, + snapshot, a5xx_snapshot_shader_memory, &info); + } + } +} + +/* Dump registers which get affected by crash dumper trigger */ +static size_t a5xx_snapshot_pre_crashdump_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_registers pre_cdregs = { + .regs = a5xx_pre_crashdumper_registers, + .count = ARRAY_SIZE(a5xx_pre_crashdumper_registers)/2, + }; + + return kgsl_snapshot_dump_registers(device, buf, remain, &pre_cdregs); +} + +struct registers { + const unsigned int *regs; + size_t size; +}; + +static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device, + u8 *buf, size_t remain, const unsigned int *regs, size_t size) +{ + struct kgsl_snapshot_registers snapshot_regs = { + .regs = regs, + .count = size / 2, + }; + + return kgsl_snapshot_dump_registers(device, buf, remain, + &snapshot_regs); +} + +#define REG_PAIR_COUNT(_a, _i) \ + (((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1) + +static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int *src = (unsigned int *) registers->hostptr; + struct registers *regs = (struct registers *)priv; + unsigned int j, k; + unsigned int count = 0; + + if (!crash_dump_valid) + return a5xx_legacy_snapshot_registers(device, buf, remain, + regs->regs, regs->size); + + if (remain < sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + remain -= sizeof(*header); + + for (j = 0; j < regs->size / 2; j++) { + unsigned int start = regs->regs[2 * j]; + unsigned int end = regs->regs[(2 * j) + 1]; + + if (remain < ((end - start) + 1) * 8) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + goto out; + } + + remain -= ((end - start) + 1) * 8; + + for (k = start; k <= end; k++, count++) { + *data++ = k; + *data++ = *src++; + } + } + +out: + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + +/* Snapshot a preemption record buffer */ +static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (remain < (SZ_64K + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return 0; + } + + header->size = SZ_64K >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, SZ_64K); + + return SZ_64K + sizeof(*header); +} + + +static void _a5xx_do_crashdump(struct kgsl_device *device) +{ + unsigned long wait_time; + unsigned int reg = 0; + unsigned int val; + + crash_dump_valid = false; + + if (!device->snapshot_crashdumper) + return; + + if (IS_ERR_OR_NULL(capturescript) || IS_ERR_OR_NULL(registers)) + return; + + /* IF the SMMU is stalled we cannot do a crash dump */ + kgsl_regread(device, A5XX_RBBM_STATUS3, &val); + if (val & BIT(24)) + return; + + /* Turn on APRIV so we can access the buffers */ + kgsl_regwrite(device, A5XX_CP_CNTL, 1); + + kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO, + lower_32_bits(capturescript->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_HI, + upper_32_bits(capturescript->gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CRASH_DUMP_CNTL, 1); + + wait_time = jiffies + msecs_to_jiffies(CP_CRASH_DUMPER_TIMEOUT); + while (!time_after(jiffies, wait_time)) { + kgsl_regread(device, A5XX_CP_CRASH_DUMP_CNTL, ®); + if (reg & 0x4) + break; + cpu_relax(); + } + + kgsl_regwrite(device, A5XX_CP_CNTL, 0); + + if (!(reg & 0x4)) { + dev_err(device->dev, "Crash dump timed out: 0x%X\n", reg); + return; + } + + crash_dump_valid = true; +} + +static int get_hlsq_registers(struct kgsl_device *device, + const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data) +{ + unsigned int i; + unsigned int *src = registers->hostptr + regs->offset; + + for (i = 0; i < regs->size; i++) { + *data++ = regs->ahbaddr + i; + *data++ = *(src + i); + } + + return (2 * regs->size); +} + +static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int count = 0, i; + + /* Figure out how many registers we are going to dump */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + count += a5xx_hlsq_sp_tp_registers[i].size; + + if (remain < (count * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + data += get_hlsq_registers(device, + &a5xx_hlsq_sp_tp_registers[i], data); + + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + +static size_t a5xx_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i, size; + + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) || + adreno_is_a540(adreno_dev) || adreno_is_a512(adreno_dev)) + size = 1024; + else if (adreno_is_a510(adreno_dev)) + size = 32; + else + size = 64; + + /* The MERCIU data is two dwords per entry */ + size = size << 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP MERCIU DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MERCIU; + header->size = size; + + kgsl_regwrite(device, A5XX_CP_MERCIU_DBG_ADDR, 0); + + for (i = 0; i < size; i++) { + kgsl_regread(device, A5XX_CP_MERCIU_DBG_DATA_1, + &data[(i * 2)]); + kgsl_regread(device, A5XX_CP_MERCIU_DBG_DATA_2, + &data[(i * 2) + 1]); + } + + return DEBUG_SECTION_SZ(size); +} + +static size_t a5xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 size, *data = (u32 *) (buf + sizeof(*header)); + + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) || + adreno_is_a510(adreno_dev)) + size = 256; + else + size = 512; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_ROQ; + header->size = size; + + kgsl_regmap_read_indexed(&device->regmap, A5XX_CP_ROQ_DBG_ADDR, + A5XX_CP_ROQ_DBG_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +static size_t a5xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf; + u32 size, *data = (u32 *) (buf + sizeof(*header)); + + if (adreno_is_a505_or_a506(adreno_dev) || adreno_is_a508(adreno_dev) || + adreno_is_a510(adreno_dev)) + size = 32; + else + size = 64; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MEQ; + header->size = size; + + kgsl_regmap_read_indexed(&device->regmap, A5XX_CP_MEQ_DBG_ADDR, + A5XX_CP_MEQ_DBG_DATA, data, size); + + return DEBUG_SECTION_SZ(size); +} + +/* + * a5xx_snapshot() - A5XX GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the A5XX specific bits and pieces are grabbed + * into the snapshot memory + */ +void a5xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int i; + u32 hi, lo; + struct adreno_ringbuffer *rb; + struct registers regs; + + /* Disable Clock gating temporarily for the debug bus to work */ + a5xx_hwcg_set(adreno_dev, false); + + /* Save some CP information that the generic snapshot uses */ + kgsl_regread(device, A5XX_CP_IB1_BASE, &lo); + kgsl_regread(device, A5XX_CP_IB1_BASE_HI, &hi); + + snapshot->ib1base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, A5XX_CP_IB2_BASE, &lo); + kgsl_regread(device, A5XX_CP_IB2_BASE_HI, &hi); + + snapshot->ib2base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, A5XX_CP_IB1_BUFSZ, &snapshot->ib1size); + kgsl_regread(device, A5XX_CP_IB2_BUFSZ, &snapshot->ib2size); + + /* Dump the registers which get affected by crash dumper trigger */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, + snapshot, a5xx_snapshot_pre_crashdump_regs, NULL); + + /* Dump vbif registers as well which get affected by crash dumper */ + SNAPSHOT_REGISTERS(device, snapshot, a5xx_vbif_registers); + + /* Try to run the crash dumper */ + _a5xx_do_crashdump(device); + + regs.regs = a5xx_registers; + regs.size = ARRAY_SIZE(a5xx_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + a5xx_snapshot_registers, ®s); + + if (a5xx_has_gpmu(adreno_dev)) { + regs.regs = a5xx_gpmu_registers; + regs.size = ARRAY_SIZE(a5xx_gpmu_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, + snapshot, a5xx_snapshot_registers, ®s); + } + + + /* Dump SP TP HLSQ registers */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL); + + /* CP_PFP indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA, 0, 36); + + /* CP_ME indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_ME_STAT_ADDR, A5XX_CP_ME_STAT_DATA, 0, 29); + + /* CP_DRAW_STATE */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_DRAW_STATE_ADDR, A5XX_CP_DRAW_STATE_DATA, + 0, 1 << A5XX_CP_DRAW_STATE_ADDR_WIDTH); + + /* ME_UCODE Cache */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_ME_UCODE_DBG_ADDR, A5XX_CP_ME_UCODE_DBG_DATA, + 0, 0x53F); + + /* PFP_UCODE Cache */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_PFP_UCODE_DBG_ADDR, A5XX_CP_PFP_UCODE_DBG_DATA, + 0, 0x53F); + + /* CP MEQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_meq, NULL); + + /* CP ROQ */ + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_roq, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_merciu, NULL); + + /* CP PFP and PM4 */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_pfp, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_pm4, NULL); + + /* Shader memory */ + a5xx_snapshot_shader(device, snapshot); + + /* Debug bus */ + a5xx_snapshot_debugbus(device, snapshot); + + /* Preemption record */ + if (adreno_is_preemption_enabled(adreno_dev)) { + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_preemption_record, + rb->preemption_desc); + } + } + +} + +static int _a5xx_crashdump_init_shader(struct a5xx_shader_block *block, + uint64_t *ptr, uint64_t *offset) +{ + int qwords = 0; + unsigned int j; + + /* Capture each bank in the block */ + for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { + /* Program the aperture */ + ptr[qwords++] = + (block->statetype << A5XX_SHADER_STATETYPE_SHIFT) | j; + ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) | + (1 << 21) | 1; + + /* Read all the data in one chunk */ + ptr[qwords++] = registers->gpuaddr + *offset; + ptr[qwords++] = + (((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) | + block->sz; + + /* Remember the offset of the first bank for easy access */ + if (j == 0) + block->offset = *offset; + + *offset += block->sz * sizeof(unsigned int); + } + + return qwords; +} + +static int _a5xx_crashdump_init_hlsq(struct a5xx_hlsq_sp_tp_regs *regs, + uint64_t *ptr, uint64_t *offset) +{ + int qwords = 0; + + /* Program the aperture */ + ptr[qwords++] = + (regs->statetype << A5XX_SHADER_STATETYPE_SHIFT); + ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) | + (1 << 21) | 1; + + /* Read all the data in one chunk */ + ptr[qwords++] = registers->gpuaddr + *offset; + ptr[qwords++] = + (((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) | + regs->size; + + /* Remember the offset of the first bank for easy access */ + regs->offset = *offset; + + *offset += regs->size * sizeof(unsigned int); + + return qwords; +} + +void a5xx_crashdump_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int script_size = 0; + unsigned int data_size = 0; + unsigned int i, j; + uint64_t *ptr; + uint64_t offset = 0; + + if (!IS_ERR_OR_NULL(capturescript) && !IS_ERR_OR_NULL(registers)) + return; + + /* + * We need to allocate two buffers: + * 1 - the buffer to hold the draw script + * 2 - the buffer to hold the data + */ + + /* + * To save the registers, we need 16 bytes per register pair for the + * script and a dword for each register int the data + */ + + /* Each pair needs 16 bytes (2 qwords) */ + script_size += (ARRAY_SIZE(a5xx_registers) / 2) * 16; + + /* Each register needs a dword in the data */ + for (j = 0; j < ARRAY_SIZE(a5xx_registers) / 2; j++) + data_size += REG_PAIR_COUNT(a5xx_registers, j) * + sizeof(unsigned int); + + if (a5xx_has_gpmu(adreno_dev)) { + /* Each pair needs 16 bytes (2 qwords) */ + script_size += (ARRAY_SIZE(a5xx_gpmu_registers) / 2) * 16; + + /* Each register needs a dword in the data */ + for (j = 0; j < ARRAY_SIZE(a5xx_gpmu_registers) / 2; j++) + data_size += REG_PAIR_COUNT(a5xx_gpmu_registers, j) * + sizeof(unsigned int); + } + + /* + * To save the shader blocks for each block in each type we need 32 + * bytes for the script (16 bytes to program the aperture and 16 to + * read the data) and then a block specific number of bytes to hold + * the data + */ + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + script_size += 32 * A5XX_NUM_SHADER_BANKS; + data_size += a5xx_shader_blocks[i].sz * sizeof(unsigned int) * + A5XX_NUM_SHADER_BANKS; + } + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) { + script_size += 32; + data_size += + a5xx_hlsq_sp_tp_registers[i].size * sizeof(unsigned int); + } + + /* Now allocate the script and data buffers */ + + /* The script buffers needs 2 extra qwords on the end */ + if (!IS_ERR_OR_NULL(capturescript)) + capturescript = kgsl_allocate_global(device, + script_size + 16, 0, KGSL_MEMFLAGS_GPUREADONLY, + KGSL_MEMDESC_PRIVILEGED, "capturescript"); + + if (IS_ERR(capturescript)) + return; + + if (!IS_ERR_OR_NULL(registers)) + registers = kgsl_allocate_global(device, data_size, 0, 0, + KGSL_MEMDESC_PRIVILEGED, "capturescript_regs"); + + if (IS_ERR(registers)) + return; + + /* Build the crash script */ + + ptr = (uint64_t *) capturescript->hostptr; + + /* For the registers, program a read command for each pair */ + + for (j = 0; j < ARRAY_SIZE(a5xx_registers) / 2; j++) { + unsigned int r = REG_PAIR_COUNT(a5xx_registers, j); + *ptr++ = registers->gpuaddr + offset; + *ptr++ = (((uint64_t) a5xx_registers[2 * j]) << 44) + | r; + offset += r * sizeof(unsigned int); + } + + if (a5xx_has_gpmu(adreno_dev)) { + for (j = 0; j < ARRAY_SIZE(a5xx_gpmu_registers) / 2; j++) { + unsigned int r = REG_PAIR_COUNT(a5xx_gpmu_registers, j); + *ptr++ = registers->gpuaddr + offset; + *ptr++ = (((uint64_t) a5xx_gpmu_registers[2 * j]) << 44) + | r; + offset += r * sizeof(unsigned int); + } + } + + /* Program each shader block */ + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + ptr += _a5xx_crashdump_init_shader(&a5xx_shader_blocks[i], ptr, + &offset); + } + /* Program the hlsq sp tp register sets */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + ptr += _a5xx_crashdump_init_hlsq(&a5xx_hlsq_sp_tp_registers[i], + ptr, &offset); + + *ptr++ = 0; + *ptr++ = 0; +} diff --git a/adreno_a6xx.c b/adreno_a6xx.c new file mode 100644 index 0000000000..44e69b9d94 --- /dev/null +++ b/adreno_a6xx.c @@ -0,0 +1,2774 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_a6xx.h" +#include "adreno_a6xx_hwsched.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +/* IFPC & Preemption static powerup restore list */ +static u32 a6xx_pwrup_reglist[] = { + A6XX_VSC_ADDR_MODE_CNTL, + A6XX_GRAS_ADDR_MODE_CNTL, + A6XX_RB_ADDR_MODE_CNTL, + A6XX_PC_ADDR_MODE_CNTL, + A6XX_HLSQ_ADDR_MODE_CNTL, + A6XX_VFD_ADDR_MODE_CNTL, + A6XX_VPC_ADDR_MODE_CNTL, + A6XX_UCHE_ADDR_MODE_CNTL, + A6XX_SP_ADDR_MODE_CNTL, + A6XX_TPL1_ADDR_MODE_CNTL, + A6XX_UCHE_WRITE_RANGE_MAX_LO, + A6XX_UCHE_WRITE_RANGE_MAX_HI, + A6XX_UCHE_TRAP_BASE_LO, + A6XX_UCHE_TRAP_BASE_HI, + A6XX_UCHE_WRITE_THRU_BASE_LO, + A6XX_UCHE_WRITE_THRU_BASE_HI, + A6XX_UCHE_GMEM_RANGE_MIN_LO, + A6XX_UCHE_GMEM_RANGE_MIN_HI, + A6XX_UCHE_GMEM_RANGE_MAX_LO, + A6XX_UCHE_GMEM_RANGE_MAX_HI, + A6XX_UCHE_FILTER_CNTL, + A6XX_UCHE_CACHE_WAYS, + A6XX_UCHE_MODE_CNTL, + A6XX_RB_NC_MODE_CNTL, + A6XX_TPL1_NC_MODE_CNTL, + A6XX_SP_NC_MODE_CNTL, + A6XX_PC_DBG_ECO_CNTL, + A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, +}; + +/* IFPC only static powerup restore list */ +static u32 a6xx_ifpc_pwrup_reglist[] = { + A6XX_CP_CHICKEN_DBG, + A6XX_CP_DBG_ECO_CNTL, + A6XX_CP_PROTECT_CNTL, + A6XX_CP_PROTECT_REG, + A6XX_CP_PROTECT_REG+1, + A6XX_CP_PROTECT_REG+2, + A6XX_CP_PROTECT_REG+3, + A6XX_CP_PROTECT_REG+4, + A6XX_CP_PROTECT_REG+5, + A6XX_CP_PROTECT_REG+6, + A6XX_CP_PROTECT_REG+7, + A6XX_CP_PROTECT_REG+8, + A6XX_CP_PROTECT_REG+9, + A6XX_CP_PROTECT_REG+10, + A6XX_CP_PROTECT_REG+11, + A6XX_CP_PROTECT_REG+12, + A6XX_CP_PROTECT_REG+13, + A6XX_CP_PROTECT_REG+14, + A6XX_CP_PROTECT_REG+15, + A6XX_CP_PROTECT_REG+16, + A6XX_CP_PROTECT_REG+17, + A6XX_CP_PROTECT_REG+18, + A6XX_CP_PROTECT_REG+19, + A6XX_CP_PROTECT_REG+20, + A6XX_CP_PROTECT_REG+21, + A6XX_CP_PROTECT_REG+22, + A6XX_CP_PROTECT_REG+23, + A6XX_CP_PROTECT_REG+24, + A6XX_CP_PROTECT_REG+25, + A6XX_CP_PROTECT_REG+26, + A6XX_CP_PROTECT_REG+27, + A6XX_CP_PROTECT_REG+28, + A6XX_CP_PROTECT_REG+29, + A6XX_CP_PROTECT_REG+30, + A6XX_CP_PROTECT_REG+31, + A6XX_CP_AHB_CNTL, +}; + +/* Applicable to a620, a635, a650 and a660 */ +static u32 a650_pwrup_reglist[] = { + A6XX_CP_PROTECT_REG + 47, /* Programmed for infinite span */ + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + A6XX_UCHE_CMDQ_CONFIG, +}; + +static u32 a615_pwrup_reglist[] = { + A6XX_UCHE_GBIF_GX_CONFIG, +}; + +int a6xx_fenced_write(struct adreno_device *adreno_dev, u32 offset, + u32 value, u32 mask) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status, i; + + kgsl_regwrite(device, offset, value); + + if (!gmu_core_isenabled(device)) + return 0; + + for (i = 0; i < GMU_CORE_LONG_WAKEUP_RETRY_LIMIT; i++) { + /* + * Make sure the previous register write is posted before + * checking the fence status + */ + mb(); + + kgsl_regread(device, A6XX_GMU_AHB_FENCE_STATUS, &status); + + /* + * If !writedropped0/1, then the write to fenced register + * was successful + */ + if (!(status & mask)) + break; + + /* Wait a small amount of time before trying again */ + udelay(GMU_CORE_WAKEUP_DELAY_US); + + /* Try to write the fenced register again */ + kgsl_regwrite(device, offset, value); + } + + if (i < GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT) + return 0; + + if (i == GMU_CORE_LONG_WAKEUP_RETRY_LIMIT) { + dev_err(adreno_dev->dev.dev, + "Timed out waiting %d usecs to write fenced register 0x%x\n", + i * GMU_CORE_WAKEUP_DELAY_US, offset); + return -ETIMEDOUT; + } + + dev_err(adreno_dev->dev.dev, + "Waited %d usecs to write fenced register 0x%x\n", + i * GMU_CORE_WAKEUP_DELAY_US, offset); + + return 0; +} + +int a6xx_init(struct adreno_device *adreno_dev) +{ + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + + adreno_dev->highest_bank_bit = a6xx_core->highest_bank_bit; + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + /* If the memory type is DDR 4, override the existing configuration */ + if (of_fdt_get_ddrtype() == 0x7) { + if (adreno_is_a660_shima(adreno_dev) || + adreno_is_a635(adreno_dev)) + adreno_dev->highest_bank_bit = 14; + else if ((adreno_is_a650(adreno_dev) || + adreno_is_a660(adreno_dev))) + adreno_dev->highest_bank_bit = 15; + } + + a6xx_crashdump_init(adreno_dev); + + return adreno_allocate_global(KGSL_DEVICE(adreno_dev), + &adreno_dev->pwrup_reglist, + PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, + "powerup_register_list"); +} + +static int a6xx_nogmu_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = a6xx_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = a6xx_microcode_read(adreno_dev); + if (ret) + return ret; + + /* Try to map the GMU wrapper region if applicable */ + ret = kgsl_regmap_add_region(&device->regmap, device->pdev, + "gmu_wrapper", NULL, NULL); + if (ret && ret != -ENODEV) + dev_err(device->dev, "Couldn't map the GMU wrapper registers\n"); + + adreno_create_profile_buffer(adreno_dev); + + return a6xx_init(adreno_dev); +} + +static void a6xx_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + const struct adreno_protected_regs *regs = a6xx_core->protected_regs; + int i; + + /* + * Enable access protection to privileged registers, fault on an access + * protect violation and select the last span to protect from the start + * address all the way to the end of the register address space + */ + kgsl_regwrite(device, A6XX_CP_PROTECT_CNTL, + (1 << 0) | (1 << 1) | (1 << 3)); + + /* Program each register defined by the core definition */ + for (i = 0; regs[i].reg; i++) { + u32 count; + + /* + * This is the offset of the end register as counted from the + * start, i.e. # of registers in the range - 1 + */ + count = regs[i].end - regs[i].start; + + kgsl_regwrite(device, regs[i].reg, + (regs[i].start & 0x3ffff) | ((count & 0x1fff) << 18) | + (regs[i].noaccess << 31)); + } +} + +static inline unsigned int +__get_rbbm_clock_cntl_on(struct adreno_device *adreno_dev) +{ + if (adreno_is_a630(adreno_dev)) + return 0x8AA8AA02; + else if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) + return 0xAAA8AA82; + else + return 0x8AA8AA82; +} + +static inline unsigned int +__get_gmu_ao_cgc_mode_cntl(struct adreno_device *adreno_dev) +{ + if (adreno_is_a612(adreno_dev)) + return 0x00000022; + else if (adreno_is_a615_family(adreno_dev)) + return 0x00000222; + else if (adreno_is_a660(adreno_dev)) + return 0x00020000; + else + return 0x00020202; +} + +static inline unsigned int +__get_gmu_ao_cgc_delay_cntl(struct adreno_device *adreno_dev) +{ + if (adreno_is_a612(adreno_dev)) + return 0x00000011; + else if (adreno_is_a615_family(adreno_dev)) + return 0x00000111; + else + return 0x00010111; +} + +static inline unsigned int +__get_gmu_ao_cgc_hyst_cntl(struct adreno_device *adreno_dev) +{ + if (adreno_is_a612(adreno_dev)) + return 0x00000055; + else if (adreno_is_a615_family(adreno_dev)) + return 0x00000555; + else + return 0x00005555; +} + +static unsigned int __get_gmu_wfi_config(struct adreno_device *adreno_dev) +{ + if (adreno_is_a620(adreno_dev) || adreno_is_a640(adreno_dev) || + adreno_is_a650(adreno_dev)) + return 0x00000002; + + return 0x00000000; +} + +void a6xx_cx_regulator_disable_wait(struct regulator *reg, + struct kgsl_device *device, u32 timeout) +{ + if (!adreno_regulator_disable_poll(device, reg, A6XX_GPU_CC_CX_GDSCR, timeout)) { + dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); + /* Dump the cx regulator consumer list */ + qcom_clk_dump(NULL, reg, false); + } +} + +static void set_holi_sptprac_clock(struct kgsl_device *device, bool enable) +{ + u32 val = 0; + + kgsl_regread(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, &val); + val &= ~1; + kgsl_regwrite(device, A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, + val | (enable ? 1 : 0)); +} + +static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + unsigned int value; + int i; + + if (!adreno_dev->hwcg_enabled) + on = false; + + if (gmu_core_isenabled(device)) { + gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, + on ? __get_gmu_ao_cgc_mode_cntl(adreno_dev) : 0); + gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, + on ? __get_gmu_ao_cgc_delay_cntl(adreno_dev) : 0); + gmu_core_regwrite(device, A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, + on ? __get_gmu_ao_cgc_hyst_cntl(adreno_dev) : 0); + gmu_core_regwrite(device, A6XX_GMU_CX_GMU_WFI_CONFIG, + on ? __get_gmu_wfi_config(adreno_dev) : 0); + } + + kgsl_regread(device, A6XX_RBBM_CLOCK_CNTL, &value); + + if (value == __get_rbbm_clock_cntl_on(adreno_dev) && on) + return; + + if (value == 0 && !on) + return; + + /* + * Disable SP clock before programming HWCG registers. + * A612 and A610 GPU is not having the GX power domain. + * Hence skip GMU_GX registers for A12 and A610. + */ + + if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && + !adreno_is_a610(adreno_dev)) + gmu_core_regrmw(device, + A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); + else if (adreno_is_a619_holi(adreno_dev)) + set_holi_sptprac_clock(device, false); + + for (i = 0; i < a6xx_core->hwcg_count; i++) + kgsl_regwrite(device, a6xx_core->hwcg[i].offset, + on ? a6xx_core->hwcg[i].val : 0); + + /* + * Enable SP clock after programming HWCG registers. + * A612 and A610 GPU is not having the GX power domain. + * Hence skip GMU_GX registers for A612. + */ + if (gmu_core_isenabled(device) && !adreno_is_a612(adreno_dev) && + !adreno_is_a610(adreno_dev)) + gmu_core_regrmw(device, + A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); + else if (adreno_is_a619_holi(adreno_dev)) + set_holi_sptprac_clock(device, true); + + /* enable top level HWCG */ + kgsl_regwrite(device, A6XX_RBBM_CLOCK_CNTL, + on ? __get_rbbm_clock_cntl_on(adreno_dev) : 0); +} + +struct a6xx_reglist_list { + u32 *regs; + u32 count; +}; + +#define REGLIST(_a) \ + (struct a6xx_reglist_list) { .regs = _a, .count = ARRAY_SIZE(_a), } + +static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) +{ + struct a6xx_reglist_list reglist[3]; + void *ptr = adreno_dev->pwrup_reglist->hostptr; + struct cpu_gpu_lock *lock = ptr; + int items = 0, i, j; + u32 *dest = ptr + sizeof(*lock); + + /* Static IFPC-only registers */ + reglist[items++] = REGLIST(a6xx_ifpc_pwrup_reglist); + + /* Static IFPC + preemption registers */ + reglist[items++] = REGLIST(a6xx_pwrup_reglist); + + /* Add target specific registers */ + if (adreno_is_a615_family(adreno_dev)) + reglist[items++] = REGLIST(a615_pwrup_reglist); + else if (adreno_is_a650_family(adreno_dev)) + reglist[items++] = REGLIST(a650_pwrup_reglist); + + /* + * For each entry in each of the lists, write the offset and the current + * register value into the GPU buffer + */ + for (i = 0; i < items; i++) { + u32 *r = reglist[i].regs; + + for (j = 0; j < reglist[i].count; j++) { + *dest++ = r[j]; + kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++); + } + + lock->list_length += reglist[i].count * 2; + } + + if (adreno_is_a630(adreno_dev)) { + *dest++ = A6XX_RBBM_VBIF_CLIENT_QOS_CNTL; + kgsl_regread(KGSL_DEVICE(adreno_dev), + A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, dest++); + } else { + *dest++ = A6XX_RBBM_GBIF_CLIENT_QOS_CNTL; + kgsl_regread(KGSL_DEVICE(adreno_dev), + A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, dest++); + } + + lock->list_length += 2; + + *dest++ = A6XX_RBBM_PERFCTR_CNTL; + *dest++ = 1; + lock->list_length += 2; + + /* + * The overall register list is composed of + * 1. Static IFPC-only registers + * 2. Static IFPC + preemption registers + * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) + * + * The CP views the second and third entries as one dynamic list + * starting from list_offset. list_length should be the total dwords in + * all the lists and list_offset should be specified as the size in + * dwords of the first entry in the list. + */ + lock->list_offset = reglist[0].count * 2; +} + + +static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev); +static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev); +static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev); + +static void a6xx_set_secvid(struct kgsl_device *device) +{ + static bool set; + + if (set || !device->mmu.secured) + return; + + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_CNTL, 0x0); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, + lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, + KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + + if (ADRENO_QUIRK(ADRENO_DEVICE(device), ADRENO_QUIRK_SECVID_SET_ONCE)) + set = true; +} + +static void a6xx_deassert_gbif_halt(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0); + + if (adreno_is_a619_holi(adreno_dev)) + kgsl_regwrite(device, A6XX_RBBM_GPR0_CNTL, 0x0); + else + kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, 0x0); +} + +/* + * Some targets support marking certain transactions as always privileged which + * allows us to mark more memory as privileged without having to explicitly set + * the APRIV bit. For those targets, choose the following transactions to be + * privileged by default: + * CDWRITE [6:6] - Crashdumper writes + * CDREAD [5:5] - Crashdumper reads + * RBRPWB [3:3] - RPTR shadow writes + * RBPRIVLEVEL [2:2] - Memory accesses from PM4 packets in the ringbuffer + * RBFETCH [1:1] - Ringbuffer reads + */ +#define A6XX_APRIV_DEFAULT \ + ((1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1)) + +void a6xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + unsigned int mal, mode, hbb_hi = 0, hbb_lo = 0; + unsigned int uavflagprd_inv; + unsigned int amsbc = 0; + unsigned int rgb565_predicator = 0; + static bool patch_reglist; + + /* Enable 64 bit addressing */ + kgsl_regwrite(device, A6XX_CP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_VSC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_GRAS_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_RB_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_PC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_HLSQ_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_VFD_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_VPC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_UCHE_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_SP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_TPL1_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); + + /* Set up VBIF registers from the GPU core definition */ + kgsl_regmap_multi_write(&device->regmap, a6xx_core->vbif, + a6xx_core->vbif_count); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_LIMIT_UCHE_GBIF_RW)) + kgsl_regwrite(device, A6XX_UCHE_GBIF_GX_CONFIG, 0x10200F9); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* + * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively + * disabling L2 bypass + */ + kgsl_regwrite(device, A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0); + kgsl_regwrite(device, A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff); + kgsl_regwrite(device, A6XX_UCHE_TRAP_BASE_LO, 0xfffff000); + kgsl_regwrite(device, A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff); + kgsl_regwrite(device, A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000); + kgsl_regwrite(device, A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); + + /* + * Some A6xx targets no longer use a programmed GMEM base address + * so only write the registers if a non zero address is given + * in the GPU list + */ + if (adreno_dev->gpucore->gmem_base) { + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MIN_LO, + adreno_dev->gpucore->gmem_base); + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x0); + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MAX_LO, + adreno_dev->gpucore->gmem_base + + adreno_dev->gpucore->gmem_size - 1); + kgsl_regwrite(device, A6XX_UCHE_GMEM_RANGE_MAX_HI, 0x0); + } + + kgsl_regwrite(device, A6XX_UCHE_FILTER_CNTL, 0x804); + kgsl_regwrite(device, A6XX_UCHE_CACHE_WAYS, 0x4); + + /* ROQ sizes are twice as big on a640/a680 than on a630 */ + if (ADRENO_GPUREV(adreno_dev) >= ADRENO_REV_A640) { + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); + } else if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) { + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); + } else { + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x010000C0); + kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C); + } + + if (adreno_is_a660(adreno_dev)) + kgsl_regwrite(device, A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); + + if (adreno_is_a612(adreno_dev) || adreno_is_a610(adreno_dev)) { + /* For A612 and A610 Mem pool size is reduced to 48 */ + kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 48); + kgsl_regwrite(device, A6XX_CP_MEM_POOL_DBG_ADDR, 47); + } else { + kgsl_regwrite(device, A6XX_CP_MEM_POOL_SIZE, 128); + } + + /* Setting the primFifo thresholds values */ + kgsl_regwrite(device, A6XX_PC_DBG_ECO_CNTL, + a6xx_core->prim_fifo_threshold); + + /* Set the AHB default slave response to "ERROR" */ + kgsl_regwrite(device, A6XX_CP_AHB_CNTL, 0x1); + + /* Turn on performance counters */ + kgsl_regwrite(device, A6XX_RBBM_PERFCTR_CNTL, 0x1); + + /* Turn on the IFPC counter (countable 4 on XOCLK4) */ + if (gmu_core_isenabled(device)) + gmu_core_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1, + 0xff, 0x4); + + /* Turn on GX_MEM retention */ + if (gmu_core_isenabled(device) && adreno_is_a612(adreno_dev)) { + kgsl_regwrite(device, A6XX_RBBM_BLOCK_GX_RETENTION_CNTL, 0x7FB); + /* For CP IPC interrupt */ + kgsl_regwrite(device, A6XX_RBBM_INT_2_MASK, 0x00000010); + } + + if (of_property_read_u32(device->pdev->dev.of_node, + "qcom,min-access-length", &mal)) + mal = 32; + + if (of_property_read_u32(device->pdev->dev.of_node, + "qcom,ubwc-mode", &mode)) + mode = 0; + + switch (mode) { + case KGSL_UBWC_1_0: + mode = 1; + break; + case KGSL_UBWC_2_0: + mode = 0; + break; + case KGSL_UBWC_3_0: + mode = 0; + amsbc = 1; /* Only valid for A640 and A680 */ + break; + case KGSL_UBWC_4_0: + mode = 0; + rgb565_predicator = 1; + amsbc = 1; + break; + default: + break; + } + + if (!WARN_ON(!adreno_dev->highest_bank_bit)) { + hbb_lo = (adreno_dev->highest_bank_bit - 13) & 3; + hbb_hi = ((adreno_dev->highest_bank_bit - 13) >> 2) & 1; + } + + mal = (mal == 64) ? 1 : 0; + + uavflagprd_inv = (adreno_is_a650_family(adreno_dev)) ? 2 : 0; + + kgsl_regwrite(device, A6XX_RB_NC_MODE_CNTL, (rgb565_predicator << 11)| + (hbb_hi << 10) | (amsbc << 4) | (mal << 3) | + (hbb_lo << 1) | mode); + + kgsl_regwrite(device, A6XX_TPL1_NC_MODE_CNTL, (hbb_hi << 4) | + (mal << 3) | (hbb_lo << 1) | mode); + + kgsl_regwrite(device, A6XX_SP_NC_MODE_CNTL, (hbb_hi << 10) | + (mal << 3) | (uavflagprd_inv << 4) | + (hbb_lo << 1) | mode); + + kgsl_regwrite(device, A6XX_UCHE_MODE_CNTL, (mal << 23) | + (hbb_lo << 21)); + + kgsl_regwrite(device, A6XX_RBBM_INTERFACE_HANG_INT_CNTL, + (1 << 30) | a6xx_core->hang_detect_cycles); + + kgsl_regwrite(device, A6XX_UCHE_CLIENT_PF, 1); + + /* Set weights for bicubic filtering */ + if (adreno_is_a650_family(adreno_dev)) { + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + 0x3FE05FF4); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + 0x3FA0EBEE); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + 0x3F5193ED); + kgsl_regwrite(device, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + 0x3F0243F0); + } + + /* Set TWOPASSUSEWFI in A6XX_PC_DBG_ECO_CNTL if requested */ + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) + kgsl_regrmw(device, A6XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); + + /* Set the bit vccCacheSkipDis=1 to get rid of TSEskip logic */ + if (a6xx_core->disable_tseskip) + kgsl_regrmw(device, A6XX_PC_DBG_ECO_CNTL, 0, (1 << 9)); + + /* Enable the GMEM save/restore feature for preemption */ + if (adreno_is_preemption_enabled(adreno_dev)) + kgsl_regwrite(device, A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + 0x1); + + /* + * Enable GMU power counter 0 to count GPU busy. This is applicable to + * all a6xx targets + */ + kgsl_regwrite(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); + kgsl_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, 0x20); + kgsl_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0x1); + + a6xx_protect_init(adreno_dev); + /* + * We start LM here because we want all the following to be up + * 1. GX HS + * 2. SPTPRAC + * 3. HFI + * At this point, we are guaranteed all. + */ + + /* Configure LLCC */ + a6xx_llc_configure_gpu_scid(adreno_dev); + a6xx_llc_configure_gpuhtw_scid(adreno_dev); + + a6xx_llc_enable_overrides(adreno_dev); + + if (adreno_is_a660(adreno_dev)) { + kgsl_regwrite(device, A6XX_CP_CHICKEN_DBG, 0x1); + kgsl_regwrite(device, A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); + + /* Set dualQ + disable afull for A660 GPU but not for A635 */ + if (!adreno_is_a635(adreno_dev)) + kgsl_regwrite(device, A6XX_UCHE_CMDQ_CONFIG, 0x66906); + } + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + kgsl_regwrite(device, A6XX_CP_APRIV_CNTL, A6XX_APRIV_DEFAULT); + + a6xx_set_secvid(device); + + /* + * Enable hardware clock gating here to prevent any register access + * issue due to internal clock gating. + */ + a6xx_hwcg_set(adreno_dev, true); + + /* + * All registers must be written before this point so that we don't + * miss any register programming when we patch the power up register + * list. + */ + if (!patch_reglist && (adreno_dev->pwrup_reglist->gpuaddr != 0)) { + a6xx_patch_pwrup_reglist(adreno_dev); + patch_reglist = true; + } + + /* + * During adreno_stop, GBIF halt is asserted to ensure + * no further transaction can go through GPU before GPU + * headswitch is turned off. + * + * This halt is deasserted once headswitch goes off but + * incase headswitch doesn't goes off clear GBIF halt + * here to ensure GPU wake-up doesn't fail because of + * halted GPU transactions. + */ + a6xx_deassert_gbif_halt(adreno_dev); + +} + +/* Offsets into the MX/CX mapped register regions */ +#define RDPM_MX_OFFSET 0xf00 +#define RDPM_CX_OFFSET 0xf18 + +void a6xx_rdpm_mx_freq_update(struct a6xx_gmu_device *gmu, + u32 freq) +{ + if (gmu->rdpm_mx_virt) { + writel_relaxed(freq/1000, + (gmu->rdpm_mx_virt + RDPM_MX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +void a6xx_rdpm_cx_freq_update(struct a6xx_gmu_device *gmu, + u32 freq) +{ + if (gmu->rdpm_cx_virt) { + writel_relaxed(freq/1000, + (gmu->rdpm_cx_virt + RDPM_CX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +/* This is the start point for non GMU/RGMU targets */ +static int a6xx_nogmu_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * During adreno_stop() GBIF halt is asserted to ensure that + * no further transactions go through the GPU before the + * GPU headswitch is turned off. + * + * The halt is supposed to be deasserted when the headswitch goes off + * but clear it again during start to be sure + */ + kgsl_regwrite(device, A6XX_GBIF_HALT, 0x0); + kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, 0x0); + + ret = kgsl_mmu_start(device); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + adreno_perfcounter_restore(adreno_dev); + + a6xx_start(adreno_dev); + return 0; +} + +/* + * CP_INIT_MAX_CONTEXT bit tells if the multiple hardware contexts can + * be used at once of if they should be serialized + */ +#define CP_INIT_MAX_CONTEXT BIT(0) + +/* Enables register protection mode */ +#define CP_INIT_ERROR_DETECTION_CONTROL BIT(1) + +/* Header dump information */ +#define CP_INIT_HEADER_DUMP BIT(2) /* Reserved */ + +/* Default Reset states enabled for PFP and ME */ +#define CP_INIT_DEFAULT_RESET_STATE BIT(3) + +/* Drawcall filter range */ +#define CP_INIT_DRAWCALL_FILTER_RANGE BIT(4) + +/* Ucode workaround masks */ +#define CP_INIT_UCODE_WORKAROUND_MASK BIT(5) + +/* + * Operation mode mask + * + * This ordinal provides the option to disable the + * save/restore of performance counters across preemption. + */ +#define CP_INIT_OPERATION_MODE_MASK BIT(6) + +/* Register initialization list */ +#define CP_INIT_REGISTER_INIT_LIST BIT(7) + +/* Register initialization list with spinlock */ +#define CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK BIT(8) + +#define CP_INIT_MASK (CP_INIT_MAX_CONTEXT | \ + CP_INIT_ERROR_DETECTION_CONTROL | \ + CP_INIT_HEADER_DUMP | \ + CP_INIT_DEFAULT_RESET_STATE | \ + CP_INIT_UCODE_WORKAROUND_MASK | \ + CP_INIT_OPERATION_MODE_MASK | \ + CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK) + +void a6xx_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) +{ + int i = 0; + + cmds[i++] = cp_type7_packet(CP_ME_INIT, A6XX_CP_INIT_DWORDS - 1); + + /* Enabled ordinal mask */ + cmds[i++] = CP_INIT_MASK; + + if (CP_INIT_MASK & CP_INIT_MAX_CONTEXT) + cmds[i++] = 0x00000003; + + if (CP_INIT_MASK & CP_INIT_ERROR_DETECTION_CONTROL) + cmds[i++] = 0x20000000; + + if (CP_INIT_MASK & CP_INIT_HEADER_DUMP) { + /* Header dump address */ + cmds[i++] = 0x00000000; + /* Header dump enable and dump size */ + cmds[i++] = 0x00000000; + } + + if (CP_INIT_MASK & CP_INIT_UCODE_WORKAROUND_MASK) + cmds[i++] = 0x00000000; + + if (CP_INIT_MASK & CP_INIT_OPERATION_MODE_MASK) + cmds[i++] = 0x00000002; + + if (CP_INIT_MASK & CP_INIT_REGISTER_INIT_LIST_WITH_SPINLOCK) { + uint64_t gpuaddr = adreno_dev->pwrup_reglist->gpuaddr; + + cmds[i++] = lower_32_bits(gpuaddr); + cmds[i++] = upper_32_bits(gpuaddr); + cmds[i++] = 0; + } +} + +void a6xx_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int rptr, wptr; + unsigned int status, status3, intstatus; + unsigned int hwfault; + + dev_err(device->dev, str); + + kgsl_regread(device, A6XX_CP_RB_RPTR, &rptr); + kgsl_regread(device, A6XX_CP_RB_WPTR, &wptr); + + kgsl_regread(device, A6XX_RBBM_STATUS, &status); + kgsl_regread(device, A6XX_RBBM_STATUS3, &status3); + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &intstatus); + kgsl_regread(device, A6XX_CP_HW_FAULT, &hwfault); + + + dev_err(device->dev, + "rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n", + adreno_dev->cur_rb ? adreno_dev->cur_rb->id : -1, rptr, wptr, + status, status3, intstatus); + + dev_err(device->dev, " hwfault=%8.8X\n", hwfault); + + kgsl_device_snapshot(device, NULL, false); + +} + +/* + * a6xx_send_cp_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, + */ +static int a6xx_send_cp_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, A6XX_CP_INIT_DWORDS); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + a6xx_cp_init_cmds(adreno_dev, cmds); + + ret = a6xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + a6xx_spin_idle_debug(adreno_dev, + "CP initialization failed to idle\n"); + + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RPTR_OFFSET(rb->id), 0); + rb->wptr = 0; + rb->_wptr = 0; + } + } + + return ret; +} + +/* + * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move + * to a different ringbuffer, if desired + */ +static int _preemption_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context) +{ + unsigned int *cmds_orig = cmds; + + /* Turn CP protection OFF on legacy targets */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + cmds += cp_protected_mode(adreno_dev, cmds, 0); + + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); + *cmds++ = 1; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->preemption_desc->gpuaddr); + + *cmds++ = 2; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->secure_preemption_desc->gpuaddr); + + /* Turn CP protection back ON */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + cmds += cp_protected_mode(adreno_dev, cmds, 1); + + *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); + *cmds++ = 0; + /* generate interrupt on preemption completion */ + *cmds++ = 0; + + return cmds - cmds_orig; +} + +static int a6xx_post_start(struct adreno_device *adreno_dev) +{ + int ret; + unsigned int *cmds, *start; + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + cmds = adreno_ringbuffer_allocspace(rb, 42); + if (IS_ERR(cmds)) { + dev_err(device->dev, + "error allocating preemption init cmds\n"); + return PTR_ERR(cmds); + } + start = cmds; + + cmds += _preemption_init(adreno_dev, rb, cmds, NULL); + + rb->_wptr = rb->_wptr - (42 - (cmds - start)); + + ret = a6xx_ringbuffer_submit(rb, NULL, false); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) + a6xx_spin_idle_debug(adreno_dev, + "hw preemption initialization failed to idle\n"); + } + + return ret; +} + +int a6xx_rb_start(struct adreno_device *adreno_dev) +{ + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 cp_rb_cntl = A6XX_CP_RB_CNTL_DEFAULT | + (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? 0 : (1 << 27)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + struct adreno_ringbuffer *rb; + uint64_t addr; + int ret, i; + unsigned int *cmds; + + /* Clear all the ringbuffers */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RPTR_OFFSET(rb->id), 0); + + rb->wptr = 0; + rb->_wptr = 0; + rb->wptr_preempt_end = ~0; + } + + a6xx_preemption_start(adreno_dev); + + /* Set up the current ringbuffer */ + rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + + kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); + kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); + + /* + * The size of the ringbuffer in the hardware is the log2 + * representation of the size in quadwords (sizedwords / 2). + */ + kgsl_regwrite(device, A6XX_CP_RB_CNTL, cp_rb_cntl); + + kgsl_regwrite(device, A6XX_CP_RB_BASE, + lower_32_bits(rb->buffer_desc->gpuaddr)); + + kgsl_regwrite(device, A6XX_CP_RB_BASE_HI, + upper_32_bits(rb->buffer_desc->gpuaddr)); + + /* Program the ucode base for CP */ + kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_LO, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, A6XX_CP_SQE_INSTR_BASE_HI, + upper_32_bits(fw->memdesc->gpuaddr)); + + /* Clear the SQE_HALT to start the CP engine */ + kgsl_regwrite(device, A6XX_CP_SQE_CNTL, 1); + + ret = a6xx_send_cp_init(adreno_dev, rb); + if (ret) + return ret; + + ret = adreno_zap_shader_load(adreno_dev, a6xx_core->zap_name); + if (ret) + return ret; + + /* + * Take the GPU out of secure mode. Try the zap shader if it is loaded, + * otherwise just try to write directly to the secure control register + */ + if (!adreno_dev->zap_loaded) + kgsl_regwrite(device, A6XX_RBBM_SECVID_TRUST_CNTL, 0); + else { + cmds = adreno_ringbuffer_allocspace(rb, 2); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + *cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1); + *cmds++ = 0; + + ret = a6xx_ringbuffer_submit(rb, NULL, true); + if (!ret) { + ret = adreno_spin_idle(adreno_dev, 2000); + if (ret) { + a6xx_spin_idle_debug(adreno_dev, + "Switch to unsecure failed to idle\n"); + return ret; + } + } + } + + return a6xx_post_start(adreno_dev); +} + +/* + * a6xx_sptprac_enable() - Power on SPTPRAC + * @adreno_dev: Pointer to Adreno device + */ +static int a6xx_sptprac_enable(struct adreno_device *adreno_dev) +{ + return a6xx_gmu_sptprac_enable(adreno_dev); +} + +/* + * a6xx_sptprac_disable() - Power off SPTPRAC + * @adreno_dev: Pointer to Adreno device + */ +static void a6xx_sptprac_disable(struct adreno_device *adreno_dev) +{ + a6xx_gmu_sptprac_disable(adreno_dev); +} + +/* + * a6xx_gpu_keepalive() - GMU reg write to request GPU stays on + * @adreno_dev: Pointer to the adreno device that has the GMU + * @state: State to set: true is ON, false is OFF + */ +static void a6xx_gpu_keepalive(struct adreno_device *adreno_dev, + bool state) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!gmu_core_isenabled(device)) + return; + + gmu_core_regwrite(device, A6XX_GMU_GMU_PWR_COL_KEEPALIVE, state); +} + +static bool a6xx_irq_pending(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status; + + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status); + + /* Return busy if a interrupt is pending */ + return ((status & adreno_dev->irq_mask) || + atomic_read(&adreno_dev->pending_irq_refcnt)); +} + +static bool a619_holi_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + kgsl_regread(device, A6XX_RBBM_STATUS, ®); + if (reg & 0xfffffffe) + return false; + + return a6xx_irq_pending(adreno_dev) ? false : true; +} + +bool a6xx_hw_isidle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int reg; + + /* Non GMU devices monitor the RBBM status */ + if (!gmu_core_isenabled(device)) { + kgsl_regread(device, A6XX_RBBM_STATUS, ®); + if (reg & 0xfffffffe) + return false; + + return a6xx_irq_pending(adreno_dev) ? false : true; + } + + gmu_core_regread(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, ®); + + /* Bit 23 is GPUBUSYIGNAHB */ + return (reg & BIT(23)) ? false : true; +} + +int a6xx_microcode_read(struct adreno_device *adreno_dev) +{ + struct adreno_firmware *sqe_fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + + return adreno_get_firmware(adreno_dev, a6xx_core->sqefw_name, sqe_fw); +} + +static int64_t a6xx_read_throttling_counters(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int64_t adj = -1; + u32 a, b, c; + struct adreno_busy_data *busy = &adreno_dev->busy_data; + + if (!(adreno_dev->lm_enabled || adreno_dev->bcl_enabled)) + return 0; + + a = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L, + &busy->throttle_cycles[0]); + + b = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L, + &busy->throttle_cycles[1]); + + c = counter_delta(device, A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L, + &busy->throttle_cycles[2]); + + /* + * Currently there are no a6xx targets with both LM and BCL enabled. + * So if BCL is enabled, we can log bcl counters and return. + */ + if (adreno_dev->bcl_enabled) { + trace_kgsl_bcl_clock_throttling(a, b, c); + return 0; + } + + /* + * The adjustment is the number of cycles lost to throttling, which + * is calculated as a weighted average of the cycles throttled + * at different levels. The adjustment is negative because in A6XX, + * the busy count includes the throttled cycles. Therefore, we want + * to remove them to prevent appearing to be busier than + * we actually are. + */ + if (adreno_is_a620(adreno_dev) || adreno_is_a650(adreno_dev)) + /* + * With the newer generations, CRC throttle from SIDs of 0x14 + * and above cannot be observed in power counters. Since 90% + * throttle uses SID 0x16 the adjustment calculation needs + * correction. The throttling is in increments of 4.2%, and the + * 91.7% counter does a weighted count by the value of sid used + * which are taken into consideration for the final formula. + */ + adj *= div_s64((a * 42) + (b * 500) + + (div_s64((int64_t)c - a - b * 12, 22) * 917), 1000); + else + adj *= ((a * 5) + (b * 50) + (c * 90)) / 100; + + trace_kgsl_clock_throttling(0, b, c, a, adj); + + return adj; +} +#define GPU_CPR_FSM_CTL_OFFSET 0x4 +static void a6xx_gx_cpr_toggle(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev); + static void __iomem *gx_cpr_virt; + struct resource *res; + u32 val = 0; + + if (!a6xx_core->gx_cpr_toggle) + return; + + if (!gx_cpr_virt) { + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "gx_cpr"); + if (res == NULL) + return; + + gx_cpr_virt = devm_ioremap_resource(&device->pdev->dev, res); + if (!gx_cpr_virt) { + dev_err(device->dev, "Failed to map GX CPR\n"); + return; + } + } + + /* + * Toggle(disable -> enable) closed loop functionality to recover + * CPR measurements stall happened under certain conditions. + */ + + val = readl_relaxed(gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET); + /* Make sure memory is updated before access */ + rmb(); + + writel_relaxed(val & 0xfffffff0, gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET); + /* make sure register write committed */ + wmb(); + + /* Wait for small time before we enable GX CPR */ + udelay(5); + + writel_relaxed(val | 0x00000001, gx_cpr_virt + GPU_CPR_FSM_CTL_OFFSET); + /* make sure register write committed */ + wmb(); +} + +/* This is only defined for non-GMU and non-RGMU targets */ +static int a6xx_clear_pending_transactions(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (adreno_is_a619_holi(adreno_dev)) { + kgsl_regwrite(device, A6XX_RBBM_GPR0_CNTL, 0x1e0); + ret = adreno_wait_for_halt_ack(device, + A6XX_RBBM_VBIF_GX_RESET_STATUS, 0xf0); + } else { + kgsl_regwrite(device, A6XX_RBBM_GBIF_HALT, + A6XX_GBIF_GX_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, A6XX_RBBM_GBIF_HALT_ACK, + A6XX_GBIF_GX_HALT_MASK); + } + + if (ret) + return ret; + + return a6xx_halt_gbif(adreno_dev); +} + +/** + * a6xx_reset() - Helper function to reset the GPU + * @adreno_dev: Pointer to the adreno device structure for the GPU + * + * Try to reset the GPU to recover from a fault for targets without + * a GMU. + */ +static int a6xx_reset(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + unsigned long flags = device->pwrctrl.ctrl_flags; + + ret = a6xx_clear_pending_transactions(adreno_dev); + if (ret) + return ret; + + /* Clear ctrl_flags to ensure clocks and regulators are turned off */ + device->pwrctrl.ctrl_flags = 0; + + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + + /* since device is officially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + a6xx_reset_preempt_records(adreno_dev); + + ret = adreno_start(device, 0); + if (ret) + return ret; + + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + + device->pwrctrl.ctrl_flags = flags; + + /* Toggle GX CPR on demand */ + a6xx_gx_cpr_toggle(device); + + /* + * If active_cnt is zero, there is no need to keep the GPU active. So, + * we should transition to SLUMBER. + */ + if (!atomic_read(&device->active_cnt)) + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + + return 0; +} + +static void a6xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status1, status2; + + kgsl_regread(device, A6XX_CP_INTERRUPT_STATUS, &status1); + + if (status1 & BIT(A6XX_CP_OPCODE_ERROR)) { + unsigned int opcode; + + kgsl_regwrite(device, A6XX_CP_SQE_STAT_ADDR, 1); + kgsl_regread(device, A6XX_CP_SQE_STAT_DATA, &opcode); + dev_crit_ratelimited(device->dev, + "CP opcode error interrupt | opcode=0x%8.8x\n", opcode); + } + if (status1 & BIT(A6XX_CP_UCODE_ERROR)) + dev_crit_ratelimited(device->dev, "CP ucode error interrupt\n"); + if (status1 & BIT(A6XX_CP_HW_FAULT_ERROR)) { + kgsl_regread(device, A6XX_CP_HW_FAULT, &status2); + dev_crit_ratelimited(device->dev, + "CP | Ringbuffer HW fault | status=%x\n", status2); + } + if (status1 & BIT(A6XX_CP_REGISTER_PROTECTION_ERROR)) { + kgsl_regread(device, A6XX_CP_PROTECT_STATUS, &status2); + dev_crit_ratelimited(device->dev, + "CP | Protected mode error | %s | addr=%x | status=%x\n", + status2 & (1 << 20) ? "READ" : "WRITE", + status2 & 0x3FFFF, status2); + } + if (status1 & BIT(A6XX_CP_AHB_ERROR)) + dev_crit_ratelimited(device->dev, + "CP AHB error interrupt\n"); + if (status1 & BIT(A6XX_CP_VSD_PARITY_ERROR)) + dev_crit_ratelimited(device->dev, + "CP VSD decoder parity error\n"); + if (status1 & BIT(A6XX_CP_ILLEGAL_INSTR_ERROR)) + dev_crit_ratelimited(device->dev, + "CP Illegal instruction error\n"); + +} + +static void a6xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + switch (bit) { + case A6XX_INT_CP_AHB_ERROR: + dev_crit_ratelimited(device->dev, "CP: AHB bus error\n"); + break; + case A6XX_INT_ATB_ASYNCFIFO_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB ASYNC overflow\n"); + break; + case A6XX_INT_RBBM_ATB_BUS_OVERFLOW: + dev_crit_ratelimited(device->dev, + "RBBM: ATB bus overflow\n"); + break; + case A6XX_INT_UCHE_OOB_ACCESS: + dev_crit_ratelimited(device->dev, + "UCHE: Out of bounds access\n"); + break; + case A6XX_INT_UCHE_TRAP_INTR: + dev_crit_ratelimited(device->dev, "UCHE: Trap interrupt\n"); + break; + case A6XX_INT_TSB_WRITE_ERROR: + dev_crit_ratelimited(device->dev, "TSB: Write error interrupt\n"); + break; + default: + dev_crit_ratelimited(device->dev, "Unknown interrupt %d\n", + bit); + } +} + +/* + * a6xx_llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks + * @adreno_dev: The adreno device pointer + */ +static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev) +{ + uint32_t gpu_scid; + uint32_t gpu_cntl1_val = 0; + int i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_mmu *mmu = &device->mmu; + + if (IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice) || + !adreno_dev->gpu_llc_slice_enable) + return; + + if (llcc_slice_activate(adreno_dev->gpu_llc_slice)) + return; + + gpu_scid = llcc_get_slice_id(adreno_dev->gpu_llc_slice); + for (i = 0; i < A6XX_LLC_NUM_GPU_SCIDS; i++) + gpu_cntl1_val = (gpu_cntl1_val << A6XX_GPU_LLC_SCID_NUM_BITS) + | gpu_scid; + + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + kgsl_regrmw(device, A6XX_GBIF_SCACHE_CNTL1, + A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); + else + adreno_cx_misc_regrmw(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); + + /* + * On A660, the SCID programming for UCHE traffic is done in + * A6XX_GBIF_SCACHE_CNTL0[14:10] + * GFO ENABLE BIT(8) : LLC uses a 64 byte cache line size enabling + * GFO allows it allocate partial cache lines + */ + if (adreno_is_a660(adreno_dev)) + kgsl_regrmw(device, A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | + BIT(8), (gpu_scid << 10) | BIT(8)); +} + +/* + * a6xx_llc_configure_gpuhtw_scid() - Program the SCID for GPU pagetables + * @adreno_dev: The adreno device pointer + */ +static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev) +{ + uint32_t gpuhtw_scid; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_mmu *mmu = &device->mmu; + + if (IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice) || + !adreno_dev->gpuhtw_llc_slice_enable) + return; + + if (llcc_slice_activate(adreno_dev->gpuhtw_llc_slice)) + return; + + /* + * On SMMU-v500, the GPUHTW SCID is configured via a NoC override in + * the XBL image. + */ + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + return; + + gpuhtw_scid = llcc_get_slice_id(adreno_dev->gpuhtw_llc_slice); + + adreno_cx_misc_regrmw(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + A6XX_GPUHTW_LLC_SCID_MASK, + gpuhtw_scid << A6XX_GPUHTW_LLC_SCID_SHIFT); +} + +/* + * a6xx_llc_enable_overrides() - Override the page attributes + * @adreno_dev: The adreno device pointer + */ +static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_mmu *mmu = &device->mmu; + + /* + * Attributes override through GBIF is not supported with MMU-500. + * Attributes are used as configured through SMMU pagetable entries. + */ + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + return; + + /* + * 0x3: readnoallocoverrideen=0 + * read-no-alloc=0 - Allocate lines on read miss + * writenoallocoverrideen=1 + * write-no-alloc=1 - Do not allocates lines on write miss + */ + adreno_cx_misc_regwrite(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0x3); +} + +static const char *uche_client[7][3] = { + {"SP | VSC | VPC | HLSQ | PC | LRZ", "TP", "VFD"}, + {"VSC | VPC | HLSQ | PC | LRZ", "TP | VFD", "SP"}, + {"SP | VPC | HLSQ | PC | LRZ", "TP | VFD", "VSC"}, + {"SP | VSC | HLSQ | PC | LRZ", "TP | VFD", "VPC"}, + {"SP | VSC | VPC | PC | LRZ", "TP | VFD", "HLSQ"}, + {"SP | VSC | VPC | HLSQ | LRZ", "TP | VFD", "PC"}, + {"SP | VSC | VPC | HLSQ | PC", "TP | VFD", "LRZ"}, +}; + +static const char *const uche_client_a660[] = { "VFD", "SP", "VSC", "VPC", + "HLSQ", "PC", "LRZ", "TP" }; + +#define SCOOBYDOO 0x5c00bd00 + +static const char *a6xx_fault_block_uche(struct kgsl_device *device, + unsigned int mid) +{ + unsigned int uche_client_id = 0; + static char str[40]; + + /* + * Smmu driver takes a vote on CX gdsc before calling the kgsl + * pagefault handler. If there is contention for device mutex in this + * path and the dispatcher fault handler is holding this lock, trying + * to turn off CX gdsc will fail during the reset. So to avoid blocking + * here, try to lock device mutex and return if it fails. + */ + if (!mutex_trylock(&device->mutex)) + return "UCHE: unknown"; + + if (!kgsl_state_is_awake(device)) { + mutex_unlock(&device->mutex); + return "UCHE: unknown"; + } + + kgsl_regread(device, A6XX_UCHE_CLIENT_PF, &uche_client_id); + mutex_unlock(&device->mutex); + + /* Ignore the value if the gpu is in IFPC */ + if (uche_client_id == SCOOBYDOO) + return "UCHE: unknown"; + + if (adreno_is_a660(ADRENO_DEVICE(device))) { + + /* Mask is 7 bits for A660 */ + uche_client_id &= 0x7F; + if (uche_client_id >= ARRAY_SIZE(uche_client_a660) || + (mid == 2)) + return "UCHE: Unknown"; + + if (mid == 1) + snprintf(str, sizeof(str), "UCHE: Not %s", + uche_client_a660[uche_client_id]); + else if (mid == 3) + snprintf(str, sizeof(str), "UCHE: %s", + uche_client_a660[uche_client_id]); + } else { + uche_client_id &= A6XX_UCHE_CLIENT_PF_CLIENT_ID_MASK; + if (uche_client_id >= ARRAY_SIZE(uche_client)) + return "UCHE: Unknown"; + + snprintf(str, sizeof(str), "UCHE: %s", + uche_client[uche_client_id][mid - 1]); + } + + return str; +} + +static const char *a6xx_iommu_fault_block(struct kgsl_device *device, + unsigned int fsynr1) +{ + unsigned int mid = fsynr1 & 0xff; + + switch (mid) { + case 0: + return "CP"; + case 1: + case 2: + case 3: + return a6xx_fault_block_uche(device, mid); + case 4: + return "CCU"; + case 6: + return "CDP Prefetch"; + case 7: + return "GPMU"; + } + + return "Unknown"; +} + +static void a6xx_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (adreno_is_preemption_enabled(adreno_dev)) + a6xx_preemption_trigger(adreno_dev, true); + + adreno_dispatcher_schedule(device); +} + +/* + * a6xx_gpc_err_int_callback() - Isr for GPC error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a6xx_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* + * GPC error is typically the result of mistake SW programming. + * Force GPU fault for this interrupt so that we can debug it + * with help of register dump. + */ + + dev_crit(device->dev, "RBBM: GPC error\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_SOFT_FAULT); +} + +static const struct adreno_irq_funcs a6xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 2 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 3 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 4 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 5 - UNUSED */ + /* 6 - RBBM_ATB_ASYNC_OVERFLOW */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), + ADRENO_IRQ_CALLBACK(a6xx_gpc_err_int_callback), /* 7 - GPC_ERR */ + ADRENO_IRQ_CALLBACK(a6xx_preemption_callback),/* 8 - CP_SW */ + ADRENO_IRQ_CALLBACK(a6xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 10 - CP_CCU_FLUSH_DEPTH_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_CCU_RESOLVE_TS */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + ADRENO_IRQ_CALLBACK(NULL), /* 16 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNUSED */ + ADRENO_IRQ_CALLBACK(a6xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 21 - UNUSED */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ + /* 23 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 24 - UCHE_OOB_ACCESS */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 25 - UCHE_TRAP_INTR */ + ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */ + ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */ + ADRENO_IRQ_CALLBACK(a6xx_err_callback), /* 28 - TSBWRITEERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 29 - UNUSED */ + ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */ + ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */ +}; + +/* + * If the AHB fence is not in ALLOW mode when we receive an RBBM + * interrupt, something went wrong. This means that we cannot proceed + * since the IRQ status and clear registers are not accessible. + * This is usually harmless because the GMU will abort power collapse + * and change the fence back to ALLOW. Poll so that this can happen. + */ +static int a6xx_irq_poll_fence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status, fence, fence_retries = 0; + u64 a, b, c; + + if (!gmu_core_isenabled(device)) + return 0; + + a = a6xx_read_alwayson(adreno_dev); + + kgsl_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence); + + while (fence != 0) { + b = a6xx_read_alwayson(adreno_dev); + + /* Wait for small time before trying again */ + udelay(1); + kgsl_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence); + + if (fence_retries == 100 && fence != 0) { + c = a6xx_read_alwayson(adreno_dev); + + kgsl_regread(device, A6XX_GMU_RBBM_INT_UNMASKED_STATUS, + &status); + + dev_crit_ratelimited(device->dev, + "status=0x%x Unmasked status=0x%x Mask=0x%x timestamps: %llx %llx %llx\n", + status & adreno_dev->irq_mask, status, + adreno_dev->irq_mask, a, b, c); + return -ETIMEDOUT; + } + + fence_retries++; + } + + return 0; +} + +static irqreturn_t a6xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + irqreturn_t ret = IRQ_NONE; + u32 status; + + /* + * On A6xx, the GPU can power down once the INT_0_STATUS is read + * below. But there still might be some register reads required + * so force the GMU/GPU into KEEPALIVE mode until done with the ISR. + */ + a6xx_gpu_keepalive(adreno_dev, true); + + if (a6xx_irq_poll_fence(adreno_dev)) { + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + goto done; + } + + kgsl_regread(device, A6XX_RBBM_INT_0_STATUS, &status); + + kgsl_regwrite(device, A6XX_RBBM_INT_CLEAR_CMD, status); + + ret = adreno_irq_callbacks(adreno_dev, a6xx_irq_funcs, status); + + trace_kgsl_a5xx_irq_status(adreno_dev, status); + +done: + /* If hard fault, then let snapshot turn off the keepalive */ + if (!(adreno_gpu_fault(adreno_dev) & ADRENO_HARD_FAULT)) + a6xx_gpu_keepalive(adreno_dev, false); + + return ret; +} + +#ifdef CONFIG_QCOM_KGSL_CORESIGHT +static struct adreno_coresight_register a6xx_coresight_regs[] = { + { A6XX_DBGC_CFG_DBGBUS_SEL_A }, + { A6XX_DBGC_CFG_DBGBUS_SEL_B }, + { A6XX_DBGC_CFG_DBGBUS_SEL_C }, + { A6XX_DBGC_CFG_DBGBUS_SEL_D }, + { A6XX_DBGC_CFG_DBGBUS_CNTLT }, + { A6XX_DBGC_CFG_DBGBUS_CNTLM }, + { A6XX_DBGC_CFG_DBGBUS_OPL }, + { A6XX_DBGC_CFG_DBGBUS_OPE }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_0 }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_1 }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_2 }, + { A6XX_DBGC_CFG_DBGBUS_IVTL_3 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_0 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_1 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_2 }, + { A6XX_DBGC_CFG_DBGBUS_MASKL_3 }, + { A6XX_DBGC_CFG_DBGBUS_BYTEL_0 }, + { A6XX_DBGC_CFG_DBGBUS_BYTEL_1 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_0 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_1 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_2 }, + { A6XX_DBGC_CFG_DBGBUS_IVTE_3 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_0 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_1 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_2 }, + { A6XX_DBGC_CFG_DBGBUS_MASKE_3 }, + { A6XX_DBGC_CFG_DBGBUS_NIBBLEE }, + { A6XX_DBGC_CFG_DBGBUS_PTRC0 }, + { A6XX_DBGC_CFG_DBGBUS_PTRC1 }, + { A6XX_DBGC_CFG_DBGBUS_LOADREG }, + { A6XX_DBGC_CFG_DBGBUS_IDX }, + { A6XX_DBGC_CFG_DBGBUS_CLRC }, + { A6XX_DBGC_CFG_DBGBUS_LOADIVT }, + { A6XX_DBGC_VBIF_DBG_CNTL }, + { A6XX_DBGC_DBG_LO_HI_GPIO }, + { A6XX_DBGC_EXT_TRACE_BUS_CNTL }, + { A6XX_DBGC_READ_AHB_THROUGH_DBG }, + { A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 }, + { A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 }, + { A6XX_DBGC_EVT_CFG }, + { A6XX_DBGC_EVT_INTF_SEL_0 }, + { A6XX_DBGC_EVT_INTF_SEL_1 }, + { A6XX_DBGC_PERF_ATB_CFG }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_0 }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_1 }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_2 }, + { A6XX_DBGC_PERF_ATB_COUNTER_SEL_3 }, + { A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 }, + { A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 }, + { A6XX_DBGC_PERF_ATB_DRAIN_CMD }, + { A6XX_DBGC_ECO_CNTL }, + { A6XX_DBGC_AHB_DBG_CNTL }, +}; + +static struct adreno_coresight_register a6xx_coresight_regs_cx[] = { + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_A }, + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_B }, + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_C }, + { A6XX_CX_DBGC_CFG_DBGBUS_SEL_D }, + { A6XX_CX_DBGC_CFG_DBGBUS_CNTLT }, + { A6XX_CX_DBGC_CFG_DBGBUS_CNTLM }, + { A6XX_CX_DBGC_CFG_DBGBUS_OPL }, + { A6XX_CX_DBGC_CFG_DBGBUS_OPE }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_IVTE_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_2 }, + { A6XX_CX_DBGC_CFG_DBGBUS_MASKE_3 }, + { A6XX_CX_DBGC_CFG_DBGBUS_NIBBLEE }, + { A6XX_CX_DBGC_CFG_DBGBUS_PTRC0 }, + { A6XX_CX_DBGC_CFG_DBGBUS_PTRC1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_LOADREG }, + { A6XX_CX_DBGC_CFG_DBGBUS_IDX }, + { A6XX_CX_DBGC_CFG_DBGBUS_CLRC }, + { A6XX_CX_DBGC_CFG_DBGBUS_LOADIVT }, + { A6XX_CX_DBGC_VBIF_DBG_CNTL }, + { A6XX_CX_DBGC_DBG_LO_HI_GPIO }, + { A6XX_CX_DBGC_EXT_TRACE_BUS_CNTL }, + { A6XX_CX_DBGC_READ_AHB_THROUGH_DBG }, + { A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 }, + { A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 }, + { A6XX_CX_DBGC_EVT_CFG }, + { A6XX_CX_DBGC_EVT_INTF_SEL_0 }, + { A6XX_CX_DBGC_EVT_INTF_SEL_1 }, + { A6XX_CX_DBGC_PERF_ATB_CFG }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_0 }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_1 }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_2 }, + { A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_3 }, + { A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 }, + { A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 }, + { A6XX_CX_DBGC_PERF_ATB_DRAIN_CMD }, + { A6XX_CX_DBGC_ECO_CNTL }, + { A6XX_CX_DBGC_AHB_DBG_CNTL }, +}; + +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a6xx_coresight_regs[0]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a6xx_coresight_regs[1]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a6xx_coresight_regs[2]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a6xx_coresight_regs[3]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a6xx_coresight_regs[4]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a6xx_coresight_regs[5]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a6xx_coresight_regs[6]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a6xx_coresight_regs[7]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a6xx_coresight_regs[8]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a6xx_coresight_regs[9]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a6xx_coresight_regs[10]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a6xx_coresight_regs[11]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a6xx_coresight_regs[12]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a6xx_coresight_regs[13]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a6xx_coresight_regs[14]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a6xx_coresight_regs[15]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a6xx_coresight_regs[16]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a6xx_coresight_regs[17]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a6xx_coresight_regs[18]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a6xx_coresight_regs[19]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a6xx_coresight_regs[20]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a6xx_coresight_regs[21]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a6xx_coresight_regs[22]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a6xx_coresight_regs[23]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a6xx_coresight_regs[24]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a6xx_coresight_regs[25]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a6xx_coresight_regs[26]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a6xx_coresight_regs[27]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a6xx_coresight_regs[28]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a6xx_coresight_regs[29]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a6xx_coresight_regs[30]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a6xx_coresight_regs[31]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a6xx_coresight_regs[32]); +static ADRENO_CORESIGHT_ATTR(vbif_dbg_cntl, &a6xx_coresight_regs[33]); +static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a6xx_coresight_regs[34]); +static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a6xx_coresight_regs[35]); +static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, &a6xx_coresight_regs[36]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, &a6xx_coresight_regs[37]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, &a6xx_coresight_regs[38]); +static ADRENO_CORESIGHT_ATTR(evt_cfg, &a6xx_coresight_regs[39]); +static ADRENO_CORESIGHT_ATTR(evt_intf_sel_0, &a6xx_coresight_regs[40]); +static ADRENO_CORESIGHT_ATTR(evt_intf_sel_1, &a6xx_coresight_regs[41]); +static ADRENO_CORESIGHT_ATTR(perf_atb_cfg, &a6xx_coresight_regs[42]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_0, &a6xx_coresight_regs[43]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_1, &a6xx_coresight_regs[44]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_2, &a6xx_coresight_regs[45]); +static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_3, &a6xx_coresight_regs[46]); +static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_0, + &a6xx_coresight_regs[47]); +static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_1, + &a6xx_coresight_regs[48]); +static ADRENO_CORESIGHT_ATTR(perf_atb_drain_cmd, &a6xx_coresight_regs[49]); +static ADRENO_CORESIGHT_ATTR(eco_cntl, &a6xx_coresight_regs[50]); +static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a6xx_coresight_regs[51]); + +/*CX debug registers*/ +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_a, + &a6xx_coresight_regs_cx[0]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_b, + &a6xx_coresight_regs_cx[1]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_c, + &a6xx_coresight_regs_cx[2]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_d, + &a6xx_coresight_regs_cx[3]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlt, + &a6xx_coresight_regs_cx[4]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlm, + &a6xx_coresight_regs_cx[5]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_opl, + &a6xx_coresight_regs_cx[6]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ope, + &a6xx_coresight_regs_cx[7]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_0, + &a6xx_coresight_regs_cx[8]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_1, + &a6xx_coresight_regs_cx[9]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_2, + &a6xx_coresight_regs_cx[10]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_3, + &a6xx_coresight_regs_cx[11]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_0, + &a6xx_coresight_regs_cx[12]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_1, + &a6xx_coresight_regs_cx[13]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_2, + &a6xx_coresight_regs_cx[14]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_3, + &a6xx_coresight_regs_cx[15]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_0, + &a6xx_coresight_regs_cx[16]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_1, + &a6xx_coresight_regs_cx[17]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_0, + &a6xx_coresight_regs_cx[18]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_1, + &a6xx_coresight_regs_cx[19]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_2, + &a6xx_coresight_regs_cx[20]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_3, + &a6xx_coresight_regs_cx[21]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_0, + &a6xx_coresight_regs_cx[22]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_1, + &a6xx_coresight_regs_cx[23]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_2, + &a6xx_coresight_regs_cx[24]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_3, + &a6xx_coresight_regs_cx[25]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_nibblee, + &a6xx_coresight_regs_cx[26]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc0, + &a6xx_coresight_regs_cx[27]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc1, + &a6xx_coresight_regs_cx[28]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadreg, + &a6xx_coresight_regs_cx[29]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_idx, + &a6xx_coresight_regs_cx[30]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_clrc, + &a6xx_coresight_regs_cx[31]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadivt, + &a6xx_coresight_regs_cx[32]); +static ADRENO_CORESIGHT_ATTR(cx_vbif_dbg_cntl, + &a6xx_coresight_regs_cx[33]); +static ADRENO_CORESIGHT_ATTR(cx_dbg_lo_hi_gpio, + &a6xx_coresight_regs_cx[34]); +static ADRENO_CORESIGHT_ATTR(cx_ext_trace_bus_cntl, + &a6xx_coresight_regs_cx[35]); +static ADRENO_CORESIGHT_ATTR(cx_read_ahb_through_dbg, + &a6xx_coresight_regs_cx[36]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf1, + &a6xx_coresight_regs_cx[37]); +static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf2, + &a6xx_coresight_regs_cx[38]); +static ADRENO_CORESIGHT_ATTR(cx_evt_cfg, + &a6xx_coresight_regs_cx[39]); +static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_0, + &a6xx_coresight_regs_cx[40]); +static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_1, + &a6xx_coresight_regs_cx[41]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_cfg, + &a6xx_coresight_regs_cx[42]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_0, + &a6xx_coresight_regs_cx[43]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_1, + &a6xx_coresight_regs_cx[44]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_2, + &a6xx_coresight_regs_cx[45]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_3, + &a6xx_coresight_regs_cx[46]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_0, + &a6xx_coresight_regs_cx[47]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_1, + &a6xx_coresight_regs_cx[48]); +static ADRENO_CORESIGHT_ATTR(cx_perf_atb_drain_cmd, + &a6xx_coresight_regs_cx[49]); +static ADRENO_CORESIGHT_ATTR(cx_eco_cntl, + &a6xx_coresight_regs_cx[50]); +static ADRENO_CORESIGHT_ATTR(cx_ahb_dbg_cntl, + &a6xx_coresight_regs_cx[51]); + +static struct attribute *a6xx_coresight_attrs[] = { + &coresight_attr_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_vbif_dbg_cntl.attr.attr, + &coresight_attr_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_ext_trace_bus_cntl.attr.attr, + &coresight_attr_read_ahb_through_dbg.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_evt_cfg.attr.attr, + &coresight_attr_evt_intf_sel_0.attr.attr, + &coresight_attr_evt_intf_sel_1.attr.attr, + &coresight_attr_perf_atb_cfg.attr.attr, + &coresight_attr_perf_atb_counter_sel_0.attr.attr, + &coresight_attr_perf_atb_counter_sel_1.attr.attr, + &coresight_attr_perf_atb_counter_sel_2.attr.attr, + &coresight_attr_perf_atb_counter_sel_3.attr.attr, + &coresight_attr_perf_atb_trig_intf_sel_0.attr.attr, + &coresight_attr_perf_atb_trig_intf_sel_1.attr.attr, + &coresight_attr_perf_atb_drain_cmd.attr.attr, + &coresight_attr_eco_cntl.attr.attr, + &coresight_attr_ahb_dbg_cntl.attr.attr, + NULL, +}; + +/*cx*/ +static struct attribute *a6xx_coresight_attrs_cx[] = { + &coresight_attr_cx_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cx_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cx_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cx_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cx_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cx_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cx_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cx_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cx_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cx_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cx_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_cx_vbif_dbg_cntl.attr.attr, + &coresight_attr_cx_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_cx_ext_trace_bus_cntl.attr.attr, + &coresight_attr_cx_read_ahb_through_dbg.attr.attr, + &coresight_attr_cx_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cx_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_cx_evt_cfg.attr.attr, + &coresight_attr_cx_evt_intf_sel_0.attr.attr, + &coresight_attr_cx_evt_intf_sel_1.attr.attr, + &coresight_attr_cx_perf_atb_cfg.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_0.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_1.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_2.attr.attr, + &coresight_attr_cx_perf_atb_counter_sel_3.attr.attr, + &coresight_attr_cx_perf_atb_trig_intf_sel_0.attr.attr, + &coresight_attr_cx_perf_atb_trig_intf_sel_1.attr.attr, + &coresight_attr_cx_perf_atb_drain_cmd.attr.attr, + &coresight_attr_cx_eco_cntl.attr.attr, + &coresight_attr_cx_ahb_dbg_cntl.attr.attr, + NULL, +}; + +static const struct attribute_group a6xx_coresight_group = { + .attrs = a6xx_coresight_attrs, +}; + +static const struct attribute_group *a6xx_coresight_groups[] = { + &a6xx_coresight_group, + NULL, +}; + +static const struct attribute_group a6xx_coresight_group_cx = { + .attrs = a6xx_coresight_attrs_cx, +}; + +static const struct attribute_group *a6xx_coresight_groups_cx[] = { + &a6xx_coresight_group_cx, + NULL, +}; + +static struct adreno_coresight a6xx_coresight = { + .registers = a6xx_coresight_regs, + .count = ARRAY_SIZE(a6xx_coresight_regs), + .groups = a6xx_coresight_groups, +}; + +static struct adreno_coresight a6xx_coresight_cx = { + .registers = a6xx_coresight_regs_cx, + .count = ARRAY_SIZE(a6xx_coresight_regs_cx), + .groups = a6xx_coresight_groups_cx, +}; +#endif + +int a6xx_probe_common(struct platform_device *pdev, + struct adreno_device *adreno_dev, u32 chipid, + const struct adreno_gpu_core *gpucore) +{ + const struct adreno_gpudev *gpudev = gpucore->gpudev; + + adreno_dev->gpucore = gpucore; + adreno_dev->chipid = chipid; + + adreno_reg_offset_init(gpudev->reg_offsets); + + adreno_dev->hwcg_enabled = true; + + adreno_dev->preempt.preempt_level = 1; + adreno_dev->preempt.skipsaverestore = true; + adreno_dev->preempt.usesgmem = true; + + return adreno_device_probe(pdev, adreno_dev); +} + +static int a6xx_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + int ret; + + adreno_dev = (struct adreno_device *) + of_device_get_match_data(&pdev->dev); + + memset(adreno_dev, 0, sizeof(*adreno_dev)); + + ret = a6xx_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + ret = adreno_dispatcher_init(adreno_dev); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + timer_setup(&device->idle_timer, kgsl_timer, 0); + + INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + + adreno_dev->irq_mask = A6XX_INT_MASK; + + return 0; +} + +/* Register offset defines for A6XX, in order of enum adreno_regs */ +static unsigned int a6xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A6XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A6XX_CP_RB_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, + A6XX_CP_RB_RPTR_ADDR_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI, + A6XX_CP_RB_RPTR_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A6XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A6XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A6XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A6XX_CP_SQE_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A6XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A6XX_CP_IB1_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A6XX_CP_IB1_REM_SIZE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A6XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A6XX_CP_IB2_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A6XX_CP_IB2_REM_SIZE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A6XX_CP_CONTEXT_SWITCH_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, + A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, + A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, + A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO), + ADRENO_REG_DEFINE( + ADRENO_REG_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, + A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, + A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, + A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_LEVEL_STATUS, + A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A6XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A6XX_RBBM_STATUS3), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A6XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A6XX_RBBM_CLOCK_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A6XX_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, + A6XX_GMU_AO_HOST_INTERRUPT_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_AHB_FENCE_STATUS, + A6XX_GMU_AHB_FENCE_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_GMU_GMU2HOST_INTR_MASK, + A6XX_GMU_GMU2HOST_INTR_MASK), +}; + +int a6xx_perfcounter_update(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg, bool update_reg) +{ + void *ptr = adreno_dev->pwrup_reglist->hostptr; + struct cpu_gpu_lock *lock = ptr; + u32 *data = ptr + sizeof(*lock); + int i, offset = 0; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; + } + + /* + * If the perfcounter select register is already present in reglist + * update it, otherwise append the pair to + * the end of the list. + */ + for (i = 0; i < lock->list_length >> 1; i++) { + if (data[offset] == reg->select) { + data[offset + 1] = reg->countable; + goto update; + } + + if (data[offset] == GEN7_RBBM_PERFCTR_CNTL) + break; + + offset += 2; + } + + /* + * For all targets GEN7_RBBM_PERFCTR_CNTL needs to be the last entry, + * so overwrite the existing GEN7_RBBM_PERFCNTL_CTRL and add it back to + * the end. + */ + data[offset] = reg->select; + data[offset + 1] = reg->countable; + data[offset + 2] = GEN7_RBBM_PERFCTR_CNTL; + data[offset + 3] = 1; + + lock->list_length += 2; + +update: + if (update_reg) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg->select, + reg->countable); + + kgsl_hwunlock(lock); + return 0; +} + +u64 gen7_read_alwayson(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 lo = 0, hi = 0, tmp = 0; + + /* Always use the GMU AO counter when doing a AHB read */ + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_H, &hi); + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_L, &lo); + + /* Check for overflow */ + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_H, &tmp); + + if (hi != tmp) { + gmu_core_regread(device, GEN7_GMU_ALWAYS_ON_COUNTER_L, + &lo); + hi = tmp; + } + + return (((u64) hi) << 32) | lo; +} + +static void gen7_remove(struct adreno_device *adreno_dev) +{ + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + del_timer(&adreno_dev->preempt.timer); +} + +static void gen7_read_bus_stats(struct kgsl_device *device, + struct kgsl_power_stats *stats, + struct adreno_busy_data *busy) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 ram_cycles, starved_ram; + + ram_cycles = counter_delta(device, adreno_dev->ram_cycles_lo, + &busy->bif_ram_cycles); + + starved_ram = counter_delta(device, adreno_dev->starved_ram_lo, + &busy->bif_starved_ram); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch1_read, + &busy->bif_ram_cycles_read_ch1); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch0_write, + &busy->bif_ram_cycles_write_ch0); + + ram_cycles += counter_delta(device, + adreno_dev->ram_cycles_lo_ch1_write, + &busy->bif_ram_cycles_write_ch1); + + starved_ram += counter_delta(device, + adreno_dev->starved_ram_lo_ch1, + &busy->bif_starved_ram_ch1); + + stats->ram_time = ram_cycles; + stats->ram_wait = starved_ram; +} + +static void gen7_power_stats(struct adreno_device *adreno_dev, + struct kgsl_power_stats *stats) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_busy_data *busy = &adreno_dev->busy_data; + u64 gpu_busy; + + /* Set the GPU busy counter for frequency scaling */ + gpu_busy = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, + &busy->gpu_busy); + + stats->busy_time = gpu_busy * 10; + do_div(stats->busy_time, 192); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) { + u32 ifpc = counter_delta(device, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L, + &busy->num_ifpc); + + adreno_dev->ifpc_count += ifpc; + if (ifpc > 0) + trace_adreno_ifpc_count(adreno_dev->ifpc_count); + } + + if (device->pwrctrl.bus_control) + gen7_read_bus_stats(device, stats, busy); + + if (adreno_dev->bcl_enabled) { + u32 a, b, c; + + a = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L, + &busy->throttle_cycles[0]); + + b = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L, + &busy->throttle_cycles[1]); + + c = counter_delta(device, GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L, + &busy->throttle_cycles[2]); + + trace_kgsl_bcl_clock_throttling(a, b, c); + } +} + +static int gen7_setproperty(struct kgsl_device_private *dev_priv, + u32 type, void __user *value, u32 sizebytes) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 enable; + + if (type != KGSL_PROP_PWRCTRL) + return -ENODEV; + + if (sizebytes != sizeof(enable)) + return -EINVAL; + + if (copy_from_user(&enable, value, sizeof(enable))) + return -EFAULT; + + mutex_lock(&device->mutex); + + if (enable) { + clear_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + + kgsl_pwrscale_enable(device); + } else { + set_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags); + + if (!adreno_active_count_get(adreno_dev)) + adreno_active_count_put(adreno_dev); + + kgsl_pwrscale_disable(device, true); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +const struct gen7_gpudev adreno_gen7_hwsched_gpudev = { + .base = { + .reg_offsets = gen7_register_offsets, + .probe = gen7_hwsched_probe, + .snapshot = gen7_hwsched_snapshot, + .irq_handler = gen7_irq_handler, + .iommu_fault_block = gen7_iommu_fault_block, + .preemption_context_init = gen7_preemption_context_init, + .context_detach = gen7_hwsched_context_detach, + .read_alwayson = gen7_read_alwayson, + .reset = gen7_hwsched_reset, + .power_ops = &gen7_hwsched_power_ops, + .power_stats = gen7_power_stats, + .setproperty = gen7_setproperty, + .hw_isidle = gen7_hw_isidle, + .add_to_va_minidump = gen7_hwsched_add_to_minidump, + }, + .hfi_probe = gen7_hwsched_hfi_probe, + .hfi_remove = gen7_hwsched_hfi_remove, + .handle_watchdog = gen7_hwsched_handle_watchdog, +}; + +const struct gen7_gpudev adreno_gen7_gmu_gpudev = { + .base = { + .reg_offsets = gen7_register_offsets, + .probe = gen7_gmu_device_probe, + .snapshot = gen7_gmu_snapshot, + .irq_handler = gen7_irq_handler, + .rb_start = gen7_rb_start, + .gpu_keepalive = gen7_gpu_keepalive, + .hw_isidle = gen7_hw_isidle, + .iommu_fault_block = gen7_iommu_fault_block, + .reset = gen7_gmu_reset, + .preemption_schedule = gen7_preemption_schedule, + .preemption_context_init = gen7_preemption_context_init, + .read_alwayson = gen7_read_alwayson, + .power_ops = &gen7_gmu_power_ops, + .remove = gen7_remove, + .ringbuffer_submitcmd = gen7_ringbuffer_submitcmd, + .power_stats = gen7_power_stats, + .setproperty = gen7_setproperty, + .add_to_va_minidump = gen7_gmu_add_to_minidump, + }, + .hfi_probe = gen7_gmu_hfi_probe, + .handle_watchdog = gen7_gmu_handle_watchdog, +}; diff --git a/adreno_gen7.h b/adreno_gen7.h new file mode 100644 index 0000000000..7e4c910231 --- /dev/null +++ b/adreno_gen7.h @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_GEN7_H_ +#define _ADRENO_GEN7_H_ + +#include + +#include "gen7_reg.h" +#include "adreno_gen7_gmu.h" + +extern const struct adreno_power_ops gen7_gmu_power_ops; +extern const struct adreno_power_ops gen7_hwsched_power_ops; +extern const struct adreno_perfcounters adreno_gen7_perfcounters; + +struct gen7_gpudev { + struct adreno_gpudev base; + int (*hfi_probe)(struct adreno_device *adreno_dev); + void (*hfi_remove)(struct adreno_device *adreno_dev); + void (*handle_watchdog)(struct adreno_device *adreno_dev); +}; + +extern const struct gen7_gpudev adreno_gen7_gmu_gpudev; +extern const struct gen7_gpudev adreno_gen7_hwsched_gpudev; + +/** + * struct gen7_device - Container for the gen7_device + */ +struct gen7_device { + /** @gmu: Container for the gen7 GMU device */ + struct gen7_gmu_device gmu; + /** @adreno_dev: Container for the generic adreno device */ + struct adreno_device adreno_dev; +}; + +/** + * struct gen7_protected_regs - container for a protect register span + */ +struct gen7_protected_regs { + /** @reg: Physical protected mode register to write to */ + u32 reg; + /** @start: Dword offset of the starting register in the range */ + u32 start; + /** + * @end: Dword offset of the ending register in the range + * (inclusive) + */ + u32 end; + /** + * @noaccess: 1 if the register should not be accessible from + * userspace, 0 if it can be read (but not written) + */ + u32 noaccess; +}; + +/** + * struct adreno_gen7_core - gen7 specific GPU core definitions + */ +struct adreno_gen7_core { + /** @base: Container for the generic GPU definitions */ + struct adreno_gpu_core base; + /** @sqefw_name: Name of the SQE microcode file */ + const char *sqefw_name; + /** @gmufw_name: Name of the GMU firmware file */ + const char *gmufw_name; + /** @gmufw_name: Name of the backup GMU firmware file */ + const char *gmufw_bak_name; + /** @zap_name: Name of the CPZ zap file */ + const char *zap_name; + /** @hwcg: List of registers and values to write for HWCG */ + const struct kgsl_regmap_list *hwcg; + /** @hwcg_count: Number of registers in @hwcg */ + u32 hwcg_count; + /** @gbif: List of registers and values to write for GBIF */ + const struct kgsl_regmap_list *gbif; + /** @gbif_count: Number of registers in @gbif */ + u32 gbif_count; + /** @hang_detect_cycles: Hang detect counter timeout value */ + u32 hang_detect_cycles; + /** @protected_regs: Array of protected registers for the target */ + const struct gen7_protected_regs *protected_regs; + /** @ctxt_record_size: Size of the preemption record in bytes */ + u64 ctxt_record_size; + /** @highest_bank_bit: Highest bank bit value */ + u32 highest_bank_bit; +}; + +/** + * struct gen7_cp_preemption_record - CP context record for + * preemption. + * @magic: (00) Value at this offset must be equal to + * GEN7_CP_CTXRECORD_MAGIC_REF. + * @info: (04) Type of record. Written non-zero (usually) by CP. + * we must set to zero for all ringbuffers. + * @errno: (08) Error code. Initialize this to GEN7_CP_CTXRECORD_ERROR_NONE. + * CP will update to another value if a preemption error occurs. + * @data: (12) DATA field in YIELD and SET_MARKER packets. + * Written by CP when switching out. Not used on switch-in. Initialized to 0. + * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this. + * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this. + * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this. + * @_pad28: (28) Reserved/padding. + * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize. + * rbase: (40) RB_BASE_LO|HI saved and restored. + * counter: (48) Pointer to preemption counter. + * @bv_rptr_addr: (56) BV_RB_RPTR_ADDR_LO|HI save and restored. We must initialize. + */ +struct gen7_cp_preemption_record { + u32 magic; + u32 info; + u32 errno; + u32 data; + u32 cntl; + u32 rptr; + u32 wptr; + u32 _pad28; + u64 rptr_addr; + u64 rbase; + u64 counter; + u64 bv_rptr_addr; +}; + +/** + * struct gen7_cp_smmu_info - CP preemption SMMU info. + * @magic: (00) The value at this offset must be equal to + * GEN7_CP_SMMU_INFO_MAGIC_REF + * @_pad4: (04) Reserved/padding + * @ttbr0: (08) Base address of the page table for the * incoming context + * @asid: (16) Address Space IDentifier (ASID) of the incoming context + * @context_idr: (20) Context Identification Register value + * @context_bank: (24) Which Context Bank in SMMU to update + */ +struct gen7_cp_smmu_info { + u32 magic; + u32 _pad4; + u64 ttbr0; + u32 asid; + u32 context_idr; + u32 context_bank; +}; + +#define GEN7_CP_SMMU_INFO_MAGIC_REF 0x241350d5UL + +#define GEN7_CP_CTXRECORD_MAGIC_REF 0xae399d6eUL +/* Size of each CP preemption record */ +#define GEN7_CP_CTXRECORD_SIZE_IN_BYTES (2860 * 1024) +/* Size of the user context record block (in bytes) */ +#define GEN7_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024) +/* Size of the performance counter save/restore block (in bytes) */ +#define GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE (4 * 1024) + +#define GEN7_CP_RB_CNTL_DEFAULT \ + (FIELD_PREP(GENMASK(7, 0), ilog2(KGSL_RB_DWORDS >> 1)) | \ + FIELD_PREP(GENMASK(12, 8), ilog2(4))) + +/* Size of the CP_INIT pm4 stream in dwords */ +#define GEN7_CP_INIT_DWORDS 10 + +#define GEN7_INT_MASK \ + ((1 << GEN7_INT_AHBERROR) | \ + (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) | \ + (1 << GEN7_INT_GPCERROR) | \ + (1 << GEN7_INT_SWINTERRUPT) | \ + (1 << GEN7_INT_HWERROR) | \ + (1 << GEN7_INT_PM4CPINTERRUPT) | \ + (1 << GEN7_INT_RB_DONE_TS) | \ + (1 << GEN7_INT_CACHE_CLEAN_TS) | \ + (1 << GEN7_INT_ATBBUSOVERFLOW) | \ + (1 << GEN7_INT_HANGDETECTINTERRUPT) | \ + (1 << GEN7_INT_OUTOFBOUNDACCESS) | \ + (1 << GEN7_INT_UCHETRAPINTERRUPT) | \ + (1 << GEN7_INT_TSBWRITEERROR)) + +#define GEN7_HWSCHED_INT_MASK \ + ((1 << GEN7_INT_AHBERROR) | \ + (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) | \ + (1 << GEN7_INT_ATBBUSOVERFLOW) | \ + (1 << GEN7_INT_OUTOFBOUNDACCESS) | \ + (1 << GEN7_INT_UCHETRAPINTERRUPT)) + +/** + * to_gen7_core - return the gen7 specific GPU core struct + * @adreno_dev: An Adreno GPU device handle + * + * Returns: + * A pointer to the gen7 specific GPU core struct + */ +static inline const struct adreno_gen7_core * +to_gen7_core(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *core = adreno_dev->gpucore; + + return container_of(core, struct adreno_gen7_core, base); +} + +/** + * gen7_is_smmu_stalled() - Check whether smmu is stalled or not + * @device: Pointer to KGSL device + * + * Return - True if smmu is stalled or false otherwise + */ +static inline bool gen7_is_smmu_stalled(struct kgsl_device *device) +{ + u32 val; + + kgsl_regread(device, GEN7_RBBM_STATUS3, &val); + + return val & BIT(24); +} + +/** + * gen7_cx_regulator_disable_wait - Disable a cx regulator and wait for it + * @reg: A &struct regulator handle + * @device: kgsl device struct + * @timeout: Time to wait (in milliseconds) + * + * Disable the regulator and wait @timeout milliseconds for it to enter the + * disabled state. + * + */ +void gen7_cx_regulator_disable_wait(struct regulator *reg, + struct kgsl_device *device, u32 timeout); + +/* Preemption functions */ +void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic); +void gen7_preemption_schedule(struct adreno_device *adreno_dev); +void gen7_preemption_start(struct adreno_device *adreno_dev); +int gen7_preemption_init(struct adreno_device *adreno_dev); + +u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + unsigned int *cmds); +u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds); + +unsigned int gen7_set_marker(unsigned int *cmds, + enum adreno_cp_marker_type type); + +void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit); + +int gen7_preemption_context_init(struct kgsl_context *context); + +void gen7_preemption_context_destroy(struct kgsl_context *context); + +void gen7_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +void gen7_crashdump_init(struct adreno_device *adreno_dev); + +/** + * gen7_read_alwayson - Read the current always on clock value + * @adreno_dev: An Adreno GPU handle + * + * Return: The current value of the GMU always on counter + */ +u64 gen7_read_alwayson(struct adreno_device *adreno_dev); + +/** + * gen7_start - Program gen7 registers + * @adreno_dev: An Adreno GPU handle + * + * This function does all gen7 register programming every + * time we boot the gpu + * + * Return: 0 on success or negative on failure + */ +int gen7_start(struct adreno_device *adreno_dev); + +/** + * gen7_init - Initialize gen7 resources + * @adreno_dev: An Adreno GPU handle + * + * This function does gen7 specific one time initialization + * and is invoked when the very first client opens a + * kgsl instance + * + * Return: Zero on success and negative error on failure + */ +int gen7_init(struct adreno_device *adreno_dev); + +/** + * gen7_rb_start - Gen7 specific ringbuffer setup + * @adreno_dev: An Adreno GPU handle + * + * This function does gen7 specific ringbuffer setup and + * attempts to submit CP INIT and bring GPU out of secure mode + * + * Return: Zero on success and negative error on failure + */ +int gen7_rb_start(struct adreno_device *adreno_dev); + +/** + * gen7_microcode_read - Get the cp microcode from the filesystem + * @adreno_dev: An Adreno GPU handle + * + * This function gets the firmware from filesystem and sets up + * the micorocode global buffer + * + * Return: Zero on success and negative error on failure + */ +int gen7_microcode_read(struct adreno_device *adreno_dev); + +/** + * gen7_probe_common - Probe common gen7 resources + * @pdev: Pointer to the platform device + * @adreno_dev: Pointer to the adreno device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore strucure + * + * This function sets up the gen7 resources common across all + * gen7 targets + */ +int gen7_probe_common(struct platform_device *pdev, + struct adreno_device *adreno_dev, u32 chipid, + const struct adreno_gpu_core *gpucore); + +/** + * gen7_hw_isidle - Check whether gen7 gpu is idle or not + * @adreno_dev: An Adreno GPU handle + * + * Return: True if gpu is idle, otherwise false + */ +bool gen7_hw_isidle(struct adreno_device *adreno_dev); + +/** + * gen7_spin_idle_debug - Debug logging used when gpu fails to idle + * @adreno_dev: An Adreno GPU handle + * + * This function logs interesting registers and triggers a snapshot + */ +void gen7_spin_idle_debug(struct adreno_device *adreno_dev, + const char *str); + +/** + * gen7_perfcounter_update - Update the IFPC perfcounter list + * @adreno_dev: An Adreno GPU handle + * @reg: Perfcounter reg struct to add/remove to the list + * @update_reg: true if the perfcounter needs to be programmed by the CPU + * + * Return: 0 on success or -EBUSY if the lock couldn't be taken + */ +int gen7_perfcounter_update(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg, bool update_reg); + +/* + * gen7_ringbuffer_init - Initialize the ringbuffers + * @adreno_dev: An Adreno GPU handle + * + * Initialize the ringbuffer(s) for a5xx. + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_init(struct adreno_device *adreno_dev); + +/** + * gen7_ringbuffer_submitcmd - Submit a user command to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @cmdobj: Pointer to a user command object + * @flags: Internal submit flags + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time); + +/** + * gen7_ringbuffer_submit - Submit a command to the ringbuffer + * @rb: Ringbuffer pointer + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +/** + * gen7_fenced_write - Write to a fenced register + * @adreno_dev: An Adreno GPU handle + * @offset: Register offset + * @value: Value to write + * @mask: Expected FENCE_STATUS for successful write + * + * Return: 0 on success or negative on failure + */ +int gen7_fenced_write(struct adreno_device *adreno_dev, u32 offset, + u32 value, u32 mask); + +/** + * gen77ringbuffer_addcmds - Wrap and submit commands to the ringbuffer + * @adreno_dev: An Adreno GPU handle + * @rb: Ringbuffer pointer + * @drawctxt: Draw context submitting the commands + * @flags: Submission flags + * @in: Input buffer to write to ringbuffer + * @dwords: Dword length of @in + * @timestamp: Draw context timestamp for the submission + * @time: Optional pointer to a adreno_submit_time container + * + * Return: 0 on success or negative on failure + */ +int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time); + +/** + * gen7_cp_init_cmds - Create the CP_INIT commands + * @adreno_dev: An Adreno GPU handle + * @cmd: Buffer to write the CP_INIT commands into + */ +void gen7_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds); + +/** + * gen7_gmu_hfi_probe - Probe Gen7 HFI specific data + * @adreno_dev: An Adreno GPU handle + * + * Return: 0 on success or negative on failure + */ +int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev); + +static inline const struct gen7_gpudev * +to_gen7_gpudev(const struct adreno_gpudev *gpudev) +{ + return container_of(gpudev, struct gen7_gpudev, base); +} + +/** + * gen7_reset_preempt_records - Reset the preemption buffers + * @adreno_dev: Handle to the adreno device + * + * Reset the preemption records at the time of hard reset + */ +void gen7_reset_preempt_records(struct adreno_device *adreno_dev); +#endif diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c new file mode 100644 index 0000000000..8fc1726748 --- /dev/null +++ b/adreno_gen7_gmu.c @@ -0,0 +1,2708 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_trace.h" +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +static struct gmu_vma_entry gen7_gmu_vma[] = { + [GMU_ITCM] = { + .start = 0x00000000, + .size = SZ_16K, + }, + [GMU_CACHE] = { + .start = SZ_16K, + .size = (SZ_16M - SZ_16K), + .next_va = SZ_16K, + }, + [GMU_DTCM] = { + .start = SZ_256M + SZ_16K, + .size = SZ_16K, + }, + [GMU_DCACHE] = { + .start = 0x0, + .size = 0x0, + }, + [GMU_NONCACHED_KERNEL] = { + .start = 0x60000000, + .size = SZ_512M, + .next_va = 0x60000000, + }, +}; + +static ssize_t log_stream_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + bool val; + int ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + gmu->log_stream_enable = val; + return count; +} + +static ssize_t log_stream_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + + return scnprintf(buf, PAGE_SIZE, "%d\n", gmu->log_stream_enable); +} + +static ssize_t log_group_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + gmu->log_group_mask = val; + return count; +} + +static ssize_t log_group_mask_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct gen7_gmu_device *gmu = container_of(kobj, struct gen7_gmu_device, log_kobj); + + return scnprintf(buf, PAGE_SIZE, "%x\n", gmu->log_group_mask); +} + +static struct kobj_attribute log_stream_enable_attr = + __ATTR(log_stream_enable, 0644, log_stream_enable_show, log_stream_enable_store); + +static struct kobj_attribute log_group_mask_attr = + __ATTR(log_group_mask, 0644, log_group_mask_show, log_group_mask_store); + +static struct attribute *log_attrs[] = { + &log_stream_enable_attr.attr, + &log_group_mask_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(log); + +static struct kobj_type log_kobj_type = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = log_groups, +}; + +static int gen7_timed_poll_check_rscc(struct gen7_gmu_device *gmu, + unsigned int offset, unsigned int expected_ret, + unsigned int timeout, unsigned int mask) +{ + u32 value; + + return readl_poll_timeout(gmu->rscc_virt + (offset << 2), value, + (value & mask) == expected_ret, 100, timeout * 1000); +} + +struct gen7_gmu_device *to_gen7_gmu(struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + + return &gen7_dev->gmu; +} + +struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu) +{ + struct gen7_device *gen7_dev = + container_of(gmu, struct gen7_device, gmu); + + return &gen7_dev->adreno_dev; +} + +#define RSC_CMD_OFFSET 2 + +static void _regwrite(void __iomem *regbase, + unsigned int offsetwords, unsigned int value) +{ + void __iomem *reg; + + reg = regbase + (offsetwords << 2); + __raw_writel(value, reg); +} + +void gen7_load_rsc_ucode(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + void __iomem *rscc = gmu->rscc_virt; + + /* Disable SDE clock gating */ + _regwrite(rscc, GEN7_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24)); + + /* Setup RSC PDC handshake for sleep and wakeup */ + _regwrite(rscc, GEN7_RSCC_PDC_SLAVE_ID_DRV0, 1); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET, 0); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_DATA + RSC_CMD_OFFSET * 2, + 0x80000000); + _regwrite(rscc, GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR + RSC_CMD_OFFSET * 2, 0); + _regwrite(rscc, GEN7_RSCC_OVERRIDE_START_ADDR, 0); + _regwrite(rscc, GEN7_RSCC_PDC_SEQ_START_ADDR, 0x4520); + _regwrite(rscc, GEN7_RSCC_PDC_MATCH_VALUE_LO, 0x4510); + _regwrite(rscc, GEN7_RSCC_PDC_MATCH_VALUE_HI, 0x4514); + + /* Load RSC sequencer uCode for sleep and wakeup */ + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0, 0xeaaae5a0); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 1, 0xe1a1ebab); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e0a581); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 3, 0xecac82e2); + _regwrite(rscc, GEN7_RSCC_SEQ_MEM_0_DRV0 + 4, 0x0020edad); +} + +int gen7_load_pdc_ucode(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct resource *res_cfg; + void __iomem *cfg = NULL; + + res_cfg = platform_get_resource_byname(gmu->pdev, IORESOURCE_MEM, + "gmu_pdc"); + if (res_cfg) + cfg = ioremap(res_cfg->start, resource_size(res_cfg)); + + if (!cfg) { + dev_err(&gmu->pdev->dev, "Failed to map PDC CFG\n"); + return -ENODEV; + } + + /* Setup GPU PDC */ + _regwrite(cfg, GEN7_PDC_GPU_SEQ_START_ADDR, 0); + _regwrite(cfg, GEN7_PDC_GPU_ENABLE_PDC, 0x80000001); + + iounmap(cfg); + + return 0; +} + +/* Configure and enable GMU low power mode */ +static void gen7_gmu_power_config(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Disable GMU WB/RB buffer and caches at boot */ + gmu_core_regwrite(device, GEN7_GMU_SYS_BUS_CONFIG, 0x1); + gmu_core_regwrite(device, GEN7_GMU_ICACHE_CONFIG, 0x1); + gmu_core_regwrite(device, GEN7_GMU_DCACHE_CONFIG, 0x1); +} + +static void gmu_ao_sync_event(struct adreno_device *adreno_dev) +{ + unsigned long flags; + u64 ticks; + + /* + * Get the GMU always on ticks and log it in a trace message. This + * will be used to map GMU ticks to ftrace time. Do this in atomic + * context to ensure nothing happens between reading the always + * on ticks and doing the trace. + */ + + local_irq_save(flags); + + ticks = gen7_read_alwayson(adreno_dev); + + trace_gmu_ao_sync(ticks); + + local_irq_restore(flags); +} + +int gen7_gmu_device_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + gmu_ao_sync_event(adreno_dev); + + /* Bring GMU out of reset */ + gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 0); + + /* Make sure the write is posted before moving ahead */ + wmb(); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT, + BIT(8), 100, GENMASK(8, 0))) { + dev_err(&gmu->pdev->dev, "GMU failed to come out of reset\n"); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +/* + * gen7_gmu_hfi_start() - Write registers and start HFI. + * @device: Pointer to KGSL device + */ +int gen7_gmu_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_regwrite(device, GEN7_GMU_HFI_CTRL_INIT, 1); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_HFI_CTRL_STATUS, + BIT(0), 100, BIT(0))) { + dev_err(&gmu->pdev->dev, "GMU HFI init failed\n"); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct device *dev = &gmu->pdev->dev; + + /* Skip wakeup sequence if we didn't do the sleep sequence */ + if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) + return 0; + + /* RSC wake sequence */ + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, BIT(1)); + + /* Write request before polling */ + wmb(); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_RSCC_CONTROL_ACK, + BIT(1), 100, BIT(1))) { + dev_err(dev, "Failed to do GPU RSC power on\n"); + return -ETIMEDOUT; + } + + if (gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_SEQ_BUSY_DRV0, + 0x0, 100, UINT_MAX)) { + dev_err(dev, "GPU RSC sequence stuck in waking up GPU\n"); + return -ETIMEDOUT; + } + + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, 0); + + clear_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); + + return 0; +} + +int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (!test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return 0; + + if (test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) + return 0; + + gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 1); + /* Make sure M3 is in reset before going on */ + wmb(); + + gmu_core_regread(device, GEN7_GMU_GENERAL_9, &gmu->log_wptr_retention); + + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, BIT(0)); + /* Make sure the request completes before continuing */ + wmb(); + + ret = gen7_timed_poll_check_rscc(gmu, GEN7_GPU_RSCC_RSC_STATUS0_DRV0, + BIT(16), 100, BIT(16)); + if (ret) { + dev_err(&gmu->pdev->dev, "GPU RSC power off fail\n"); + return -ETIMEDOUT; + } + + gmu_core_regwrite(device, GEN7_GMU_RSCC_CONTROL_REQ, 0); + + if (adreno_dev->lm_enabled) + gmu_core_regwrite(device, GEN7_GMU_AO_SPARE_CNTL, 0); + + set_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags); + + return 0; +} + +static struct kgsl_memdesc *find_gmu_memdesc(struct gen7_gmu_device *gmu, + u32 addr, u32 size) +{ + int i; + + for (i = 0; i < gmu->global_entries; i++) { + struct kgsl_memdesc *md = &gmu->gmu_globals[i]; + + if ((addr >= md->gmuaddr) && + (((addr + size) <= (md->gmuaddr + md->size)))) + return md; + } + + return NULL; +} + +static int find_vma_block(struct gen7_gmu_device *gmu, u32 addr, u32 size) +{ + int i; + + for (i = 0; i < GMU_MEM_TYPE_MAX; i++) { + struct gmu_vma_entry *vma = &gmu->vma[i]; + + if ((addr >= vma->start) && + ((addr + size) <= (vma->start + vma->size))) + return i; + } + + return -ENOENT; +} + +static void load_tcm(struct adreno_device *adreno_dev, const u8 *src, + u32 tcm_start, u32 base, const struct gmu_block_header *blk) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 tcm_offset = tcm_start + ((blk->addr - base)/sizeof(u32)); + + kgsl_regmap_bulk_write(&device->regmap, tcm_offset, src, + blk->size >> 2); +} + +int gen7_gmu_load_fw(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + const u8 *fw = (const u8 *)gmu->fw_image->data; + + while (fw < gmu->fw_image->data + gmu->fw_image->size) { + const struct gmu_block_header *blk = + (const struct gmu_block_header *)fw; + int id; + + fw += sizeof(*blk); + + /* Don't deal with zero size blocks */ + if (blk->size == 0) + continue; + + id = find_vma_block(gmu, blk->addr, blk->size); + + if (id < 0) { + dev_err(&gmu->pdev->dev, + "Unknown block in GMU FW addr:0x%x size:0x%x\n", + blk->addr, blk->size); + return -EINVAL; + } + + if (id == GMU_ITCM) { + load_tcm(adreno_dev, fw, + GEN7_GMU_CM3_ITCM_START, + gmu->vma[GMU_ITCM].start, blk); + } else if (id == GMU_DTCM) { + load_tcm(adreno_dev, fw, + GEN7_GMU_CM3_DTCM_START, + gmu->vma[GMU_DTCM].start, blk); + } else { + struct kgsl_memdesc *md = + find_gmu_memdesc(gmu, blk->addr, blk->size); + + if (!md) { + dev_err(&gmu->pdev->dev, + "No backing memory for GMU FW block addr:0x%x size:0x%x\n", + blk->addr, blk->size); + return -EINVAL; + } + + memcpy(md->hostptr + (blk->addr - md->gmuaddr), fw, + blk->size); + } + + fw += blk->size; + } + + /* Proceed only after the FW is written */ + wmb(); + return 0; +} + +static const char *oob_to_str(enum oob_request req) +{ + switch (req) { + case oob_gpu: + return "oob_gpu"; + case oob_perfcntr: + return "oob_perfcntr"; + case oob_boot_slumber: + return "oob_boot_slumber"; + case oob_dcvs: + return "oob_dcvs"; + default: + return "unknown"; + } +} + +static void trigger_reset_recovery(struct adreno_device *adreno_dev, + enum oob_request req) +{ + /* + * Trigger recovery for perfcounter oob only since only + * perfcounter oob can happen alongside an actively rendering gpu. + */ + if (req != oob_perfcntr) + return; + + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) + adreno_dev->dispatch_ops->fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); +} + +int gen7_gmu_oob_set(struct kgsl_device *device, + enum oob_request req) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + int set, check; + + if (req >= oob_boot_slumber) { + dev_err(&gmu->pdev->dev, + "Unsupported OOB request %s\n", + oob_to_str(req)); + return -EINVAL; + } + + set = BIT(30 - req * 2); + check = BIT(31 - req); + + gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, set); + + if (gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, check, + 100, check)) { + gmu_core_fault_snapshot(device); + ret = -ETIMEDOUT; + WARN(1, "OOB request %s timed out\n", oob_to_str(req)); + trigger_reset_recovery(adreno_dev, req); + } + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, check); + + trace_kgsl_gmu_oob_set(set); + return ret; +} + +void gen7_gmu_oob_clear(struct kgsl_device *device, + enum oob_request req) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int clear = BIT(31 - req * 2); + + if (req >= oob_boot_slumber) { + dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", + oob_to_str(req)); + return; + } + + gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, clear); + trace_kgsl_gmu_oob_clear(clear); +} + +void gen7_gmu_irq_enable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + /* Clear pending IRQs and Unmask needed IRQs */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, UINT_MAX); + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, + (unsigned int)~HFI_IRQ_MASK); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + (unsigned int)~GMU_AO_INT_MASK); + + /* Enable all IRQs on host */ + enable_irq(hfi->irq); + enable_irq(gmu->irq); +} + +void gen7_gmu_irq_disable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + /* Disable all IRQs on host */ + disable_irq(gmu->irq); + disable_irq(hfi->irq); + + /* Mask all IRQs and clear pending IRQs */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, UINT_MAX); + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, UINT_MAX); +} + +static int gen7_gmu_hfi_start_msg(struct adreno_device *adreno_dev) +{ + struct hfi_start_cmd req; + int ret; + + ret = CMD_MSG_HDR(req, H2F_MSG_START); + if (ret) + return ret; + + return gen7_hfi_send_generic_req(adreno_dev, &req); +} + +static int gen7_complete_rpmh_votes(struct gen7_gmu_device *gmu) +{ + int ret = 0; + + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS0_DRV0_STATUS, + BIT(0), 1, BIT(0)); + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS1_DRV0_STATUS, + BIT(0), 1, BIT(0)); + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS2_DRV0_STATUS, + BIT(0), 1, BIT(0)); + ret |= gen7_timed_poll_check_rscc(gmu, GEN7_RSCC_TCS3_DRV0_STATUS, + BIT(0), 1, BIT(0)); + + return ret; +} + +#define GX_GDSC_POWER_OFF BIT(0) +#define GX_CLK_OFF BIT(1) +#define is_on(val) (!(val & (GX_GDSC_POWER_OFF | GX_CLK_OFF))) + +bool gen7_gmu_gx_is_on(struct kgsl_device *device) +{ + unsigned int val; + + gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, &val); + return is_on(val); +} + +static const char *idle_level_name(int level) +{ + if (level == GPU_HW_ACTIVE) + return "GPU_HW_ACTIVE"; + else if (level == GPU_HW_IFPC) + return "GPU_HW_IFPC"; + + return "(Unknown)"; +} + +int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + unsigned int reg, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8; + unsigned long t; + u64 ts1, ts2; + + ts1 = gen7_read_alwayson(adreno_dev); + + t = jiffies + msecs_to_jiffies(100); + do { + gmu_core_regread(device, + GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); + gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, ®1); + + /* + * Check that we are at lowest level. If lowest level is IFPC + * double check that GFX clock is off. + */ + if (gmu->idle_level == reg) + if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1))) + return 0; + + /* Wait 100us to reduce unnecessary AHB bus traffic */ + usleep_range(10, 100); + } while (!time_after(jiffies, t)); + + /* Check one last time */ + gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); + gmu_core_regread(device, GEN7_GMU_GFX_PWR_CLK_STATUS, ®1); + + /* + * Check that we are at lowest level. If lowest level is IFPC + * double check that GFX clock is off. + */ + if (gmu->idle_level == reg) + if (!(gmu->idle_level == GPU_HW_IFPC && is_on(reg1))) + return 0; + + ts2 = gen7_read_alwayson(adreno_dev); + + /* Collect abort data to help with debugging */ + gmu_core_regread(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, ®2); + gmu_core_regread(device, GEN7_GMU_RBBM_INT_UNMASKED_STATUS, ®3); + gmu_core_regread(device, GEN7_GMU_GMU_PWR_COL_KEEPALIVE, ®4); + gmu_core_regread(device, GEN7_GMU_AO_SPARE_CNTL, ®5); + + dev_err(&gmu->pdev->dev, + "----------------------[ GMU error ]----------------------\n"); + dev_err(&gmu->pdev->dev, + "Timeout waiting for lowest idle level %s\n", + idle_level_name(gmu->idle_level)); + dev_err(&gmu->pdev->dev, "Start: %llx (absolute ticks)\n", ts1); + dev_err(&gmu->pdev->dev, "Poll: %llx (ticks relative to start)\n", + ts2-ts1); + dev_err(&gmu->pdev->dev, + "RPMH_POWER_STATE=%x GFX_PWR_CLK_STATUS=%x\n", reg, reg1); + dev_err(&gmu->pdev->dev, "CX_BUSY_STATUS=%x\n", reg2); + dev_err(&gmu->pdev->dev, + "RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n", + reg3, reg4); + dev_err(&gmu->pdev->dev, "GEN7_GMU_AO_SPARE_CNTL=%x\n", reg5); + + /* Access GX registers only when GX is ON */ + if (is_on(reg1)) { + kgsl_regread(device, GEN7_CP_STATUS_1, ®6); + kgsl_regread(device, GEN7_CP_CP2GMU_STATUS, ®7); + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, ®8); + + dev_err(&gmu->pdev->dev, "GEN7_CP_STATUS_1=%x\n", reg6); + dev_err(&gmu->pdev->dev, + "CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x\n", + reg7, reg8); + } + + WARN_ON(1); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; +} + +/* Bitmask for GPU idle status check */ +#define CXGXCPUBUSYIGNAHB BIT(30) +int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 status2; + u64 ts1; + + ts1 = gen7_read_alwayson(adreno_dev); + if (gmu_core_timed_poll_check(device, GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, + 0, 100, CXGXCPUBUSYIGNAHB)) { + gmu_core_regread(device, + GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS2, &status2); + dev_err(&gmu->pdev->dev, + "GMU not idling: status2=0x%x %llx %llx\n", + status2, ts1, + gen7_read_alwayson(ADRENO_DEVICE(device))); + gmu_core_fault_snapshot(device); + return -ETIMEDOUT; + } + + return 0; +} + +void gen7_gmu_version_info(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + /* GMU version info is at a fixed offset in the DTCM */ + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xff8, + &gmu->ver.core); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xff9, + &gmu->ver.core_dev); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffa, + &gmu->ver.pwr); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffb, + &gmu->ver.pwr_dev); + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + 0xffc, + &gmu->ver.hfi); +} + +int gen7_gmu_itcm_shadow(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 i, *dest; + + if (gmu->itcm_shadow) + return 0; + + gmu->itcm_shadow = vzalloc(gmu->vma[GMU_ITCM].size); + if (!gmu->itcm_shadow) + return -ENOMEM; + + dest = (u32 *)gmu->itcm_shadow; + + for (i = 0; i < (gmu->vma[GMU_ITCM].size >> 2); i++) + gmu_core_regread(KGSL_DEVICE(adreno_dev), + GEN7_GMU_CM3_ITCM_START + i, dest++); + + return 0; +} + +void gen7_gmu_register_config(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 val; + + /* Clear any previously set cm3 fault */ + atomic_set(&gmu->cm3_fault, 0); + + /* Vote veto for FAL10 */ + gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 0x1); + gmu_core_regwrite(device, GEN7_GPU_GMU_CX_GMU_CX_FAL_INTF, 0x1); + + /* Turn on TCM retention */ + adreno_cx_misc_regwrite(adreno_dev, GEN7_GPU_CX_MISC_TCM_RET_CNTL, 1); + + /* Clear init result to make sure we are getting fresh value */ + gmu_core_regwrite(device, GEN7_GMU_CM3_FW_INIT_RESULT, 0); + gmu_core_regwrite(device, GEN7_GMU_CM3_BOOT_CONFIG, 0x2); + + gmu_core_regwrite(device, GEN7_GMU_HFI_QTBL_ADDR, + gmu->hfi.hfi_mem->gmuaddr); + gmu_core_regwrite(device, GEN7_GMU_HFI_QTBL_INFO, 1); + + gmu_core_regwrite(device, GEN7_GMU_AHB_FENCE_RANGE_0, BIT(31) | + FIELD_PREP(GENMASK(30, 18), 0x32) | + FIELD_PREP(GENMASK(17, 0), 0x8a0)); + + /* + * Make sure that CM3 state is at reset value. Snapshot is changing + * NMI bit and if we boot up GMU with NMI bit set GMU will boot + * straight in to NMI handler without executing __main code + */ + gmu_core_regwrite(device, GEN7_GMU_CM3_CFG, 0x4052); + + /** + * We may have asserted gbif halt as part of reset sequence which may + * not get cleared if the gdsc was not reset. So clear it before + * attempting GMU boot. + */ + kgsl_regwrite(device, GEN7_GBIF_HALT, 0x0); + + /* Set the log wptr index */ + gmu_core_regwrite(device, GEN7_GMU_GENERAL_9, + gmu->log_wptr_retention); + + /* Pass chipid to GMU FW, must happen before starting GMU */ + gmu_core_regwrite(device, GEN7_GMU_GENERAL_10, + ADRENO_GMU_CHIPID(adreno_dev->chipid)); + + /* Log size is encoded in (number of 4K units - 1) */ + val = (gmu->gmu_log->gmuaddr & GENMASK(31, 12)) | + ((GMU_LOG_SIZE/SZ_4K - 1) & GENMASK(7, 0)); + gmu_core_regwrite(device, GEN7_GMU_GENERAL_8, val); + + /* Configure power control and bring the GMU out of reset */ + gen7_gmu_power_config(adreno_dev); + + /* + * Enable BCL throttling - + * XOCLK1: countable: 0x13 (25% throttle) + * XOCLK2: countable: 0x17 (58% throttle) + * XOCLK3: countable: 0x19 (75% throttle) + * POWER_CONTROL_SELECT_0 controls counters 0 - 3, each selector + * is 8 bits wide. + */ + if (adreno_dev->bcl_enabled) + gmu_core_regrmw(device, GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0, + 0xffffff00, FIELD_PREP(GENMASK(31, 24), 0x19) | + FIELD_PREP(GENMASK(23, 16), 0x17) | + FIELD_PREP(GENMASK(15, 8), 0x13)); +} + +struct kgsl_memdesc *gen7_reserve_gmu_kernel_block(struct gen7_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id) +{ + int ret; + struct kgsl_memdesc *md; + struct gmu_vma_entry *vma = &gmu->vma[vma_id]; + struct kgsl_device *device = KGSL_DEVICE(gen7_gmu_to_adreno(gmu)); + + if (gmu->global_entries == ARRAY_SIZE(gmu->gmu_globals)) + return ERR_PTR(-ENOMEM); + + md = &gmu->gmu_globals[gmu->global_entries]; + + ret = kgsl_allocate_kernel(device, md, size, 0, KGSL_MEMDESC_SYSMEM); + if (ret) { + memset(md, 0x0, sizeof(*md)); + return ERR_PTR(-ENOMEM); + } + + if (!addr) + addr = vma->next_va; + + ret = gmu_core_map_memdesc(gmu->domain, md, addr, + IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to map GMU kernel block: addr:0x%08x size:0x%x :%d\n", + addr, md->size, ret); + kgsl_sharedmem_free(md); + memset(md, 0, sizeof(*md)); + return ERR_PTR(-ENOMEM); + } + + md->gmuaddr = addr; + + vma->next_va = md->gmuaddr + md->size; + + gmu->global_entries++; + + return md; +} + +static int gen7_gmu_process_prealloc(struct gen7_gmu_device *gmu, + struct gmu_block_header *blk) +{ + struct kgsl_memdesc *md; + + int id = find_vma_block(gmu, blk->addr, blk->value); + + if (id < 0) { + dev_err(&gmu->pdev->dev, + "Invalid prealloc block addr: 0x%x value:%d\n", + blk->addr, blk->value); + return id; + } + + /* Nothing to do for TCM blocks or user uncached */ + if (id == GMU_ITCM || id == GMU_DTCM || id == GMU_NONCACHED_USER) + return 0; + + /* Check if the block is already allocated */ + md = find_gmu_memdesc(gmu, blk->addr, blk->value); + if (md != NULL) + return 0; + + md = gen7_reserve_gmu_kernel_block(gmu, blk->addr, blk->value, id); + + return PTR_ERR_OR_ZERO(md); +} + +int gen7_gmu_parse_fw(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); + struct gmu_block_header *blk; + int ret, offset = 0; + const char *gmufw_name = gen7_core->gmufw_name; + + /* GMU fw already saved and verified so do nothing new */ + if (gmu->fw_image) + return 0; + + if (gen7_core->gmufw_name == NULL) + return -EINVAL; + + ret = request_firmware(&gmu->fw_image, gmufw_name, &gmu->pdev->dev); + if (ret) { + if (gen7_core->gmufw_bak_name) { + gmufw_name = gen7_core->gmufw_bak_name; + ret = request_firmware(&gmu->fw_image, gmufw_name, + &gmu->pdev->dev); + } + if (ret) { + dev_err(&gmu->pdev->dev, + "request_firmware (%s) failed: %d\n", + gmufw_name, ret); + + return ret; + } + } + + /* + * Zero payload fw blocks contain meta data and are + * guaranteed to precede fw load data. Parse the + * meta data blocks. + */ + while (offset < gmu->fw_image->size) { + blk = (struct gmu_block_header *)&gmu->fw_image->data[offset]; + + if (offset + sizeof(*blk) > gmu->fw_image->size) { + dev_err(&gmu->pdev->dev, "Invalid FW Block\n"); + return -EINVAL; + } + + /* Done with zero length blocks so return */ + if (blk->size) + break; + + offset += sizeof(*blk); + + if (blk->type == GMU_BLK_TYPE_PREALLOC_REQ || + blk->type == GMU_BLK_TYPE_PREALLOC_PERSIST_REQ) { + ret = gen7_gmu_process_prealloc(gmu, blk); + + if (ret) + return ret; + } + } + + return 0; +} + +int gen7_gmu_memory_init(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + /* GMU master log */ + if (IS_ERR_OR_NULL(gmu->gmu_log)) + gmu->gmu_log = gen7_reserve_gmu_kernel_block(gmu, 0, + GMU_LOG_SIZE, GMU_NONCACHED_KERNEL); + + return PTR_ERR_OR_ZERO(gmu->gmu_log); +} + +static int gen7_gmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + ret = gen7_gmu_parse_fw(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_memory_init(adreno_dev); + if (ret) + return ret; + + return gen7_hfi_init(adreno_dev); +} + +static void _do_gbif_halt(struct kgsl_device *device, u32 reg, u32 ack_reg, + u32 mask, const char *client) +{ + u32 ack; + unsigned long t; + + kgsl_regwrite(device, reg, mask); + + t = jiffies + msecs_to_jiffies(100); + do { + kgsl_regread(device, ack_reg, &ack); + if ((ack & mask) == mask) + return; + + /* + * If we are attempting recovery in case of stall-on-fault + * then the halt sequence will not complete as long as SMMU + * is stalled. + */ + kgsl_mmu_pagefault_resume(&device->mmu, false); + + usleep_range(10, 100); + } while (!time_after(jiffies, t)); + + /* Check one last time */ + kgsl_mmu_pagefault_resume(&device->mmu, false); + + kgsl_regread(device, ack_reg, &ack); + if ((ack & mask) == mask) + return; + + dev_err(device->dev, "%s GBIF halt timed out\n", client); +} + +static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) +{ + int ret = 0; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ + + /* Check no outstanding RPMh voting */ + gen7_complete_rpmh_votes(gmu); + + /* Clear the WRITEDROPPED fields and set fence to allow mode */ + gmu_core_regwrite(device, GEN7_GMU_AHB_FENCE_STATUS_CLR, 0x7); + gmu_core_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0); + + /* Make sure above writes are committed before we proceed to recovery */ + wmb(); + + gmu_core_regwrite(device, GEN7_GMU_CM3_SYSRESET, 1); + + /* Halt GX traffic */ + if (gen7_gmu_gx_is_on(device)) + _do_gbif_halt(device, GEN7_RBBM_GBIF_HALT, + GEN7_RBBM_GBIF_HALT_ACK, + GEN7_GBIF_GX_HALT_MASK, + "GX"); + + /* Halt CX traffic */ + _do_gbif_halt(device, GEN7_GBIF_HALT, GEN7_GBIF_HALT_ACK, + GEN7_GBIF_ARB_HALT_MASK, "CX"); + + if (gen7_gmu_gx_is_on(device)) + kgsl_regwrite(device, GEN7_RBBM_SW_RESET_CMD, 0x1); + + /* Allow the software reset to complete */ + udelay(100); + + /* + * This is based on the assumption that GMU is the only one controlling + * the GX HS. This code path is the only client voting for GX through + * the regulator interface. + */ + if (gmu->gx_gdsc) { + if (gen7_gmu_gx_is_on(device)) { + /* Switch gx gdsc control from GMU to CPU + * force non-zero reference count in clk driver + * so next disable call will turn + * off the GDSC + */ + ret = regulator_enable(gmu->gx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "suspend fail: gx enable %d\n", ret); + + ret = regulator_disable(gmu->gx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "suspend fail: gx disable %d\n", ret); + + if (gen7_gmu_gx_is_on(device)) + dev_err(&gmu->pdev->dev, + "gx is stuck on\n"); + } + } +} + +/* + * gen7_gmu_notify_slumber() - initiate request to GMU to prepare to slumber + * @device: Pointer to KGSL device + */ +static int gen7_gmu_notify_slumber(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; + int perf_idx = gmu->hfi.dcvs_table.gpu_level_num - + pwr->default_pwrlevel - 1; + struct hfi_prep_slumber_cmd req = { + .freq = perf_idx, + .bw = bus_level, + }; + int ret; + + /* Disable the power counter so that the GMU is not busy */ + gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); + + ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &req); + + /* Make sure the fence is in ALLOW mode */ + gmu_core_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0); + return ret; +} + +void gen7_gmu_suspend(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gen7_gmu_irq_disable(adreno_dev); + + gen7_gmu_pwrctrl_suspend(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + dev_err(&gmu->pdev->dev, "Suspended GMU\n"); + + device->state = KGSL_STATE_NONE; +} + +static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, + int gpu_pwrlevel, int bus_level) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct hfi_gx_bw_perf_vote_cmd req = { + .ack_type = DCVS_ACK_BLOCK, + .freq = INVALID_DCVS_IDX, + .bw = INVALID_DCVS_IDX, + }; + int ret = 0; + + if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags)) + return 0; + + /* Do not set to XO and lower GPU clock vote from GMU */ + if ((gpu_pwrlevel != INVALID_DCVS_IDX) && + (gpu_pwrlevel >= table->gpu_level_num - 1)) + return -EINVAL; + + if (gpu_pwrlevel < table->gpu_level_num - 1) + req.freq = table->gpu_level_num - gpu_pwrlevel - 1; + + if (bus_level < pwr->ddr_table_count && bus_level > 0) + req.bw = bus_level; + + /* GMU will vote for slumber levels through the sleep sequence */ + if ((req.freq == INVALID_DCVS_IDX) && + (req.bw == INVALID_DCVS_IDX)) { + return 0; + } + + ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &req); + if (ret) { + dev_err_ratelimited(&gmu->pdev->dev, + "Failed to set GPU perf idx %d, bw idx %d\n", + req.freq, req.bw); + + /* + * If this was a dcvs request along side an active gpu, request + * dispatcher based reset and recovery. + */ + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT | + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + + return ret; +} + +static int gen7_gmu_clock_set(struct adreno_device *adreno_dev, u32 pwrlevel) +{ + return gen7_gmu_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX); +} + +static int gen7_gmu_ifpc_store(struct kgsl_device *device, + unsigned int val) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + unsigned int requested_idle_level; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) + return -EINVAL; + + if (val) + requested_idle_level = GPU_HW_IFPC; + else + requested_idle_level = GPU_HW_ACTIVE; + + if (gmu->idle_level == requested_idle_level) + return 0; + + /* Power down the GPU before changing the idle level */ + return adreno_power_cycle_u32(adreno_dev, &gmu->idle_level, + requested_idle_level); +} + +static unsigned int gen7_gmu_ifpc_show(struct kgsl_device *device) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device)); + + return gmu->idle_level == GPU_HW_IFPC; +} + +/* Send an NMI to the GMU */ +void gen7_gmu_send_nmi(struct adreno_device *adreno_dev, bool force) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 result; + + /* + * Do not send NMI if the SMMU is stalled because GMU will not be able + * to save cm3 state to DDR. + */ + if (gen7_gmu_gx_is_on(device) && gen7_is_smmu_stalled(device)) { + dev_err(&gmu->pdev->dev, + "Skipping NMI because SMMU is stalled\n"); + return; + } + + if (force) + goto nmi; + + /* + * We should not send NMI if there was a CM3 fault reported because we + * don't want to overwrite the critical CM3 state captured by gmu before + * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is + * already active. We could have hit a GMU assert and NMI might have + * already been triggered. + */ + + /* make sure we're reading the latest cm3_fault */ + smp_rmb(); + + if (atomic_read(&gmu->cm3_fault)) + return; + + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); + + if (result & 0xE00) + return; + +nmi: + /* Mask so there's no interrupt caused by NMI */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, UINT_MAX); + + /* Make sure the interrupt is masked before causing it */ + wmb(); + + /* This will cause the GMU to save it's internal state to ddr */ + gmu_core_regrmw(device, GEN7_GMU_CM3_CFG, BIT(9), BIT(9)); + + /* Make sure the NMI is invoked before we proceed*/ + wmb(); + + /* Wait for the NMI to be handled */ + udelay(200); +} + +static void gen7_gmu_cooperative_reset(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + unsigned int result; + + gmu_core_regwrite(device, GEN7_GMU_CX_GMU_WDOG_CTRL, 0); + gmu_core_regwrite(device, GEN7_GMU_HOST2GMU_INTR_SET, BIT(17)); + + /* + * After triggering graceful death wait for snapshot ready + * indication from GMU. + */ + if (!gmu_core_timed_poll_check(device, GEN7_GMU_CM3_FW_INIT_RESULT, + 0x800, 2, 0x800)) + return; + + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); + dev_err(&gmu->pdev->dev, + "GMU cooperative reset timed out 0x%x\n", result); + /* + * If we dont get a snapshot ready from GMU, trigger NMI + * and if we still timeout then we just continue with reset. + */ + gen7_gmu_send_nmi(adreno_dev, true); + + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &result); + if ((result & 0x800) != 0x800) + dev_err(&gmu->pdev->dev, + "GMU cooperative reset NMI timed out 0x%x\n", result); +} + +static int gen7_gmu_wait_for_active_transition(struct kgsl_device *device) +{ + unsigned int reg; + struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device)); + + if (gmu_core_timed_poll_check(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, + GPU_HW_ACTIVE, 100, GENMASK(3, 0))) { + gmu_core_regread(device, GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE, ®); + dev_err(&gmu->pdev->dev, + "GMU failed to move to ACTIVE state, Current state: 0x%x\n", + reg); + + return -ETIMEDOUT; + } + + return 0; +} + +static bool gen7_gmu_scales_bandwidth(struct kgsl_device *device) +{ + return true; +} + +void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask; + + /* Temporarily mask the watchdog interrupt to prevent a storm */ + gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, &mask); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + (mask | GMU_INT_WDOG_BITE)); + + gen7_gmu_send_nmi(adreno_dev, false); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU watchdog expired interrupt received\n"); +} + +static irqreturn_t gen7_gmu_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + const struct gen7_gpudev *gen7_gpudev = + to_gen7_gpudev(ADRENO_GPU_DEVICE(adreno_dev)); + unsigned int status = 0; + + gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_STATUS, &status); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_CLR, status); + + if (status & GMU_INT_HOST_AHB_BUS_ERR) + dev_err_ratelimited(&gmu->pdev->dev, + "AHB bus error interrupt received\n"); + + if (status & GMU_INT_WDOG_BITE) + gen7_gpudev->handle_watchdog(adreno_dev); + + if (status & GMU_INT_FENCE_ERR) { + unsigned int fence_status; + + gmu_core_regread(device, GEN7_GMU_AHB_FENCE_STATUS, + &fence_status); + dev_err_ratelimited(&gmu->pdev->dev, + "FENCE error interrupt received %x\n", fence_status); + } + + if (status & ~GMU_AO_INT_MASK) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled GMU interrupts 0x%lx\n", + status & ~GMU_AO_INT_MASK); + + return IRQ_HANDLED; +} + +void gen7_gmu_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Send nmi only if it was a gmu fault */ + if (device->gmu_fault) + gen7_gmu_send_nmi(adreno_dev, false); + + gen7_gmu_device_snapshot(device, snapshot); + + gen7_snapshot(adreno_dev, snapshot); + + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX); + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, HFI_IRQ_MASK); +} + +void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag) +{ + struct qmp_pkt msg; + char msg_buf[36]; + u32 size; + int ret; + + if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + return; + + size = scnprintf(msg_buf, sizeof(msg_buf), + "{class: gpu, res: acd, val: %d}", flag); + + /* mailbox controller expects 4-byte aligned buffer */ + msg.size = ALIGN((size + 1), SZ_4); + msg.data = msg_buf; + + ret = mbox_send_message(gmu->mailbox.channel, &msg); + + if (ret < 0) + dev_err(&gmu->pdev->dev, + "AOP mbox send message failed: %d\n", ret); +} + +int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + ret = regulator_enable(gmu->cx_gdsc); + if (ret) + dev_err(&gmu->pdev->dev, + "Failed to enable GMU CX gdsc, error %d\n", ret); + + return ret; +} + +int gen7_gmu_enable_clks(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", + GMU_FREQ_MIN); + if (ret) { + dev_err(&gmu->pdev->dev, "Unable to set the GMU clock\n"); + return ret; + } + + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "hub_clk", + 150000000); + if (ret && ret != -ENODEV) { + dev_err(&gmu->pdev->dev, "Unable to set the HUB clock\n"); + return ret; + } + + ret = clk_bulk_prepare_enable(gmu->num_clks, gmu->clks); + if (ret) { + dev_err(&gmu->pdev->dev, "Cannot enable GMU clocks\n"); + return ret; + } + + device->state = KGSL_STATE_AWARE; + + return 0; +} + +static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int level, ret; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_version_info(adreno_dev); + + ret = gen7_gmu_itcm_shadow(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + /* Vote for minimal DDR BW for GMU to init */ + level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; + icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + ret = gen7_load_pdc_ucode(adreno_dev); + if (ret) + goto err; + + gen7_load_rsc_ucode(adreno_dev); + set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } + + ret = gen7_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_start(adreno_dev); + if (ret) + goto err; + + icc_set_bw(pwr->icc_path, 0, 0); + + device->gmu_fault = false; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + adreno_dev->bcl_enabled = true; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +static int gen7_gmu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_rscc_wakeup_sequence(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_start(adreno_dev); + if (ret) + goto err; + + device->gmu_fault = false; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +static void set_acd(struct adreno_device *adreno_dev, void *priv) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + adreno_dev->acd_enabled = *((bool *)priv); + gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); +} + +static int gen7_gmu_acd_set(struct kgsl_device *device, bool val) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (IS_ERR_OR_NULL(gmu->mailbox.channel)) + return -EINVAL; + + /* Don't do any unneeded work if ACD is already in the correct state */ + if (adreno_dev->acd_enabled == val) + return 0; + + /* Power cycle the GPU for changes to take effect */ + return adreno_power_cycle(adreno_dev, set_acd, &val); +} + +static const struct gmu_dev_ops gen7_gmudev = { + .oob_set = gen7_gmu_oob_set, + .oob_clear = gen7_gmu_oob_clear, + .gx_is_on = gen7_gmu_gx_is_on, + .ifpc_store = gen7_gmu_ifpc_store, + .ifpc_show = gen7_gmu_ifpc_show, + .cooperative_reset = gen7_gmu_cooperative_reset, + .wait_for_active_transition = gen7_gmu_wait_for_active_transition, + .scales_bandwidth = gen7_gmu_scales_bandwidth, + .acd_set = gen7_gmu_acd_set, +}; + +static int gen7_gmu_bus_set(struct adreno_device *adreno_dev, int buslevel, + u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = 0; + + if (buslevel != pwr->cur_buslevel) { + ret = gen7_gmu_dcvs_set(adreno_dev, INVALID_DCVS_IDX, buslevel); + if (ret) + return ret; + + pwr->cur_buslevel = buslevel; + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel); + } + + if (ab != pwr->cur_ab) { + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0); + pwr->cur_ab = ab; + } + + return ret; +} + +static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) +{ + int i; + + for (i = 0; i < gmu->global_entries; i++) { + struct kgsl_memdesc *md = &gmu->gmu_globals[i]; + + if (!md->gmuaddr) + continue; + + iommu_unmap(gmu->domain, + md->gmuaddr, md->size); + + dma_free_attrs(&gmu->pdev->dev, (size_t) md->size, + (void *)md->hostptr, md->physaddr, 0); + + memset(md, 0, sizeof(*md)); + } + + if (gmu->domain) { + iommu_detach_device(gmu->domain, &gmu->pdev->dev); + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + } + + gmu->global_entries = 0; +} + +static int gen7_gmu_aop_mailbox_init(struct adreno_device *adreno_dev, + struct gen7_gmu_device *gmu) +{ + struct kgsl_mailbox *mailbox = &gmu->mailbox; + + mailbox->client.dev = &gmu->pdev->dev; + mailbox->client.tx_block = true; + mailbox->client.tx_tout = 1000; + mailbox->client.knows_txdone = false; + + mailbox->channel = mbox_request_channel(&mailbox->client, 0); + if (IS_ERR(mailbox->channel)) + return PTR_ERR(mailbox->channel); + + adreno_dev->acd_enabled = true; + return 0; +} + +static void gen7_gmu_acd_probe(struct kgsl_device *device, + struct gen7_gmu_device *gmu, struct device_node *node) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pwrlevel = + &pwr->pwrlevels[pwr->num_pwrlevels - 1]; + struct hfi_acd_table_cmd *cmd = &gmu->hfi.acd_table; + int ret, i, cmd_idx = 0; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_ACD)) + return; + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ACD_TBL, sizeof(*cmd), HFI_MSG_CMD); + + cmd->version = 1; + cmd->stride = 1; + cmd->enable_by_level = 0; + + /* + * Iterate through each gpu power level and generate a mask for GMU + * firmware for ACD enabled levels and store the corresponding control + * register configurations to the acd_table structure. + */ + for (i = 0; i < pwr->num_pwrlevels; i++) { + if (pwrlevel->acd_level) { + cmd->enable_by_level |= (1 << (i + 1)); + cmd->data[cmd_idx++] = pwrlevel->acd_level; + } + pwrlevel--; + } + + if (!cmd->enable_by_level) + return; + + cmd->num_levels = cmd_idx; + + ret = gen7_gmu_aop_mailbox_init(adreno_dev, gmu); + if (ret) + dev_err(&gmu->pdev->dev, + "AOP mailbox init failed: %d\n", ret); +} + +static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + ret = kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu", NULL, NULL); + + if (ret) + dev_err(&gmu->pdev->dev, "Unable to map the GMU registers\n"); + /* + * gmu_ao_blk_dec1 and gmu_ao_blk_dec2 are contiguous and contained within the gmu region + * mapped above. gmu_ao_blk_dec0 is not within the gmu region and is mapped separately. + */ + kgsl_regmap_add_region(&device->regmap, gmu->pdev, "gmu_ao_blk_dec0", NULL, NULL); + + return ret; +} + +static int gen7_gmu_regulators_probe(struct gen7_gmu_device *gmu, + struct platform_device *pdev) +{ + gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + if (IS_ERR(gmu->cx_gdsc)) { + if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); + return PTR_ERR(gmu->cx_gdsc); + } + + gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(gmu->gx_gdsc)) { + if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); + return PTR_ERR(gmu->gx_gdsc); + } + + return 0; +} + +void gen7_gmu_remove(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (!IS_ERR_OR_NULL(gmu->mailbox.channel)) + mbox_free_channel(gmu->mailbox.channel); + + adreno_dev->acd_enabled = false; + + if (gmu->fw_image) + release_firmware(gmu->fw_image); + + gen7_free_gmu_globals(gmu); + + vfree(gmu->itcm_shadow); + kobject_put(&gmu->log_kobj); +} + +static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + char *fault_type = "unknown"; + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_type = "external"; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_type = "transaction stalled"; + + dev_err(dev, "GMU fault addr = %lX, context=kernel (%s %s fault)\n", + addr, + (flags & IOMMU_FAULT_WRITE) ? "write" : "read", + fault_type); + + return 0; +} + +static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) +{ + int ret; + int no_stall = 1; + + gmu->domain = iommu_domain_alloc(&platform_bus_type); + if (gmu->domain == NULL) { + dev_err(&gmu->pdev->dev, "Unable to allocate GMU IOMMU domain\n"); + return -ENODEV; + } + + /* + * Disable stall on fault for the GMU context bank. + * This sets SCTLR.CFCFG = 0. + * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. + */ + iommu_domain_set_attr(gmu->domain, + DOMAIN_ATTR_FAULT_MODEL_NO_STALL, &no_stall); + + ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); + if (!ret) { + iommu_set_fault_handler(gmu->domain, + gen7_gmu_iommu_fault_handler, gmu); + return 0; + } + + dev_err(&gmu->pdev->dev, + "Unable to attach GMU IOMMU domain: %d\n", ret); + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + + return ret; +} + +int gen7_gmu_probe(struct kgsl_device *device, + struct platform_device *pdev) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct device *dev = &pdev->dev; + struct resource *res; + int ret, i; + + gmu->pdev = pdev; + + dma_set_coherent_mask(&gmu->pdev->dev, DMA_BIT_MASK(64)); + gmu->pdev->dev.dma_mask = &gmu->pdev->dev.coherent_dma_mask; + set_dma_ops(&gmu->pdev->dev, NULL); + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "rscc"); + if (res) { + gmu->rscc_virt = devm_ioremap(&device->pdev->dev, res->start, + resource_size(res)); + if (!gmu->rscc_virt) { + dev_err(&gmu->pdev->dev, "rscc ioremap failed\n"); + return -ENOMEM; + } + } + + /* Set up GMU regulators */ + ret = gen7_gmu_regulators_probe(gmu, pdev); + if (ret) + return ret; + + ret = devm_clk_bulk_get_all(&pdev->dev, &gmu->clks); + if (ret < 0) + return ret; + + /* + * Voting for apb_pclk will enable power and clocks required for + * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled, + * QDSS is essentially unusable. Hence, if QDSS cannot be used, + * don't vote for this clock. + */ + if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) { + for (i = 0; i < ret; i++) { + if (!strcmp(gmu->clks[i].id, "apb_pclk")) { + gmu->clks[i].clk = NULL; + break; + } + } + } + + gmu->num_clks = ret; + + /* Set up GMU IOMMU and shared memory with GMU */ + ret = gen7_gmu_iommu_init(gmu); + if (ret) + goto error; + + gmu->vma = gen7_gmu_vma; + + /* Map and reserve GMU CSRs registers */ + ret = gen7_gmu_reg_probe(adreno_dev); + if (ret) + goto error; + + /* Populates RPMh configurations */ + ret = gen7_build_rpmh_tables(adreno_dev); + if (ret) + goto error; + + /* Set up GMU idle state */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_IFPC)) + gmu->idle_level = GPU_HW_IFPC; + else + gmu->idle_level = GPU_HW_ACTIVE; + + gen7_gmu_acd_probe(device, gmu, pdev->dev.of_node); + + set_bit(GMU_ENABLED, &device->gmu_core.flags); + + device->gmu_core.dev_ops = &gen7_gmudev; + + /* Set default GMU attributes */ + gmu->log_stream_enable = false; + gmu->log_group_mask = 0x3; + + /* GMU sysfs nodes setup */ + kobject_init_and_add(&gmu->log_kobj, &log_kobj_type, &dev->kobj, "log"); + + of_property_read_u32(gmu->pdev->dev.of_node, "qcom,gmu-perf-ddr-bw", + &gmu->perf_ddr_bw); + + gmu->irq = kgsl_request_irq(gmu->pdev, "gmu", + gen7_gmu_irq_handler, device); + + if (gmu->irq >= 0) + return 0; + + ret = gmu->irq; + +error: + gen7_gmu_remove(device); + return ret; +} + +static void gen7_gmu_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +int gen7_halt_gbif(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Halt new client requests */ + kgsl_regwrite(device, GEN7_GBIF_HALT, GEN7_GBIF_CLIENT_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, + GEN7_GBIF_HALT_ACK, GEN7_GBIF_CLIENT_HALT_MASK); + + /* Halt all AXI requests */ + kgsl_regwrite(device, GEN7_GBIF_HALT, GEN7_GBIF_ARB_HALT_MASK); + ret = adreno_wait_for_halt_ack(device, + GEN7_GBIF_HALT_ACK, GEN7_GBIF_ARB_HALT_MASK); + + /* De-assert the halts */ + kgsl_regwrite(device, GEN7_GBIF_HALT, 0x0); + + return ret; +} + +static int gen7_gmu_power_off(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + if (device->gmu_fault) + goto error; + + /* Wait for the lowest idle level we requested */ + ret = gen7_gmu_wait_for_lowest_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_gmu_notify_slumber(adreno_dev); + if (ret) + goto error; + + ret = gen7_gmu_wait_for_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_rscc_sleep_sequence(adreno_dev); + if (ret) + goto error; + + /* Now that we are done with GMU and GPU, Clear the GBIF */ + ret = gen7_halt_gbif(adreno_dev); + if (ret) + goto error; + + gen7_gmu_irq_disable(adreno_dev); + + gen7_hfi_stop(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + device->state = KGSL_STATE_NONE; + + return 0; + +error: + gen7_hfi_stop(adreno_dev); + gen7_gmu_suspend(adreno_dev); + + return ret; +} + +void gen7_enable_gpu_irq(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_pwrctrl_irq(device, true); + + adreno_irqctrl(adreno_dev, 1); +} + +void gen7_disable_gpu_irq(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_pwrctrl_irq(device, false); + + if (gen7_gmu_gx_is_on(device)) + adreno_irqctrl(adreno_dev, 0); +} + +static int gen7_gpu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + adreno_set_active_ctxs_null(adreno_dev); + + adreno_ringbuffer_set_global(adreno_dev, 0); + + ret = kgsl_mmu_start(device); + if (ret) + goto err; + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (ret) + goto oob_clear; + + ret = gen7_gmu_hfi_start_msg(adreno_dev); + if (ret) + goto oob_clear; + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + gen7_start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + gen7_enable_gpu_irq(adreno_dev); + + ret = gen7_rb_start(adreno_dev); + if (ret) { + gen7_disable_gpu_irq(adreno_dev); + goto oob_clear; + } + + /* Start the dispatcher */ + adreno_dispatcher_start(device); + + device->reset_counter++; + + gen7_gmu_oob_clear(device, oob_gpu); + + return 0; + +oob_clear: + gen7_gmu_oob_clear(device, oob_gpu); + +err: + gen7_gmu_power_off(adreno_dev); + + return ret; +} + +static void gmu_idle_timer(struct timer_list *t) +{ + struct kgsl_device *device = container_of(t, struct kgsl_device, + idle_timer); + + kgsl_schedule_work(&device->idle_check_ws); +} + +static int gen7_boot(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + WARN_ON(test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_gmu_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_gpu_boot(adreno_dev); + if (ret) + return ret; + + kgsl_start_idle_timer(device); + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return ret; +} + +static int gen7_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return gen7_boot(adreno_dev); + + ret = gen7_ringbuffer_init(adreno_dev); + if (ret) + return ret; + + ret = gen7_microcode_read(adreno_dev); + if (ret) + return ret; + + ret = gen7_init(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_init(adreno_dev); + if (ret) + return ret; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_gmu_first_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_gpu_boot(adreno_dev); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + adreno_create_profile_buffer(adreno_dev); + + set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags); + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return 0; +} + +static int gen7_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + + adreno_suspend_context(device); + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (!ret) { + kgsl_pwrscale_update_stats(device); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + } + + gen7_gmu_oob_clear(device, oob_gpu); + + kgsl_pwrctrl_irq(device, false); + + gen7_gmu_power_off(adreno_dev); + + adreno_set_active_ctxs_null(adreno_dev); + + adreno_dispatcher_stop(adreno_dev); + + adreno_ringbuffer_stop(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + del_timer_sync(&device->idle_timer); + + kgsl_pwrscale_sleep(device); + + kgsl_pwrctrl_clear_l3_vote(device); + + /* + * Reset the context records so that CP can start + * at the correct read pointer for BV thread after + * coming out of slumber. + */ + gen7_reset_preempt_records(adreno_dev); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SLUMBER); + + return ret; +} + +static void gmu_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, + struct kgsl_device, idle_check_ws); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + mutex_lock(&device->mutex); + + if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags)) + goto done; + + if (!atomic_read(&device->active_cnt)) { + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + gen7_power_off(adreno_dev); + } else { + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + +done: + mutex_unlock(&device->mutex); +} + +static int gen7_gmu_first_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * Do the one time settings that need to happen when we + * attempt to boot the gpu the very first time + */ + ret = gen7_first_boot(adreno_dev); + if (ret) + return ret; + + /* + * A client that does a first_open but never closes the device + * may prevent us from going back to SLUMBER. So trigger the idle + * check by incrementing the active count and immediately releasing it. + */ + atomic_inc(&device->active_cnt); + gen7_gmu_active_count_put(adreno_dev); + + return 0; +} + +static int gen7_gmu_last_close(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return gen7_power_off(adreno_dev); + + return 0; +} + +static int gen7_gmu_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0) && + !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + ret = gen7_boot(adreno_dev); + + if (ret == 0) + atomic_inc(&device->active_cnt); + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + return ret; +} + +static int gen7_gmu_pm_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SUSPEND); + + /* Halt any new submissions */ + reinit_completion(&device->halt_gate); + + /* wait for active count so device can be put in slumber */ + ret = kgsl_active_count_wait(device, 0, HZ); + if (ret) { + dev_err(device->dev, + "Timed out waiting for the active count\n"); + goto err; + } + + ret = adreno_idle(device); + if (ret) + goto err; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + gen7_power_off(adreno_dev); + + set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); + + adreno_get_gpu_halt(adreno_dev); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SUSPEND); + + return 0; +err: + adreno_dispatcher_start(device); + return ret; +} + +static void gen7_gmu_pm_resume(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags), + "resume invoked without a suspend\n")) + return; + + adreno_put_gpu_halt(adreno_dev); + + adreno_dispatcher_start(device); + + clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); +} + +static void gen7_gmu_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + /* + * Do not wake up a suspended device or until the first boot sequence + * has been completed. + */ + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) || + !test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + goto done; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_gmu_boot(adreno_dev); + if (ret) + return; + + ret = gen7_gpu_boot(adreno_dev); + if (ret) + return; + + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + +done: + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, jiffies + + msecs_to_jiffies(adreno_wake_timeout)); +} + +const struct adreno_power_ops gen7_gmu_power_ops = { + .first_open = gen7_gmu_first_open, + .last_close = gen7_gmu_last_close, + .active_count_get = gen7_gmu_active_count_get, + .active_count_put = gen7_gmu_active_count_put, + .pm_suspend = gen7_gmu_pm_suspend, + .pm_resume = gen7_gmu_pm_resume, + .touch_wakeup = gen7_gmu_touch_wakeup, + .gpu_clock_set = gen7_gmu_clock_set, + .gpu_bus_set = gen7_gmu_bus_set, +}; + +int gen7_gmu_device_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + struct gen7_device *gen7_dev; + int ret; + + gen7_dev = devm_kzalloc(&pdev->dev, sizeof(*gen7_dev), + GFP_KERNEL); + if (!gen7_dev) + return -ENOMEM; + + adreno_dev = &gen7_dev->adreno_dev; + + ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + ret = adreno_dispatcher_init(adreno_dev); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + INIT_WORK(&device->idle_check_ws, gmu_idle_check); + + timer_setup(&device->idle_timer, gmu_idle_timer, 0); + + adreno_dev->irq_mask = GEN7_INT_MASK; + + return 0; +} + +int gen7_gmu_reset(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + gen7_hfi_stop(adreno_dev); + + gen7_disable_gpu_irq(adreno_dev); + + /* Hard reset the gmu and gpu */ + gen7_gmu_suspend(adreno_dev); + + gen7_reset_preempt_records(adreno_dev); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* Attempt to reboot the gmu and gpu */ + return gen7_boot(adreno_dev); +} + +int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + hfi->irq = kgsl_request_irq(gmu->pdev, "hfi", + gen7_hfi_irq_handler, KGSL_DEVICE(adreno_dev)); + + return hfi->irq < 0 ? hfi->irq : 0; +} + +int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + int ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GEN7_DEVICE, + (void *)(gen7_dev), sizeof(struct gen7_device)); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY, + gen7_dev->gmu.gmu_log->hostptr, gen7_dev->gmu.gmu_log->size); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY, + gen7_dev->gmu.hfi.hfi_mem->hostptr, gen7_dev->gmu.hfi.hfi_mem->size); + + return ret; +} + +static int gen7_gmu_bind(struct device *dev, struct device *master, void *data) +{ + struct kgsl_device *device = dev_get_drvdata(master); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + const struct gen7_gpudev *gen7_gpudev = to_gen7_gpudev(gpudev); + int ret; + + ret = gen7_gmu_probe(device, to_platform_device(dev)); + if (ret) + return ret; + + if (gen7_gpudev->hfi_probe) { + ret = gen7_gpudev->hfi_probe(adreno_dev); + + if (ret) { + gen7_gmu_remove(device); + return ret; + } + } + + return 0; +} + +static void gen7_gmu_unbind(struct device *dev, struct device *master, + void *data) +{ + struct kgsl_device *device = dev_get_drvdata(master); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + const struct gen7_gpudev *gen7_gpudev = to_gen7_gpudev(gpudev); + + if (gen7_gpudev->hfi_remove) + gen7_gpudev->hfi_remove(adreno_dev); + + gen7_gmu_remove(device); +} + +static const struct component_ops gen7_gmu_component_ops = { + .bind = gen7_gmu_bind, + .unbind = gen7_gmu_unbind, +}; + +static int gen7_gmu_probe_dev(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &gen7_gmu_component_ops); +} + +static int gen7_gmu_remove_dev(struct platform_device *pdev) +{ + component_del(&pdev->dev, &gen7_gmu_component_ops); + return 0; +} + +static const struct of_device_id gen7_gmu_match_table[] = { + { .compatible = "qcom,gen7-gmu" }, + { }, +}; + +struct platform_driver gen7_gmu_driver = { + .probe = gen7_gmu_probe_dev, + .remove = gen7_gmu_remove_dev, + .driver = { + .name = "adreno-gen7-gmu", + .of_match_table = gen7_gmu_match_table, + }, +}; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h new file mode 100644 index 0000000000..0702793251 --- /dev/null +++ b/adreno_gen7_gmu.h @@ -0,0 +1,422 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_GEN7_GMU_H +#define __ADRENO_GEN7_GMU_H + +#include + +#include "adreno_gen7_hfi.h" +#include "kgsl_gmu_core.h" + +/** + * struct gen7_gmu_device - GMU device structure + * @ver: GMU Version information + * @irq: GMU interrupt number + * @fw_image: GMU FW image + * @hfi_mem: pointer to HFI shared memory + * @dump_mem: pointer to GMU debug dump memory + * @gmu_log: gmu event log memory + * @hfi: HFI controller + * @num_gpupwrlevels: number GPU frequencies in GPU freq table + * @num_bwlevel: number of GPU BW levels + * @num_cnocbwlevel: number CNOC BW levels + * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling + * @cx_gdsc: CX headswitch that controls power of GMU and + * subsystem peripherals + * @gx_gdsc: GX headswitch that controls power of GPU subsystem + * @clks: GPU subsystem clocks required for GMU functionality + * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different + * than default power level + * @idle_level: Minimal GPU idle power level + * @fault_count: GMU fault count + * @mailbox: Messages to AOP for ACD enable/disable go through this + * @log_wptr_retention: Store the log wptr offset on slumber + */ +struct gen7_gmu_device { + struct { + u32 core; + u32 core_dev; + u32 pwr; + u32 pwr_dev; + u32 hfi; + } ver; + struct platform_device *pdev; + int irq; + const struct firmware *fw_image; + struct kgsl_memdesc *dump_mem; + struct kgsl_memdesc *gmu_log; + struct gen7_hfi hfi; + /** @pwrlevels: Array of GMU power levels */ + struct regulator *cx_gdsc; + struct regulator *gx_gdsc; + struct clk_bulk_data *clks; + /** @num_clks: Number of entries in the @clks array */ + int num_clks; + unsigned int idle_level; + struct kgsl_mailbox mailbox; + /** @gmu_globals: Array to store gmu global buffers */ + struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES]; + /** @global_entries: To keep track of number of gmu buffers */ + u32 global_entries; + struct gmu_vma_entry *vma; + unsigned int log_wptr_retention; + /** @cm3_fault: whether gmu received a cm3 fault interrupt */ + atomic_t cm3_fault; + /** + * @itcm_shadow: Copy of the itcm block in firmware binary used for + * snapshot + */ + void *itcm_shadow; + /** @flags: Internal gmu flags */ + unsigned long flags; + /** @rscc_virt: Pointer where RSCC block is mapped */ + void __iomem *rscc_virt; + /** @domain: IOMMU domain for the kernel context */ + struct iommu_domain *domain; + /** @log_stream_enable: GMU log streaming enable. Disabled by default */ + bool log_stream_enable; + /** @log_group_mask: Allows overriding default GMU log group mask */ + u32 log_group_mask; + struct kobject log_kobj; + /* + * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at + * which GMU can run at 500 Mhz. + */ + u32 perf_ddr_bw; +}; + +/* Helper function to get to gen7 gmu device from adreno device */ +struct gen7_gmu_device *to_gen7_gmu(struct adreno_device *adreno_dev); + +/* Helper function to get to adreno device from gen7 gmu device */ +struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu); + +/** + * gen7_reserve_gmu_kernel_block() - Allocate a gmu buffer + * @gmu: Pointer to the gen7 gmu device + * @addr: Desired gmu virtual address + * @size: Size of the buffer in bytes + * @vma_id: Target gmu vma where this buffer should be mapped + * + * This function allocates a buffer and maps it in + * the desired gmu vma + * + * Return: Pointer to the memory descriptor or error pointer on failure + */ +struct kgsl_memdesc *gen7_reserve_gmu_kernel_block(struct gen7_gmu_device *gmu, + u32 addr, u32 size, u32 vma_id); + +/** + * gen7_build_rpmh_tables - Build the rpmh tables + * @adreno_dev: Pointer to the adreno device + * + * This function creates the gpu dcvs and bw tables + * + * Return: 0 on success and negative error on failure + */ +int gen7_build_rpmh_tables(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_gx_is_on - Check if GX is on + * @device: Pointer to KGSL device + * + * This function reads pwr status registers to check if GX + * is on or off + */ +bool gen7_gmu_gx_is_on(struct kgsl_device *device); + +/** + * gen7_gmu_device_snapshot - GEN7 GMU snapshot function + * @device: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN7 GMU specific bits and pieces are grabbed + * into the snapshot memory + */ +void gen7_gmu_device_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot); + +/** + * gen7_gmu_device_probe - GEN7 GMU snapshot function + * @pdev: Pointer to the platform device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore + * + * The target specific probe function for gmu based gen7 targets. + */ +int gen7_gmu_device_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore); + +/** + * gen7_gmu_reset - Reset and restart the gmu + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_reset(struct adreno_device *adreno_dev); + +/** + * gen7_enable_gpu_irq - Enable gpu interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_enable_gpu_irq(struct adreno_device *adreno_dev); + +/** + * gen7_disable_gpu_irq - Disable gpu interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_disable_gpu_irq(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_snapshot- Take snapshot for gmu targets + * @adreno_dev: Pointer to the adreno device + * @snapshot: Pointer to the snapshot structure + * + * Send an NMI to gmu if we hit a gmu fault. Then take gmu + * snapshot and carry on with rest of the gen7 snapshot + */ +void gen7_gmu_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +/** + * gen7_gmu_probe - Probe gen7 gmu resources + * @device: Pointer to the kgsl device + * @pdev: Pointer to the gmu platform device + * + * Probe the gmu and hfi resources + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_probe(struct kgsl_device *device, + struct platform_device *pdev); + +/** + * gen7_gmu_parse_fw - Parse the gmu fw binary + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_parse_fw(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_memory_init - Allocate gmu memory + * @adreno_dev: Pointer to the adreno device + * + * Allocates the gmu log buffer and others if ndeeded. + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_memory_init(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_aop_send_acd_state - Enable or disable acd feature in aop + * @gmu: Pointer to the gen7 gmu device + * @flag: Boolean to enable or disable acd in aop + * + * This function enables or disables gpu acd feature using mailbox + */ +void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag); + +/** + * gen7_gmu_enable_clocks - Enable gmu clocks + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_load_fw - Load gmu firmware + * @adreno_dev: Pointer to the adreno device + * + * Loads the gmu firmware binary into TCMs and memory + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_load_fw(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_device_start - Bring gmu out of reset + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_device_start(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_hfi_start - Indicate hfi start to gmu + * @device: Pointer to the kgsl device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_itcm_shadow - Create itcm shadow copy for snapshot + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_itcm_shadow(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_register_config - gmu register configuration + * @adreno_dev: Pointer to the adreno device + * + * Program gmu regsiters based on features + */ +void gen7_gmu_register_config(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_version_info - Get gmu firmware version + * @adreno_dev: Pointer to the adreno device + * + * Program gmu regsiters based on features + */ +void gen7_gmu_version_info(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_irq_enable - Enable gmu interrupts + * @adreno_dev: Pointer to the adreno device + */ +void gen7_gmu_irq_enable(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_irq_disable - Disaable gmu interrupts + * @adreno_dev: Pointer to the adreno device + */ +void gen7_gmu_irq_disable(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_suspend - Hard reset the gpu and gmu + * @adreno_dev: Pointer to the adreno device + * + * In case we hit a gmu fault, hard reset the gpu and gmu + * to recover from the fault + */ +void gen7_gmu_suspend(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_oob_set - send gmu oob request + * @device: Pointer to the kgsl device + * @req: Type of oob request as defined in enum oob_request + * + * Request gmu to keep gpu powered up till the oob is cleared + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_oob_set(struct kgsl_device *device, enum oob_request oob); + +/** + * gen7_gmu_oob_clear - clear an asserted oob request + * @device: Pointer to the kgsl device + * @req: Type of oob request as defined in enum oob_request + * + * Clear a previously requested oob so that gmu can power + * collapse the gpu + */ +void gen7_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob); + +/** + * gen7_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc + * @adreno_dev: Pointer to the adreno device + * + * If ifpc is enabled, wait for gmu to put gpu into ifpc. + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_wait_for_idle - Wait for gmu to become idle + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev); + +/** + * gen7_rscc_sleep_sequence - Trigger rscc sleep sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev); + +/** + * gen7_rscc_wakeup_sequence - Trigger rscc wakeup sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev); + +/** + * gen7_halt_gbif - Halt CX and GX requests in GBIF + * @adreno_dev: Pointer to the adreno device + * + * Clear any pending GX or CX transactions in GBIF and + * deassert GBIF halt + * + * Return: 0 on success or negative error on failure + */ +int gen7_halt_gbif(struct adreno_device *adreno_dev); + +/** + * gen7_load_pdc_ucode - Load and enable pdc sequence + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_load_pdc_ucode(struct adreno_device *adreno_dev); + +/** + * gen7_load_rsc_ucode - Load rscc sequence + * @adreno_dev: Pointer to the adreno device + */ +void gen7_load_rsc_ucode(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_remove - Clean up gmu probed resources + * @device: Pointer to the kgsl device + */ +void gen7_gmu_remove(struct kgsl_device *device); + +/** + * gen7_gmu_enable_clks - Enable gmu clocks + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_enable_clks(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_enable_gdsc - Enable gmu gdsc + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_handle_watchdog - Handle watchdog interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev); + +/** + * gen7_gmu_send_nmi - Send NMI to GMU + * @adreno_dev: Pointer to the adreno device + * @force: Boolean to forcefully send NMI irrespective of GMU state + */ +void gen7_gmu_send_nmi(struct adreno_device *adreno_dev, bool force); + +/** + * gen7_gmu_add_to_minidump - Register gen7_device with va minidump + * @adreno_dev: Pointer to the adreno device + */ +int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev); + +#endif diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c new file mode 100644 index 0000000000..39c3c184a9 --- /dev/null +++ b/adreno_gen7_gmu_snapshot.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "gen7_reg.h" +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_gmu.h" +#include "adreno_snapshot.h" +#include "kgsl_device.h" + +static const u32 gen7_gmu_registers[] = { + 0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403, + 0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03, + 0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403, + 0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03, + 0x1f400, 0x1f40d, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507, + 0x1f509, 0x1f50b, 0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c, + 0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c, + 0x1f824, 0x1f82a, 0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860, + 0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f8a0, 0x1f8a2, + 0x1f8a4, 0x1f8af, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0, + 0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f914, 0x1f920, 0x1f921, + 0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940, + 0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f94f, 0x1f951, 0x1f958, 0x1f95a, + 0x1f95d, 0x1f95d, 0x1f962, 0x1f962, 0x1f964, 0x1f96b, 0x1f970, 0x1f979, + 0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993, 0x1f996, 0x1f99e, + 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1, 0x1f9f8, 0x1f9fa, + 0x1fa00, 0x1fa03, 0x20000, 0x20005, 0x20008, 0x2000c, 0x20010, 0x20012, + 0x20018, 0x20018, 0x20020, 0x20023, 0x20030, 0x20031, 0x23801, 0x23801, + 0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, 0x23809, 0x23809, + 0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, 0x23811, 0x23811, + 0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, 0x23819, 0x23819, + 0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, 0x23822, 0x23822, + 0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, 0x2382a, 0x2382a, + 0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, 0x23832, 0x23832, + 0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, 0x2383a, 0x2383a, + 0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, 0x23b00, 0x23b01, + 0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, 0x23b15, 0x23b16, + 0x23b20, 0x23b20, 0x23b28, 0x23b28, 0x23b30, 0x23b30, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gmu_registers), 8)); + +static const u32 gen7_gmu_gx_registers[] = { + 0x1a400, 0x1a41f, 0x1a440, 0x1a45f, 0x1a480, 0x1a49f, 0x1a4c0, 0x1a4df, + 0x1a500, 0x1a51f, 0x1a540, 0x1a55f, 0x1a580, 0x1a59f, 0x1a5c0, 0x1a5df, + 0x1a780, 0x1a781, 0x1a783, 0x1a785, 0x1a787, 0x1a789, 0x1a78b, 0x1a78d, + 0x1a78f, 0x1a791, 0x1a793, 0x1a795, 0x1a797, 0x1a799, 0x1a79b, 0x1a79b, + 0x1a7c0, 0x1a7c1, 0x1a7c4, 0x1a7c5, 0x1a7c8, 0x1a7c9, 0x1a7cc, 0x1a7cd, + 0x1a7d0, 0x1a7d1, 0x1a7d4, 0x1a7d5, 0x1a7d8, 0x1a7d9, 0x1a7fc, 0x1a7fd, + 0x1a800, 0x1a802, 0x1a804, 0x1a804, 0x1a816, 0x1a816, 0x1a81e, 0x1a81e, + 0x1a826, 0x1a826, 0x1a82e, 0x1a82e, 0x1a836, 0x1a836, 0x1a83e, 0x1a83e, + 0x1a846, 0x1a846, 0x1a860, 0x1a862, 0x1a864, 0x1a867, 0x1a870, 0x1a870, + 0x1a883, 0x1a884, 0x1a8c0, 0x1a8c2, 0x1a8c4, 0x1a8c7, 0x1a8d0, 0x1a8d3, + 0x1a900, 0x1a92b, 0x1a940, 0x1a940, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gmu_gx_registers), 8)); + +static const u32 gen7_rscc_registers[] = { + 0x14000, 0x14036, 0x14040, 0x14042, 0x14080, 0x14084, 0x14089, 0x1408c, + 0x14091, 0x14094, 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac, + 0x14100, 0x14102, 0x14114, 0x14119, 0x14124, 0x1412e, 0x14140, 0x14143, + 0x14180, 0x14197, 0x14340, 0x14342, 0x14344, 0x14347, 0x1434c, 0x14373, + 0x143ec, 0x143ef, 0x143f4, 0x1441b, 0x14494, 0x14497, 0x1449c, 0x144c3, + 0x1453c, 0x1453f, 0x14544, 0x1456b, 0x145e4, 0x145e7, 0x145ec, 0x14613, + 0x1468c, 0x1468f, 0x14694, 0x146bb, 0x14734, 0x14737, 0x1473c, 0x14763, + 0x147dc, 0x147df, 0x147e4, 0x1480b, 0x14884, 0x14887, 0x1488c, 0x148b3, + 0x1492c, 0x1492f, 0x14934, 0x1495b, 0x14f51, 0x14f54, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_rscc_registers), 8)); + +struct gmu_mem_type_desc { + struct kgsl_memdesc *memdesc; + u32 type; +}; + +static size_t gen7_snapshot_gmu_mem(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + unsigned int *data = (unsigned int *) + (buf + sizeof(*mem_hdr)); + struct gmu_mem_type_desc *desc = priv; + + if (remain < desc->memdesc->size + sizeof(*mem_hdr)) { + dev_err(device->dev, + "snapshot: Not enough memory for the gmu section %d\n", + desc->type); + return 0; + } + + mem_hdr->type = desc->type; + mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr; + mem_hdr->gmuaddr = desc->memdesc->gmuaddr; + mem_hdr->gpuaddr = 0; + + memcpy(data, desc->memdesc->hostptr, desc->memdesc->size); + + return desc->memdesc->size + sizeof(*mem_hdr); +} + +static size_t gen7_gmu_snapshot_dtcm(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv; + u32 *data = (u32 *)(buf + sizeof(*mem_hdr)); + u32 i; + + if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) { + SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory"); + return 0; + } + + mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + mem_hdr->hostaddr = 0; + mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start; + mem_hdr->gpuaddr = 0; + + for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++) + gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + i, data++); + + return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr); +} + +static size_t gen7_gmu_snapshot_itcm(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_gmu_mem *mem_hdr = + (struct kgsl_snapshot_gmu_mem *)buf; + void *dest = buf + sizeof(*mem_hdr); + struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv; + + if (!gmu->itcm_shadow) { + dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n"); + return 0; + } + + if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) { + SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory"); + return 0; + } + + mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + mem_hdr->hostaddr = 0; + mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start; + mem_hdr->gpuaddr = 0; + + memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size); + + return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr); +} + +static void gen7_gmu_snapshot_memories(struct kgsl_device *device, + struct gen7_gmu_device *gmu, struct kgsl_snapshot *snapshot) +{ + struct gmu_mem_type_desc desc; + struct kgsl_memdesc *md; + int i; + + for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) { + + md = &gmu->gmu_globals[i]; + if (!md->size) + continue; + + desc.memdesc = md; + if (md == gmu->hfi.hfi_mem) + desc.type = SNAPSHOT_GMU_MEM_HFI; + else if (md == gmu->gmu_log) + desc.type = SNAPSHOT_GMU_MEM_LOG; + else if (md == gmu->dump_mem) + desc.type = SNAPSHOT_GMU_MEM_DEBUG; + else + desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen7_snapshot_gmu_mem, &desc); + } +} + +struct kgsl_snapshot_gmu_version { + u32 type; + u32 value; +}; + +static size_t gen7_snapshot_gmu_version(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + u32 *data = (u32 *) (buf + sizeof(*header)); + struct kgsl_snapshot_gmu_version *ver = priv; + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "GMU Version"); + return 0; + } + + header->type = ver->type; + header->size = 1; + + *data = ver->value; + + return DEBUG_SECTION_SZ(1); +} + +static void gen7_gmu_snapshot_versions(struct kgsl_device *device, + struct gen7_gmu_device *gmu, + struct kgsl_snapshot *snapshot) +{ + int i; + + struct kgsl_snapshot_gmu_version gmu_vers[] = { + { .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION, + .value = gmu->ver.core, }, + { .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION, + .value = gmu->ver.core_dev, }, + { .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION, + .value = gmu->ver.pwr, }, + { .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION, + .value = gmu->ver.pwr_dev, }, + { .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION, + .value = gmu->ver.hfi, }, + }; + + for (i = 0; i < ARRAY_SIZE(gmu_vers); i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen7_snapshot_gmu_version, + &gmu_vers[i]); +} + +#define RSCC_OFFSET_DWORDS 0x14000 + +static size_t gen7_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + const u32 *regs = priv; + unsigned int *data = (unsigned int *)buf; + int count = 0, k; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + /* Figure out how many registers we are going to dump */ + count = adreno_snapshot_regs_count(regs); + + if (remain < (count * 4)) { + SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS"); + return 0; + } + + for (regs = priv; regs[0] != UINT_MAX; regs += 2) { + unsigned int cnt = REG_COUNT(regs); + + if (cnt == 1) { + *data++ = BIT(31) | regs[0]; + *data++ = __raw_readl(gmu->rscc_virt + + ((regs[0] - RSCC_OFFSET_DWORDS) << 2)); + continue; + } + *data++ = regs[0]; + *data++ = cnt; + for (k = regs[0]; k <= regs[1]; k++) + *data++ = __raw_readl(gmu->rscc_virt + + ((k - RSCC_OFFSET_DWORDS) << 2)); + } + + /* Return the size of the section */ + return (count * 4); +} + +/* + * gen7_gmu_device_snapshot() - GEN7 GMU snapshot function + * @device: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN7 GMU specific bits and pieces are grabbed + * into the snapshot memory + */ +void gen7_gmu_device_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen7_gmu_snapshot_itcm, gmu); + + gen7_gmu_snapshot_versions(device, gmu, snapshot); + + gen7_gmu_snapshot_memories(device, gmu, snapshot); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + adreno_snapshot_registers_v2, (void *) gen7_gmu_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + gen7_snapshot_rscc_registers, (void *) gen7_rscc_registers); + + if (!gen7_gmu_gx_is_on(device)) + goto dtcm; + + /* Set fence to ALLOW mode so registers can be read */ + kgsl_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0); + /* Make sure the previous write posted before reading */ + wmb(); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot, + adreno_snapshot_registers_v2, (void *) gen7_gmu_gx_registers); + + /* A stalled SMMU can lead to NoC timeouts when host accesses DTCM */ + if (gen7_is_smmu_stalled(device)) { + dev_err(&gmu->pdev->dev, + "Not dumping dtcm because SMMU is stalled\n"); + return; + } + +dtcm: + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY, + snapshot, gen7_gmu_snapshot_dtcm, gmu); +} diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c new file mode 100644 index 0000000000..953b7da45d --- /dev/null +++ b/adreno_gen7_hfi.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_hfi.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" + +/* Below section is for all structures related to HFI queues */ +#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT + +/* Total header sizes + queue sizes + 16 for alignment */ +#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \ + (HFI_QUEUE_SIZE * HFI_QUEUE_MAX)) + +#define HOST_QUEUE_START_ADDR(hfi_mem, i) \ + ((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i)) + +struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + return &gmu->hfi; +} + +/* Size in below functions are in unit of dwords */ +int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx, + unsigned int *output, unsigned int max_size) +{ + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + u32 msg_hdr; + u32 i, read; + u32 size; + int result = 0; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return -EINVAL; + + if (hdr->read_index == hdr->write_index) + return -ENODATA; + + /* Clear the output data before populating */ + memset(output, 0, max_size); + + queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); + msg_hdr = queue[hdr->read_index]; + size = MSG_HDR_GET_SIZE(msg_hdr); + + if (size > (max_size >> 2)) { + dev_err(&gmu->pdev->dev, + "HFI message too big: hdr:0x%x rd idx=%d\n", + msg_hdr, hdr->read_index); + result = -EMSGSIZE; + goto done; + } + + read = hdr->read_index; + + if (read < hdr->queue_size) { + for (i = 0; i < size && i < (max_size >> 2); i++) { + output[i] = queue[read]; + read = (read + 1)%hdr->queue_size; + } + result = size; + } else { + /* In case FW messed up */ + dev_err(&gmu->pdev->dev, + "Read index %d greater than queue size %d\n", + hdr->read_index, hdr->queue_size); + result = -ENODATA; + } + + read = ALIGN(read, SZ_4) % hdr->queue_size; + + hfi_update_read_idx(hdr, read); + + /* For acks, trace the packet for which this ack was sent */ + if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK) + trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]), + MSG_HDR_GET_SIZE(output[1]), + MSG_HDR_GET_SEQNUM(output[1])); + else + trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr), + MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr)); + +done: + return result; +} + +/* Size in below functions are in unit of dwords */ +int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, + u32 *msg) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + u32 i, write, empty_space; + u32 size = MSG_HDR_GET_SIZE(*msg); + u32 align_size = ALIGN(size, SZ_4); + u32 id = MSG_HDR_GET_ID(*msg); + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return -EINVAL; + + queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx); + + trace_kgsl_hfi_send(id, size, MSG_HDR_GET_SEQNUM(*msg)); + + empty_space = (hdr->write_index >= hdr->read_index) ? + (hdr->queue_size - (hdr->write_index - hdr->read_index)) + : (hdr->read_index - hdr->write_index); + + if (empty_space <= align_size) + return -ENOSPC; + + write = hdr->write_index; + + for (i = 0; i < size; i++) { + queue[write] = msg[i]; + write = (write + 1) % hdr->queue_size; + } + + /* Cookify any non used data at the end of the write buffer */ + for (; i < align_size; i++) { + queue[write] = 0xfafafafa; + write = (write + 1) % hdr->queue_size; + } + + hfi_update_write_idx(hdr, write); + + return 0; +} + +int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg) +{ + int ret; + + ret = gen7_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg); + + /* + * Memory barrier to make sure packet and write index are written before + * an interrupt is raised + */ + wmb(); + + /* Send interrupt to GMU to receive the message */ + if (!ret) + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), + GEN7_GMU_HOST2GMU_INTR_SET, 0x1); + + return ret; +} + +/* Sizes of the queue and message are in unit of dwords */ +static void init_queues(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + int i; + struct hfi_queue_table *tbl; + struct hfi_queue_header *hdr; + struct { + unsigned int idx; + unsigned int pri; + unsigned int status; + } queue[HFI_QUEUE_MAX] = { + { HFI_CMD_ID, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED }, + { HFI_MSG_ID, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED }, + { HFI_DBG_ID, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED }, + }; + + /* Fill Table Header */ + tbl = mem_addr->hostptr; + tbl->qtbl_hdr.version = 0; + tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2; + tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2; + tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2; + tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX; + tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX; + + memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr)); + + /* Fill Individual Queue Headers */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i); + hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0, 0); + hdr->status = queue[i].status; + hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */ + } +} + +int gen7_hfi_init(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + + /* Allocates & maps memory for HFI */ + if (IS_ERR_OR_NULL(hfi->hfi_mem)) { + hfi->hfi_mem = gen7_reserve_gmu_kernel_block(gmu, 0, + HFIMEM_SIZE, GMU_NONCACHED_KERNEL); + if (!IS_ERR(hfi->hfi_mem)) + init_queues(adreno_dev); + } + + return PTR_ERR_OR_ZERO(hfi->hfi_mem); +} + +int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, + struct pending_cmd *ret_cmd) +{ + struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 *ack = rcvd; + u32 hdr = ack[0]; + u32 req_hdr = ack[1]; + + if (ret_cmd == NULL) + return -EINVAL; + + if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); + return 0; + } + + /* Didn't find the sender, list the waiter */ + dev_err_ratelimited(&gmu->pdev->dev, + "HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n", + req_hdr, ret_cmd->sent_hdr); + + gmu_core_fault_snapshot(device); + + return -ENODEV; +} + +static int poll_gmu_reg(struct adreno_device *adreno_dev, + u32 offsetdwords, unsigned int expected_val, + unsigned int mask, unsigned int timeout_ms) +{ + unsigned int val; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); + bool nmi = false; + + while (time_is_after_jiffies(timeout)) { + gmu_core_regread(device, offsetdwords, &val); + if ((val & mask) == expected_val) + return 0; + + /* + * If GMU firmware fails any assertion, error message is sent + * to KMD and NMI is triggered. So check if GMU is in NMI and + * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT + * contain GMU reset status. Non zero value here indicates that + * GMU reset is active, NMI handler would eventually complete + * and GMU would wait for recovery. + */ + gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &val); + if (val & 0xE00) { + nmi = true; + break; + } + + usleep_range(10, 100); + } + + /* Check one last time */ + gmu_core_regread(device, offsetdwords, &val); + if ((val & mask) == expected_val) + return 0; + + dev_err(&gmu->pdev->dev, + "Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n", + nmi ? "abort" : "timeout", offsetdwords, expected_val, + val & mask); + + return -ETIMEDOUT; +} + +static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, + void *data, struct pending_cmd *ret_cmd) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int rc; + u32 *cmd = data; + struct gen7_hfi *hfi = &gmu->hfi; + unsigned int seqnum = atomic_inc_return(&hfi->seqnum); + + *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + if (ret_cmd == NULL) + return gen7_hfi_cmdq_write(adreno_dev, cmd); + + ret_cmd->sent_hdr = cmd[0]; + + rc = gen7_hfi_cmdq_write(adreno_dev, cmd); + if (rc) + return rc; + + rc = poll_gmu_reg(adreno_dev, GEN7_GMU_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT); + + if (rc) { + gmu_core_fault_snapshot(device); + dev_err(&gmu->pdev->dev, + "Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n", + cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd)); + return rc; + } + + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + rc = gen7_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd); + + return rc; +} + +#define HFI_ACK_ERROR 0xffffffff + +int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd) +{ + struct pending_cmd ret_cmd; + int rc; + + memset(&ret_cmd, 0, sizeof(ret_cmd)); + + rc = gen7_hfi_send_cmd_wait_inline(adreno_dev, cmd, &ret_cmd); + + if (!rc && ret_cmd.results[2] == HFI_ACK_ERROR) { + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_fault_snapshot(device); + dev_err(&gmu->pdev->dev, "HFI ACK failure: Req 0x%8.8X\n", + ret_cmd.results[1]); + return -EINVAL; + } + + return rc; +} + +int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev) +{ + struct hfi_core_fw_start_cmd cmd = { + .handle = 0x0, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START); + if (ret) + return ret; + + return gen7_hfi_send_generic_req(adreno_dev, &cmd); +} + +static const char *feature_to_string(u32 feature) +{ + if (feature == HFI_FEATURE_ACD) + return "ACD"; + else if (feature == HFI_FEATURE_LM) + return "LM"; + + return "unknown"; +} + +int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, + u32 feature, u32 enable, u32 data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_feature_ctrl_cmd cmd = { + .feature = feature, + .enable = enable, + .data = data, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &cmd); + if (ret) + dev_err(&gmu->pdev->dev, + "Unable to %s feature %s (%d)\n", + enable ? "enable" : "disable", + feature_to_string(feature), + feature); + return ret; +} + +int gen7_hfi_send_set_value(struct adreno_device *adreno_dev, + u32 type, u32 subtype, u32 data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_set_value_cmd cmd = { + .type = type, + .subtype = subtype, + .data = data, + }; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE); + if (ret) + return ret; + + ret = gen7_hfi_send_generic_req(adreno_dev, &cmd); + if (ret) + dev_err(&gmu->pdev->dev, + "Unable to set HFI Value %d, %d to %d, error = %d\n", + type, subtype, data, ret); + return ret; +} + +void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd) +{ + struct hfi_err_cmd *cmd = rcvd; + + dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n", + ((cmd->error_code >> 16) & 0xffff), + (cmd->error_code & 0xffff), + (char *) cmd->data); +} + +void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd) +{ + struct hfi_debug_cmd *cmd = rcvd; + + dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n", + cmd->type, cmd->timestamp, cmd->data); +} + +int gen7_hfi_process_queue(struct gen7_gmu_device *gmu, + u32 queue_idx, struct pending_cmd *ret_cmd) +{ + u32 rcvd[MAX_RCVD_SIZE]; + + while (gen7_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) { + /* ACK Handler */ + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + int ret = gen7_receive_ack_cmd(gmu, rcvd, ret_cmd); + + if (ret) + return ret; + continue; + } + + /* Request Handler */ + switch (MSG_HDR_GET_ID(rcvd[0])) { + case F2H_MSG_ERR: /* No Reply */ + adreno_gen7_receive_err_req(gmu, rcvd); + break; + case F2H_MSG_DEBUG: /* No Reply */ + adreno_gen7_receive_debug_req(gmu, rcvd); + break; + default: /* No Reply */ + dev_err(&gmu->pdev->dev, + "HFI request %d not supported\n", + MSG_HDR_GET_ID(rcvd[0])); + break; + } + } + + return 0; +} + +int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev) +{ + int ret; + + if (!adreno_dev->bcl_enabled) + return 0; + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, 0); + + return ret; +} + +int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (adreno_dev->acd_enabled) { + ret = gen7_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_ACD, 1, 0); + + if (!ret) + ret = gen7_hfi_send_generic_req(adreno_dev, + &gmu->hfi.acd_table); + } + + return ret; +} + +int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (gmu->idle_level == GPU_HW_IFPC) + return gen7_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_IFPC, 1, 0x1680); + return 0; +} + +int gen7_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr; + int result, i; + + /* Force read_index to the write_index no matter what */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + if (hdr->read_index != hdr->write_index) { + dev_err(&gmu->pdev->dev, + "HFI Q[%d] Index Error: read:0x%X write:0x%X\n", + i, hdr->read_index, hdr->write_index); + hdr->read_index = hdr->write_index; + } + } + + result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table); + if (result) + goto err; + + result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table); + if (result) + goto err; + + result = gen7_hfi_send_acd_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen7_hfi_send_bcl_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen7_hfi_send_ifpc_feature_ctrl(adreno_dev); + if (result) + goto err; + + result = gen7_hfi_send_core_fw_start(adreno_dev); + if (result) + goto err; + + set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + /* Request default DCVS level */ + result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); + if (result) + goto err; + + /* Request default BW vote */ + result = kgsl_pwrctrl_axi(device, true); + +err: + if (result) + gen7_hfi_stop(adreno_dev); + + return result; + +} + +void gen7_hfi_stop(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int i; + + /* Flush HFI queues */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + hdr = &tbl->qhdr[i]; + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + if (hdr->read_index != hdr->write_index) + dev_err(&gmu->pdev->dev, + "HFI queue[%d] is not empty before close: rd=%d,wt=%d\n", + i, hdr->read_index, hdr->write_index); + } + + kgsl_pwrctrl_axi(device, false); + + clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + +} + +/* HFI interrupt handler */ +irqreturn_t gen7_hfi_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device)); + unsigned int status = 0; + + gmu_core_regread(device, GEN7_GMU_GMU2HOST_INTR_INFO, &status); + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MASK); + + if (status & HFI_IRQ_DBGQ_MASK) + gen7_hfi_process_queue(gmu, HFI_DBG_ID, NULL); + if (status & HFI_IRQ_CM3_FAULT_MASK) { + dev_err_ratelimited(&gmu->pdev->dev, + "GMU CM3 fault interrupt received\n"); + atomic_set(&gmu->cm3_fault, 1); + + /* make sure other CPUs see the update */ + smp_wmb(); + } + if (status & ~HFI_IRQ_MASK) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled HFI interrupts 0x%lx\n", + status & ~HFI_IRQ_MASK); + + return IRQ_HANDLED; +} diff --git a/adreno_gen7_hfi.h b/adreno_gen7_hfi.h new file mode 100644 index 0000000000..273dc7deb5 --- /dev/null +++ b/adreno_gen7_hfi.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_GEN7_HFI_H +#define __ADRENO_GEN7_HFI_H + +#include "adreno_hfi.h" + +/** + * struct gen7_hfi - HFI control structure + */ +struct gen7_hfi { + /** @irq: HFI interrupt line */ + int irq; + /** @seqnum: atomic counter that is incremented for each message sent. + * The value of the counter is used as sequence number for HFI message. + */ + atomic_t seqnum; + /** @hfi_mem: Memory descriptor for the hfi memory */ + struct kgsl_memdesc *hfi_mem; + /** @bw_table: HFI BW table buffer */ + struct hfi_bwtable_cmd bw_table; + /** @acd_table: HFI table for ACD data */ + struct hfi_acd_table_cmd acd_table; + /** @dcvs_table: HFI table for gpu dcvs levels */ + struct hfi_dcvstable_cmd dcvs_table; +}; + +struct gen7_gmu_device; + +/* gen7_hfi_irq_handler - IRQ handler for HFI interripts */ +irqreturn_t gen7_hfi_irq_handler(int irq, void *data); + +/** + * gen7_hfi_start - Send the various HFIs during device boot up + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_start - Send the various HFIs during device boot up + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +void gen7_hfi_stop(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_init - Initialize hfi resources + * @adreno_dev: Pointer to the adreno device + * + * This function allocates and sets up hfi queues + * when a process creates the very first kgsl instance + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_init(struct adreno_device *adreno_dev); + +/* Helper function to get to gen7 hfi struct from adreno device */ +struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_queue_write - Write a command to hfi queue + * @adreno_dev: Pointer to the adreno device + * @queue_idx: destination queue id + * @msg: Data to be written to the queue + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, + u32 *msg); + +/** + * gen7_hfi_queue_read - Read data from hfi queue + * @gmu: Pointer to the gen7 gmu device + * @queue_idx: queue id to read from + * @output: Pointer to read the data into + * @max_size: Number of bytes to read from the queue + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx, + u32 *output, u32 max_size); + +/** + * gen7_receive_ack_cmd - Process ack type packets + * @gmu: Pointer to the gen7 gmu device + * @rcvd: Pointer to the data read from hfi queue + * @ret_cmd: Container for the hfi packet for which this ack is received + * + * Return: 0 on success or negative error on failure + */ +int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, + struct pending_cmd *ret_cmd); + +/** + * gen7_hfi_send_feature_ctrl - Enable gmu feature via hfi + * @adreno_dev: Pointer to the adreno device + * @feature: feature to be enabled or disabled + * enable: Set 1 to enable or 0 to disable a feature + * @data: payload for the send feature hfi packet + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev, + u32 feature, u32 enable, u32 data); + +/** + * gen7_hfi_send_set_value - Send gmu set_values via hfi + * @adreno_dev: Pointer to the adreno device + * @type: GMU set_value type + * @subtype: GMU set_value subtype + * @data: Value to set + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_set_value(struct adreno_device *adreno_dev, + u32 type, u32 subtype, u32 data); + +/** + * gen7_hfi_send_core_fw_start - Send the core fw start hfi + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_acd_feature_ctrl - Send the acd table and acd feature + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_generic_req - Send a generic hfi packet + * @adreno_dev: Pointer to the adreno device + * @cmd: Pointer to the hfi packet header and data + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd); + +/** + * gen7_hfi_send_bcl_feature_ctrl - Send the bcl feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev); + +/* + * gen7_hfi_process_queue - Check hfi queue for messages from gmu + * @gmu: Pointer to the gen7 gmu device + * @queue_idx: queue id to be processed + * @ret_cmd: Container for data needed for waiting for the ack + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_process_queue(struct gen7_gmu_device *gmu, + u32 queue_idx, struct pending_cmd *ret_cmd); + +/** + * gen7_hfi_cmdq_write - Write a command to command queue + * @adreno_dev: Pointer to the adreno device + * @msg: Data to be written to the queue + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg); +void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd); +void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd); +#endif diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c new file mode 100644 index 0000000000..710c696557 --- /dev/null +++ b/adreno_gen7_hwsched.c @@ -0,0 +1,1161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_hwsched.h" +#include "adreno_snapshot.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +static size_t adreno_hwsched_snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct kgsl_memdesc *rb = (struct kgsl_memdesc *)priv; + + if (remain < rb->size + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "RB"); + return 0; + } + + header->start = 0; + header->end = rb->size >> 2; + header->rptr = 0; + header->rbsize = rb->size >> 2; + header->count = rb->size >> 2; + header->timestamp_queued = 0; + header->timestamp_retired = 0; + header->gpuaddr = rb->gpuaddr; + header->id = 0; + + memcpy(data, rb->hostptr, rb->size); + + return rb->size + sizeof(*header); +} + +static void gen7_hwsched_snapshot_preemption_record(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md, u64 offset) +{ + struct kgsl_snapshot_section_header *section_header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *dest = snapshot->ptr + sizeof(*section_header); + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)dest; + const struct adreno_gen7_core *gen7_core = to_gen7_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + size_t section_size; + + if (gen7_core->ctxt_record_size) + ctxt_record_size = gen7_core->ctxt_record_size; + + ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); + + section_size = sizeof(*section_header) + sizeof(*header) + ctxt_record_size; + if (snapshot->remain < section_size) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return; + } + + section_header->magic = SNAPSHOT_SECTION_MAGIC; + section_header->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section_header->size = section_size; + + header->size = ctxt_record_size >> 2; + header->gpuaddr = md->gpuaddr + offset; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + dest += sizeof(*header); + + memcpy(dest, md->hostptr + offset, ctxt_record_size); + + snapshot->ptr += section_header->size; + snapshot->remain -= section_header->size; + snapshot->size += section_header->size; +} + +static void snapshot_preemption_records(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_memdesc *md) +{ + const struct adreno_gen7_core *gen7_core = + to_gen7_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + u64 offset; + + if (gen7_core->ctxt_record_size) + ctxt_record_size = gen7_core->ctxt_record_size; + + /* All preemption records exist as a single mem alloc entry */ + for (offset = 0; offset < md->size; offset += ctxt_record_size) + gen7_hwsched_snapshot_preemption_record(device, snapshot, md, + offset); +} + +static u32 gen7_copy_gpu_global(struct adreno_device *adreno_dev, + void *out, u64 gpuaddr, u32 size) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + u64 offset; + u32 i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct kgsl_memdesc *md = hw_hfi->mem_alloc_table[i].md; + + if (md && (gpuaddr >= md->gpuaddr) && + ((gpuaddr + size) <= (md->gpuaddr + md->size))) { + offset = gpuaddr - md->gpuaddr; + memcpy(out, md->hostptr + offset, size); + return size; + } + } + + return 0; +} + +static size_t adreno_hwsched_snapshot_rb_payload(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + u32 *data = (u32 *)(buf + sizeof(*header)); + struct payload_section *payload = (struct payload_section *)priv; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 size = gen7_hwsched_parse_payload(payload, KEY_RB_SIZEDWORDS) << 2; + u64 lo, hi, gpuaddr; + + lo = gen7_hwsched_parse_payload(payload, KEY_RB_GPUADDR_LO); + hi = gen7_hwsched_parse_payload(payload, KEY_RB_GPUADDR_HI); + gpuaddr = hi << 32 | lo; + + /* If the gpuaddress and size don't match any allocation, then abort */ + if ((remain < size + sizeof(*header)) || + !gen7_copy_gpu_global(adreno_dev, data, gpuaddr, size)) { + SNAPSHOT_ERR_NOMEM(device, "RB"); + return 0; + } + + header->start = 0; + header->end = size >> 2; + header->rptr = gen7_hwsched_parse_payload(payload, KEY_RB_RPTR); + header->wptr = gen7_hwsched_parse_payload(payload, KEY_RB_WPTR); + header->rbsize = size >> 2; + header->count = size >> 2; + header->timestamp_queued = gen7_hwsched_parse_payload(payload, + KEY_RB_QUEUED_TS); + header->timestamp_retired = gen7_hwsched_parse_payload(payload, + KEY_RB_RETIRED_TS); + header->gpuaddr = gpuaddr; + header->id = gen7_hwsched_parse_payload(payload, KEY_RB_ID); + + return size + sizeof(*header); +} + +static bool parse_payload_rb(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + bool ret = false; + + /* Skip if we didn't receive a context bad HFI */ + if (!cmd->hdr) + return false; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + kgsl_snapshot_add_section(KGSL_DEVICE(adreno_dev), + KGSL_SNAPSHOT_SECTION_RB_V2, + snapshot, adreno_hwsched_snapshot_rb_payload, + payload); + ret = true; + } + + i += sizeof(*payload) + (payload->dwords << 2); + } + + return ret; +} + +void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + bool skip_memkind_rb = false; + u32 i; + + gen7_gmu_snapshot(adreno_dev, snapshot); + + adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot); + + /* + * First try to dump ringbuffers using context bad HFI payloads + * because they have all the ringbuffer parameters. If ringbuffer + * payloads are not present, fall back to dumping ringbuffers + * based on MEMKIND_RB + */ + if (parse_payload_rb(adreno_dev, snapshot)) + skip_memkind_rb = true; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; + + if (entry->desc.mem_kind == HFI_MEMKIND_RB && !skip_memkind_rb) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_RB_V2, + snapshot, adreno_hwsched_snapshot_rb, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_SCRATCH) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_PROFILE) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_CSW_SMMU_INFO) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + entry->md); + + if (entry->desc.mem_kind == HFI_MEMKIND_CSW_PRIV_NON_SECURE) + snapshot_preemption_records(device, snapshot, + entry->md); + } + + adreno_hwsched_parse_fault_cmdobj(adreno_dev, snapshot); +} + +static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int level, ret = 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen7_gmu_itcm_shadow(adreno_dev); + if (ret) + goto clks_gdsc_off; + + if (!test_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags)) { + ret = gen7_load_pdc_ucode(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_load_rsc_ucode(adreno_dev); + set_bit(GMU_PRIV_PDC_RSC_LOADED, &gmu->flags); + } + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_version_info(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + /* Vote for minimal DDR BW for GMU to init */ + level = pwr->pwrlevels[pwr->default_pwrlevel].bus_min; + + icc_set_bw(pwr->icc_path, 0, kBps_to_icc(pwr->ddr_table[level])); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hwsched_hfi_start(adreno_dev); + if (ret) + goto err; + + icc_set_bw(pwr->icc_path, 0, 0); + + device->gmu_fault = false; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_BCL)) + adreno_dev->bcl_enabled = true; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; + +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_AWARE); + + ret = gen7_gmu_enable_gdsc(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_enable_clks(adreno_dev); + if (ret) + goto gdsc_off; + + ret = gen7_rscc_wakeup_sequence(adreno_dev); + if (ret) + goto clks_gdsc_off; + + ret = gen7_gmu_load_fw(adreno_dev); + if (ret) + goto clks_gdsc_off; + + gen7_gmu_register_config(adreno_dev); + + gen7_gmu_irq_enable(adreno_dev); + + ret = gen7_gmu_device_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hwsched_hfi_start(adreno_dev); + if (ret) + goto err; + + device->gmu_fault = false; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_AWARE); + + return 0; +err: + if (device->gmu_fault) { + gen7_gmu_suspend(adreno_dev); + + return ret; + } + + gen7_gmu_irq_disable(adreno_dev); + +clks_gdsc_off: + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + +gdsc_off: + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; +} + +void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (WARN(atomic_read(&device->active_cnt) == 0, + "Unbalanced get/put calls to KGSL active count\n")) + return; + + if (atomic_dec_and_test(&device->active_cnt)) { + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} + +static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_prep_slumber_cmd req; + int ret; + + ret = CMD_MSG_HDR(req, H2F_MSG_PREPARE_SLUMBER); + if (ret) + return ret; + + req.freq = gmu->hfi.dcvs_table.gpu_level_num - + pwr->default_pwrlevel - 1; + req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; + + /* Disable the power counter so that the GMU is not busy */ + gmu_core_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); + + return gen7_hfi_send_cmd_async(adreno_dev, &req); + +} +static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (device->gmu_fault) + goto error; + + /* Wait for the lowest idle level we requested */ + ret = gen7_gmu_wait_for_lowest_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_hwsched_notify_slumber(adreno_dev); + if (ret) + goto error; + + ret = gen7_gmu_wait_for_idle(adreno_dev); + if (ret) + goto error; + + ret = gen7_rscc_sleep_sequence(adreno_dev); + + /* Now that we are done with GMU and GPU, Clear the GBIF */ + ret = gen7_halt_gbif(adreno_dev); + + gen7_gmu_irq_disable(adreno_dev); + + gen7_hwsched_hfi_stop(adreno_dev); + + clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); + + /* Poll to make sure that the CX is off */ + gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + + return ret; + +error: + gen7_hwsched_hfi_stop(adreno_dev); + gen7_gmu_suspend(adreno_dev); + + return ret; +} + +static int gen7_hwsched_gpu_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + ret = kgsl_mmu_start(device); + if (ret) + goto err; + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (ret) + goto err; + + /* Clear the busy_data stats - we're starting over from scratch */ + memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); + + gen7_start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + gen7_enable_gpu_irq(adreno_dev); + + ret = gen7_hwsched_cp_init(adreno_dev); + if (ret) { + gen7_disable_gpu_irq(adreno_dev); + goto err; + } + + device->reset_counter++; +err: + gen7_gmu_oob_clear(device, oob_gpu); + + if (ret) + gen7_hwsched_gmu_power_off(adreno_dev); + + return ret; +} + +static void hwsched_idle_timer(struct timer_list *t) +{ + struct kgsl_device *device = container_of(t, struct kgsl_device, + idle_timer); + + kgsl_schedule_work(&device->idle_check_ws); +} + +static int gen7_hwsched_gmu_init(struct adreno_device *adreno_dev) +{ + int ret; + + ret = gen7_gmu_parse_fw(adreno_dev); + if (ret) + return ret; + + ret = gen7_gmu_memory_init(adreno_dev); + if (ret) + return ret; + + return gen7_hwsched_hfi_init(adreno_dev); +} + +static void gen7_hwsched_touch_wakeup(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + /* + * Do not wake up a suspended device or until the first boot sequence + * has been completed. + */ + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags) || + !test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + goto done; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_hwsched_gmu_boot(adreno_dev); + if (ret) + return; + + ret = gen7_hwsched_gpu_boot(adreno_dev); + if (ret) + return; + + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + +done: + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, jiffies + + msecs_to_jiffies(adreno_wake_timeout)); +} + +static int gen7_hwsched_boot(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + adreno_hwsched_start(adreno_dev); + + ret = gen7_hwsched_gmu_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_hwsched_gpu_boot(adreno_dev); + if (ret) + return ret; + + kgsl_start_idle_timer(device); + kgsl_pwrscale_wake(device); + + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return ret; +} + +static int gen7_hwsched_first_boot(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags)) + return gen7_hwsched_boot(adreno_dev); + + adreno_hwsched_start(adreno_dev); + + ret = gen7_microcode_read(adreno_dev); + if (ret) + return ret; + + ret = gen7_init(adreno_dev); + if (ret) + return ret; + + ret = gen7_hwsched_gmu_init(adreno_dev); + if (ret) + return ret; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE); + + ret = gen7_hwsched_gmu_first_boot(adreno_dev); + if (ret) + return ret; + + ret = gen7_hwsched_gpu_boot(adreno_dev); + if (ret) + return ret; + + adreno_get_bus_counters(adreno_dev); + + adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev, + ADRENO_COOP_RESET); + + set_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags); + set_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + /* + * There is a possible deadlock scenario during kgsl firmware reading + * (request_firmware) and devfreq update calls. During first boot, kgsl + * device mutex is held and then request_firmware is called for reading + * firmware. request_firmware internally takes dev_pm_qos_mtx lock. + * Whereas in case of devfreq update calls triggered by thermal/bcl or + * devfreq sysfs, it first takes the same dev_pm_qos_mtx lock and then + * tries to take kgsl device mutex as part of get_dev_status/target + * calls. This results in deadlock when both thread are unable to acquire + * the mutex held by other thread. Enable devfreq updates now as we are + * done reading all firmware files. + */ + device->pwrscale.devfreq_enabled = true; + + device->pwrctrl.last_stat_updated = ktime_get(); + device->state = KGSL_STATE_ACTIVE; + + trace_kgsl_pwr_set_state(device, KGSL_STATE_ACTIVE); + + return 0; +} + +static int gen7_hwsched_power_off(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + + /* process any profiling results that are available */ + adreno_profile_process_results(ADRENO_DEVICE(device)); + + if (!gen7_hw_isidle(adreno_dev)) + dev_err(&gmu->pdev->dev, "GPU isn't idle before SLUMBER\n"); + + ret = gen7_gmu_oob_set(device, oob_gpu); + if (ret) { + gen7_gmu_oob_clear(device, oob_gpu); + goto no_gx_power; + } + + kgsl_pwrscale_update_stats(device); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + + gen7_gmu_oob_clear(device, oob_gpu); + +no_gx_power: + kgsl_pwrctrl_irq(device, false); + + gen7_hwsched_gmu_power_off(adreno_dev); + + adreno_hwsched_unregister_contexts(adreno_dev); + + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpu_llc_slice); + + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + llcc_slice_deactivate(adreno_dev->gpuhtw_llc_slice); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + device->state = KGSL_STATE_NONE; + + del_timer_sync(&device->idle_timer); + + kgsl_pwrscale_sleep(device); + + kgsl_pwrctrl_clear_l3_vote(device); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SLUMBER); + + return ret; +} + +static void hwsched_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, + struct kgsl_device, idle_check_ws); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + mutex_lock(&device->mutex); + + if (test_bit(GMU_DISABLE_SLUMBER, &device->gmu_core.flags)) + goto done; + + if (!atomic_read(&device->active_cnt)) { + gen7_hwsched_power_off(adreno_dev); + } else { + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + } + +done: + mutex_unlock(&device->mutex); +} + +static int gen7_hwsched_first_open(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* + * Do the one time settings that need to happen when we + * attempt to boot the gpu the very first time + */ + ret = gen7_hwsched_first_boot(adreno_dev); + if (ret) + return ret; + + /* + * A client that does a first_open but never closes the device + * may prevent us from going back to SLUMBER. So trigger the idle + * check by incrementing the active count and immediately releasing it. + */ + atomic_inc(&device->active_cnt); + gen7_hwsched_active_count_put(adreno_dev); + + return 0; +} + +int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return -EINVAL; + + if ((atomic_read(&device->active_cnt) == 0)) + ret = gen7_hwsched_boot(adreno_dev); + + if (ret == 0) + atomic_inc(&device->active_cnt); + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + return ret; +} + +static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, + int gpu_pwrlevel, int bus_level) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct hfi_gx_bw_perf_vote_cmd req = { + .ack_type = DCVS_ACK_BLOCK, + .freq = INVALID_DCVS_IDX, + .bw = INVALID_DCVS_IDX, + }; + int ret; + + if (!test_bit(GMU_PRIV_HFI_STARTED, &gmu->flags)) + return 0; + + /* Do not set to XO and lower GPU clock vote from GMU */ + if ((gpu_pwrlevel != INVALID_DCVS_IDX) && + (gpu_pwrlevel >= table->gpu_level_num - 1)) { + dev_err(&gmu->pdev->dev, "Invalid gpu dcvs request: %d\n", + gpu_pwrlevel); + return -EINVAL; + } + + if (gpu_pwrlevel < table->gpu_level_num - 1) + req.freq = table->gpu_level_num - gpu_pwrlevel - 1; + + if (bus_level < pwr->ddr_table_count && bus_level > 0) + req.bw = bus_level; + + /* GMU will vote for slumber levels through the sleep sequence */ + if ((req.freq == INVALID_DCVS_IDX) && (req.bw == INVALID_DCVS_IDX)) + return 0; + + ret = CMD_MSG_HDR(req, H2F_MSG_GX_BW_PERF_VOTE); + if (ret) + return ret; + + ret = gen7_hfi_send_cmd_async(adreno_dev, &req); + + if (ret) { + dev_err_ratelimited(&gmu->pdev->dev, + "Failed to set GPU perf idx %d, bw idx %d\n", + req.freq, req.bw); + + /* + * If this was a dcvs request along side an active gpu, request + * dispatcher based reset and recovery. + */ + if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + } + + return ret; +} + +static int gen7_hwsched_clock_set(struct adreno_device *adreno_dev, + u32 pwrlevel) +{ + return gen7_hwsched_dcvs_set(adreno_dev, pwrlevel, INVALID_DCVS_IDX); +} + +static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + static unsigned long prev_freq; + unsigned long freq = GMU_FREQ_MIN; + + if (!gmu->perf_ddr_bw) + return; + + /* + * Scale the GMU if DDR is at a CX corner at which GMU can run at + * 500 Mhz + */ + if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) + freq = GMU_FREQ_MAX; + + if (prev_freq == freq) + return; + + if (kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", freq)) { + dev_err(&gmu->pdev->dev, "Unable to set the GMU clock to %ld\n", + freq); + return; + } + + trace_kgsl_gmu_pwrlevel(freq, prev_freq); + + prev_freq = freq; +} + +static int gen7_hwsched_bus_set(struct adreno_device *adreno_dev, int buslevel, + u32 ab) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = 0; + + if (buslevel != pwr->cur_buslevel) { + ret = gen7_hwsched_dcvs_set(adreno_dev, INVALID_DCVS_IDX, + buslevel); + if (ret) + return ret; + + scale_gmu_frequency(adreno_dev, buslevel); + + pwr->cur_buslevel = buslevel; + + trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel); + } + + if (ab != pwr->cur_ab) { + icc_set_bw(pwr->icc_path, MBps_to_icc(ab), 0); + pwr->cur_ab = ab; + } + + return ret; +} + +static int gen7_hwsched_pm_suspend(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + if (test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SUSPEND); + + /* Halt any new submissions */ + reinit_completion(&device->halt_gate); + + /** + * Wait for the dispatcher to retire everything by waiting + * for the active count to go to zero. + */ + ret = kgsl_active_count_wait(device, 0, msecs_to_jiffies(100)); + if (ret) { + dev_err(device->dev, "Timed out waiting for the active count\n"); + goto err; + } + + ret = adreno_hwsched_idle(adreno_dev); + if (ret) + goto err; + + gen7_hwsched_power_off(adreno_dev); + + adreno_get_gpu_halt(adreno_dev); + + set_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); + + trace_kgsl_pwr_set_state(device, KGSL_STATE_SUSPEND); + + return 0; + +err: + adreno_hwsched_start(adreno_dev); + + return ret; +} + +static void gen7_hwsched_pm_resume(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (WARN(!test_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags), + "resume invoked without a suspend\n")) + return; + + adreno_put_gpu_halt(adreno_dev); + + adreno_hwsched_start(adreno_dev); + + clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags); +} + +void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 mask; + + /* Temporarily mask the watchdog interrupt to prevent a storm */ + gmu_core_regread(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + &mask); + gmu_core_regwrite(device, GEN7_GMU_AO_HOST_INTERRUPT_MASK, + (mask | GMU_INT_WDOG_BITE)); + + gen7_gmu_send_nmi(adreno_dev, false); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU watchdog expired interrupt received\n"); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +static void gen7_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct pending_cmd *cmd = NULL; + + read_lock(&hfi->msglock); + + list_for_each_entry(cmd, &hfi->msglist, node) { + if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT) + complete(&cmd->complete); + } + + read_unlock(&hfi->msglock); +} + +int gen7_hwsched_reset(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + /* + * Any pending context unregister packets will be lost + * since we hard reset the GMU. This means any threads waiting + * for context unregister hfi ack will timeout. Wake them + * to avoid false positive ack timeout messages later. + */ + gen7_hwsched_drain_ctxt_unregister(adreno_dev); + + adreno_hwsched_unregister_contexts(adreno_dev); + + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + gen7_hwsched_hfi_stop(adreno_dev); + + gen7_disable_gpu_irq(adreno_dev); + + gen7_gmu_suspend(adreno_dev); + + /* + * In some corner cases, it is possible that GMU put TS_RETIRE + * on the msgq after we have turned off gmu interrupts. Hence, + * drain the queue one last time before we reboot the GMU. + */ + gen7_hwsched_process_msgq(adreno_dev); + + clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags); + + ret = gen7_hwsched_boot(adreno_dev); + + BUG_ON(ret); + + return ret; +} + +const struct adreno_power_ops gen7_hwsched_power_ops = { + .first_open = gen7_hwsched_first_open, + .last_close = gen7_hwsched_power_off, + .active_count_get = gen7_hwsched_active_count_get, + .active_count_put = gen7_hwsched_active_count_put, + .touch_wakeup = gen7_hwsched_touch_wakeup, + .pm_suspend = gen7_hwsched_pm_suspend, + .pm_resume = gen7_hwsched_pm_resume, + .gpu_clock_set = gen7_hwsched_clock_set, + .gpu_bus_set = gen7_hwsched_bus_set, +}; + +const struct adreno_hwsched_ops gen7_hwsched_ops = { + .submit_cmdobj = gen7_hwsched_submit_cmdobj, + .preempt_count = gen7_hwsched_preempt_count_get, +}; + +int gen7_hwsched_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore) +{ + struct adreno_device *adreno_dev; + struct kgsl_device *device; + struct gen7_hwsched_device *gen7_hwsched_dev; + int ret; + + gen7_hwsched_dev = devm_kzalloc(&pdev->dev, sizeof(*gen7_hwsched_dev), + GFP_KERNEL); + if (!gen7_hwsched_dev) + return -ENOMEM; + + adreno_dev = &gen7_hwsched_dev->gen7_dev.adreno_dev; + + ret = gen7_probe_common(pdev, adreno_dev, chipid, gpucore); + if (ret) + return ret; + + device = KGSL_DEVICE(adreno_dev); + + INIT_WORK(&device->idle_check_ws, hwsched_idle_check); + + timer_setup(&device->idle_timer, hwsched_idle_timer, 0); + + adreno_dev->irq_mask = GEN7_HWSCHED_INT_MASK; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + + return adreno_hwsched_init(adreno_dev, &gen7_hwsched_ops); +} + +int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + struct gen7_hwsched_device *gen7_hwsched = container_of(gen7_dev, + struct gen7_hwsched_device, gen7_dev); + int ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HWSCHED_DEVICE, + (void *)(gen7_hwsched), sizeof(struct gen7_hwsched_device)); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_GMU_LOG_ENTRY, + gen7_dev->gmu.gmu_log->hostptr, gen7_dev->gmu.gmu_log->size); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, KGSL_HFIMEM_ENTRY, + gen7_dev->gmu.hfi.hfi_mem->hostptr, gen7_dev->gmu.hfi.hfi_mem->size); + + return ret; +} diff --git a/adreno_gen7_hwsched.h b/adreno_gen7_hwsched.h new file mode 100644 index 0000000000..b2da557d8d --- /dev/null +++ b/adreno_gen7_hwsched.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_GEN7_HWSCHED_H_ +#define _ADRENO_GEN7_HWSCHED_H_ + +#include "adreno_gen7_hwsched_hfi.h" + +/** + * struct gen7_hwsched_device - Container for the gen7 hwscheduling device + */ +struct gen7_hwsched_device { + /** @gen7_dev: Container for the gen7 device */ + struct gen7_device gen7_dev; + /** @hwsched_hfi: Container for hwscheduling specific hfi resources */ + struct gen7_hwsched_hfi hwsched_hfi; +}; + +/** + * gen7_hwsched_probe - Target specific probe for hwsched + * @pdev: Pointer to the platform device + * @chipid: Chipid of the target + * @gpucore: Pointer to the gpucore + * + * The target specific probe function for hwsched enabled gmu targets. + * + * Return: 0 on success or negative error on failure + */ +int gen7_hwsched_probe(struct platform_device *pdev, + u32 chipid, const struct adreno_gpu_core *gpucore); + +/** + * gen7_hwsched_reset - Restart the gmu and gpu + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hwsched_reset(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_snapshot - take gen7 hwsched snapshot + * @adreno_dev: Pointer to the adreno device + * @snapshot: Pointer to the snapshot instance + * + * Snapshot the faulty ib and then snapshot rest of gen7 gmu things + */ +void gen7_hwsched_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +/** + * gen7_hwsched_handle_watchdog - Handle watchdog interrupt + * @adreno_dev: Pointer to the adreno device + */ +void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_active_count_get - Increment the active count + * @adreno_dev: Pointer to the adreno device + * + * This function increments the active count. If active count + * is 0, this function also powers up the device. + * + * Return: 0 on success or negative error on failure + */ +int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_active_count_put - Put back the active count + * @adreno_dev: Pointer to the adreno device + * + * This function decrements the active count sets the idle + * timer if active count is zero. + */ +void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_add_to_minidump - Register hwsched_device with va minidump + * @adreno_dev: Pointer to the adreno device + */ +int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev); + +#endif diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c new file mode 100644 index 0000000000..0ee8a7b858 --- /dev/null +++ b/adreno_gen7_hwsched_hfi.c @@ -0,0 +1,1606 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_gen7_hwsched.h" +#include "adreno_hfi.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_pwrctrl.h" +#include "kgsl_trace.h" + +#define HFI_QUEUE_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) + +#define DEFINE_QHDR(gmuaddr, id, prio) \ + {\ + .status = 1, \ + .start_addr = GMU_QUEUE_START_ADDR(gmuaddr, id), \ + .type = QUEUE_HDR_TYPE(id, prio, 0, 0), \ + .queue_size = SZ_4K >> 2, \ + .msg_size = 0, \ + .unused0 = 0, \ + .unused1 = 0, \ + .unused2 = 0, \ + .unused3 = 0, \ + .unused4 = 0, \ + .read_index = 0, \ + .write_index = 0, \ +} + +static struct dq_info { + /** @max_dq: Maximum number of dispatch queues per RB level */ + u32 max_dq; + /** @base_dq_id: Base dqid for level */ + u32 base_dq_id; + /** @offset: Next dqid to use for roundrobin context assignment */ + u32 offset; +} gen7_hfi_dqs[KGSL_PRIORITY_MAX_RB_LEVELS] = { + { 4, 0, }, /* RB0 */ + { 4, 4, }, /* RB1 */ + { 3, 8, }, /* RB2 */ + { 3, 11, }, /* RB3 */ +}; + +struct gen7_hwsched_hfi *to_gen7_hwsched_hfi( + struct adreno_device *adreno_dev) +{ + struct gen7_device *gen7_dev = container_of(adreno_dev, + struct gen7_device, adreno_dev); + struct gen7_hwsched_device *gen7_hwsched = container_of(gen7_dev, + struct gen7_hwsched_device, gen7_dev); + + return &gen7_hwsched->hwsched_hfi; +} + +static void add_waiter(struct gen7_hwsched_hfi *hfi, u32 hdr, + struct pending_cmd *ack) +{ + memset(ack, 0x0, sizeof(*ack)); + + init_completion(&ack->complete); + write_lock_irq(&hfi->msglock); + list_add_tail(&ack->node, &hfi->msglist); + write_unlock_irq(&hfi->msglock); + + ack->sent_hdr = hdr; +} + +static void del_waiter(struct gen7_hwsched_hfi *hfi, struct pending_cmd *ack) +{ + write_lock_irq(&hfi->msglock); + list_del(&ack->node); + write_unlock_irq(&hfi->msglock); +} + +static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct pending_cmd *cmd = NULL; + u32 waiters[64], num_waiters = 0, i; + u32 *ack = rcvd; + u32 hdr = ack[0]; + u32 req_hdr = ack[1]; + u32 size_bytes = MSG_HDR_GET_SIZE(hdr) << 2; + + if (size_bytes > sizeof(cmd->results)) + dev_err_ratelimited(&gmu->pdev->dev, + "Ack result too big: %d Truncating to: %ld\n", + size_bytes, sizeof(cmd->results)); + + read_lock(&hfi->msglock); + + list_for_each_entry(cmd, &hfi->msglist, node) { + if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + memcpy(cmd->results, ack, + min_t(u32, size_bytes, + sizeof(cmd->results))); + complete(&cmd->complete); + read_unlock(&hfi->msglock); + return; + } + + if (num_waiters < ARRAY_SIZE(waiters)) + waiters[num_waiters++] = cmd->sent_hdr; + } + + read_unlock(&hfi->msglock); + + /* Didn't find the sender, list the waiter */ + dev_err_ratelimited(&gmu->pdev->dev, + "Unexpectedly got id %d seqnum %d. Total waiters: %d Top %d Waiters:\n", + MSG_HDR_GET_ID(req_hdr), MSG_HDR_GET_SEQNUM(req_hdr), + num_waiters, min_t(u32, num_waiters, 5)); + + for (i = 0; i < num_waiters && i < 5; i++) + dev_err_ratelimited(&gmu->pdev->dev, + " id %d seqnum %d\n", + MSG_HDR_GET_ID(waiters[i]), + MSG_HDR_GET_SEQNUM(waiters[i])); +} + +static void log_profiling_info(struct adreno_device *adreno_dev, u32 *rcvd) +{ + struct hfi_ts_retire_cmd *cmd = (struct hfi_ts_retire_cmd *)rcvd; + struct kgsl_context *context; + struct retire_info info = {0}; + + context = kgsl_context_get(KGSL_DEVICE(adreno_dev), cmd->ctxt_id); + if (context == NULL) + return; + + info.timestamp = cmd->ts; + info.rb_id = adreno_get_level(context->priority); + info.gmu_dispatch_queue = context->gmu_dispatch_queue; + info.submitted_to_rb = cmd->submitted_to_rb; + info.sop = cmd->sop; + info.eop = cmd->eop; + info.retired_on_gmu = cmd->retired_on_gmu; + + trace_adreno_cmdbatch_retired(context, &info, 0, 0, 0); + + log_kgsl_cmdbatch_retired_event(context->id, cmd->ts, + context->priority, 0, cmd->sop, cmd->eop); + + kgsl_context_put(context); +} + +u32 gen7_hwsched_parse_payload(struct payload_section *payload, u32 key) +{ + u32 i; + + /* Each key-value pair is 2 dwords */ + for (i = 0; i < payload->dwords; i += 2) { + if (payload->data[i] == key) + return payload->data[i + 1]; + } + + return 0; +} + +/* Look up a particular key's value for a given type of payload */ +static u32 gen7_hwsched_lookup_key_value(struct adreno_device *adreno_dev, + u32 type, u32 key) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == type) + return gen7_hwsched_parse_payload(payload, key); + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +static u32 get_payload_rb_key(struct adreno_device *adreno_dev, + u32 rb_id, u32 key) +{ + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + u32 i = 0, payload_bytes; + void *start; + + if (!cmd->hdr) + return 0; + + payload_bytes = (MSG_HDR_GET_SIZE(cmd->hdr) << 2) - + offsetof(struct hfi_context_bad_cmd, payload); + + start = &cmd->payload[0]; + + while (i < payload_bytes) { + struct payload_section *payload = start + i; + + if (payload->type == PAYLOAD_RB) { + u32 id = gen7_hwsched_parse_payload(payload, KEY_RB_ID); + + if (id == rb_id) + return gen7_hwsched_parse_payload(payload, key); + } + + i += struct_size(payload, data, payload->dwords); + } + + return 0; +} + +static void log_gpu_fault(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct device *dev = &gmu->pdev->dev; + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + + switch (cmd->error) { + case GMU_GPU_HW_HANG: + dev_crit_ratelimited(dev, "MISC: GPU hang detected\n"); + break; + case GMU_GPU_SW_HANG: + dev_crit_ratelimited(dev, "gpu timeout ctx %d ts %d\n", + cmd->ctxt_id, cmd->ts); + break; + case GMU_CP_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP opcode error interrupt | opcode=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_OPCODE_ERROR)); + break; + case GMU_CP_PROTECTED_ERROR: { + u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP Illegal instruction error\n"); + break; + case GMU_CP_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP ucode error interrupt\n"); + break; + case GMU_CP_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP | Ringbuffer HW fault | status=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_GPU_PREEMPT_TIMEOUT: { + u32 cur, next, cur_rptr, cur_wptr, next_rptr, next_wptr; + + cur = gen7_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, KEY_PREEMPT_TIMEOUT_CUR_RB_ID); + next = gen7_hwsched_lookup_key_value(adreno_dev, + PAYLOAD_PREEMPT_TIMEOUT, + KEY_PREEMPT_TIMEOUT_NEXT_RB_ID); + cur_rptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_RPTR); + cur_wptr = get_payload_rb_key(adreno_dev, cur, KEY_RB_WPTR); + next_rptr = get_payload_rb_key(adreno_dev, next, KEY_RB_RPTR); + next_wptr = get_payload_rb_key(adreno_dev, next, KEY_RB_WPTR); + + dev_crit_ratelimited(dev, + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", + cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); + } + break; + case GMU_CP_GPC_ERROR: + dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + break; + case GMU_CP_BV_OPCODE_ERROR: + dev_crit_ratelimited(dev, + "CP BV opcode error | opcode=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_OPCODE_ERROR)); + break; + case GMU_CP_BV_PROTECTED_ERROR: { + u32 status = gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_BV_PROTECTED_ERROR); + + dev_crit_ratelimited(dev, + "CP BV | Protected mode error | %s | addr=0x%5.5x | status=0x%8.8x\n", + status & (1 << 20) ? "READ" : "WRITE", + status & 0x3FFFF, status); + } + break; + case GMU_CP_BV_HW_FAULT_ERROR: + dev_crit_ratelimited(dev, + "CP BV | Ringbuffer HW fault | status=0x%8.8x\n", + gen7_hwsched_lookup_key_value(adreno_dev, PAYLOAD_FAULT_REGS, + KEY_CP_HW_FAULT)); + break; + case GMU_CP_BV_ILLEGAL_INST_ERROR: + dev_crit_ratelimited(dev, "CP BV Illegal instruction error\n"); + break; + case GMU_CP_BV_UCODE_ERROR: + dev_crit_ratelimited(dev, "CP BV ucode error interrupt\n"); + break; + case GMU_CP_UNKNOWN_ERROR: + fallthrough; + default: + dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", + cmd->error); + break; + } +} + +static u32 peek_next_header(struct gen7_gmu_device *gmu, uint32_t queue_idx) +{ + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + u32 *queue; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return 0; + + if (hdr->read_index == hdr->write_index) + return 0; + + queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx); + + return queue[hdr->read_index]; +} + +static void process_ctx_bad(struct adreno_device *adreno_dev) +{ + log_gpu_fault(adreno_dev); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 rcvd[MAX_RCVD_SIZE], next_hdr; + + for (;;) { + next_hdr = peek_next_header(gmu, HFI_MSG_ID); + + if (!next_hdr) + return; + + if (MSG_HDR_GET_ID(next_hdr) == F2H_MSG_CONTEXT_BAD) { + gen7_hfi_queue_read(gmu, HFI_MSG_ID, + (u32 *)adreno_dev->hwsched.ctxt_bad, + HFI_MAX_MSG_SIZE); + process_ctx_bad(adreno_dev); + continue; + } + + gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)); + + /* + * We are assuming that there is only one outstanding ack + * because hfi sending thread waits for completion while + * holding the device mutex + */ + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + gen7_receive_ack_async(adreno_dev, rcvd); + } else if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_TS_RETIRE) { + log_profiling_info(adreno_dev, rcvd); + adreno_hwsched_trigger(adreno_dev); + } + } +} + +static void process_log_block(struct adreno_device *adreno_dev, void *data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_log_block *cmd = data; + u32 *log_event = gmu->gmu_log->hostptr; + u32 start, end; + + start = cmd->start_index; + end = cmd->stop_index; + + log_event += start * 4; + while (start != end) { + trace_gmu_event(log_event); + log_event += 4; + start++; + } +} + +static void process_dbgq_irq(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 rcvd[MAX_RCVD_SIZE]; + bool recovery = false; + + while (gen7_hfi_queue_read(gmu, HFI_DBG_ID, rcvd, sizeof(rcvd)) > 0) { + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR) { + adreno_gen7_receive_err_req(gmu, rcvd); + recovery = true; + break; + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_DEBUG) + adreno_gen7_receive_debug_req(gmu, rcvd); + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_LOG_BLOCK) + process_log_block(adreno_dev, rcvd); + } + + if (!recovery) + return; + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); +} + +/* HFI interrupt handler */ +static irqreturn_t gen7_hwsched_hfi_handler(int irq, void *data) +{ + struct adreno_device *adreno_dev = data; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 status = 0; + + gmu_core_regread(device, GEN7_GMU_GMU2HOST_INTR_INFO, &status); + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, hfi->irq_mask); + + /* + * If interrupts are not enabled on the HFI message queue, + * the inline message processing loop will process it, + * else, process it here. + */ + if (!(hfi->irq_mask & HFI_IRQ_MSGQ_MASK)) + status &= ~HFI_IRQ_MSGQ_MASK; + + if (status & (HFI_IRQ_MSGQ_MASK | HFI_IRQ_DBGQ_MASK)) { + wake_up_interruptible(&hfi->f2h_wq); + adreno_hwsched_trigger(adreno_dev); + } + if (status & HFI_IRQ_CM3_FAULT_MASK) { + atomic_set(&gmu->cm3_fault, 1); + + /* make sure other CPUs see the update */ + smp_wmb(); + + dev_err_ratelimited(&gmu->pdev->dev, + "GMU CM3 fault interrupt received\n"); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + } + + /* Ignore OOB bits */ + status &= GENMASK(31 - (oob_max - 1), 0); + + if (status & ~hfi->irq_mask) + dev_err_ratelimited(&gmu->pdev->dev, + "Unhandled HFI interrupts 0x%x\n", + status & ~hfi->irq_mask); + + return IRQ_HANDLED; +} + +#define HFI_IRQ_MSGQ_MASK BIT(0) +#define HFI_RSP_TIMEOUT 100 /* msec */ + +static int wait_ack_completion(struct adreno_device *adreno_dev, + struct pending_cmd *ack) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int rc; + + rc = wait_for_completion_timeout(&ack->complete, + HFI_RSP_TIMEOUT); + if (!rc) { + dev_err(&gmu->pdev->dev, + "Ack timeout for id:%d sequence=%d\n", + MSG_HDR_GET_ID(ack->sent_hdr), + MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); + return -ETIMEDOUT; + } + + return 0; +} + +static int check_ack_failure(struct adreno_device *adreno_dev, + struct pending_cmd *ack) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + + if (ack->results[2] != 0xffffffff) + return 0; + + dev_err(&gmu->pdev->dev, + "ACK error: sender id %d seqnum %d\n", + MSG_HDR_GET_ID(ack->sent_hdr), + MSG_HDR_GET_SEQNUM(ack->sent_hdr)); + + return -EINVAL; +} + +int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + u32 *cmd = data; + u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + int rc; + struct pending_cmd pending_ack; + + *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + + add_waiter(hfi, *cmd, &pending_ack); + + rc = gen7_hfi_cmdq_write(adreno_dev, cmd); + if (rc) + goto done; + + rc = wait_ack_completion(adreno_dev, &pending_ack); + if (rc) + goto done; + + rc = check_ack_failure(adreno_dev, &pending_ack); + +done: + del_waiter(hfi, &pending_ack); + + return rc; +} + +static void init_queues(struct gen7_hfi *hfi) +{ + u32 gmuaddr = hfi->hfi_mem->gmuaddr; + struct hfi_queue_table hfi_table = { + .qtbl_hdr = { + .version = 0, + .size = sizeof(struct hfi_queue_table) >> 2, + .qhdr0_offset = + sizeof(struct hfi_queue_table_header) >> 2, + .qhdr_size = sizeof(struct hfi_queue_header) >> 2, + .num_q = HFI_QUEUE_MAX, + .num_active_q = HFI_QUEUE_MAX, + }, + .qhdr = { + DEFINE_QHDR(gmuaddr, HFI_CMD_ID, 0), + DEFINE_QHDR(gmuaddr, HFI_MSG_ID, 0), + DEFINE_QHDR(gmuaddr, HFI_DBG_ID, 0), + /* 4 DQs for RB priority 0 */ + DEFINE_QHDR(gmuaddr, 3, 0), + DEFINE_QHDR(gmuaddr, 4, 0), + DEFINE_QHDR(gmuaddr, 5, 0), + DEFINE_QHDR(gmuaddr, 6, 0), + /* 4 DQs for RB priority 1 */ + DEFINE_QHDR(gmuaddr, 7, 1), + DEFINE_QHDR(gmuaddr, 8, 1), + DEFINE_QHDR(gmuaddr, 9, 1), + DEFINE_QHDR(gmuaddr, 10, 1), + /* 3 DQs for RB priority 2 */ + DEFINE_QHDR(gmuaddr, 11, 2), + DEFINE_QHDR(gmuaddr, 12, 2), + DEFINE_QHDR(gmuaddr, 13, 2), + /* 3 DQs for RB priority 3 */ + DEFINE_QHDR(gmuaddr, 14, 3), + DEFINE_QHDR(gmuaddr, 15, 3), + DEFINE_QHDR(gmuaddr, 16, 3), + }, + }; + + memcpy(hfi->hfi_mem->hostptr, &hfi_table, sizeof(hfi_table)); +} + +/* Total header sizes + queue sizes + 16 for alignment */ +#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \ + (SZ_4K * HFI_QUEUE_MAX)) + +static int hfi_f2h_main(void *arg); + +int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); + + if (IS_ERR_OR_NULL(hw_hfi->big_ib)) { + hw_hfi->big_ib = gen7_reserve_gmu_kernel_block( + to_gen7_gmu(adreno_dev), 0, + HWSCHED_MAX_IBS * sizeof(struct hfi_issue_ib), + GMU_NONCACHED_KERNEL); + if (IS_ERR(hw_hfi->big_ib)) + return PTR_ERR(hw_hfi->big_ib); + } + + if (IS_ERR_OR_NULL(hfi->hfi_mem)) { + hfi->hfi_mem = gen7_reserve_gmu_kernel_block( + to_gen7_gmu(adreno_dev), + 0, HFIMEM_SIZE, GMU_NONCACHED_KERNEL); + if (IS_ERR(hfi->hfi_mem)) + return PTR_ERR(hfi->hfi_mem); + init_queues(hfi); + } + + if (IS_ERR_OR_NULL(hw_hfi->f2h_task)) + hw_hfi->f2h_task = kthread_run(hfi_f2h_main, adreno_dev, "gmu_f2h"); + + return PTR_ERR_OR_ZERO(hw_hfi->f2h_task); +} + +static int get_attrs(u32 flags) +{ + int attrs = IOMMU_READ; + + if (flags & HFI_MEMFLAG_GMU_PRIV) + attrs |= IOMMU_PRIV; + + if (flags & HFI_MEMFLAG_GMU_WRITEABLE) + attrs |= IOMMU_WRITE; + + return attrs; +} + +static int gmu_import_buffer(struct adreno_device *adreno_dev, + struct hfi_mem_alloc_entry *entry, u32 flags) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int attrs = get_attrs(flags); + struct gmu_vma_entry *vma = &gmu->vma[GMU_NONCACHED_KERNEL]; + struct hfi_mem_alloc_desc *desc = &entry->desc; + int ret; + + if (flags & HFI_MEMFLAG_GMU_CACHEABLE) + vma = &gmu->vma[GMU_CACHE]; + + if ((vma->next_va + desc->size) > (vma->start + vma->size)) { + dev_err(&gmu->pdev->dev, + "GMU mapping too big. available: %d required: %d\n", + vma->next_va - vma->start, desc->size); + return -ENOMEM; + } + + ret = gmu_core_map_memdesc(gmu->domain, entry->md, vma->next_va, attrs); + if (ret) { + dev_err(&gmu->pdev->dev, "gmu map err: 0x%08x, %x\n", + vma->next_va, attrs); + return ret; + } + + entry->md->gmuaddr = vma->next_va; + + vma->next_va += desc->size; + return 0; +} + +static struct hfi_mem_alloc_entry *lookup_mem_alloc_table( + struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + int i; + + for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { + struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; + + if ((entry->desc.mem_kind == desc->mem_kind) && + (entry->desc.gmu_mem_handle == desc->gmu_mem_handle)) + return entry; + } + + return NULL; +} + +static struct hfi_mem_alloc_entry *get_mem_alloc_entry( + struct adreno_device *adreno_dev, struct hfi_mem_alloc_desc *desc) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct hfi_mem_alloc_entry *entry = + lookup_mem_alloc_table(adreno_dev, desc); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u64 flags = 0; + u32 priv = 0; + int ret; + const char *memkind_string = desc->mem_kind < HFI_MEMKIND_MAX ? + hfi_memkind_strings[desc->mem_kind] : "UNKNOWN"; + + if (entry) + return entry; + + if (hfi->mem_alloc_entries == ARRAY_SIZE(hfi->mem_alloc_table)) { + dev_err(&gmu->pdev->dev, + "Reached max mem alloc entries\n"); + return ERR_PTR(-ENOMEM); + } + + entry = &hfi->mem_alloc_table[hfi->mem_alloc_entries]; + + memcpy(&entry->desc, desc, sizeof(*desc)); + + entry->desc.host_mem_handle = desc->gmu_mem_handle; + + if (desc->flags & HFI_MEMFLAG_GFX_PRIV) + priv |= KGSL_MEMDESC_PRIVILEGED; + + if (!(desc->flags & HFI_MEMFLAG_GFX_WRITEABLE)) + flags |= KGSL_MEMFLAGS_GPUREADONLY; + + if (desc->flags & HFI_MEMFLAG_GFX_SECURE) + flags |= KGSL_MEMFLAGS_SECURE; + + if (!(desc->flags & HFI_MEMFLAG_GFX_ACC)) { + entry->md = gen7_reserve_gmu_kernel_block(gmu, 0, + desc->size, + (desc->flags & HFI_MEMFLAG_GMU_CACHEABLE) ? + GMU_CACHE : GMU_NONCACHED_KERNEL); + if (IS_ERR(entry->md)) { + int ret = PTR_ERR(entry->md); + + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + entry->desc.size = entry->md->size; + entry->desc.gmu_addr = entry->md->gmuaddr; + + goto done; + } + + entry->md = kgsl_allocate_global(device, desc->size, 0, flags, priv, + memkind_string); + if (IS_ERR(entry->md)) { + int ret = PTR_ERR(entry->md); + + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + + entry->desc.size = entry->md->size; + entry->desc.gpu_addr = entry->md->gpuaddr; + + if (!(desc->flags & HFI_MEMFLAG_GMU_ACC)) + goto done; + + /* + * If gmu mapping fails, then we have to live with + * leaking the gpu global buffer allocated above. + */ + ret = gmu_import_buffer(adreno_dev, entry, desc->flags); + if (ret) { + dev_err(&gmu->pdev->dev, + "gpuaddr: 0x%llx size: %lld bytes lost\n", + entry->md->gpuaddr, entry->md->size); + memset(entry, 0, sizeof(*entry)); + return ERR_PTR(ret); + } + + entry->desc.gmu_addr = entry->md->gmuaddr; +done: + hfi->mem_alloc_entries++; + + return entry; +} + +static int process_mem_alloc(struct adreno_device *adreno_dev, + struct hfi_mem_alloc_desc *mad) +{ + struct hfi_mem_alloc_entry *entry; + + entry = get_mem_alloc_entry(adreno_dev, mad); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + if (entry->md) { + mad->gpu_addr = entry->md->gpuaddr; + mad->gmu_addr = entry->md->gmuaddr; + } + + /* + * GMU uses the host_mem_handle to check if this memalloc was + * successful + */ + mad->host_mem_handle = mad->gmu_mem_handle; + + return 0; +} + +static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) +{ + struct hfi_mem_alloc_cmd *in = (struct hfi_mem_alloc_cmd *)rcvd; + struct hfi_mem_alloc_reply_cmd out = {0}; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret; + + ret = process_mem_alloc(adreno_dev, &in->desc); + if (ret) + return ret; + + memcpy(&out.desc, &in->desc, sizeof(out.desc)); + + out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC, sizeof(out)); + out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, + atomic_inc_return(&gmu->hfi.seqnum)); + + out.req_hdr = in->hdr; + + return gen7_hfi_cmdq_write(adreno_dev, (u32 *)&out); +} + +static int send_start_msg(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + int ret, rc = 0; + struct hfi_start_cmd cmd; + u32 rcvd[MAX_RCVD_SIZE]; + struct pending_cmd pending_ack = {0}; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_START); + if (ret) + return ret; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + + pending_ack.sent_hdr = cmd.hdr; + + rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd); + if (rc) + return rc; + +poll: + rc = gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, + HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT, HFI_IRQ_MSGQ_MASK); + + if (rc) { + dev_err(&gmu->pdev->dev, + "Timed out processing MSG_START seqnum: %d\n", + seqnum); + gmu_core_fault_snapshot(device); + return rc; + } + + /* Clear the interrupt */ + gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, + HFI_IRQ_MSGQ_MASK); + + if (gen7_hfi_queue_read(gmu, HFI_MSG_ID, rcvd, sizeof(rcvd)) <= 0) { + dev_err(&gmu->pdev->dev, "MSG_START: no payload\n"); + gmu_core_fault_snapshot(device); + return -EINVAL; + } + + if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) { + rc = gen7_receive_ack_cmd(gmu, rcvd, &pending_ack); + if (rc) + return rc; + + return check_ack_failure(adreno_dev, &pending_ack); + } + + if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_MEM_ALLOC) { + rc = mem_alloc_reply(adreno_dev, rcvd); + if (rc) + return rc; + + goto poll; + } + + dev_err(&gmu->pdev->dev, + "MSG_START: unexpected response id:%d, type:%d\n", + MSG_HDR_GET_ID(rcvd[0]), + MSG_HDR_GET_TYPE(rcvd[0])); + + gmu_core_fault_snapshot(device); + + return rc; +} + +static void reset_hfi_queues(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr; + u32 i; + + /* Flush HFI queues */ + for (i = 0; i < HFI_QUEUE_MAX; i++) { + struct hfi_queue_header *hdr = &tbl->qhdr[i]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + continue; + + if (hdr->read_index != hdr->write_index) { + dev_err(&gmu->pdev->dev, + "HFI queue[%d] is not empty before close: rd=%d,wt=%d\n", + i, hdr->read_index, hdr->write_index); + hdr->read_index = hdr->write_index; + + gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev)); + } + } +} + +void gen7_hwsched_hfi_stop(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + hfi->irq_mask &= ~HFI_IRQ_MSGQ_MASK; + + reset_hfi_queues(adreno_dev); + + kgsl_pwrctrl_axi(KGSL_DEVICE(adreno_dev), false); + + clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + +} + +static void enable_async_hfi(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + hfi->irq_mask |= HFI_IRQ_MSGQ_MASK; + + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_GMU2HOST_INTR_MASK, + (u32)~hfi->irq_mask); +} + +static int enable_preemption(struct adreno_device *adreno_dev) +{ + u32 data; + int ret; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + /* + * Bits [0:1] contains the preemption level + * Bit 2 is to enable/disable gmem save/restore + * Bit 3 is to enable/disable skipsaverestore + */ + data = FIELD_PREP(GENMASK(1, 0), adreno_dev->preempt.preempt_level) | + FIELD_PREP(BIT(2), adreno_dev->preempt.usesgmem) | + FIELD_PREP(BIT(3), adreno_dev->preempt.skipsaverestore); + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_PREEMPTION, 1, + data); + if (ret) + return ret; + + /* + * Bits[3:0] contain the preemption timeout enable bit per ringbuffer + * Bits[31:4] contain the timeout in ms + */ + return gen7_hfi_send_set_value(adreno_dev, HFI_VALUE_BIN_TIME, 1, + FIELD_PREP(GENMASK(31, 4), 3000) | + FIELD_PREP(GENMASK(3, 0), 0xf)); + +} + +int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = gen7_gmu_hfi_start(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table); + if (ret) + goto err; + + ret = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table); + if (ret) + goto err; + + ret = gen7_hfi_send_acd_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_bcl_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_ifpc_feature_ctrl(adreno_dev); + if (ret) + goto err; + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_HWSCHED, 1, 0); + if (ret) + goto err; + + ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_KPROF, 1, 0); + if (ret) + goto err; + + /* Enable the long ib timeout detection */ + if (adreno_long_ib_detect(adreno_dev)) { + ret = gen7_hfi_send_feature_ctrl(adreno_dev, + HFI_FEATURE_BAIL_OUT_TIMER, 1, 0); + if (ret) + goto err; + } + + if (gmu->log_stream_enable) + gen7_hfi_send_set_value(adreno_dev, + HFI_VALUE_LOG_STREAM_ENABLE, 0, 1); + + if (gmu->log_group_mask) + gen7_hfi_send_set_value(adreno_dev, + HFI_VALUE_LOG_GROUP, 0, gmu->log_group_mask); + + ret = gen7_hfi_send_core_fw_start(adreno_dev); + if (ret) + goto err; + + ret = enable_preemption(adreno_dev); + if (ret) + goto err; + + ret = send_start_msg(adreno_dev); + if (ret) + goto err; + + enable_async_hfi(adreno_dev); + + set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags); + + /* Request default DCVS level */ + ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device); + if (ret) + goto err; + + /* Request default BW vote */ + ret = kgsl_pwrctrl_axi(device, true); + +err: + if (ret) + gen7_hwsched_hfi_stop(adreno_dev); + + return ret; +} + +static int submit_raw_cmds(struct adreno_device *adreno_dev, void *cmds, + const char *str) +{ + int ret; + + ret = gen7_hfi_send_cmd_async(adreno_dev, cmds); + if (ret) + return ret; + + ret = gmu_core_timed_poll_check(KGSL_DEVICE(adreno_dev), + GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS, 0, 200, BIT(23)); + if (ret) + gen7_spin_idle_debug(adreno_dev, str); + + return ret; +} + +static int cp_init(struct adreno_device *adreno_dev) +{ + u32 cmds[GEN7_CP_INIT_DWORDS + 1]; + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, + (GEN7_CP_INIT_DWORDS + 1) << 2, HFI_MSG_CMD); + + gen7_cp_init_cmds(adreno_dev, &cmds[1]); + + return submit_raw_cmds(adreno_dev, cmds, + "CP initialization failed to idle\n"); +} + +static int send_switch_to_unsecure(struct adreno_device *adreno_dev) +{ + u32 cmds[3]; + + cmds[0] = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD_RAW, sizeof(cmds), + HFI_MSG_CMD); + + cmds[1] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[2] = 0; + + return submit_raw_cmds(adreno_dev, cmds, + "Switch to unsecure failed to idle\n"); +} + +int gen7_hwsched_cp_init(struct adreno_device *adreno_dev) +{ + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + /* Program the ucode base for CP */ + kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_LO, + lower_32_bits(fw->memdesc->gpuaddr)); + kgsl_regwrite(device, GEN7_CP_SQE_INSTR_BASE_HI, + upper_32_bits(fw->memdesc->gpuaddr)); + + ret = cp_init(adreno_dev); + if (ret) + return ret; + + ret = adreno_zap_shader_load(adreno_dev, gen7_core->zap_name); + if (ret) + return ret; + + if (!adreno_dev->zap_loaded) + kgsl_regwrite(KGSL_DEVICE(adreno_dev), + GEN7_RBBM_SECVID_TRUST_CNTL, 0x0); + else + ret = send_switch_to_unsecure(adreno_dev); + + return ret; +} + +static bool is_queue_empty(struct adreno_device *adreno_dev, u32 queue_idx) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem; + struct hfi_queue_table *tbl = mem_addr->hostptr; + struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx]; + + if (hdr->status == HFI_QUEUE_STATUS_DISABLED) + return true; + + if (hdr->read_index == hdr->write_index) + return true; + + return false; +} + +static int hfi_f2h_main(void *arg) +{ + struct adreno_device *adreno_dev = arg; + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + while (!kthread_should_stop()) { + wait_event_interruptible(hfi->f2h_wq, !kthread_should_stop() && + !(is_queue_empty(adreno_dev, HFI_MSG_ID) && + is_queue_empty(adreno_dev, HFI_DBG_ID))); + + if (kthread_should_stop()) + break; + + gen7_hwsched_process_msgq(adreno_dev); + process_dbgq_irq(adreno_dev); + } + + return 0; +} + +int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + + gmu->hfi.irq = kgsl_request_irq(gmu->pdev, "hfi", + gen7_hwsched_hfi_handler, adreno_dev); + + if (gmu->hfi.irq < 0) + return gmu->hfi.irq; + + hw_hfi->irq_mask = HFI_IRQ_MASK; + + rwlock_init(&hw_hfi->msglock); + + INIT_LIST_HEAD(&hw_hfi->msglist); + + init_waitqueue_head(&hw_hfi->f2h_wq); + + return 0; +} + +void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev) +{ + struct gen7_hwsched_hfi *hw_hfi = to_gen7_hwsched_hfi(adreno_dev); + + kthread_stop(hw_hfi->f2h_task); +} + +static void add_profile_events(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj, struct adreno_submit_time *time) +{ + unsigned long flags; + u64 time_in_s; + unsigned long time_in_ns; + struct kgsl_context *context = drawobj->context; + struct submission_info info = {0}; + + /* + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. + */ + + local_irq_save(flags); + + /* Read always on registers */ + time->ticks = gen7_read_alwayson(adreno_dev); + + /* Trace the GPU time to create a mapping to ftrace time */ + trace_adreno_cmdbatch_sync(context->id, context->priority, + drawobj->timestamp, time->ticks); + + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); + + /* Get the timeofday for the wall time (for the user) */ + ktime_get_real_ts64(&time->utime); + + local_irq_restore(flags); + + /* Return kernel clock time to the client if requested */ + time_in_s = time->ktime; + time_in_ns = do_div(time_in_s, 1000000000); + + info.inflight = -1; + info.rb_id = adreno_get_level(context->priority); + info.gmu_dispatch_queue = context->gmu_dispatch_queue; + + trace_adreno_cmdbatch_submitted(drawobj, &info, time->ticks, + (unsigned long) time_in_s, time_in_ns / 1000, 0); + + log_kgsl_cmdbatch_submitted_event(context->id, drawobj->timestamp, + context->priority, drawobj->flags); +} + +static u32 get_next_dq(u32 priority) +{ + struct dq_info *info = &gen7_hfi_dqs[priority]; + u32 next = info->base_dq_id + info->offset; + + info->offset = (info->offset + 1) % info->max_dq; + + return next; +} + +static u32 get_dq_id(u32 priority) +{ + u32 level = adreno_get_level(priority); + + return get_next_dq(level); +} + +static int send_context_register(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct hfi_register_ctxt_cmd cmd; + struct kgsl_pagetable *pt = context->proc_priv->pagetable; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_REGISTER_CONTEXT); + if (ret) + return ret; + + cmd.ctxt_id = context->id; + cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags; + cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt); + cmd.ctxt_idr = pid_nr(context->proc_priv->pid); + cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt); + + return gen7_hfi_send_cmd_async(adreno_dev, &cmd); +} + +static int send_context_pointers(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct hfi_context_pointers_cmd cmd; + int ret; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_CONTEXT_POINTERS); + if (ret) + return ret; + + cmd.ctxt_id = context->id; + cmd.sop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, soptimestamp); + cmd.eop_addr = MEMSTORE_ID_GPU_ADDR(device, context->id, eoptimestamp); + if (context->user_ctxt_record) + cmd.user_ctxt_record_addr = + context->user_ctxt_record->memdesc.gpuaddr; + else + cmd.user_ctxt_record_addr = 0; + + return gen7_hfi_send_cmd_async(adreno_dev, &cmd); +} + +static int hfi_context_register(struct adreno_device *adreno_dev, + struct kgsl_context *context) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + if (context->gmu_registered) + return 0; + + ret = send_context_register(adreno_dev, context); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to register context %d: %d\n", + context->id, ret); + + if (device->gmu_fault) + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + return ret; + } + + ret = send_context_pointers(adreno_dev, context); + if (ret) { + dev_err(&gmu->pdev->dev, + "Unable to register context %d pointers: %d\n", + context->id, ret); + + if (device->gmu_fault) + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + return ret; + } + + context->gmu_registered = true; + context->gmu_dispatch_queue = get_dq_id(context->priority); + + return 0; +} + +static void populate_ibs(struct adreno_device *adreno_dev, + struct hfi_submit_cmd *cmd, struct kgsl_drawobj_cmd *cmdobj) +{ + struct hfi_issue_ib *issue_ib; + struct kgsl_memobj_node *ib; + + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) { + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + + /* + * The dispatcher ensures that there is only one big IB inflight + */ + cmd->big_ib_gmu_va = hfi->big_ib->gmuaddr; + cmd->flags |= CMDBATCH_INDIRECT; + issue_ib = hfi->big_ib->hostptr; + } else { + issue_ib = (struct hfi_issue_ib *)&cmd[1]; + } + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + issue_ib->addr = ib->gpuaddr; + issue_ib->size = ib->size; + issue_ib++; + } + + cmd->numibs = cmdobj->numibs; +} + +#define HFI_DSP_IRQ_BASE 2 + +#define DISPQ_IRQ_BIT(_idx) BIT((_idx) + HFI_DSP_IRQ_BASE) + +int gen7_hwsched_submit_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj) +{ + struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); + int ret = 0; + u32 cmd_sizebytes; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct hfi_submit_cmd *cmd; + struct adreno_submit_time time = {0}; + + ret = hfi_context_register(adreno_dev, drawobj->context); + if (ret) + return ret; + + /* Add a *issue_ib struct for each IB */ + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS || + test_bit(CMDOBJ_SKIP, &cmdobj->priv)) + cmd_sizebytes = sizeof(*cmd); + else + cmd_sizebytes = sizeof(*cmd) + + (sizeof(struct hfi_issue_ib) * cmdobj->numibs); + + if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE)) + return -EMSGSIZE; + + cmd = kmalloc(cmd_sizebytes, GFP_KERNEL); + if (cmd == NULL) + return -ENOMEM; + + cmd->ctxt_id = drawobj->context->id; + cmd->flags = HFI_CTXT_FLAG_NOTIFY; + cmd->ts = drawobj->timestamp; + + if (test_bit(CMDOBJ_SKIP, &cmdobj->priv)) + goto skipib; + + populate_ibs(adreno_dev, cmd, cmdobj); + + if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) && + cmdobj->profiling_buf_entry) { + + time.drawobj = drawobj; + + cmd->profile_gpuaddr_lo = + lower_32_bits(cmdobj->profiling_buffer_gpuaddr); + cmd->profile_gpuaddr_hi = + upper_32_bits(cmdobj->profiling_buffer_gpuaddr); + + /* Indicate to GMU to do user profiling for this submission */ + cmd->flags |= CMDBATCH_PROFILING; + } + +skipib: + adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, cmd_sizebytes, + HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, + atomic_inc_return(&hfi->seqnum)); + + ret = gen7_hfi_queue_write(adreno_dev, + HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, + (u32 *)cmd); + if (ret) + goto free; + + add_profile_events(adreno_dev, drawobj, &time); + + cmdobj->submit_ticks = time.ticks; + + /* Send interrupt to GMU to receive the message */ + gmu_core_regwrite(KGSL_DEVICE(adreno_dev), GEN7_GMU_HOST2GMU_INTR_SET, + DISPQ_IRQ_BIT(drawobj->context->gmu_dispatch_queue)); + + /* Put the profiling information in the user profiling buffer */ + adreno_profile_submit_time(&time); + +free: + kfree(cmd); + + return ret; +} + +static int send_context_unregister_hfi(struct adreno_device *adreno_dev, + struct kgsl_context *context, u32 ts) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct pending_cmd pending_ack; + struct hfi_unregister_ctxt_cmd cmd; + u32 seqnum; + int rc, ret; + + /* Only send HFI if device is not in SLUMBER */ + if (!context->gmu_registered || + !test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + ret = CMD_MSG_HDR(cmd, H2F_MSG_UNREGISTER_CONTEXT); + if (ret) + return ret; + + cmd.ctxt_id = context->id, + cmd.ts = ts, + + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + + add_waiter(hfi, cmd.hdr, &pending_ack); + + /* + * Although we know device is powered on, we can still enter SLUMBER + * because the wait for ack below is done without holding the mutex. So + * take an active count before releasing the mutex so as to avoid a + * concurrent SLUMBER sequence while GMU is un-registering this context. + */ + gen7_hwsched_active_count_get(adreno_dev); + + rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd); + if (rc) + goto done; + + mutex_unlock(&device->mutex); + + rc = wait_for_completion_timeout(&pending_ack.complete, + msecs_to_jiffies(30 * 1000)); + if (!rc) { + dev_err(&gmu->pdev->dev, + "Ack timeout for context unregister seq: %d ctx: %d ts: %d\n", + MSG_HDR_GET_SEQNUM(pending_ack.sent_hdr), + context->id, ts); + rc = -ETIMEDOUT; + + mutex_lock(&device->mutex); + + gmu_core_fault_snapshot(device); + + /* + * Trigger dispatcher based reset and recovery. Invalidate the + * context so that any un-finished inflight submissions are not + * replayed after recovery. + */ + adreno_drawctxt_set_guilty(device, context); + + adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + + goto done; + } + + mutex_lock(&device->mutex); + + rc = check_ack_failure(adreno_dev, &pending_ack); +done: + gen7_hwsched_active_count_put(adreno_dev); + + del_waiter(hfi, &pending_ack); + + return rc; +} + +void gen7_hwsched_context_detach(struct adreno_context *drawctxt) +{ + struct kgsl_context *context = &drawctxt->base; + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret = 0; + + mutex_lock(&device->mutex); + + ret = send_context_unregister_hfi(adreno_dev, context, + drawctxt->internal_timestamp); + + if (!ret) { + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + drawctxt->timestamp); + + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + drawctxt->timestamp); + + adreno_profile_process_results(adreno_dev); + } + + context->gmu_registered = false; + + mutex_unlock(&device->mutex); +} + +u32 gen7_hwsched_preempt_count_get(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct hfi_get_value_cmd cmd; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + struct pending_cmd pending_ack; + int rc; + + if (device->state != KGSL_STATE_ACTIVE) + return 0; + + rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); + if (rc) + return 0; + + cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.type = HFI_VALUE_PREEMPT_COUNT; + cmd.subtype = 0; + + add_waiter(hfi, cmd.hdr, &pending_ack); + + rc = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&cmd); + if (rc) + goto done; + + rc = wait_ack_completion(adreno_dev, &pending_ack); + if (rc) + goto done; + + rc = check_ack_failure(adreno_dev, &pending_ack); + +done: + del_waiter(hfi, &pending_ack); + + return rc ? 0 : pending_ack.results[2]; +} diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h new file mode 100644 index 0000000000..a756f0ded1 --- /dev/null +++ b/adreno_gen7_hwsched_hfi.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_GEN7_HWSCHED_HFI_H_ +#define _ADRENO_GEN7_HWSCHED_HFI_H_ + +/* Maximum number of IBs in a submission */ +#define HWSCHED_MAX_NUMIBS \ + ((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \ + / sizeof(struct hfi_issue_ib)) + +struct gen7_hwsched_hfi { + struct hfi_mem_alloc_entry mem_alloc_table[32]; + u32 mem_alloc_entries; + /** @irq_mask: Store the hfi interrupt mask */ + u32 irq_mask; + /** @msglock: To protect the list of un-ACKed hfi packets */ + rwlock_t msglock; + /** @msglist: List of un-ACKed hfi packets */ + struct list_head msglist; + /** @f2h_task: Task for processing gmu fw to host packets */ + struct task_struct *f2h_task; + /** @f2h_wq: Waitqueue for the f2h_task */ + wait_queue_head_t f2h_wq; + /** @big_ib: GMU buffer to hold big IBs */ + struct kgsl_memdesc *big_ib; +}; + +struct kgsl_drawobj_cmd; + +/** + * gen7_hwsched_hfi_probe - Probe hwsched hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_remove - Release hwsched hfi resources + * @adreno_dev: Pointer to adreno device structure + */ +void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_init - Initialize hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * This function is used to initialize hfi resources + * once before the very first gmu boot + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_start - Start hfi resources + * @adreno_dev: Pointer to adreno device structure + * + * Send the various hfi packets before booting the gpu + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_hfi_stop - Stop the hfi resources + * @adreno_dev: Pointer to the adreno device + * + * This function does the hfi cleanup when powering down the gmu + */ +void gen7_hwsched_hfi_stop(struct adreno_device *adreno_dev); + +/** + * gen7_hwched_cp_init - Send CP_INIT via HFI + * @adreno_dev: Pointer to adreno device structure + * + * This function is used to send CP INIT packet and bring + * GPU out of secure mode using hfi raw packets. + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hwsched_cp_init(struct adreno_device *adreno_dev); + +/** + * gen7_hfi_send_cmd_async - Send an hfi packet + * @adreno_dev: Pointer to adreno device structure + * @data: Data to be sent in the hfi packet + * + * Send data in the form of an HFI packet to gmu and wait for + * it's ack asynchronously + * + * Return: 0 on success and negative error on failure. + */ +int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data); + +/** + * gen7_hwsched_submit_cmdobj - Dispatch IBs to dispatch queues + * @adreno_dev: Pointer to adreno device structure + * @cmdobj: The command object which needs to be submitted + * + * This function is used to register the context if needed and submit + * IBs to the hfi dispatch queues. + + * Return: 0 on success and negative error on failure + */ +int gen7_hwsched_submit_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj); + +/** + * gen7_hwsched_context_detach - Unregister a context with GMU + * @drawctxt: Pointer to the adreno context + * + * This function sends context unregister HFI and waits for the ack + * to ensure all submissions from this context have retired + */ +void gen7_hwsched_context_detach(struct adreno_context *drawctxt); + +/* Helper function to get to gen7 hwsched hfi device from adreno device */ +struct gen7_hwsched_hfi *to_gen7_hwsched_hfi(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_preempt_count_get - Get preemption count from GMU + * @adreno_dev: Pointer to adreno device + * + * This function sends a GET_VALUE HFI packet to get the number of + * preemptions completed since last SLUMBER exit. + * + * Return: Preemption count + */ +u32 gen7_hwsched_preempt_count_get(struct adreno_device *adreno_dev); + +/** + * gen7_hwsched_parse_payload - Parse payload to look up a key + * @payload: Pointer to a payload section + * @key: The key who's value is to be looked up + * + * This function parses the payload data which is a sequence + * of key-value pairs. + * + * Return: The value of the key or 0 if key is not found + */ +u32 gen7_hwsched_parse_payload(struct payload_section *payload, u32 key); + +/** + * gen7_hwsched_process_msgq - Process hfi msg queue + * @adreno_dev: Pointer to adreno device + * + * Process any pending firmware to host packets in the message + * queue + */ +void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev); +#endif diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c new file mode 100644 index 0000000000..f088856da3 --- /dev/null +++ b/adreno_gen7_perfcounter.c @@ -0,0 +1,896 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_perfcounter.h" +#include "adreno_pm4types.h" +#include "kgsl_device.h" + +/* + * For registers that do not get restored on power cycle, read the value and add + * the stored shadow value + */ +static u64 gen7_counter_read_norestore(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return ((((u64) hi) << 32) | lo) + reg->value; +} + +static int gen7_counter_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + int ret = 0; + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + ret = gen7_perfcounter_update(adreno_dev, reg, true); + else + kgsl_regwrite(device, reg->select, countable); + + if (!ret) + reg->value = 0; + + return ret; +} + +static int gen7_counter_inline_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; + u32 cmds[3]; + int ret; + + if (!(device->state == KGSL_STATE_ACTIVE)) + return gen7_counter_enable(adreno_dev, group, counter, + countable); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) + gen7_perfcounter_update(adreno_dev, reg, false); + + cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + cmds[1] = cp_type4_packet(reg->select, 1); + cmds[2] = countable; + + /* submit to highest priority RB always */ + ret = gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, + F_NOTPROTECTED, cmds, 3, 0, NULL); + if (ret) + return ret; + + /* + * schedule dispatcher to make sure rb[0] is run, because + * if the current RB is not rb[0] and gpu is idle then + * rb[0] will not get scheduled to run + */ + if (adreno_dev->cur_rb != rb) + adreno_dispatcher_schedule(device); + + /* wait for the above commands submitted to complete */ + ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, + ADRENO_IDLE_TIMEOUT); + + if (ret) { + /* + * If we were woken up because of cancelling rb events + * either due to soft reset or adreno_stop, ignore the + * error and return 0 here. The perfcounter is already + * set up in software and it will be programmed in + * hardware when we wake up or come up after soft reset + */ + if (ret == -EAGAIN) + ret = 0; + else + dev_err(device->dev, + "Perfcounter %s/%u/%u start via commands failed %d\n", + group->name, counter, countable, ret); + } + + if (!ret) + reg->value = 0; + + return ret; +} + +static u64 gen7_counter_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + u32 hi, lo; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* These registers are restored on power resume */ + return (((u64) hi) << 32) | lo; +} + +static int gen7_counter_gbif_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + unsigned int shift = counter << 3; + unsigned int select = BIT(counter); + + if (countable > 0xff) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, select); + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, 0); + + /* select the desired countable */ + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + + /* enable counter */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_EN, select, select); + + reg->value = 0; + return 0; +} + +static int gen7_counter_gbif_pwr_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + unsigned int shift = counter << 3; + unsigned int select = BIT(16 + counter); + + if (countable > 0xff) + return -EINVAL; + + /* + * Write 1, followed by 0 to CLR register for + * clearing the counter + */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, select); + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_CLR, select, 0); + + /* select the desired countable */ + kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift); + + /* Enable the counter */ + kgsl_regrmw(device, GEN7_GBIF_PERF_PWR_CNT_EN, select, select); + + reg->value = 0; + return 0; +} + +static int gen7_counter_alwayson_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + return 0; +} + +static u64 gen7_counter_alwayson_read(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter) +{ + struct adreno_perfcount_register *reg = &group->regs[counter]; + + return gen7_read_alwayson(adreno_dev) + reg->value; +} + +static void gen7_write_gmu_counter_enable(struct kgsl_device *device, + struct adreno_perfcount_register *reg, u32 bit, u32 countable) +{ + kgsl_regrmw(device, reg->select, 0xff << bit, countable << bit); +} + +static int gen7_counter_gmu_xoclk_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > 0xff) + return -EINVAL; + + /* + * Counters [0:3] are in select 1 bit offsets 0, 8, 16 and 24 + * Counters [4:5] are in select 2 bit offset 0, 8 + * Counters [6:9] are in select 3 bit offset 0, 8, 16 and 24 + */ + + if (counter == 4 || counter == 5) + counter -= 4; + else if (counter >= 6) + counter -= 6; + + gen7_write_gmu_counter_enable(device, reg, counter * 8, countable); + + reg->value = 0; + + kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); + + return 0; +} + +static int gen7_counter_gmu_gmuclk_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > 0xff) + return -EINVAL; + + /* + * The two counters are stuck into GMU_CX_GMU_POWER_COUNTER_SELECT_1 + * at bit offset 16 and 24 + */ + gen7_write_gmu_counter_enable(device, reg, + 16 + (counter * 8), countable); + + kgsl_regwrite(device, GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static int gen7_counter_gmu_perf_enable(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg = &group->regs[counter]; + + if (countable > 0xff) + return -EINVAL; + + /* + * Counters [0:3] are in select 1 bit offsets 0, 8, 16 and 24 + * Counters [4:5] are in select 2 bit offset 0, 8 + */ + + if (counter >= 4) + counter -= 4; + + gen7_write_gmu_counter_enable(device, reg, counter * 8, countable); + + kgsl_regwrite(device, GEN7_GMU_CX_GMU_PERF_COUNTER_ENABLE, 1); + + reg->value = 0; + return 0; +} + +static struct adreno_perfcount_register gen7_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_0_LO, + GEN7_RBBM_PERFCTR_CP_0_HI, -1, GEN7_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_1_LO, + GEN7_RBBM_PERFCTR_CP_1_HI, -1, GEN7_CP_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_2_LO, + GEN7_RBBM_PERFCTR_CP_2_HI, -1, GEN7_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_3_LO, + GEN7_RBBM_PERFCTR_CP_3_HI, -1, GEN7_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_4_LO, + GEN7_RBBM_PERFCTR_CP_4_HI, -1, GEN7_CP_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_5_LO, + GEN7_RBBM_PERFCTR_CP_5_HI, -1, GEN7_CP_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_6_LO, + GEN7_RBBM_PERFCTR_CP_6_HI, -1, GEN7_CP_PERFCTR_CP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_7_LO, + GEN7_RBBM_PERFCTR_CP_7_HI, -1, GEN7_CP_PERFCTR_CP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_8_LO, + GEN7_RBBM_PERFCTR_CP_8_HI, -1, GEN7_CP_PERFCTR_CP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_9_LO, + GEN7_RBBM_PERFCTR_CP_9_HI, -1, GEN7_CP_PERFCTR_CP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_10_LO, + GEN7_RBBM_PERFCTR_CP_10_HI, -1, GEN7_CP_PERFCTR_CP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_11_LO, + GEN7_RBBM_PERFCTR_CP_11_HI, -1, GEN7_CP_PERFCTR_CP_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_12_LO, + GEN7_RBBM_PERFCTR_CP_12_HI, -1, GEN7_CP_PERFCTR_CP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CP_13_LO, + GEN7_RBBM_PERFCTR_CP_13_HI, -1, GEN7_CP_PERFCTR_CP_SEL_13 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_0_LO, + GEN7_RBBM_PERFCTR2_CP_0_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_1_LO, + GEN7_RBBM_PERFCTR2_CP_1_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_2_LO, + GEN7_RBBM_PERFCTR2_CP_2_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_3_LO, + GEN7_RBBM_PERFCTR2_CP_3_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_4_LO, + GEN7_RBBM_PERFCTR2_CP_4_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_5_LO, + GEN7_RBBM_PERFCTR2_CP_5_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_CP_6_LO, + GEN7_RBBM_PERFCTR2_CP_6_HI, -1, GEN7_CP_BV_PERFCTR_CP_SEL_6 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_0_LO, + GEN7_RBBM_PERFCTR_RBBM_0_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_1_LO, + GEN7_RBBM_PERFCTR_RBBM_1_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_2_LO, + GEN7_RBBM_PERFCTR_RBBM_2_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RBBM_3_LO, + GEN7_RBBM_PERFCTR_RBBM_3_HI, -1, GEN7_RBBM_PERFCTR_RBBM_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_0_LO, + GEN7_RBBM_PERFCTR_PC_0_HI, -1, GEN7_PC_PERFCTR_PC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_1_LO, + GEN7_RBBM_PERFCTR_PC_1_HI, -1, GEN7_PC_PERFCTR_PC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_2_LO, + GEN7_RBBM_PERFCTR_PC_2_HI, -1, GEN7_PC_PERFCTR_PC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_3_LO, + GEN7_RBBM_PERFCTR_PC_3_HI, -1, GEN7_PC_PERFCTR_PC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_4_LO, + GEN7_RBBM_PERFCTR_PC_4_HI, -1, GEN7_PC_PERFCTR_PC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_5_LO, + GEN7_RBBM_PERFCTR_PC_5_HI, -1, GEN7_PC_PERFCTR_PC_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_6_LO, + GEN7_RBBM_PERFCTR_PC_6_HI, -1, GEN7_PC_PERFCTR_PC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_PC_7_LO, + GEN7_RBBM_PERFCTR_PC_7_HI, -1, GEN7_PC_PERFCTR_PC_SEL_7 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_0_LO, + GEN7_RBBM_PERFCTR_BV_PC_0_HI, -1, GEN7_PC_PERFCTR_PC_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_1_LO, + GEN7_RBBM_PERFCTR_BV_PC_1_HI, -1, GEN7_PC_PERFCTR_PC_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_2_LO, + GEN7_RBBM_PERFCTR_BV_PC_2_HI, -1, GEN7_PC_PERFCTR_PC_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_3_LO, + GEN7_RBBM_PERFCTR_BV_PC_3_HI, -1, GEN7_PC_PERFCTR_PC_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_4_LO, + GEN7_RBBM_PERFCTR_BV_PC_4_HI, -1, GEN7_PC_PERFCTR_PC_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_5_LO, + GEN7_RBBM_PERFCTR_BV_PC_5_HI, -1, GEN7_PC_PERFCTR_PC_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_6_LO, + GEN7_RBBM_PERFCTR_BV_PC_6_HI, -1, GEN7_PC_PERFCTR_PC_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_PC_7_LO, + GEN7_RBBM_PERFCTR_BV_PC_7_HI, -1, GEN7_PC_PERFCTR_PC_SEL_15 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_0_LO, + GEN7_RBBM_PERFCTR_VFD_0_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_1_LO, + GEN7_RBBM_PERFCTR_VFD_1_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_2_LO, + GEN7_RBBM_PERFCTR_VFD_2_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_3_LO, + GEN7_RBBM_PERFCTR_VFD_3_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_4_LO, + GEN7_RBBM_PERFCTR_VFD_4_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_5_LO, + GEN7_RBBM_PERFCTR_VFD_5_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_6_LO, + GEN7_RBBM_PERFCTR_VFD_6_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VFD_7_LO, + GEN7_RBBM_PERFCTR_VFD_7_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_7 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_0_LO, + GEN7_RBBM_PERFCTR_BV_VFD_0_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_1_LO, + GEN7_RBBM_PERFCTR_BV_VFD_1_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_2_LO, + GEN7_RBBM_PERFCTR_BV_VFD_2_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_3_LO, + GEN7_RBBM_PERFCTR_BV_VFD_3_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_4_LO, + GEN7_RBBM_PERFCTR_BV_VFD_4_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_5_LO, + GEN7_RBBM_PERFCTR_BV_VFD_5_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_6_LO, + GEN7_RBBM_PERFCTR_BV_VFD_6_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VFD_7_LO, + GEN7_RBBM_PERFCTR_BV_VFD_7_HI, -1, GEN7_VFD_PERFCTR_VFD_SEL_15 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_0_LO, + GEN7_RBBM_PERFCTR_HLSQ_0_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_1_LO, + GEN7_RBBM_PERFCTR_HLSQ_1_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_2_LO, + GEN7_RBBM_PERFCTR_HLSQ_2_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_3_LO, + GEN7_RBBM_PERFCTR_HLSQ_3_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_4_LO, + GEN7_RBBM_PERFCTR_HLSQ_4_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_HLSQ_5_LO, + GEN7_RBBM_PERFCTR_HLSQ_5_HI, -1, GEN7_SP_PERFCTR_HLSQ_SEL_5 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_0_LO, + GEN7_RBBM_PERFCTR_VPC_0_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_1_LO, + GEN7_RBBM_PERFCTR_VPC_1_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_2_LO, + GEN7_RBBM_PERFCTR_VPC_2_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_3_LO, + GEN7_RBBM_PERFCTR_VPC_3_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_4_LO, + GEN7_RBBM_PERFCTR_VPC_4_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VPC_5_LO, + GEN7_RBBM_PERFCTR_VPC_5_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_5 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_0_LO, + GEN7_RBBM_PERFCTR_BV_VPC_0_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_1_LO, + GEN7_RBBM_PERFCTR_BV_VPC_1_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_2_LO, + GEN7_RBBM_PERFCTR_BV_VPC_2_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_3_LO, + GEN7_RBBM_PERFCTR_BV_VPC_3_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_4_LO, + GEN7_RBBM_PERFCTR_BV_VPC_4_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_BV_VPC_5_LO, + GEN7_RBBM_PERFCTR_BV_VPC_5_HI, -1, GEN7_VPC_PERFCTR_VPC_SEL_11 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_0_LO, + GEN7_RBBM_PERFCTR_CCU_0_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_1_LO, + GEN7_RBBM_PERFCTR_CCU_1_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_2_LO, + GEN7_RBBM_PERFCTR_CCU_2_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_3_LO, + GEN7_RBBM_PERFCTR_CCU_3_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CCU_4_LO, + GEN7_RBBM_PERFCTR_CCU_4_HI, -1, GEN7_RB_PERFCTR_CCU_SEL_4 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_0_LO, + GEN7_RBBM_PERFCTR_TSE_0_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_1_LO, + GEN7_RBBM_PERFCTR_TSE_1_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_2_LO, + GEN7_RBBM_PERFCTR_TSE_2_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TSE_3_LO, + GEN7_RBBM_PERFCTR_TSE_3_HI, -1, GEN7_GRAS_PERFCTR_TSE_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_0_LO, + GEN7_RBBM_PERFCTR_RAS_0_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_1_LO, + GEN7_RBBM_PERFCTR_RAS_1_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_2_LO, + GEN7_RBBM_PERFCTR_RAS_2_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RAS_3_LO, + GEN7_RBBM_PERFCTR_RAS_3_HI, -1, GEN7_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_0_LO, + GEN7_RBBM_PERFCTR_UCHE_0_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_1_LO, + GEN7_RBBM_PERFCTR_UCHE_1_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_2_LO, + GEN7_RBBM_PERFCTR_UCHE_2_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_3_LO, + GEN7_RBBM_PERFCTR_UCHE_3_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_4_LO, + GEN7_RBBM_PERFCTR_UCHE_4_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_5_LO, + GEN7_RBBM_PERFCTR_UCHE_5_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_6_LO, + GEN7_RBBM_PERFCTR_UCHE_6_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_7_LO, + GEN7_RBBM_PERFCTR_UCHE_7_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_8_LO, + GEN7_RBBM_PERFCTR_UCHE_8_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_9_LO, + GEN7_RBBM_PERFCTR_UCHE_9_HI, -1, GEN7_UCHE_PERFCTR_UCHE_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_10_LO, + GEN7_RBBM_PERFCTR_UCHE_10_HI, -1, + GEN7_UCHE_PERFCTR_UCHE_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UCHE_11_LO, + GEN7_RBBM_PERFCTR_UCHE_11_HI, -1, + GEN7_UCHE_PERFCTR_UCHE_SEL_11 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_0_LO, + GEN7_RBBM_PERFCTR_TP_0_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_1_LO, + GEN7_RBBM_PERFCTR_TP_1_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_2_LO, + GEN7_RBBM_PERFCTR_TP_2_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_3_LO, + GEN7_RBBM_PERFCTR_TP_3_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_4_LO, + GEN7_RBBM_PERFCTR_TP_4_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_5_LO, + GEN7_RBBM_PERFCTR_TP_5_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_6_LO, + GEN7_RBBM_PERFCTR_TP_6_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_7_LO, + GEN7_RBBM_PERFCTR_TP_7_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_8_LO, + GEN7_RBBM_PERFCTR_TP_8_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_9_LO, + GEN7_RBBM_PERFCTR_TP_9_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_10_LO, + GEN7_RBBM_PERFCTR_TP_10_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_TP_11_LO, + GEN7_RBBM_PERFCTR_TP_11_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_11 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_0_LO, + GEN7_RBBM_PERFCTR2_TP_0_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_1_LO, + GEN7_RBBM_PERFCTR2_TP_1_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_2_LO, + GEN7_RBBM_PERFCTR2_TP_2_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_3_LO, + GEN7_RBBM_PERFCTR2_TP_3_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_4_LO, + GEN7_RBBM_PERFCTR2_TP_4_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_TP_5_LO, + GEN7_RBBM_PERFCTR2_TP_5_HI, -1, GEN7_TPL1_PERFCTR_TP_SEL_17 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_0_LO, + GEN7_RBBM_PERFCTR_SP_0_HI, -1, GEN7_SP_PERFCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_1_LO, + GEN7_RBBM_PERFCTR_SP_1_HI, -1, GEN7_SP_PERFCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_2_LO, + GEN7_RBBM_PERFCTR_SP_2_HI, -1, GEN7_SP_PERFCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_3_LO, + GEN7_RBBM_PERFCTR_SP_3_HI, -1, GEN7_SP_PERFCTR_SP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_4_LO, + GEN7_RBBM_PERFCTR_SP_4_HI, -1, GEN7_SP_PERFCTR_SP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_5_LO, + GEN7_RBBM_PERFCTR_SP_5_HI, -1, GEN7_SP_PERFCTR_SP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_6_LO, + GEN7_RBBM_PERFCTR_SP_6_HI, -1, GEN7_SP_PERFCTR_SP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_7_LO, + GEN7_RBBM_PERFCTR_SP_7_HI, -1, GEN7_SP_PERFCTR_SP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_8_LO, + GEN7_RBBM_PERFCTR_SP_8_HI, -1, GEN7_SP_PERFCTR_SP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_9_LO, + GEN7_RBBM_PERFCTR_SP_9_HI, -1, GEN7_SP_PERFCTR_SP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_10_LO, + GEN7_RBBM_PERFCTR_SP_10_HI, -1, GEN7_SP_PERFCTR_SP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_11_LO, + GEN7_RBBM_PERFCTR_SP_11_HI, -1, GEN7_SP_PERFCTR_SP_SEL_11 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_12_LO, + GEN7_RBBM_PERFCTR_SP_12_HI, -1, GEN7_SP_PERFCTR_SP_SEL_12 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_13_LO, + GEN7_RBBM_PERFCTR_SP_13_HI, -1, GEN7_SP_PERFCTR_SP_SEL_13 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_14_LO, + GEN7_RBBM_PERFCTR_SP_14_HI, -1, GEN7_SP_PERFCTR_SP_SEL_14 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_15_LO, + GEN7_RBBM_PERFCTR_SP_15_HI, -1, GEN7_SP_PERFCTR_SP_SEL_15 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_16_LO, + GEN7_RBBM_PERFCTR_SP_16_HI, -1, GEN7_SP_PERFCTR_SP_SEL_16 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_17_LO, + GEN7_RBBM_PERFCTR_SP_17_HI, -1, GEN7_SP_PERFCTR_SP_SEL_17 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_18_LO, + GEN7_RBBM_PERFCTR_SP_18_HI, -1, GEN7_SP_PERFCTR_SP_SEL_18 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_19_LO, + GEN7_RBBM_PERFCTR_SP_19_HI, -1, GEN7_SP_PERFCTR_SP_SEL_19 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_20_LO, + GEN7_RBBM_PERFCTR_SP_20_HI, -1, GEN7_SP_PERFCTR_SP_SEL_20 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_21_LO, + GEN7_RBBM_PERFCTR_SP_21_HI, -1, GEN7_SP_PERFCTR_SP_SEL_21 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_22_LO, + GEN7_RBBM_PERFCTR_SP_22_HI, -1, GEN7_SP_PERFCTR_SP_SEL_22 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_SP_23_LO, + GEN7_RBBM_PERFCTR_SP_23_HI, -1, GEN7_SP_PERFCTR_SP_SEL_23 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_0_LO, + GEN7_RBBM_PERFCTR2_SP_0_HI, -1, GEN7_SP_PERFCTR_SP_SEL_24 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_1_LO, + GEN7_RBBM_PERFCTR2_SP_1_HI, -1, GEN7_SP_PERFCTR_SP_SEL_25 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_2_LO, + GEN7_RBBM_PERFCTR2_SP_2_HI, -1, GEN7_SP_PERFCTR_SP_SEL_26 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_3_LO, + GEN7_RBBM_PERFCTR2_SP_3_HI, -1, GEN7_SP_PERFCTR_SP_SEL_27 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_4_LO, + GEN7_RBBM_PERFCTR2_SP_4_HI, -1, GEN7_SP_PERFCTR_SP_SEL_28 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_5_LO, + GEN7_RBBM_PERFCTR2_SP_5_HI, -1, GEN7_SP_PERFCTR_SP_SEL_29 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_6_LO, + GEN7_RBBM_PERFCTR2_SP_6_HI, -1, GEN7_SP_PERFCTR_SP_SEL_30 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_7_LO, + GEN7_RBBM_PERFCTR2_SP_7_HI, -1, GEN7_SP_PERFCTR_SP_SEL_31 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_8_LO, + GEN7_RBBM_PERFCTR2_SP_8_HI, -1, GEN7_SP_PERFCTR_SP_SEL_32 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_9_LO, + GEN7_RBBM_PERFCTR2_SP_9_HI, -1, GEN7_SP_PERFCTR_SP_SEL_33 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_10_LO, + GEN7_RBBM_PERFCTR2_SP_10_HI, -1, GEN7_SP_PERFCTR_SP_SEL_34 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_SP_11_LO, + GEN7_RBBM_PERFCTR2_SP_11_HI, -1, GEN7_SP_PERFCTR_SP_SEL_35 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_0_LO, + GEN7_RBBM_PERFCTR_RB_0_HI, -1, GEN7_RB_PERFCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_1_LO, + GEN7_RBBM_PERFCTR_RB_1_HI, -1, GEN7_RB_PERFCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_2_LO, + GEN7_RBBM_PERFCTR_RB_2_HI, -1, GEN7_RB_PERFCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_3_LO, + GEN7_RBBM_PERFCTR_RB_3_HI, -1, GEN7_RB_PERFCTR_RB_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_4_LO, + GEN7_RBBM_PERFCTR_RB_4_HI, -1, GEN7_RB_PERFCTR_RB_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_5_LO, + GEN7_RBBM_PERFCTR_RB_5_HI, -1, GEN7_RB_PERFCTR_RB_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_6_LO, + GEN7_RBBM_PERFCTR_RB_6_HI, -1, GEN7_RB_PERFCTR_RB_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_RB_7_LO, + GEN7_RBBM_PERFCTR_RB_7_HI, -1, GEN7_RB_PERFCTR_RB_SEL_7 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_vsc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VSC_0_LO, + GEN7_RBBM_PERFCTR_VSC_0_HI, -1, GEN7_VSC_PERFCTR_VSC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_VSC_1_LO, + GEN7_RBBM_PERFCTR_VSC_1_HI, -1, GEN7_VSC_PERFCTR_VSC_SEL_1 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_lrz[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_0_LO, + GEN7_RBBM_PERFCTR_LRZ_0_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_1_LO, + GEN7_RBBM_PERFCTR_LRZ_1_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_2_LO, + GEN7_RBBM_PERFCTR_LRZ_2_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_LRZ_3_LO, + GEN7_RBBM_PERFCTR_LRZ_3_HI, -1, GEN7_GRAS_PERFCTR_LRZ_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_cmp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_0_LO, + GEN7_RBBM_PERFCTR_CMP_0_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_1_LO, + GEN7_RBBM_PERFCTR_CMP_1_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_2_LO, + GEN7_RBBM_PERFCTR_CMP_2_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_CMP_3_LO, + GEN7_RBBM_PERFCTR_CMP_3_HI, -1, GEN7_RB_PERFCTR_CMP_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_ufc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_0_LO, + GEN7_RBBM_PERFCTR_UFC_0_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_1_LO, + GEN7_RBBM_PERFCTR_UFC_1_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_2_LO, + GEN7_RBBM_PERFCTR_UFC_2_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR_UFC_3_LO, + GEN7_RBBM_PERFCTR_UFC_3_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_3 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_bv_ufc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_UFC_0_LO, + GEN7_RBBM_PERFCTR2_UFC_0_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_RBBM_PERFCTR2_UFC_1_LO, + GEN7_RBBM_PERFCTR2_UFC_1_HI, -1, GEN7_RB_PERFCTR_UFC_SEL_5 }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_gbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW0, + GEN7_GBIF_PERF_CNT_HIGH0, -1, GEN7_GBIF_PERF_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW1, + GEN7_GBIF_PERF_CNT_HIGH1, -1, GEN7_GBIF_PERF_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW2, + GEN7_GBIF_PERF_CNT_HIGH2, -1, GEN7_GBIF_PERF_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PERF_CNT_LOW3, + GEN7_GBIF_PERF_CNT_HIGH3, -1, GEN7_GBIF_PERF_CNT_SEL }, +}; + +static struct adreno_perfcount_register gen7_perfcounters_gbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW0, + GEN7_GBIF_PWR_CNT_HIGH0, -1, GEN7_GBIF_PERF_PWR_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW1, + GEN7_GBIF_PWR_CNT_HIGH1, -1, GEN7_GBIF_PERF_PWR_CNT_SEL }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_GBIF_PWR_CNT_LOW2, + GEN7_GBIF_PWR_CNT_HIGH2, -1, GEN7_GBIF_PERF_PWR_CNT_SEL }, +}; + +#define GMU_COUNTER(lo, hi, sel) \ + { .countable = KGSL_PERFCOUNTER_NOT_USED, \ + .offset = lo, .offset_hi = hi, .select = sel } + +#define GMU_COUNTER_RESERVED(lo, hi, sel) \ + { .countable = KGSL_PERFCOUNTER_BROKEN, \ + .offset = lo, .offset_hi = hi, .select = sel } + +static struct adreno_perfcount_register gen7_perfcounters_gmu_xoclk[] = { + /* + * COUNTER_XOCLK_0 and COUNTER_XOCLK_4 are used for the GPU + * busy and ifpc count. Mark them as reserved to ensure they + * are not re-used. + */ + GMU_COUNTER_RESERVED(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0), + GMU_COUNTER_RESERVED(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2), +}; + +static struct adreno_perfcount_register gen7_perfcounters_gmu_gmuclk[] = { + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L, + GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H, + GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1), +}; + +static struct adreno_perfcount_register gen7_perfcounters_gmu_perf[] = { + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_0_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_0_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_1_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_1_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_2_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_2_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_3_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_3_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_4_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_4_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1), + GMU_COUNTER(GEN7_GMU_CX_GMU_PERF_COUNTER_5_L, + GEN7_GMU_CX_GMU_PERF_COUNTER_5_H, + GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1), +}; + +static struct adreno_perfcount_register gen7_perfcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, GEN7_CP_ALWAYS_ON_COUNTER_LO, + GEN7_CP_ALWAYS_ON_COUNTER_HI, -1 }, +}; + +/* + * ADRENO_PERFCOUNTER_GROUP_RESTORE flag is enabled by default + * because most of the perfcounter groups need to be restored + * as part of preemption and IFPC. Perfcounter groups that are + * not restored as part of preemption and IFPC should be defined + * using GEN7_PERFCOUNTER_GROUP_FLAGS macro + */ + +#define GEN7_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags, \ + enable, read) \ + [KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \ + ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags, \ + enable, read } + +#define GEN7_PERFCOUNTER_GROUP(offset, name, enable, read) \ + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, offset, name, \ + ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read) + +#define GEN7_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + GEN7_PERFCOUNTER_GROUP(offset, name, \ + gen7_counter_enable, gen7_counter_read) + +#define GEN7_BV_PERFCOUNTER_GROUP(offset, name, enable, read) \ + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, BV_##offset, bv_##name, \ + ADRENO_PERFCOUNTER_GROUP_RESTORE, enable, read) + +#define GEN7_BV_REGULAR_PERFCOUNTER_GROUP(offset, name) \ + GEN7_BV_PERFCOUNTER_GROUP(offset, name, \ + gen7_counter_enable, gen7_counter_read) + +static const struct adreno_perfcount_group gen7_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + GEN7_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, RBBM, rbbm, 0, + gen7_counter_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), + GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), + GEN7_REGULAR_PERFCOUNTER_GROUP(TSE, tse), + GEN7_REGULAR_PERFCOUNTER_GROUP(RAS, ras), + GEN7_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz), + GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), + GEN7_PERFCOUNTER_GROUP(TP, tp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_PERFCOUNTER_GROUP(SP, sp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb), + GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0, + gen7_counter_gbif_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF_PWR, gbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen7_counter_gbif_pwr_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED, + gen7_counter_alwayson_enable, gen7_counter_alwayson_read), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_XOCLK, gmu_xoclk, 0, + gen7_counter_gmu_xoclk_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_GMUCLK, gmu_gmuclk, 0, + gen7_counter_gmu_gmuclk_enable, gen7_counter_read_norestore), + GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, GMU_PERF, gmu_perf, 0, + gen7_counter_gmu_perf_enable, gen7_counter_read_norestore), + GEN7_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(CP, cp), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), + GEN7_BV_PERFCOUNTER_GROUP(TP, tp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_BV_PERFCOUNTER_GROUP(SP, sp, + gen7_counter_inline_enable, gen7_counter_read), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), +}; + +const struct adreno_perfcounters adreno_gen7_perfcounters = { + gen7_perfcounter_groups, + ARRAY_SIZE(gen7_perfcounter_groups), +}; diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c new file mode 100644 index 0000000000..4c5da6d497 --- /dev/null +++ b/adreno_gen7_preempt.c @@ -0,0 +1,746 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" + +#define PREEMPT_RECORD(_field) \ + offsetof(struct gen7_cp_preemption_record, _field) + +#define PREEMPT_SMMU_RECORD(_field) \ + offsetof(struct gen7_cp_smmu_info, _field) + +enum { + SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO = 0, + SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR, + SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR, + SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR, + SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER, +}; + +static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, + bool atomic) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&rb->preempt_lock, flags); + + if (!atomic) { + /* + * We might have skipped updating the wptr in case we are in + * dispatcher context. Do it now. + */ + if (rb->skip_inline_wptr) { + + ret = gen7_fenced_write(adreno_dev, + GEN7_CP_RB_WPTR, rb->wptr, + FENCE_STATUS_WRITEDROPPED0_MASK); + + reset_timer = true; + rb->skip_inline_wptr = false; + } + } else { + unsigned int wptr; + + kgsl_regread(device, GEN7_CP_RB_WPTR, &wptr); + if (wptr != rb->wptr) { + kgsl_regwrite(device, GEN7_CP_RB_WPTR, rb->wptr); + reset_timer = true; + } + } + + if (reset_timer) + rb->dispatch_q.expires = jiffies + + msecs_to_jiffies(adreno_drawobj_timeout); + + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!atomic) { + /* If WPTR update fails, set the fault and trigger recovery */ + if (ret) { + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + } +} + +static void _power_collapse_set(struct adreno_device *adreno_dev, bool val) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + gmu_core_regwrite(device, + GEN7_GMU_PWR_COL_PREEMPT_KEEPALIVE, (val ? 1 : 0)); +} + +static void _gen7_preemption_done(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * In the very unlikely case that the power is off, do nothing - the + * state will be reset on power up and everybody will be happy + */ + + if (!kgsl_state_is_awake(device)) + return; + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status & 0x1) { + dev_err(device->dev, + "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n", + status, adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + /* Set a fault and restart */ + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + + return; + } + + adreno_dev->preempt.count++; + + del_timer_sync(&adreno_dev->preempt.timer); + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, + status); + + /* Clean up all the bits */ + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr for the new command queue */ + _update_wptr(adreno_dev, true, false); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + /* Clear the preempt state */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); +} + +static void _gen7_preemption_fault(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + /* + * If the power is on check the preemption status one more time - if it + * was successful then just transition to the complete state + */ + if (kgsl_state_is_awake(device)) { + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status); + + if (!(status & 0x1)) { + adreno_set_preempt_state(adreno_dev, + ADRENO_PREEMPT_COMPLETE); + + adreno_dispatcher_schedule(device); + return; + } + } + + dev_err(device->dev, + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", + adreno_dev->cur_rb->id, + adreno_get_rptr(adreno_dev->cur_rb), + adreno_dev->cur_rb->wptr, + adreno_dev->next_rb->id, + adreno_get_rptr(adreno_dev->next_rb), + adreno_dev->next_rb->wptr); + + adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT); +} + +static void _gen7_preemption_worker(struct work_struct *work) +{ + struct adreno_preemption *preempt = container_of(work, + struct adreno_preemption, work); + struct adreno_device *adreno_dev = container_of(preempt, + struct adreno_device, preempt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + /* Need to take the mutex to make sure that the power stays on */ + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED)) + _gen7_preemption_fault(adreno_dev); + + mutex_unlock(&device->mutex); +} + +/* Find the highest priority active ringbuffer */ +static struct adreno_ringbuffer *gen7_next_ringbuffer( + struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + unsigned long flags; + unsigned int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + bool empty; + + spin_lock_irqsave(&rb->preempt_lock, flags); + empty = adreno_rb_empty(rb); + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (!empty) + return rb; + } + + return NULL; +} + +void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *next; + u64 ttbr0, gpuaddr; + u32 contextidr, cntl; + unsigned long flags; + struct adreno_preemption *preempt = &adreno_dev->preempt; + + /* Put ourselves into a possible trigger state */ + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START)) + return; + + /* Get the next ringbuffer to preempt in */ + next = gen7_next_ringbuffer(adreno_dev); + + /* + * Nothing to do if every ringbuffer is empty or if the current + * ringbuffer is the only active one + */ + if (next == NULL || next == adreno_dev->cur_rb) { + /* + * Update any critical things that might have been skipped while + * we were looking for a new ringbuffer + */ + + if (next != NULL) { + _update_wptr(adreno_dev, false, atomic); + + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + } + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + return; + } + + /* Turn off the dispatcher timer */ + del_timer(&adreno_dev->dispatcher.timer); + + /* + * This is the most critical section - we need to take care not to race + * until we have programmed the CP for the switch + */ + + spin_lock_irqsave(&next->preempt_lock, flags); + + /* + * Get the pagetable from the pagetable info. + * The pagetable_desc is allocated and mapped at probe time, and + * preemption_desc at init time, so no need to check if + * sharedmem accesses to these memdescs succeed. + */ + kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, + PT_INFO_OFFSET(ttbr0)); + kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, + PT_INFO_OFFSET(contextidr)); + + kgsl_sharedmem_writel(next->preemption_desc, + PREEMPT_RECORD(wptr), next->wptr); + + spin_unlock_irqrestore(&next->preempt_lock, flags); + + /* And write it to the smmu info */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), ttbr0); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), contextidr); + } + + kgsl_sharedmem_readq(preempt->scratch, &gpuaddr, + next->id * sizeof(u64)); + + /* + * Set a keepalive bit before the first preemption register write. + * This is required since while each individual write to the context + * switch registers will wake the GPU from collapse, it will not in + * itself cause GPU activity. Thus, the GPU could technically be + * re-collapsed between subsequent register writes leading to a + * prolonged preemption sequence. The keepalive bit prevents any + * further power collapse while it is set. + * It is more efficient to use a keepalive+wake-on-fence approach here + * rather than an OOB. Both keepalive and the fence are effectively + * free when the GPU is already powered on, whereas an OOB requires an + * unconditional handshake with the GMU. + */ + _power_collapse_set(adreno_dev, true); + + /* + * Fenced writes on this path will make sure the GPU is woken up + * in case it was power collapsed by the GMU. + */ + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO, + lower_32_bits(next->preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + /* + * Above fence writes will make sure GMU comes out of + * IFPC state if its was in IFPC state but it doesn't + * guarantee that GMU FW actually moved to ACTIVE state + * i.e. wake-up from IFPC is complete. + * Wait for GMU to move to ACTIVE state before triggering + * preemption. This is require to make sure CP doesn't + * interrupt GMU during wake-up from IFPC. + */ + if (gmu_core_dev_wait_for_active_transition(device)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI, + upper_32_bits(next->preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO, + lower_32_bits(next->secure_preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI, + upper_32_bits(next->secure_preemption_desc->gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO, + lower_32_bits(gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + if (gen7_fenced_write(adreno_dev, + GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI, + upper_32_bits(gpuaddr), + FENCE_STATUS_WRITEDROPPED1_MASK)) + goto err; + + adreno_dev->next_rb = next; + + /* Start the timer to detect a stuck preemption */ + mod_timer(&adreno_dev->preempt.timer, + jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT)); + + cntl = (preempt->preempt_level << 6) | 0x01; + + /* Skip save/restore during L1 preemption */ + if (preempt->skipsaverestore) + cntl |= (1 << 9); + + /* Enable GMEM save/restore across preemption */ + if (preempt->usesgmem) + cntl |= (1 << 8); + + trace_adreno_preempt_trigger(adreno_dev->cur_rb, adreno_dev->next_rb, + cntl); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED); + + /* Trigger the preemption */ + if (gen7_fenced_write(adreno_dev, GEN7_CP_CONTEXT_SWITCH_CNTL, cntl, + FENCE_STATUS_WRITEDROPPED1_MASK)) { + adreno_dev->next_rb = NULL; + del_timer(&adreno_dev->preempt.timer); + goto err; + } + + return; +err: + /* If fenced write fails, take inline snapshot and trigger recovery */ + if (!in_interrupt()) { + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } else { + adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT); + } + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + /* Clear the keep alive */ + _power_collapse_set(adreno_dev, false); + +} + +void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int status; + + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING)) + return; + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status); + + if (status & 0x1) { + dev_err(KGSL_DEVICE(adreno_dev)->dev, + "preempt interrupt with non-zero status: %X\n", + status); + + /* + * Under the assumption that this is a race between the + * interrupt and the register, schedule the worker to clean up. + * If the status still hasn't resolved itself by the time we get + * there then we have to assume something bad happened + */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); + adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + return; + } + + adreno_dev->preempt.count++; + + /* + * We can now safely clear the preemption keepalive bit, allowing + * power collapse to resume its regular activity. + */ + _power_collapse_set(adreno_dev, false); + + del_timer(&adreno_dev->preempt.timer); + + kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status); + + trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb, + status); + + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->next_rb = NULL; + + /* Update the wptr if it changed while preemption was ongoing */ + _update_wptr(adreno_dev, true, true); + + /* Update the dispatcher timer for the new command queue */ + mod_timer(&adreno_dev->dispatcher.timer, + adreno_dev->cur_rb->dispatch_q.expires); + + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + gen7_preemption_trigger(adreno_dev, true); +} + +void gen7_preemption_schedule(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + mutex_lock(&device->mutex); + + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE)) + _gen7_preemption_done(adreno_dev); + + gen7_preemption_trigger(adreno_dev, false); + + mutex_unlock(&device->mutex); +} + +u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 *cmds) +{ + u32 *cmds_orig = cmds; + u64 gpuaddr = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + *cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1); + *cmds++ = CP_SET_THREAD_BR; + + if (drawctxt) { + gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); + } else { + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); + } + + /* NULL SMMU_INFO buffer - we track in KMD */ + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO; + cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); + + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); + + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->secure_preemption_desc->gpuaddr); + + if (drawctxt) { + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + } + + /* + * There is no need to specify this address when we are about to + * trigger preemption. This is because CP internally stores this + * address specified here in the CP_SET_PSEUDO_REGISTER payload to + * the context record and thus knows from where to restore + * the saved perfcounters for the new ringbuffer. + */ + *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER; + cmds += cp_gpuaddr(adreno_dev, cmds, + rb->perfcounter_save_restore_desc->gpuaddr); + + if (drawctxt) { + struct adreno_ringbuffer *rb = drawctxt->rb; + u64 dest = adreno_dev->preempt.scratch->gpuaddr + + (rb->id * sizeof(u64)); + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); + cmds += cp_gpuaddr(adreno_dev, cmds, dest); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + } + + return (unsigned int) (cmds - cmds_orig); +} + +u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev, + u32 *cmds) +{ + u32 index = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (adreno_dev->cur_rb) { + u64 dest = adreno_dev->preempt.scratch->gpuaddr + + (adreno_dev->cur_rb->id * sizeof(u64)); + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4); + cmds[index++] = lower_32_bits(dest); + cmds[index++] = upper_32_bits(dest); + cmds[index++] = 0; + cmds[index++] = 0; + } + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds[index++] = 0; + cmds[index++] = 0; + cmds[index++] = 1; + cmds[index++] = 0; + + return index; +} + +void gen7_preemption_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_ringbuffer *rb; + unsigned int i; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + /* Force the state to be clear */ + adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); + + if (kgsl_mmu_is_perprocess(&device->mmu)) { + /* smmu_info is allocated and mapped in gen7_preemption_iommu_init */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(magic), GEN7_CP_SMMU_INFO_MAGIC_REF); + kgsl_sharedmem_writeq(iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device)); + + /* The CP doesn't use the asid record, so poison it */ + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(asid), 0xdecafbad); + kgsl_sharedmem_writel(iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), 0); + + kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + lower_32_bits(iommu->smmu_info->gpuaddr)); + + kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + upper_32_bits(iommu->smmu_info->gpuaddr)); + } + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + + adreno_ringbuffer_set_pagetable(rb, + device->mmu.defaultpagetable); + } +} + +static void reset_rb_preempt_record(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size); + + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(magic), GEN7_CP_CTXRECORD_MAGIC_REF); + kgsl_sharedmem_writel(rb->preemption_desc, + PREEMPT_RECORD(cntl), GEN7_CP_RB_CNTL_DEFAULT); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id)); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); + kgsl_sharedmem_writeq(rb->preemption_desc, + PREEMPT_RECORD(bv_rptr_addr), SCRATCH_BV_RPTR_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id)); +} + +void gen7_reset_preempt_records(struct adreno_device *adreno_dev) +{ + int i; + struct adreno_ringbuffer *rb; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + reset_rb_preempt_record(adreno_dev, rb); + } +} + +static int gen7_preemption_ringbuffer_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + int ret; + + if (gen7_core->ctxt_record_size) + ctxt_record_size = gen7_core->ctxt_record_size; + + ret = adreno_allocate_global(device, &rb->preemption_desc, + ctxt_record_size, SZ_16K, 0, + KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); + if (ret) + return ret; + + ret = adreno_allocate_global(device, &rb->secure_preemption_desc, + ctxt_record_size, 0, + KGSL_MEMFLAGS_SECURE, KGSL_MEMDESC_PRIVILEGED, + "secure_preemption_desc"); + if (ret) + return ret; + + ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc, + GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0, + KGSL_MEMDESC_PRIVILEGED, + "perfcounter_save_restore_desc"); + if (ret) + return ret; + + reset_rb_preempt_record(adreno_dev, rb); + + return 0; +} + +int gen7_preemption_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct adreno_preemption *preempt = &adreno_dev->preempt; + struct adreno_ringbuffer *rb; + int ret; + unsigned int i; + + /* We are dependent on IOMMU to make preemption go on the CP side */ + if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) + return -ENODEV; + + INIT_WORK(&preempt->work, _gen7_preemption_worker); + + /* Allocate mem for storing preemption switch record */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + ret = gen7_preemption_ringbuffer_init(adreno_dev, rb); + if (ret) + return ret; + } + + ret = adreno_allocate_global(device, &preempt->scratch, PAGE_SIZE, + 0, 0, 0, "preempt_scratch"); + if (ret) + return ret; + + /* Allocate mem for storing preemption smmu record */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED, + "smmu_info"); + if (ret) + return ret; + } + + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + return 0; +} + +int gen7_preemption_context_init(struct kgsl_context *context) +{ + struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 flags = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return 0; + + if (context->flags & KGSL_CONTEXT_SECURE) + flags |= KGSL_MEMFLAGS_SECURE; + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + /* + * gpumem_alloc_entry takes an extra refcount. Put it only when + * destroying the context to keep the context record valid + */ + context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv, + GEN7_CP_CTXRECORD_USER_RESTORE_SIZE, flags); + if (IS_ERR(context->user_ctxt_record)) { + int ret = PTR_ERR(context->user_ctxt_record); + + context->user_ctxt_record = NULL; + return ret; + } + + return 0; +} diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c new file mode 100644 index 0000000000..47277fc51f --- /dev/null +++ b/adreno_gen7_ringbuffer.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_gen7.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + +static bool is_concurrent_binning(struct adreno_context *drawctxt) +{ + if (!drawctxt) + return false; + + return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE); +} + +static int gen7_rb_pagetable_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + struct kgsl_pagetable *pagetable, u32 *cmds) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable); + int count = 0; + u32 id = drawctxt ? drawctxt->base.id : 0; + + if (pagetable == device->mmu.defaultpagetable) + return 0; + + /* CP switches the pagetable and flushes the Caches */ + cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + cmds[count++] = id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5); + cmds[count++] = lower_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[count++] = upper_32_bits(rb->pagetable_desc->gpuaddr + + PT_INFO_OFFSET(ttbr0)); + cmds[count++] = lower_32_bits(ttbr0); + cmds[count++] = upper_32_bits(ttbr0); + cmds[count++] = id; + + /* + * Sync both threads after switching pagetables and enable BR only + * to make sure BV doesn't race ahead while BR is still switching + * pagetables. + */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; + + return count; +} + +static int gen7_rb_context_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_pagetable *pagetable = + adreno_drawctxt_get_pagetable(drawctxt); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + u32 cmds[42]; + + /* Sync both threads */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH; + /* Reset context state */ + cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1); + cmds[count++] = CP_CLEAR_BV_BR_COUNTER | CP_CLEAR_RESOURCE_TABLE | + CP_CLEAR_ON_CHIP_TS; + /* + * Enable/disable concurrent binning for pagetable switch and + * set the thread to BR since only BR can execute the pagetable + * switch packets. + */ + /* Sync both threads and enable BR only */ + cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR; + + if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) + count += gen7_rb_pagetable_switch(adreno_dev, rb, + drawctxt, pagetable, &cmds[count]); + else { + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + u32 id = drawctxt ? drawctxt->base.id : 0; + u32 offset = GEN7_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d; + + /* + * Set the CONTEXTIDR register to the current context id so we + * can use it in pagefault debugging. Unlike TTBR0 we don't + * need any special sequence or locking to change it + */ + cmds[count++] = cp_type4_packet(offset, 1); + cmds[count++] = id; + } + + cmds[count++] = cp_type7_packet(CP_NOP, 1); + cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb, + current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device, + KGSL_MEMSTORE_GLOBAL, current_context)); + cmds[count++] = drawctxt->base.id; + + cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[count++] = 0x31; + + return gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, + cmds, count, 0, NULL); +} + +#define RB_SOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp) +#define CTXT_SOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp) + +#define RB_EOPTIMESTAMP(device, rb) \ + MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp) +#define CTXT_EOPTIMESTAMP(device, drawctxt) \ + MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp) + +int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + unsigned long flags; + + adreno_get_submit_time(adreno_dev, rb, time); + adreno_profile_submit_time(time); + + spin_lock_irqsave(&rb->preempt_lock, flags); + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { + if (adreno_dev->cur_rb == rb) { + kgsl_pwrscale_busy(device); + ret = gen7_fenced_write(adreno_dev, + GEN7_CP_RB_WPTR, rb->_wptr, + FENCE_STATUS_WRITEDROPPED0_MASK); + rb->skip_inline_wptr = false; + } + } else { + if (adreno_dev->cur_rb == rb) + rb->skip_inline_wptr = true; + } + + rb->wptr = rb->_wptr; + spin_unlock_irqrestore(&rb->preempt_lock, flags); + + if (ret) { + /* + * If WPTR update fails, take inline snapshot and trigger + * recovery. + */ + gmu_core_fault_snapshot(device); + adreno_dispatcher_fault(adreno_dev, + ADRENO_GMU_FAULT_SKIP_SNAPSHOT); + } + + return ret; +} + +int gen7_ringbuffer_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int i, ret; + + ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE, + 0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED, + "scratch"); + if (ret) + return ret; + + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) { + adreno_dev->num_ringbuffers = 1; + return adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[0], 0); + } + + adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers); + + for (i = 0; i < adreno_dev->num_ringbuffers; i++) { + int ret; + + ret = adreno_ringbuffer_setup(adreno_dev, + &adreno_dev->ringbuffers[i], i); + if (ret) + return ret; + } + + timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0); + gen7_preemption_init(adreno_dev); + return 0; +} + +#define GEN7_SUBMIT_MAX 100 + +int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, struct adreno_context *drawctxt, + u32 flags, u32 *in, u32 dwords, u32 timestamp, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 size = GEN7_SUBMIT_MAX + dwords; + u32 *cmds, index = 0; + u64 profile_gpuaddr; + u32 profile_dwords; + + if (adreno_drawctxt_detached(drawctxt)) + return -ENOENT; + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + + rb->timestamp++; + + if (drawctxt) + drawctxt->internal_timestamp = rb->timestamp; + + /* All submissions are run with protected mode off due to APRIV */ + flags &= ~F_NOTPROTECTED; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + + /* Identify the start of a command */ + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER; + + /* This is 21 dwords when drawctxt is not NULL */ + index += gen7_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt, + &cmds[index]); + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x101; /* IFPC disable */ + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + profile_gpuaddr = adreno_profile_preib_processing(adreno_dev, + drawctxt, &profile_dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (drawctxt) { + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + } + + cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3); + cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + + if (IS_SECURE(flags)) { + /* Sync BV and BR if entering secure mode */ + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE; + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 1; + } + + memcpy(&cmds[index], in, dwords << 2); + index += dwords; + + profile_gpuaddr = adreno_profile_postib_processing(adreno_dev, + drawctxt, &dwords); + + if (profile_gpuaddr) { + cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(profile_gpuaddr); + cmds[index++] = upper_32_bits(profile_gpuaddr); + cmds[index++] = profile_dwords; + } + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy)) + cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); + + /* + * If this is an internal command, just write the ringbuffer timestamp, + * otherwise, write both + */ + if (!drawctxt) { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } else { + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27); + cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device, + drawctxt)); + cmds[index++] = timestamp; + + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(27); + cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb)); + cmds[index++] = rb->timestamp; + } + + if (IS_WFI(flags)) + cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + if (IS_SECURE(flags)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_CONCURRENT_BIN_DISABLE; + cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1); + cmds[index++] = 0; + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SYNC_THREADS; + } + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x100; /* IFPC enable */ + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + /* 10 dwords */ + index += gen7_preemption_post_ibsubmit(adreno_dev, &cmds[index]); + + /* Adjust the thing for the number of bytes we actually wrote */ + rb->_wptr -= (size - index); + + return gen7_ringbuffer_submit(rb, time); +} + +static u32 gen7_get_alwayson_counter(u32 *cmds, u64 gpuaddr) +{ + cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3); + cmds[1] = GEN7_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18); + cmds[2] = lower_32_bits(gpuaddr); + cmds[3] = upper_32_bits(gpuaddr); + + return 4; +} + +#define PROFILE_IB_DWORDS 4 +#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) + +static u64 gen7_get_user_profiling_ib(struct adreno_ringbuffer *rb, + struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds) +{ + u32 offset = rb->profile_index * (PROFILE_IB_DWORDS << 2); + u32 *ib = rb->profile_desc->hostptr + offset; + u32 dwords = gen7_get_alwayson_counter(ib, + cmdobj->profiling_buffer_gpuaddr + target_offset); + + cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset); + cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset); + cmds[3] = dwords; + + rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; + + return 4; +} + +static int gen7_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (rb->drawctxt_active == drawctxt) + return 0; + + if (kgsl_context_detached(&drawctxt->base)) + return -ENOENT; + + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + trace_adreno_drawctxt_switch(rb, drawctxt); + + gen7_rb_context_switch(adreno_dev, rb, drawctxt); + + /* Release the current drawctxt as soon as the new one is switched */ + adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active, + rb, rb->timestamp); + + rb->drawctxt_active = drawctxt; + return 0; +} + + +#define GEN7_USER_PROFILE_IB(rb, cmdobj, cmds, field) \ + gen7_get_user_profiling_ib((rb), (cmdobj), \ + offsetof(struct kgsl_drawobj_profiling_buffer, field), \ + (cmds)) + +#define GEN7_KERNEL_PROFILE(dev, cmdobj, cmds, field) \ + gen7_get_alwayson_counter((cmds), \ + (dev)->profile_buffer->gpuaddr + \ + ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ + field)) + +#define GEN7_COMMAND_DWORDS 38 + +int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, u32 flags, + struct adreno_submit_time *time) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + int ret = 0, numibs = 0, index = 0; + u32 *cmds; + + /* Count the number of IBs (if we are not skipping) */ + if (!IS_SKIP(flags)) { + struct list_head *tmp; + + list_for_each(tmp, &cmdobj->cmdlist) + numibs++; + } + + cmds = kmalloc((GEN7_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL); + if (!cmds) { + ret = -ENOMEM; + goto done; + } + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = START_IB_IDENTIFIER; + + /* Kernel profiling: 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + started); + + /* User profiling: 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index], + gpu_ticks_submitted); + + if (is_concurrent_binning(drawctxt)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BOTH; + } + if (numibs) { + struct kgsl_memobj_node *ib; + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x00d; /* IB1LIST start */ + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + if (ib->priv & MEMOBJ_SKIP || + (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE && + !IS_PREAMBLE(flags))) + cmds[index++] = cp_type7_packet(CP_NOP, 4); + + cmds[index++] = + cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3); + cmds[index++] = lower_32_bits(ib->gpuaddr); + cmds[index++] = upper_32_bits(ib->gpuaddr); + + /* Double check that IB_PRIV is never set */ + cmds[index++] = (ib->size >> 2) & 0xfffff; + } + + cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1); + cmds[index++] = 0x00e; /* IB1LIST end */ + } + + if (is_concurrent_binning(drawctxt)) { + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + } + /* CCU invalidate depth */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 24; + + /* CCU invalidate color */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1); + cmds[index++] = 25; + + /* 4 dwords */ + if (IS_KERNEL_PROFILE(flags)) + index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index], + retired); + + /* 4 dwords */ + if (IS_USER_PROFILE(flags)) + index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index], + gpu_ticks_retired); + + cmds[index++] = cp_type7_packet(CP_NOP, 1); + cmds[index++] = END_IB_IDENTIFIER; + + ret = gen7_drawctxt_switch(adreno_dev, rb, drawctxt); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + dev_err(device->dev, + "Unable to switch draw context: %d\n", ret); + goto done; + } + + adreno_drawobj_set_constraint(device, drawobj); + + ret = gen7_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt, + flags, cmds, index, drawobj->timestamp, time); + +done: + trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs, + drawobj->timestamp, drawobj->flags, ret, drawctxt->type); + + kfree(cmds); + return ret; +} diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c new file mode 100644 index 0000000000..37e3fcdaf1 --- /dev/null +++ b/adreno_gen7_rpmh.c @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "adreno.h" +#include "adreno_gen7.h" +#include "kgsl_bus.h" +#include "kgsl_device.h" + +struct rpmh_arc_vals { + u32 num; + const u16 *val; +}; + +struct bcm { + const char *name; + u32 buswidth; + u32 channels; + u32 unit; + u16 width; + u8 vcd; + bool fixed; +}; + +struct bcm_data { + __le32 unit; + __le16 width; + u8 vcd; + u8 reserved; +}; + +struct rpmh_bw_votes { + u32 wait_bitmask; + u32 num_cmds; + u32 *addrs; + u32 num_levels; + u32 **cmds; +}; + +#define ARC_VOTE_SET(pri, sec, vlvl) \ + (FIELD_PREP(GENMASK(31, 16), vlvl) | \ + FIELD_PREP(GENMASK(15, 8), sec) | \ + FIELD_PREP(GENMASK(7, 0), pri)) + +static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id) +{ + size_t len = 0; + + arc->val = cmd_db_read_aux_data(res_id, &len); + + /* + * cmd_db_read_aux_data() gives us a zero-padded table of + * size len that contains the arc values. To determine the + * number of arc values, we loop through the table and count + * them until we get to the end of the buffer or hit the + * zero padding. + */ + for (arc->num = 1; arc->num < (len >> 1); arc->num++) { + if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0) + break; + } + + return 0; +} + +static int setup_volt_dependency_tbl(u32 *votes, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, + u16 *vlvl, unsigned int num_entries) +{ + int i, j, k; + uint16_t cur_vlvl; + bool found_match; + + /* i tracks current KGSL GPU frequency table entry + * j tracks secondary rail voltage table entry + * k tracks primary rail voltage table entry + */ + for (i = 0; i < num_entries; i++) { + found_match = false; + + /* Look for a primary rail voltage that matches a VLVL level */ + for (k = 0; k < pri_rail->num; k++) { + if (pri_rail->val[k] >= vlvl[i]) { + cur_vlvl = pri_rail->val[k]; + found_match = true; + break; + } + } + + /* If we did not find a matching VLVL level then abort */ + if (!found_match) + return -EINVAL; + + /* + * Look for a secondary rail index whose VLVL value + * is greater than or equal to the VLVL value of the + * corresponding index of the primary rail + */ + for (j = 0; j < sec_rail->num; j++) { + if (sec_rail->val[j] >= cur_vlvl || + j + 1 == sec_rail->num) + break; + } + + if (j == sec_rail->num) + j = 0; + + votes[i] = ARC_VOTE_SET(k, j, cur_vlvl); + } + + return 0; +} + +/* Generate a set of bandwidth votes for the list of BCMs */ +static void tcs_cmd_data(struct bcm *bcms, int count, u32 ab, u32 ib, + u32 *data) +{ + int i; + + for (i = 0; i < count; i++) { + bool valid = true; + bool commit = false; + u64 avg, peak, x, y; + + if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd) + commit = true; + + if (bcms[i].fixed) { + if (!ab && !ib) + data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0); + else + data[i] = BCM_TCS_CMD(commit, true, 0x0, 0x8); + continue; + } + + /* Multiple the bandwidth by the width of the connection */ + avg = ((u64) ab) * bcms[i].width; + + /* And then divide by the total width across channels */ + do_div(avg, bcms[i].buswidth * bcms[i].channels); + + peak = ((u64) ib) * bcms[i].width; + do_div(peak, bcms[i].buswidth); + + /* Input bandwidth value is in KBps */ + x = avg * 1000ULL; + do_div(x, bcms[i].unit); + + /* Input bandwidth value is in KBps */ + y = peak * 1000ULL; + do_div(y, bcms[i].unit); + + /* + * If a bandwidth value was specified but the calculation ends + * rounding down to zero, set a minimum level + */ + if (ab && x == 0) + x = 1; + + if (ib && y == 0) + y = 1; + + x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK); + y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK); + + if (!x && !y) + valid = false; + + data[i] = BCM_TCS_CMD(commit, valid, x, y); + } +} + +static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes) +{ + int i; + + if (!votes) + return; + + for (i = 0; votes->cmds && i < votes->num_levels; i++) + kfree(votes->cmds[i]); + + kfree(votes->cmds); + kfree(votes->addrs); + kfree(votes); +} + +/* Build the votes table from the specified bandwidth levels */ +static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, + int bcm_count, u32 *levels, int levels_count) +{ + struct rpmh_bw_votes *votes; + int i; + + votes = kzalloc(sizeof(*votes), GFP_KERNEL); + if (!votes) + return ERR_PTR(-ENOMEM); + + votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->addrs) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL); + if (!votes->cmds) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + votes->num_cmds = bcm_count; + votes->num_levels = levels_count; + + /* Get the cmd-db information for each BCM */ + for (i = 0; i < bcm_count; i++) { + size_t l; + const struct bcm_data *data; + + data = cmd_db_read_aux_data(bcms[i].name, &l); + + votes->addrs[i] = cmd_db_read_addr(bcms[i].name); + + bcms[i].unit = le32_to_cpu(data->unit); + bcms[i].width = le16_to_cpu(data->width); + bcms[i].vcd = data->vcd; + } + + for (i = 0; i < bcm_count; i++) { + if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd) + votes->wait_bitmask |= (1 << i); + } + + for (i = 0; i < levels_count; i++) { + votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL); + if (!votes->cmds[i]) { + free_rpmh_bw_votes(votes); + return ERR_PTR(-ENOMEM); + } + + tcs_cmd_data(bcms, bcm_count, 0, levels[i], votes->cmds[i]); + } + + return votes; +} + +/* + * setup_gmu_arc_votes - Build the gmu voting table + * @hfi: Pointer to hfi device + * @pri_rail: Pointer to primary power rail vlvl table + * @sec_rail: Pointer to second/dependent power rail vlvl table + * + * This function initializes the cx votes for all gmu frequencies + * for gmu dcvs + */ +static int setup_cx_arc_votes(struct gen7_hfi *hfi, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) +{ + /* Hardcoded values of GMU CX voltage levels */ + u16 gmu_cx_vlvl[MAX_CX_LEVELS]; + u32 cx_votes[MAX_CX_LEVELS]; + struct hfi_dcvstable_cmd *table = &hfi->dcvs_table; + int ret, i; + + gmu_cx_vlvl[0] = 0; + gmu_cx_vlvl[1] = RPMH_REGULATOR_LEVEL_LOW_SVS; + gmu_cx_vlvl[2] = RPMH_REGULATOR_LEVEL_SVS; + + table->gmu_level_num = 3; + + table->cx_votes[0].freq = 0; + table->cx_votes[1].freq = GMU_FREQ_MIN / 1000; + table->cx_votes[2].freq = GMU_FREQ_MAX / 1000; + + ret = setup_volt_dependency_tbl(cx_votes, pri_rail, + sec_rail, gmu_cx_vlvl, table->gmu_level_num); + if (!ret) { + for (i = 0; i < table->gmu_level_num; i++) + table->cx_votes[i].vote = cx_votes[i]; + } + + return ret; +} + +/* + * setup_gx_arc_votes - Build the gpu dcvs voting table + * @hfi: Pointer to hfi device + * @pri_rail: Pointer to primary power rail vlvl table + * @sec_rail: Pointer to second/dependent power rail vlvl table + * + * This function initializes the gx votes for all gpu frequencies + * for gpu dcvs + */ +static int setup_gx_arc_votes(struct adreno_device *adreno_dev, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + u32 index; + u16 vlvl_tbl[MAX_GX_LEVELS]; + u32 gx_votes[MAX_GX_LEVELS]; + int ret, i; + + /* Add the zero powerlevel for the perf table */ + table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; + + if (table->gpu_level_num > pri_rail->num || + table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { + dev_err(&gmu->pdev->dev, + "Defined more GPU DCVS levels than RPMh can support\n"); + return -ERANGE; + } + + memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); + + table->gx_votes[0].freq = 0; + + /* GMU power levels are in ascending order */ + for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) { + vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level; + table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000; + } + + ret = setup_volt_dependency_tbl(gx_votes, pri_rail, + sec_rail, vlvl_tbl, table->gpu_level_num); + if (!ret) { + for (i = 0; i < table->gpu_level_num; i++) { + table->gx_votes[i].vote = gx_votes[i]; + table->gx_votes[i].acd = 0xffffffff; + } + } + + return ret; + +} + +static int build_dcvs_table(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hfi *hfi = &gmu->hfi; + struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; + int ret; + + ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&cx_arc, "cx.lvl"); + if (ret) + return ret; + + ret = rpmh_arc_cmds(&mx_arc, "mx.lvl"); + if (ret) + return ret; + + ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc); + if (ret) + return ret; + + return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc); +} + +/* + * List of Bus Control Modules (BCMs) that need to be configured for the GPU + * to access DDR. For each bus level we will generate a vote each BC + */ +static struct bcm gen7_ddr_bcms[] = { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { .name = "ACV", .fixed = true }, +}; + +/* Same as above, but for the CNOC BCMs */ +static struct bcm gen7_cnoc_bcms[] = { + { .name = "CN0", .buswidth = 4 }, +}; + +static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd, + struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc) +{ + u32 i, j; + + cmd->bw_level_num = ddr->num_levels; + cmd->ddr_cmds_num = ddr->num_cmds; + cmd->ddr_wait_bitmask = ddr->wait_bitmask; + + for (i = 0; i < ddr->num_cmds; i++) + cmd->ddr_cmd_addrs[i] = ddr->addrs[i]; + + for (i = 0; i < ddr->num_levels; i++) + for (j = 0; j < ddr->num_cmds; j++) + cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j]; + + if (!cnoc) + return; + + cmd->cnoc_cmds_num = cnoc->num_cmds; + cmd->cnoc_wait_bitmask = cnoc->wait_bitmask; + + for (i = 0; i < cnoc->num_cmds; i++) + cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i]; + + for (i = 0; i < cnoc->num_levels; i++) + for (j = 0; j < cnoc->num_cmds; j++) + cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j]; +} + +static int build_bw_table(struct adreno_device *adreno_dev) +{ + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct rpmh_bw_votes *ddr, *cnoc = NULL; + u32 *cnoc_table; + u32 count; + int ret; + + ddr = build_rpmh_bw_votes(gen7_ddr_bcms, ARRAY_SIZE(gen7_ddr_bcms), + pwr->ddr_table, pwr->ddr_table_count); + if (IS_ERR(ddr)) + return PTR_ERR(ddr); + + cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc", + &count); + + if (count > 0) + cnoc = build_rpmh_bw_votes(gen7_cnoc_bcms, + ARRAY_SIZE(gen7_cnoc_bcms), cnoc_table, count); + + kfree(cnoc_table); + + if (IS_ERR(cnoc)) { + free_rpmh_bw_votes(ddr); + return PTR_ERR(cnoc); + } + + ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL); + if (ret) + return ret; + + build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc); + + free_rpmh_bw_votes(ddr); + free_rpmh_bw_votes(cnoc); + + return 0; +} + +int gen7_build_rpmh_tables(struct adreno_device *adreno_dev) +{ + int ret; + + ret = build_dcvs_table(adreno_dev); + if (ret) + return ret; + + return build_bw_table(adreno_dev); +} diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c new file mode 100644 index 0000000000..71a2d37c22 --- /dev/null +++ b/adreno_gen7_snapshot.c @@ -0,0 +1,1254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno_gen7_snapshot.h" +#include "adreno.h" +#include "adreno_snapshot.h" + +static struct kgsl_memdesc *gen7_capturescript; +static struct kgsl_memdesc *gen7_crashdump_registers; +static u32 *gen7_cd_reg_end; + +#define GEN7_DEBUGBUS_BLOCK_SIZE 0x100 + +#define GEN7_SP_READ_SEL_VAL(_location, _pipe, _statetype, _usptp, _sptp) \ + (FIELD_PREP(GENMASK(19, 18), _location) | \ + FIELD_PREP(GENMASK(17, 16), _pipe) | \ + FIELD_PREP(GENMASK(15, 8), _statetype) | \ + FIELD_PREP(GENMASK(7, 4), _usptp) | \ + FIELD_PREP(GENMASK(3, 0), _sptp)) + +#define GEN7_CP_APERTURE_REG_VAL(_pipe, _cluster, _context) \ + (FIELD_PREP(GENMASK(13, 12), _pipe) | \ + FIELD_PREP(GENMASK(10, 8), _cluster) | \ + FIELD_PREP(GENMASK(5, 4), _context)) + +#define GEN7_DEBUGBUS_SECTION_SIZE (sizeof(struct kgsl_snapshot_debugbus) \ + + (GEN7_DEBUGBUS_BLOCK_SIZE << 3)) + +#define CD_REG_END 0xaaaaaaaa + +static int CD_WRITE(u64 *ptr, u32 offset, u64 val) +{ + ptr[0] = val; + ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | BIT(21) | BIT(0); + + return 2; +} + +static int CD_READ(u64 *ptr, u32 offset, u32 size, u64 target) +{ + ptr[0] = target; + ptr[1] = FIELD_PREP(GENMASK(63, 44), offset) | size; + + return 2; +} + +static void CD_FINISH(u64 *ptr, u32 offset) +{ + gen7_cd_reg_end = gen7_crashdump_registers->hostptr + offset; + *gen7_cd_reg_end = CD_REG_END; + ptr[0] = gen7_crashdump_registers->gpuaddr + offset; + ptr[1] = FIELD_PREP(GENMASK(63, 44), GEN7_CP_CRASH_DUMP_STATUS) | BIT(0); + ptr[2] = 0; + ptr[3] = 0; +} + +static bool CD_SCRIPT_CHECK(struct kgsl_device *device) +{ + return (gen7_is_smmu_stalled(device) || (!device->snapshot_crashdumper) || + IS_ERR_OR_NULL(gen7_capturescript) || + IS_ERR_OR_NULL(gen7_crashdump_registers)); +} + +static bool _gen7_do_crashdump(struct kgsl_device *device) +{ + unsigned int reg = 0; + ktime_t timeout; + + kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_LO, + lower_32_bits(gen7_capturescript->gpuaddr)); + kgsl_regwrite(device, GEN7_CP_CRASH_SCRIPT_BASE_HI, + upper_32_bits(gen7_capturescript->gpuaddr)); + kgsl_regwrite(device, GEN7_CP_CRASH_DUMP_CNTL, 1); + + timeout = ktime_add_ms(ktime_get(), CP_CRASH_DUMPER_TIMEOUT); + + if (!device->snapshot_atomic) + might_sleep(); + for (;;) { + /* make sure we're reading the latest value */ + rmb(); + if ((*gen7_cd_reg_end) != CD_REG_END) + break; + if (ktime_compare(ktime_get(), timeout) > 0) + break; + /* Wait 1msec to avoid unnecessary looping */ + if (!device->snapshot_atomic) + usleep_range(100, 1000); + } + + kgsl_regread(device, GEN7_CP_CRASH_DUMP_STATUS, ®); + + /* + * Writing to the GEN7_CP_CRASH_DUMP_CNTL also resets the + * GEN7_CP_CRASH_DUMP_STATUS. Make sure the read above is + * complete before we change the value + */ + rmb(); + + kgsl_regwrite(device, GEN7_CP_CRASH_DUMP_CNTL, 0); + + if (WARN(!(reg & 0x2), "Crashdumper timed out\n")) + return false; + + return true; +} + +static size_t gen7_legacy_snapshot_registers(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct reg_list *regs = priv; + + if (regs->sel) + kgsl_regwrite(device, regs->sel->host_reg, regs->sel->val); + + return adreno_snapshot_registers_v2(device, buf, remain, (void *)regs->regs); +} + +static size_t gen7_snapshot_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct reg_list *regs = (struct reg_list *)priv; + const u32 *ptr = regs->regs; + unsigned int *data = (unsigned int *)buf; + unsigned int *src; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + src = gen7_crashdump_registers->hostptr + regs->offset; + + for (ptr = regs->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = BIT(31) | ptr[0]; + else { + *data++ = ptr[0]; + *data++ = cnt; + } + memcpy(data, src, cnt << 2); + data += cnt; + src += cnt; + } + + /* Return the size of the section */ + return size; +} + +static size_t gen7_legacy_snapshot_shader(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader_v2 *header = + (struct kgsl_snapshot_shader_v2 *) buf; + struct gen7_shader_block_info *info = (struct gen7_shader_block_info *) priv; + struct gen7_shader_block *block = info->block; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int read_sel; + int i; + + if (remain < (sizeof(*header) + (block->size << 2))) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->index = block->sp_id; + header->size = block->size; + header->usptp = block->usptp; + header->location = block->location; + header->pipe_id = block->pipeid; + + read_sel = GEN7_SP_READ_SEL_VAL(block->location, block->pipeid, + block->statetype, block->usptp, block->sp_id); + + kgsl_regwrite(device, GEN7_SP_READ_SEL, read_sel); + + /* + * An explicit barrier is needed so that reads do not happen before + * the register write. + */ + mb(); + + for (i = 0; i < block->size; i++) + data[i] = kgsl_regmap_read(&device->regmap, GEN7_SP_AHB_READ_APERTURE + i); + + return (sizeof(*header) + (block->size << 2)); +} + +static size_t gen7_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader_v2 *header = + (struct kgsl_snapshot_shader_v2 *) buf; + struct gen7_shader_block_info *info = (struct gen7_shader_block_info *) priv; + struct gen7_shader_block *block = info->block; + unsigned int *data = (unsigned int *) (buf + sizeof(*header)); + + if (remain < (sizeof(*header) + (block->size << 2))) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = block->statetype; + header->index = block->sp_id; + header->size = block->size; + header->usptp = block->usptp; + header->location = block->location; + header->pipe_id = block->pipeid; + + memcpy(data, gen7_crashdump_registers->hostptr + info->offset, + (block->size << 2)); + + return (sizeof(*header) + (block->size << 2)); +} + +static void gen7_snapshot_shader(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int i; + struct gen7_shader_block_info info; + u64 *ptr; + u32 offset = 0; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen7_legacy_snapshot_shader; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_shader_blocks); i++) { + info.block = &gen7_shader_blocks[i]; + info.offset = offset; + offset += gen7_shader_blocks[i].size << 2; + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V2, + snapshot, func, &info); + } + return; + } + + /* Build the crash script */ + ptr = gen7_capturescript->hostptr; + offset = 0; + + for (i = 0; i < ARRAY_SIZE(gen7_shader_blocks); i++) { + struct gen7_shader_block *block = &gen7_shader_blocks[i]; + + /* Program the aperture */ + ptr += CD_WRITE(ptr, GEN7_SP_READ_SEL, + GEN7_SP_READ_SEL_VAL(block->location, block->pipeid, + block->statetype, block->usptp, block->sp_id)); + + /* Read all the data in one chunk */ + ptr += CD_READ(ptr, GEN7_SP_AHB_READ_APERTURE, block->size, + gen7_crashdump_registers->gpuaddr + offset); + + offset += block->size << 2; + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_shader_memory; + + offset = 0; + + for (i = 0; i < ARRAY_SIZE(gen7_shader_blocks); i++) { + info.block = &gen7_shader_blocks[i]; + info.offset = offset; + offset += gen7_shader_blocks[i].size << 2; + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER_V2, + snapshot, func, &info); + } +} + +static void gen7_snapshot_mempool(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + /* set CP_CHICKEN_DBG[StabilizeMVC] to stabilize it while dumping */ + kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x4); + kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x4); + + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_MEM_POOL_DBG_ADDR, GEN7_CP_MEM_POOL_DBG_DATA, + 0, 0x2100); + + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_BV_MEM_POOL_DBG_ADDR, GEN7_CP_BV_MEM_POOL_DBG_DATA, + 0, 0x2100); + + kgsl_regrmw(device, GEN7_CP_CHICKEN_DBG, 0x4, 0x0); + kgsl_regrmw(device, GEN7_CP_BV_CHICKEN_DBG, 0x4, 0x0); +} + +static unsigned int gen7_read_dbgahb(struct kgsl_device *device, + unsigned int regbase, unsigned int reg) +{ + unsigned int val; + + kgsl_regread(device, (GEN7_SP_AHB_READ_APERTURE + reg - regbase), &val); + return val; +} + +static size_t gen7_legacy_snapshot_cluster_dbgahb(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + struct gen7_sptp_cluster_registers *cluster = + (struct gen7_sptp_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int read_sel; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int j; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = cluster->context_id; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = cluster->location_id; + + read_sel = GEN7_SP_READ_SEL_VAL(cluster->location_id, cluster->pipe_id, + cluster->statetype, 0, 0); + + kgsl_regwrite(device, GEN7_SP_READ_SEL, read_sel); + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int count = REG_COUNT(ptr); + + if (count == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + for (j = ptr[0]; j <= ptr[1]; j++) + *data++ = gen7_read_dbgahb(device, cluster->regbase, j); + } + + return (size + sizeof(*header)); +} + +static size_t gen7_snapshot_cluster_dbgahb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + struct gen7_sptp_cluster_registers *cluster = + (struct gen7_sptp_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int *src; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + header->ctxt_id = cluster->context_id; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = cluster->location_id; + + src = gen7_crashdump_registers->hostptr + cluster->offset; + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + memcpy(data, src, cnt << 2); + data += cnt; + src += cnt; + } + + return (size + sizeof(*header)); +} + +static void gen7_snapshot_dbgahb_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + u64 *ptr, offset = 0; + unsigned int count; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen7_legacy_snapshot_cluster_dbgahb; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_sptp_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, + &gen7_sptp_clusters[i]); + return; + } + + /* Build the crash script */ + ptr = gen7_capturescript->hostptr; + + for (i = 0; i < ARRAY_SIZE(gen7_sptp_clusters); i++) { + struct gen7_sptp_cluster_registers *cluster = &gen7_sptp_clusters[i]; + const u32 *regs = cluster->regs; + + cluster->offset = offset; + + /* Program the aperture */ + ptr += CD_WRITE(ptr, GEN7_SP_READ_SEL, GEN7_SP_READ_SEL_VAL + (cluster->location_id, cluster->pipe_id, cluster->statetype, 0, 0)); + + for (; regs[0] != UINT_MAX; regs += 2) { + count = REG_COUNT(regs); + ptr += CD_READ(ptr, (GEN7_SP_AHB_READ_APERTURE + + regs[0] - cluster->regbase), count, + (gen7_crashdump_registers->gpuaddr + offset)); + + offset += count * sizeof(unsigned int); + } + } + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_cluster_dbgahb; + + /* Capture the registers in snapshot */ + for (i = 0; i < ARRAY_SIZE(gen7_sptp_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &gen7_sptp_clusters[i]); +} + +static size_t gen7_legacy_snapshot_mvc(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct gen7_cluster_registers *cluster = + (struct gen7_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int j; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = UINT_MAX; + + /* + * Set the AHB control for the Host to read from the + * cluster/context for this iteration. + */ + kgsl_regwrite(device, GEN7_CP_APERTURE_CNTL_HOST, GEN7_CP_APERTURE_REG_VAL + (cluster->pipe_id, cluster->cluster_id, cluster->context_id)); + + if (cluster->sel) + kgsl_regwrite(device, cluster->sel->host_reg, cluster->sel->val); + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + unsigned int count = REG_COUNT(ptr); + + if (count == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + for (j = ptr[0]; j <= ptr[1]; j++) { + kgsl_regread(device, j, data); + data++; + } + } + + return (size + sizeof(*header)); +} + +static size_t gen7_snapshot_mvc(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_mvc_regs_v2 *header = + (struct kgsl_snapshot_mvc_regs_v2 *)buf; + struct gen7_cluster_registers *cluster = + (struct gen7_cluster_registers *)priv; + const u32 *ptr = cluster->regs; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int *src; + unsigned int cnt; + unsigned int size = adreno_snapshot_regs_count(ptr) * 4; + + if (remain < (sizeof(*header) + size)) { + SNAPSHOT_ERR_NOMEM(device, "MVC REGISTERS"); + return 0; + } + + header->ctxt_id = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0; + header->cluster_id = cluster->cluster_id; + header->pipe_id = cluster->pipe_id; + header->location_id = UINT_MAX; + + src = gen7_crashdump_registers->hostptr + cluster->offset; + + for (ptr = cluster->regs; ptr[0] != UINT_MAX; ptr += 2) { + cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = ptr[0]; + else { + *data++ = ptr[0] | (1 << 31); + *data++ = ptr[1]; + } + memcpy(data, src, cnt << 2); + src += cnt; + data += cnt; + } + + return (size + sizeof(*header)); + +} + +static void gen7_snapshot_mvc_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + u64 *ptr, offset = 0; + unsigned int count; + size_t (*func)(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) = gen7_legacy_snapshot_mvc; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &gen7_clusters[i]); + return; + } + + /* Build the crash script */ + ptr = gen7_capturescript->hostptr; + + for (i = 0; i < ARRAY_SIZE(gen7_clusters); i++) { + struct gen7_cluster_registers *cluster = &gen7_clusters[i]; + const u32 *regs = cluster->regs; + + cluster->offset = offset; + ptr += CD_WRITE(ptr, GEN7_CP_APERTURE_CNTL_CD, GEN7_CP_APERTURE_REG_VAL + (cluster->pipe_id, cluster->cluster_id, cluster->context_id)); + + if (cluster->sel) + ptr += CD_WRITE(ptr, cluster->sel->cd_reg, cluster->sel->val); + + for (; regs[0] != UINT_MAX; regs += 2) { + count = REG_COUNT(regs); + + ptr += CD_READ(ptr, regs[0], + count, (gen7_crashdump_registers->gpuaddr + offset)); + + offset += count * sizeof(unsigned int); + } + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_mvc; + + for (i = 0; i < ARRAY_SIZE(gen7_clusters); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_MVC_V2, snapshot, func, &gen7_clusters[i]); +} + +/* gen7_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen7_dbgc_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg; + + reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_TRACE_BUF2, val); + val++; + kgsl_regread(device, GEN7_DBGC_CFG_DBGBUS_TRACE_BUF1, val); +} + +/* gen7_snapshot_dbgc_debugbus_block() - Capture debug data for a gpu block */ +static size_t gen7_snapshot_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + + if (remain < GEN7_DEBUGBUS_SECTION_SIZE) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->count = GEN7_DEBUGBUS_BLOCK_SIZE * 2; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + gen7_dbgc_debug_bus_read(device, *block, i, &data[i*2]); + + return GEN7_DEBUGBUS_SECTION_SIZE; +} + +static u32 gen7_dbgc_side_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index) +{ + u32 val; + unsigned int reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + val = kgsl_regmap_read(&device->regmap, GEN7_DBGC_CFG_DBGBUS_OVER); + + return FIELD_GET(GENMASK(27, 24), val); +} + +static size_t gen7_snapshot_dbgc_side_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_side_debugbus *header = + (struct kgsl_snapshot_side_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size = (GEN7_DEBUGBUS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->size = GEN7_DEBUGBUS_BLOCK_SIZE; + header->valid_data = 0x4; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + data[i] = gen7_dbgc_side_debug_bus_read(device, *block, i); + + return size; +} + +/* gen7_cx_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen7_cx_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg; + + reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_B, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_C, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + adreno_cx_dbgc_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF2, val); + val++; + adreno_cx_dbgc_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF1, val); +} + +/* + * gen7_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu + * block from the CX DBGC block + */ +static size_t gen7_snapshot_cx_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + + if (remain < GEN7_DEBUGBUS_SECTION_SIZE) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->count = GEN7_DEBUGBUS_BLOCK_SIZE * 2; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + gen7_cx_debug_bus_read(device, *block, i, &data[i*2]); + + return GEN7_DEBUGBUS_SECTION_SIZE; +} + +/* gen7_cx_side_dbgc_debug_bus_read() - Read data from trace bus */ +static void gen7_cx_side_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg = FIELD_PREP(GENMASK(7, 0), index) | + FIELD_PREP(GENMASK(24, 16), block_id); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_B, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_C, reg); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* + * There needs to be a delay of 1 us to ensure enough time for correct + * data is funneled into the trace buffer + */ + udelay(1); + + adreno_cx_dbgc_regread(device, GEN7_CX_DBGC_CFG_DBGBUS_OVER, ®); + *val = FIELD_GET(GENMASK(27, 24), reg); +} + +/* + * gen7_snapshot_cx_dbgc_debugbus_block() - Capture debug data for a gpu + * block from the CX DBGC block + */ +static size_t gen7_snapshot_cx_side_dbgc_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_side_debugbus *header = + (struct kgsl_snapshot_side_debugbus *)buf; + const u32 *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size = (GEN7_DEBUGBUS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = *block; + header->size = GEN7_DEBUGBUS_BLOCK_SIZE; + header->valid_data = 0x4; + + for (i = 0; i < GEN7_DEBUGBUS_BLOCK_SIZE; i++) + gen7_cx_side_debug_bus_read(device, *block, i, &data[i]); + + return size; +} + +/* gen7_snapshot_debugbus() - Capture debug bus data */ +static void gen7_snapshot_debugbus(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + int i; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_0, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_1, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_2, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_IVTL_3, 0); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_0, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_1, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_2, 0); + kgsl_regwrite(device, GEN7_DBGC_CFG_DBGBUS_MASKL_3, 0); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLT, + FIELD_PREP(GENMASK(31, 28), 0xf)); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_CNTLM, + FIELD_PREP(GENMASK(27, 24), 0xf)); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_0, + FIELD_PREP(GENMASK(3, 0), 0x0) | + FIELD_PREP(GENMASK(7, 4), 0x1) | + FIELD_PREP(GENMASK(11, 8), 0x2) | + FIELD_PREP(GENMASK(15, 12), 0x3) | + FIELD_PREP(GENMASK(19, 16), 0x4) | + FIELD_PREP(GENMASK(23, 20), 0x5) | + FIELD_PREP(GENMASK(27, 24), 0x6) | + FIELD_PREP(GENMASK(31, 28), 0x7)); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_1, + FIELD_PREP(GENMASK(3, 0), 0x8) | + FIELD_PREP(GENMASK(7, 4), 0x9) | + FIELD_PREP(GENMASK(11, 8), 0xa) | + FIELD_PREP(GENMASK(15, 12), 0xb) | + FIELD_PREP(GENMASK(19, 16), 0xc) | + FIELD_PREP(GENMASK(23, 20), 0xd) | + FIELD_PREP(GENMASK(27, 24), 0xe) | + FIELD_PREP(GENMASK(31, 28), 0xf)); + + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); + adreno_cx_dbgc_regwrite(device, GEN7_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); + + for (i = 0; i < ARRAY_SIZE(gen7_debugbus_blocks); i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_debugbus_block, + (void *) &gen7_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_side_debugbus_block, + (void *) &gen7_debugbus_blocks[i]); + } + + /* + * GBIF has same debugbus as of other GPU blocks hence fall back to + * default path if GPU uses GBIF. + * GBIF uses exactly same ID as of VBIF so use it as it is. + */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[1]); + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_side_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_dbgc_side_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[1]); + + /* Dump the CX debugbus data if the block exists */ + if (adreno_is_cx_dbgc_register(device, GEN7_CX_DBGC_CFG_DBGBUS_SEL_A)) { + for (i = 0; i < ARRAY_SIZE(gen7_cx_dbgc_debugbus_blocks); i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, gen7_snapshot_cx_dbgc_debugbus_block, + (void *) &gen7_cx_dbgc_debugbus_blocks[i]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, + snapshot, gen7_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen7_cx_dbgc_debugbus_blocks[i]); + } + /* + * Get debugbus for GBIF CX part if GPU has GBIF block + * GBIF uses exactly same ID as of VBIF so use + * it as it is. + */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, + gen7_snapshot_cx_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS, snapshot, + gen7_snapshot_cx_side_dbgc_debugbus_block, + (void *) &gen7_gbif_debugbus_blocks[0]); + } +} + + + +/* gen7_snapshot_sqe() - Dump SQE data in snapshot */ +static size_t gen7_snapshot_sqe(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE); + + if (remain < DEBUG_SECTION_SZ(1)) { + SNAPSHOT_ERR_NOMEM(device, "SQE VERSION DEBUG"); + return 0; + } + + /* Dump the SQE firmware version */ + header->type = SNAPSHOT_DEBUG_SQE_VERSION; + header->size = 1; + *data = fw->version; + + return DEBUG_SECTION_SZ(1); +} + +/* Snapshot the preemption related buffers */ +static size_t snapshot_preemption_record(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + u8 *ptr = buf + sizeof(*header); + const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device)); + u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES; + + if (gpucore->ctxt_record_size) + ctxt_record_size = gpucore->ctxt_record_size; + + ctxt_record_size = min_t(u64, ctxt_record_size, device->snapshot_ctxt_record_size); + + if (remain < (ctxt_record_size + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); + return 0; + } + + header->size = ctxt_record_size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, ctxt_record_size); + + return ctxt_record_size + sizeof(*header); +} + +static void gen7_reglist_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + u64 *ptr, offset = 0; + int i; + u32 r; + size_t (*func)(struct kgsl_device *device, u8 *buf, size_t remain, + void *priv) = gen7_legacy_snapshot_registers; + + if (CD_SCRIPT_CHECK(device)) { + for (i = 0; i < ARRAY_SIZE(gen7_reg_list); i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, func, &gen7_reg_list[i]); + return; + } + + /* Build the crash script */ + ptr = (u64 *)gen7_capturescript->hostptr; + + for (i = 0; i < ARRAY_SIZE(gen7_reg_list); i++) { + struct reg_list *regs = &gen7_reg_list[i]; + const u32 *regs_ptr = regs->regs; + + regs->offset = offset; + + /* Program the SEL_CNTL_CD register appropriately */ + if (regs->sel) + ptr += CD_WRITE(ptr, regs->sel->cd_reg, regs->sel->val); + + for (; regs_ptr[0] != UINT_MAX; regs_ptr += 2) { + r = REG_COUNT(regs_ptr); + ptr += CD_READ(ptr, regs_ptr[0], r, + (gen7_crashdump_registers->gpuaddr + offset)); + offset += r * sizeof(u32); + } + } + + /* Marker for end of script */ + CD_FINISH(ptr, offset); + + /* Try to run the crash dumper */ + if (_gen7_do_crashdump(device)) + func = gen7_snapshot_registers; + + for (i = 0; i < ARRAY_SIZE(gen7_reg_list); i++) + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, func, &gen7_reg_list[i]); + +} + +static void gen7_snapshot_br_roq(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int roq_size; + + /* + * CP ROQ dump units is 4 dwords. The number of units is stored + * in CP_ROQ_THRESHOLDS_2[31:20], but it is not accessible to + * host. Program the GEN7_CP_SQE_UCODE_DBG_ADDR with 0x70d3 offset + * and read the value CP_ROQ_THRESHOLDS_2 from + * GEN7_CP_SQE_UCODE_DBG_DATA + */ + kgsl_regwrite(device, GEN7_CP_SQE_UCODE_DBG_ADDR, 0x70d3); + kgsl_regread(device, GEN7_CP_SQE_UCODE_DBG_DATA, &roq_size); + roq_size = roq_size >> 20; + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_ROQ_DBG_ADDR, GEN7_CP_ROQ_DBG_DATA, 0, (roq_size << 2)); +} + +static void gen7_snapshot_bv_roq(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int roq_size; + + /* + * CP ROQ dump units is 4 dwords. The number of units is stored + * in CP_BV_ROQ_THRESHOLDS_2[31:20], but it is not accessible to + * host. Program the GEN7_CP_BV_SQE_UCODE_DBG_ADDR with 0x70d3 offset + * (at which CP stores the roq values) and read the value of + * CP_BV_ROQ_THRESHOLDS_2 from GEN7_CP_BV_SQE_UCODE_DBG_DATA + */ + kgsl_regwrite(device, GEN7_CP_BV_SQE_UCODE_DBG_ADDR, 0x70d3); + kgsl_regread(device, GEN7_CP_BV_SQE_UCODE_DBG_DATA, &roq_size); + roq_size = roq_size >> 20; + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_BV_ROQ_DBG_ADDR, GEN7_CP_BV_ROQ_DBG_DATA, 0, (roq_size << 2)); +} + +static void gen7_snapshot_lpac_roq(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int roq_size; + + /* + * CP ROQ dump units is 4 dwords. The number of units is stored + * in CP_LPAC_ROQ_THRESHOLDS_2[31:20], but it is not accessible to + * host. Program the GEN7_CP_SQE_AC_UCODE_DBG_ADDR with 0x70d3 offset + * (at which CP stores the roq values) and read the value of + * CP_LPAC_ROQ_THRESHOLDS_2 from GEN7_CP_SQE_AC_UCODE_DBG_DATA + */ + kgsl_regwrite(device, GEN7_CP_SQE_AC_UCODE_DBG_ADDR, 0x70d3); + kgsl_regread(device, GEN7_CP_SQE_AC_UCODE_DBG_DATA, &roq_size); + roq_size = roq_size >> 20; + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_LPAC_ROQ_DBG_ADDR, GEN7_CP_LPAC_ROQ_DBG_DATA, 0, (roq_size << 2)); +} + +/* + * gen7_snapshot() - GEN7 GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the GEN7 specific bits and pieces are grabbed + * into the snapshot memory + */ +void gen7_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + unsigned int i; + u32 hi, lo, cgc, cgc1, cgc2; + + /* + * Dump debugbus data here to capture it for both + * GMU and GPU snapshot. Debugbus data can be accessed + * even if the gx headswitch is off. If gx + * headswitch is off, data for gx blocks will show as + * 0x5c00bd00. Disable clock gating for SP and TP to capture + * debugbus data. + */ + if (device->ftbl->is_hwcg_on(device)) { + kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL2_SP0, &cgc); + kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL_TP0, &cgc1); + kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL3_TP0, &cgc2); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL2_SP0, GENMASK(22, 20), 0); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL_TP0, GENMASK(2, 0), 0); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_CNTL3_TP0, GENMASK(14, 12), 0); + } + + gen7_snapshot_debugbus(adreno_dev, snapshot); + + /* Restore the value of the clockgating registers */ + if (device->ftbl->is_hwcg_on(device)) { + kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL2_SP0, cgc); + kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL_TP0, cgc1); + kgsl_regwrite(device, GEN7_RBBM_CLOCK_CNTL3_TP0, cgc2); + } + + if (!gmu_core_dev_gx_is_on(device)) + return; + + kgsl_regread(device, GEN7_CP_IB1_BASE, &lo); + kgsl_regread(device, GEN7_CP_IB1_BASE_HI, &hi); + + snapshot->ib1base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, GEN7_CP_IB2_BASE, &lo); + kgsl_regread(device, GEN7_CP_IB2_BASE_HI, &hi); + + snapshot->ib2base = (((u64) hi) << 32) | lo; + + kgsl_regread(device, GEN7_CP_IB1_REM_SIZE, &snapshot->ib1size); + kgsl_regread(device, GEN7_CP_IB2_REM_SIZE, &snapshot->ib2size); + + /* Assert the isStatic bit before triggering snapshot */ + kgsl_regwrite(device, GEN7_RBBM_SNAPSHOT_STATUS, 0x1); + + /* Dump the registers which get affected by crash dumper trigger */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_pre_crashdumper_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_gpucc_registers); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_cpr_registers); + + gen7_reglist_snapshot(device, snapshot); + + /* + * Need to program and save this register before capturing resource table + * to workaround a CGC issue + */ + if (device->ftbl->is_hwcg_on(device)) { + kgsl_regread(device, GEN7_RBBM_CLOCK_MODE_CP, &cgc); + kgsl_regrmw(device, GEN7_RBBM_CLOCK_MODE_CP, 0x7, 0); + } + kgsl_snapshot_indexed_registers(device, snapshot, + GEN7_CP_RESOURCE_TBL_DBG_ADDR, GEN7_CP_RESOURCE_TBL_DBG_DATA, + 0, 0x4100); + + /* Reprogram the register back to the original stored value */ + if (device->ftbl->is_hwcg_on(device)) + kgsl_regwrite(device, GEN7_RBBM_CLOCK_MODE_CP, cgc); + + for (i = 0; i < ARRAY_SIZE(gen7_cp_indexed_reg_list); i++) + kgsl_snapshot_indexed_registers(device, snapshot, + gen7_cp_indexed_reg_list[i].addr, + gen7_cp_indexed_reg_list[i].data, 0, + gen7_cp_indexed_reg_list[i].size); + + gen7_snapshot_br_roq(device, snapshot); + + gen7_snapshot_bv_roq(device, snapshot); + + gen7_snapshot_lpac_roq(device, snapshot); + + /* SQE Firmware */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, gen7_snapshot_sqe, NULL); + + /* Mempool debug data */ + gen7_snapshot_mempool(device, snapshot); + + /* Shader memory */ + gen7_snapshot_shader(device, snapshot); + + /* MVC register section */ + gen7_snapshot_mvc_regs(device, snapshot); + + /* registers dumped through DBG AHB */ + gen7_snapshot_dbgahb_regs(device, snapshot); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, + snapshot, adreno_snapshot_registers_v2, + (void *)gen7_post_crashdumper_registers); + + kgsl_regwrite(device, GEN7_RBBM_SNAPSHOT_STATUS, 0x0); + + /* Preemption record */ + if (adreno_is_preemption_enabled(adreno_dev)) { + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_preemption_record, + rb->preemption_desc); + } + } +} + +void gen7_crashdump_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (IS_ERR_OR_NULL(gen7_capturescript)) + gen7_capturescript = kgsl_allocate_global(device, + 4 * PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, + KGSL_MEMDESC_PRIVILEGED, "capturescript"); + + if (IS_ERR(gen7_capturescript)) + return; + + if (IS_ERR_OR_NULL(gen7_crashdump_registers)) + gen7_crashdump_registers = kgsl_allocate_global(device, + 300 * PAGE_SIZE, 0, 0, KGSL_MEMDESC_PRIVILEGED, + "capturescript_regs"); + + if (IS_ERR(gen7_crashdump_registers)) + return; +} diff --git a/adreno_gen7_snapshot.h b/adreno_gen7_snapshot.h new file mode 100644 index 0000000000..93c0144414 --- /dev/null +++ b/adreno_gen7_snapshot.h @@ -0,0 +1,1311 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_GEN7_SNAPSHOT_H +#define __ADRENO_GEN7_SNAPSHOT_H + +#include "adreno.h" +#include "adreno_gen7.h" + +#define PIPE_NONE 0 +#define PIPE_BR 1 +#define PIPE_BV 2 +#define PIPE_LPAC 3 + +#define CLUSTER_NONE 0 +#define CLUSTER_FE 1 +#define CLUSTER_SP_VS 2 +#define CLUSTER_PC_VS 3 +#define CLUSTER_GRAS 4 +#define CLUSTER_SP_PS 5 +#define CLUSTER_VPC_PS 6 +#define CLUSTER_PS 7 + +#define HLSQ_State 0 +#define HLSQ_DP 1 +#define SP_TOP 2 +#define USPTP 3 + +#define STATE_NON_CONTEXT 0 +#define STATE_TOGGLE_CTXT 1 +#define STATE_FORCE_CTXT_0 2 +#define STATE_FORCE_CTXT_1 3 + +enum gen7_debugbus_ids { + DEBUGBUS_CP_0_0 = 1, + DEBUGBUS_CP_0_1 = 2, + DEBUGBUS_RBBM = 3, + DEBUGBUS_GBIF_GX = 5, + DEBUGBUS_GBIF_CX = 6, + DEBUGBUS_HLSQ = 7, + DEBUGBUS_UCHE_0 = 9, + DEBUGBUS_TESS_BR = 13, + DEBUGBUS_TESS_BV = 14, + DEBUGBUS_PC_BR = 17, + DEBUGBUS_PC_BV = 18, + DEBUGBUS_VFDP_BR = 21, + DEBUGBUS_VFDP_BV = 22, + DEBUGBUS_VPC_BR = 25, + DEBUGBUS_VPC_BV = 26, + DEBUGBUS_TSE_BR = 29, + DEBUGBUS_TSE_BV = 30, + DEBUGBUS_RAS_BR = 33, + DEBUGBUS_RAS_BV = 34, + DEBUGBUS_VSC = 37, + DEBUGBUS_COM_0 = 39, + DEBUGBUS_LRZ_BR = 43, + DEBUGBUS_LRZ_BV = 44, + DEBUGBUS_UFC_0 = 47, + DEBUGBUS_UFC_1 = 48, + DEBUGBUS_GMU_GX = 55, + DEBUGBUS_DBGC = 59, + DEBUGBUS_CX = 60, + DEBUGBUS_GMU_CX = 61, + DEBUGBUS_GPC_BR = 62, + DEBUGBUS_GPC_BV = 63, + DEBUGBUS_LARC = 66, + DEBUGBUS_HLSQ_SPTP = 68, + DEBUGBUS_RB_0 = 70, + DEBUGBUS_RB_1 = 71, + DEBUGBUS_RB_2 = 72, + DEBUGBUS_RB_3 = 73, + DEBUGBUS_UCHE_WRAPPER = 102, + DEBUGBUS_CCU_0 = 106, + DEBUGBUS_CCU_1 = 107, + DEBUGBUS_CCU_2 = 108, + DEBUGBUS_CCU_3 = 109, + DEBUGBUS_VFD_BR_0 = 138, + DEBUGBUS_VFD_BR_1 = 139, + DEBUGBUS_VFD_BR_2 = 140, + DEBUGBUS_VFD_BR_3 = 141, + DEBUGBUS_VFD_BR_4 = 142, + DEBUGBUS_VFD_BR_5 = 143, + DEBUGBUS_VFD_BR_6 = 144, + DEBUGBUS_VFD_BR_7 = 145, + DEBUGBUS_VFD_BV_0 = 202, + DEBUGBUS_VFD_BV_1 = 203, + DEBUGBUS_VFD_BV_2 = 204, + DEBUGBUS_VFD_BV_3 = 205, + DEBUGBUS_USP_0 = 234, + DEBUGBUS_USP_1 = 235, + DEBUGBUS_USP_2 = 236, + DEBUGBUS_USP_3 = 237, + DEBUGBUS_TP_0 = 266, + DEBUGBUS_TP_1 = 267, + DEBUGBUS_TP_2 = 268, + DEBUGBUS_TP_3 = 269, + DEBUGBUS_TP_4 = 270, + DEBUGBUS_TP_5 = 271, + DEBUGBUS_TP_6 = 272, + DEBUGBUS_TP_7 = 273, + DEBUGBUS_USPTP_0 = 330, + DEBUGBUS_USPTP_1 = 331, + DEBUGBUS_USPTP_2 = 332, + DEBUGBUS_USPTP_3 = 333, + DEBUGBUS_USPTP_4 = 334, + DEBUGBUS_USPTP_5 = 335, + DEBUGBUS_USPTP_6 = 336, + DEBUGBUS_USPTP_7 = 337, +}; + +static const u32 gen7_debugbus_blocks[] = { + DEBUGBUS_CP_0_0, + DEBUGBUS_CP_0_1, + DEBUGBUS_RBBM, + DEBUGBUS_HLSQ, + DEBUGBUS_UCHE_0, + DEBUGBUS_TESS_BR, + DEBUGBUS_TESS_BV, + DEBUGBUS_PC_BR, + DEBUGBUS_PC_BV, + DEBUGBUS_VFDP_BR, + DEBUGBUS_VFDP_BV, + DEBUGBUS_VPC_BR, + DEBUGBUS_VPC_BV, + DEBUGBUS_TSE_BR, + DEBUGBUS_TSE_BV, + DEBUGBUS_RAS_BR, + DEBUGBUS_RAS_BV, + DEBUGBUS_VSC, + DEBUGBUS_COM_0, + DEBUGBUS_LRZ_BR, + DEBUGBUS_LRZ_BV, + DEBUGBUS_UFC_0, + DEBUGBUS_UFC_1, + DEBUGBUS_GMU_GX, + DEBUGBUS_DBGC, + DEBUGBUS_GPC_BR, + DEBUGBUS_GPC_BV, + DEBUGBUS_LARC, + DEBUGBUS_HLSQ_SPTP, + DEBUGBUS_RB_0, + DEBUGBUS_RB_1, + DEBUGBUS_RB_2, + DEBUGBUS_RB_3, + DEBUGBUS_UCHE_WRAPPER, + DEBUGBUS_CCU_0, + DEBUGBUS_CCU_1, + DEBUGBUS_CCU_2, + DEBUGBUS_CCU_3, + DEBUGBUS_VFD_BR_0, + DEBUGBUS_VFD_BR_1, + DEBUGBUS_VFD_BR_2, + DEBUGBUS_VFD_BR_3, + DEBUGBUS_VFD_BR_4, + DEBUGBUS_VFD_BR_5, + DEBUGBUS_VFD_BR_6, + DEBUGBUS_VFD_BR_7, + DEBUGBUS_VFD_BV_0, + DEBUGBUS_VFD_BV_1, + DEBUGBUS_VFD_BV_2, + DEBUGBUS_VFD_BV_3, + DEBUGBUS_USP_0, + DEBUGBUS_USP_1, + DEBUGBUS_USP_2, + DEBUGBUS_USP_3, + DEBUGBUS_TP_0, + DEBUGBUS_TP_1, + DEBUGBUS_TP_2, + DEBUGBUS_TP_3, + DEBUGBUS_TP_4, + DEBUGBUS_TP_5, + DEBUGBUS_TP_6, + DEBUGBUS_TP_7, + DEBUGBUS_USPTP_0, + DEBUGBUS_USPTP_1, + DEBUGBUS_USPTP_2, + DEBUGBUS_USPTP_3, + DEBUGBUS_USPTP_4, + DEBUGBUS_USPTP_5, + DEBUGBUS_USPTP_6, + DEBUGBUS_USPTP_7, +}; + +enum gen7_statetype_ids { + TP0_NCTX_REG = 0, + TP0_CTX0_3D_CVS_REG = 1, + TP0_CTX0_3D_CPS_REG = 2, + TP0_CTX1_3D_CVS_REG = 3, + TP0_CTX1_3D_CPS_REG = 4, + TP0_CTX2_3D_CPS_REG = 5, + TP0_CTX3_3D_CPS_REG = 6, + TP0_TMO_DATA = 9, + TP0_SMO_DATA = 10, + TP0_MIPMAP_BASE_DATA = 11, + SP_NCTX_REG = 32, + SP_CTX0_3D_CVS_REG = 33, + SP_CTX0_3D_CPS_REG = 34, + SP_CTX1_3D_CVS_REG = 35, + SP_CTX1_3D_CPS_REG = 36, + SP_CTX2_3D_CPS_REG = 37, + SP_CTX3_3D_CPS_REG = 38, + SP_INST_DATA = 39, + SP_INST_DATA_1 = 40, + SP_LB_0_DATA = 41, + SP_LB_1_DATA = 42, + SP_LB_2_DATA = 43, + SP_LB_3_DATA = 44, + SP_LB_4_DATA = 45, + SP_LB_5_DATA = 46, + SP_LB_6_DATA = 47, + SP_LB_7_DATA = 48, + SP_CB_RAM = 49, + SP_INST_TAG = 52, + SP_INST_DATA_2 = 53, + SP_TMO_TAG = 54, + SP_SMO_TAG = 55, + SP_STATE_DATA = 56, + SP_HWAVE_RAM = 57, + SP_L0_INST_BUF = 58, + SP_LB_8_DATA = 59, + SP_LB_9_DATA = 60, + SP_LB_10_DATA = 61, + SP_LB_11_DATA = 62, + SP_LB_12_DATA = 63, + HLSQ_CVS_BE_CTXT_BUF_RAM_TAG = 69, + HLSQ_CPS_BE_CTXT_BUF_RAM_TAG = 70, + HLSQ_GFX_CVS_BE_CTXT_BUF_RAM = 71, + HLSQ_GFX_CPS_BE_CTXT_BUF_RAM = 72, + HLSQ_CHUNK_CVS_RAM = 73, + HLSQ_CHUNK_CPS_RAM = 74, + HLSQ_CHUNK_CVS_RAM_TAG = 75, + HLSQ_CHUNK_CPS_RAM_TAG = 76, + HLSQ_ICB_CVS_CB_BASE_TAG = 77, + HLSQ_ICB_CPS_CB_BASE_TAG = 78, + HLSQ_CVS_MISC_RAM = 79, + HLSQ_CPS_MISC_RAM = 80, + HLSQ_CPS_MISC_RAM_1 = 81, + HLSQ_INST_RAM = 82, + HLSQ_GFX_CVS_CONST_RAM = 83, + HLSQ_GFX_CPS_CONST_RAM = 84, + HLSQ_CVS_MISC_RAM_TAG = 85, + HLSQ_CPS_MISC_RAM_TAG = 86, + HLSQ_INST_RAM_TAG = 87, + HLSQ_GFX_CVS_CONST_RAM_TAG = 88, + HLSQ_GFX_CPS_CONST_RAM_TAG = 89, + HLSQ_INST_RAM_1 = 92, + HLSQ_STPROC_META = 93, + HLSQ_BV_BE_META = 94, + HLSQ_DATAPATH_META = 96, + HLSQ_FRONTEND_META = 97, + HLSQ_INDIRECT_META = 98, + HLSQ_BACKEND_META = 99, +}; + +static const struct sel_reg { + unsigned int host_reg; + unsigned int cd_reg; + unsigned int val; +} gen7_0_0_rb_rac_sel = { + .host_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x0, +}, +gen7_0_0_rb_rbp_sel = { + .host_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_HOST, + .cd_reg = GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_CD, + .val = 0x9, +}; + +static const u32 gen7_pre_crashdumper_registers[] = { + 0x00210, 0x00210, 0x00212, 0x00213, 0x03c00, 0x03c0b, 0x03c40, 0x03c42, + 0x03c45, 0x03c47, 0x03c49, 0x03c4a, 0x03cc0, 0x03cd1, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_pre_crashdumper_registers), 8)); + +static const u32 gen7_post_crashdumper_registers[] = { + 0x00535, 0x00535, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_post_crashdumper_registers), 8)); + +static const u32 gen7_gpu_registers[] = { + 0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b, + 0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044, + 0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050, + 0x00056, 0x00056, 0x00073, 0x00075, 0x000ad, 0x000ae, 0x000b0, 0x000b0, + 0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0, + 0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0, + 0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0, + 0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0, + 0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010b, + 0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00211, 0x00211, + 0x00215, 0x00243, 0x00260, 0x00268, 0x00272, 0x00274, 0x00281, 0x0028d, + 0x00300, 0x00401, 0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1, + 0x00500, 0x00500, 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511, + 0x00533, 0x00534, 0x00536, 0x00536, 0x00540, 0x00555, 0x00564, 0x00567, + 0x00574, 0x00577, 0x005fb, 0x005ff, 0x00800, 0x00808, 0x00810, 0x00813, + 0x00820, 0x00821, 0x00823, 0x00827, 0x00830, 0x00834, 0x0083f, 0x00841, + 0x00843, 0x00847, 0x0084f, 0x00886, 0x008a0, 0x008ab, 0x008c0, 0x008c0, + 0x008c4, 0x008c5, 0x008d0, 0x008dd, 0x008e0, 0x008e6, 0x008f0, 0x008f3, + 0x00900, 0x00903, 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d, + 0x00980, 0x00984, 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9, + 0x009ce, 0x009d7, 0x009e0, 0x009e7, 0x00a00, 0x00a00, 0x00a02, 0x00a03, + 0x00a10, 0x00a4f, 0x00a61, 0x00a9f, 0x00ad0, 0x00adb, 0x00b00, 0x00b31, + 0x00b35, 0x00b3c, 0x00b40, 0x00b40, 0x00c00, 0x00c00, 0x00c02, 0x00c04, + 0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4, + 0x00e01, 0x00e02, 0x00e07, 0x00e0e, 0x00e10, 0x00e13, 0x00e17, 0x00e19, + 0x00e1b, 0x00e2b, 0x00e30, 0x00e32, 0x00e38, 0x00e3c, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gpu_registers), 8)); + +static const u32 gen7_cx_misc_registers[] = { + 0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27832, 0x27857, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_cx_misc_registers), 8)); + +static const u32 gen7_cpr_registers[] = { + 0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c, + 0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850, + 0x26880, 0x26898, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee, + 0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f, + 0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ac, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_cpr_registers), 8)); + +static const u32 gen7_dpm_registers[] = { + 0x1aa00, 0x1aa06, 0x1aa09, 0x1aa0a, 0x1aa0c, 0x1aa0d, 0x1aa0f, 0x1aa12, + 0x1aa14, 0x1aa47, 0x1aa50, 0x1aa51, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_dpm_registers), 8)); + +static const u32 gen7_gpucc_registers[] = { + 0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405, + 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455, + 0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e, + 0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8, + 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e, + 0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_gpucc_registers), 8)); + +static const u32 gen7_0_0_noncontext_pipe_br_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16, + 0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31, + 0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79, + 0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a638, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_br_registers), 8)); + +static const u32 gen7_0_0_noncontext_pipe_bv_registers[] = { + 0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b, + 0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640, + 0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a, + 0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16, + 0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31, + 0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79, + 0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f, + 0x0a630, 0x0a631, 0x0a638, 0x0a638, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_bv_registers), 8)); + +static const u32 gen7_0_0_noncontext_pipe_lpac_registers[] = { + 0x00887, 0x0088c, 0x00f80, 0x00f80, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_lpac_registers), 8)); + +static const u32 gen7_0_0_noncontext_rb_rac_pipe_br_registers[] = { + 0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rac_pipe_br_registers), 8)); + +static const u32 gen7_0_0_noncontext_rb_rbp_pipe_br_registers[] = { + 0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c, + 0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e3f, 0x08e50, 0x08e50, + 0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e65, + 0x08e68, 0x08e68, 0x08e70, 0x08e79, 0x08e80, 0x08e8f, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rbp_pipe_br_registers), 8)); + +/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_gras_cluster_gras_pipe_br_registers[] = { + 0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d, + 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa, + 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f, + 0x08400, 0x08406, 0x0840a, 0x0840b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_br_registers), 8)); + +/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_gras_cluster_gras_pipe_bv_registers[] = { + 0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d, + 0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa, + 0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f, + 0x08400, 0x08406, 0x0840a, 0x0840b, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_bv_registers), 8)); + +/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BR */ +static const u32 gen7_0_0_pc_cluster_fe_pipe_br_registers[] = { + 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886, + 0x09b00, 0x09b08, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_br_registers), 8)); + +/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BV */ +static const u32 gen7_0_0_pc_cluster_fe_pipe_bv_registers[] = { + 0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886, + 0x09b00, 0x09b08, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_bv_registers), 8)); + +/* Block: RB_RAC Cluster: CLUSTER_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_rb_rac_cluster_ps_pipe_br_registers[] = { + 0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811, + 0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829, + 0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839, + 0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849, + 0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859, + 0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876, + 0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891, + 0x08898, 0x08898, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5, + 0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35, + 0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rac_cluster_ps_pipe_br_registers), 8)); + +/* Block: RB_RBP Cluster: CLUSTER_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers[] = { + 0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812, + 0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, 0x0882a, 0x0882a, + 0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, 0x0883a, 0x0883a, + 0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, 0x0884a, 0x0884a, + 0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, 0x0885a, 0x0885a, + 0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, 0x08877, 0x08877, + 0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, 0x088d0, 0x088e4, + 0x088e8, 0x088ea, 0x088f0, 0x088f0, 0x08900, 0x0891a, 0x08927, 0x08928, + 0x08c17, 0x08c17, 0x08c20, 0x08c25, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers[] = { + 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, + 0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba, + 0x0a9bc, 0x0a9bc, 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc, + 0x0aa00, 0x0aa00, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03, + 0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_DP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9c6, 0x0a9cb, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers[] = { + 0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8, + 0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, + 0x0a9ba, 0x0a9bc, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00, + 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers[] = { + 0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae, + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3, + 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, + 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers[] = { + 0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers[] = { + 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers[] = { + 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = { + 0x0a9b0, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc, + 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc, + 0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_DP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers[] = { + 0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers[] = { + 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9e2, 0x0a9e3, + 0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = { + 0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3, + 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers[] = { + 0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e, + 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d, + 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05, + 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831, + 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, + 0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, + 0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, + 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833, + 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867, + 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3, + 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: HLSQ_State */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers[] = { + 0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824, + 0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a, + 0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862, + 0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e, + 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d, + 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b, + 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: SP_TOP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers[] = { + 0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831, + 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d, + 0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895, + 0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, + 0x0ab20, 0x0ab20, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers), 8)); + +/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: uSPTP */ +static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers[] = { + 0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833, + 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867, + 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3, + 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers[] = { + 0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307, + 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC */ +static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers[] = { + 0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b307, 0x0b307, 0x0b309, 0x0b309, + 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers), 8)); + +/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers[] = { + 0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers), 8)); + +/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vfd_cluster_fe_pipe_br_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_br_registers), 8)); + +/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vfd_cluster_fe_pipe_bv_registers[] = { + 0x0a000, 0x0a009, 0x0a00e, 0x0a0ef, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_bv_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vpc_cluster_fe_pipe_br_registers[] = { + 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_br_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vpc_cluster_fe_pipe_bv_registers[] = { + 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_bv_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers[] = { + 0x09101, 0x0910c, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers[] = { + 0x09101, 0x0910c, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BR */ +static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers[] = { + 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers), 8)); + +/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BV */ +static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers[] = { + 0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: HLSQ_State */ +static const u32 gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers[] = { + 0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae73, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: SP_TOP */ +static const u32 gen7_0_0_sp_noncontext_pipe_br_sp_top_registers[] = { + 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c, + 0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f, + 0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_sp_top_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: uSPTP */ +static const u32 gen7_0_0_sp_noncontext_pipe_br_usptp_registers[] = { + 0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c, + 0x0ae0f, 0x0ae0f, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b, + 0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_usptp_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: HLSQ_State */ +static const u32 gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers[] = { + 0x0af88, 0x0af8a, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: SP_TOP */ +static const u32 gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers[] = { + 0x0af80, 0x0af84, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers), 8)); + +/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: uSPTP */ +static const u32 gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers[] = { + 0x0af80, 0x0af84, 0x0af90, 0x0af92, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers), 8)); + +/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_BR */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { + 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, + 0x0b60f, 0x0b621, 0x0b630, 0x0b633, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_br_registers), 8)); + +/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_LPAC */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_lpac_registers[] = { + 0x0b780, 0x0b780, + UINT_MAX, UINT_MAX, +}; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_lpac_registers), 8)); + +struct gen7_cluster_registers { + /* cluster_id: Cluster identifier */ + int cluster_id; + /* pipe_id: Pipe Identifier */ + int pipe_id; + /* context_id: one of STATE_ that identifies the context to dump */ + int context_id; + /* regs: Pointer to an array of register pairs */ + const u32 *regs; + /* sel: Pointer to a selector register to write before reading */ + const struct sel_reg *sel; + /* offset: Internal variable to track the state of the crashdump */ + unsigned int offset; +}; + +static struct gen7_cluster_registers gen7_clusters[] = { + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_br_registers, }, + { CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_bv_registers, }, + { CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT, + gen7_0_0_noncontext_pipe_lpac_registers, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_rb_rac_pipe_br_registers, &gen7_0_0_rb_rac_sel, }, + { CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT, + gen7_0_0_noncontext_rb_rbp_pipe_br_registers, &gen7_0_0_rb_rbp_sel, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_gras_cluster_gras_pipe_bv_registers, }, + { CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_gras_cluster_gras_pipe_br_registers, }, + { CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_gras_cluster_gras_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_pc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_pc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_pc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, }, + { CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vfd_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vfd_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vfd_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_fe_pipe_br_registers, }, + { CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_fe_pipe_bv_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, }, + { CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, }, + { CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0, + gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, + { CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, }, + { CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1, + gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, +}; + +struct gen7_sptp_cluster_registers { + /* cluster_id: Cluster identifier */ + int cluster_id; + /* cluster_id: SP block state type for the cluster */ + int statetype; + /* pipe_id: Pipe identifier */ + int pipe_id; + /* context_id: Context identifier */ + int context_id; + /* location_id: Location identifier */ + int location_id; + /* regs: Pointer to the list of register pairs to read */ + const u32 *regs; + /* regbase: Dword offset of the register block in the GPu register space */ + unsigned int regbase; + /* offset: Internal variable used to track the crashdump state */ + unsigned int offset; +}; + +static struct gen7_sptp_cluster_registers gen7_sptp_clusters[] = { + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_State, + gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_State, + gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers, 0xaf80 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP, + gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers, 0xaf80 }, + { CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 }, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 }, + { CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers, 0xa800 }, + { CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_State, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 }, + { CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 }, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 }, + { CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP, + gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 }, +}; + +struct gen7_shader_block { + /* statetype: Type identifer for the block */ + u32 statetype; + /* size: Size of the block (in dwords) */ + u32 size + /* sp_id: The SP id to dump */; + u32 sp_id; + /* usptp: The usptp id to dump */; + u32 usptp; + /* pipe_id: Pipe identifier for the block data */ + u32 pipeid; + /* location: Location identifer for the block data */ + u32 location; + /* offset: The offset in the snasphot dump */ + u64 offset; +}; + +static struct gen7_shader_block gen7_shader_blocks[] = { + {TP0_TMO_DATA, 0x200, 0, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 0, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 0, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 0, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 0, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_2, 0x200, 0, 0, PIPE_BR, USPTP}, + {SP_TMO_TAG, 0x80, 0, 0, PIPE_BR, USPTP}, + {SP_SMO_TAG, 0x80, 0, 0, PIPE_BR, USPTP}, + {SP_STATE_DATA, 0x40, 0, 0, PIPE_BR, USPTP}, + {SP_HWAVE_RAM, 0x100, 0, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 0, 0, PIPE_BR, USPTP}, + {SP_LB_8_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 0, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 0, 0, PIPE_BR, USPTP}, + {HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CVS_BE_CTXT_BUF_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CPS_BE_CTXT_BUF_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x300, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM, 0x1c0, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM, 0x1c0, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM, 0x300, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM, 0x300, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM_TAG, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CVS_RAM_TAG, 0x40, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM_TAG, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CHUNK_CPS_RAM_TAG, 0x40, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_ICB_CVS_CB_BASE_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_ICB_CVS_CB_BASE_TAG, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_ICB_CPS_CB_BASE_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_ICB_CPS_CB_BASE_TAG, 0x10, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CVS_MISC_RAM, 0x280, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CVS_MISC_RAM, 0x280, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CPS_MISC_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CPS_MISC_RAM, 0x800, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CPS_MISC_RAM_1, 0x200, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_INST_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_INST_RAM, 0x800, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_INST_RAM, 0x800, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM, 0x800, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM, 0x800, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_CVS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CVS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_CPS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_CPS_MISC_RAM_TAG, 0x10, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_INST_RAM_TAG, 0x80, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_INST_RAM_TAG, 0x80, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_INST_RAM_TAG, 0x80, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CVS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_GFX_CPS_CONST_RAM_TAG, 0x64, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_INST_RAM_1, 0x800, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_STPROC_META, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BV_BE_META, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BV_BE_META, 0x10, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_DATAPATH_META, 0x20, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_FRONTEND_META, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_FRONTEND_META, 0x40, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_FRONTEND_META, 0x40, 0, 0, PIPE_LPAC, HLSQ_State}, + {HLSQ_INDIRECT_META, 0x10, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BACKEND_META, 0x40, 0, 0, PIPE_BR, HLSQ_State}, + {HLSQ_BACKEND_META, 0x40, 0, 0, PIPE_BV, HLSQ_State}, + {HLSQ_BACKEND_META, 0x40, 0, 0, PIPE_LPAC, HLSQ_State}, + /* SP 0 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 0, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 0, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 0, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 0, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 0, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_2, 0x200, 0, 1, PIPE_BR, USPTP}, + {SP_TMO_TAG, 0x80, 0, 1, PIPE_BR, USPTP}, + {SP_SMO_TAG, 0x80, 0, 1, PIPE_BR, USPTP}, + {SP_STATE_DATA, 0x40, 0, 1, PIPE_BR, USPTP}, + {SP_HWAVE_RAM, 0x100, 0, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 0, 1, PIPE_BR, USPTP}, + {SP_LB_8_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 0, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 0, 1, PIPE_BR, USPTP}, + /* SP 1 USPTP 0 */ + {TP0_TMO_DATA, 0x200, 1, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 1, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 1, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 1, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 1, 0, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 1, 0, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 1, 0, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 1, 0, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 1, 0, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 1, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 1, 0, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 1, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 1, 0, PIPE_BR, USPTP}, + /* SP 1 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 1, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 1, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 1, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 1, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 1, 1, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 1, 1, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 1, 1, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 1, 1, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 1, 1, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 1, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 1, 1, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 1, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 1, 1, PIPE_BR, USPTP}, + /* SP 2 USPTP 0 */ + {TP0_TMO_DATA, 0x200, 2, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 2, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 2, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 2, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 2, 0, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 2, 0, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 2, 0, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 2, 0, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 2, 0, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 2, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 2, 0, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 2, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 2, 0, PIPE_BR, USPTP}, + /* SP 2 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 2, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 2, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 2, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 2, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 2, 1, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 2, 1, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 2, 1, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 2, 1, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 2, 1, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 2, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 2, 1, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 2, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 2, 1, PIPE_BR, USPTP}, + /* SP 3 USPTP 0 */ + {TP0_TMO_DATA, 0x200, 3, 0, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 3, 0, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 3, 0, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 3, 0, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 3, 0, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 3, 0, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 3, 0, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 3, 0, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 3, 0, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 3, 0, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 3, 0, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 3, 0, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 3, 0, PIPE_BR, USPTP}, + /* SP 3 USPTP 1 */ + {TP0_TMO_DATA, 0x200, 3, 1, PIPE_BR, USPTP}, + {TP0_SMO_DATA, 0x80, 3, 1, PIPE_BR, USPTP}, + {TP0_MIPMAP_BASE_DATA, 0x3c0, 3, 1, PIPE_BR, USPTP}, + {SP_INST_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_INST_DATA_1, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_0_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_1_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_2_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_3_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_4_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_5_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_6_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_7_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_CB_RAM, 0x390, 3, 1, PIPE_BR, USPTP,}, + {SP_INST_TAG, 0x90, 3, 1, PIPE_BR, USPTP,}, + {SP_INST_DATA_2, 0x200, 3, 1, PIPE_BR, USPTP,}, + {SP_TMO_TAG, 0x80, 3, 1, PIPE_BR, USPTP,}, + {SP_SMO_TAG, 0x80, 3, 1, PIPE_BR, USPTP,}, + {SP_STATE_DATA, 0x40, 3, 1, PIPE_BR, USPTP,}, + {SP_HWAVE_RAM, 0x100, 3, 1, PIPE_BR, USPTP}, + {SP_L0_INST_BUF, 0x50, 3, 1, PIPE_BR, USPTP,}, + {SP_LB_8_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_9_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_10_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_11_DATA, 0x800, 3, 1, PIPE_BR, USPTP}, + {SP_LB_12_DATA, 0x200, 3, 1, PIPE_BR, USPTP}, +}; + +static const u32 gen7_gbif_debugbus_blocks[] = { + DEBUGBUS_GBIF_CX, + DEBUGBUS_GBIF_GX, +}; + +static const u32 gen7_cx_dbgc_debugbus_blocks[] = { + DEBUGBUS_GMU_CX, + DEBUGBUS_CX, +}; + +struct gen7_shader_block_info { + struct gen7_shader_block *block; + u32 bank; + u64 offset; +}; + +static struct reg_list { + const u32 *regs; + const struct sel_reg *sel; + u64 offset; +} gen7_reg_list[] = { + { gen7_gpu_registers, NULL }, + { gen7_cx_misc_registers, NULL }, + { gen7_dpm_registers, NULL }, +}; + +static struct cp_indexed_reg_list { + u32 addr; + u32 data; + u32 size; +} gen7_cp_indexed_reg_list[] = { + { GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x33}, + { GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x8000}, + { GEN7_CP_BV_SQE_STAT_ADDR, GEN7_CP_BV_SQE_STAT_DATA, 0x33}, + { GEN7_CP_BV_DRAW_STATE_ADDR, GEN7_CP_BV_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_BV_SQE_UCODE_DBG_ADDR, GEN7_CP_BV_SQE_UCODE_DBG_DATA, 0x8000}, + { GEN7_CP_SQE_AC_STAT_ADDR, GEN7_CP_SQE_AC_STAT_DATA, 0x33}, + { GEN7_CP_LPAC_DRAW_STATE_ADDR, GEN7_CP_LPAC_DRAW_STATE_DATA, 0x100}, + { GEN7_CP_SQE_AC_UCODE_DBG_ADDR, GEN7_CP_SQE_AC_UCODE_DBG_DATA, 0x8000}, + { GEN7_CP_LPAC_FIFO_DBG_ADDR, GEN7_CP_LPAC_FIFO_DBG_DATA, 0x40}, +}; +#endif /*_ADRENO_GEN7_SNAPSHOT_H */ diff --git a/adreno_hfi.h b/adreno_hfi.h new file mode 100644 index 0000000000..6b171de136 --- /dev/null +++ b/adreno_hfi.h @@ -0,0 +1,869 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_HFI_H +#define __ADRENO_HFI_H + +#define HFI_QUEUE_SIZE SZ_4K /* bytes, must be base 4dw */ +#define MAX_RCVD_PAYLOAD_SIZE 16 /* dwords */ +#define MAX_RCVD_SIZE (MAX_RCVD_PAYLOAD_SIZE + 3) /* dwords */ +#define HFI_MAX_MSG_SIZE (SZ_1K) + +#define HFI_CMD_ID 0 +#define HFI_MSG_ID 1 +#define HFI_DBG_ID 2 +#define HFI_DSP_ID_0 3 + +#define HFI_CMD_IDX 0 +#define HFI_MSG_IDX 1 +#define HFI_DBG_IDX 2 +#define HFI_DSP_IDX_BASE 3 +#define HFI_DSP_IDX_0 3 + +#define HFI_CMD_IDX_LEGACY 0 +#define HFI_DSP_IDX_0_LEGACY 1 +#define HFI_MSG_IDX_LEGACY 4 +#define HFI_DBG_IDX_LEGACY 5 + +#define HFI_QUEUE_STATUS_DISABLED 0 +#define HFI_QUEUE_STATUS_ENABLED 1 + +/* HTOF queue priority, 1 is highest priority */ +#define HFI_CMD_PRI 10 +#define HFI_MSG_PRI 10 +#define HFI_DBG_PRI 40 +#define HFI_DSP_PRI_0 20 + +#define HFI_IRQ_SIDEMSGQ_MASK BIT(1) +#define HFI_IRQ_DBGQ_MASK BIT(2) +#define HFI_IRQ_CM3_FAULT_MASK BIT(15) +#define HFI_IRQ_OOB_MASK GENMASK(31, 16) +#define HFI_IRQ_MASK (HFI_IRQ_SIDEMSGQ_MASK |\ + HFI_IRQ_DBGQ_MASK |\ + HFI_IRQ_CM3_FAULT_MASK) + +#define DCVS_ACK_NONBLOCK 0 +#define DCVS_ACK_BLOCK 1 + +#define HFI_FEATURE_DCVS 0 +#define HFI_FEATURE_HWSCHED 1 +#define HFI_FEATURE_PREEMPTION 2 +#define HFI_FEATURE_CLOCKS_ON 3 +#define HFI_FEATURE_BUS_ON 4 +#define HFI_FEATURE_RAIL_ON 5 +#define HFI_FEATURE_HWCG 6 +#define HFI_FEATURE_LM 7 +#define HFI_FEATURE_THROTTLE 8 +#define HFI_FEATURE_IFPC 9 +#define HFI_FEATURE_NAP 10 +#define HFI_FEATURE_BCL 11 +#define HFI_FEATURE_ACD 12 +#define HFI_FEATURE_DIDT 13 +#define HFI_FEATURE_DEPRECATED 14 +#define HFI_FEATURE_CB 15 +#define HFI_FEATURE_KPROF 16 +#define HFI_FEATURE_BAIL_OUT_TIMER 17 +#define HFI_FEATURE_GMU_STATS 18 +#define HFI_FEATURE_DBQ 19 +#define HFI_FEATURE_MINBW 20 +#define HFI_FEATURE_CLX 21 + +/* A6xx uses a different value for KPROF */ +#define HFI_FEATURE_A6XX_KPROF 14 + +#define HFI_VALUE_FT_POLICY 100 +#define HFI_VALUE_RB_MAX_CMDS 101 +#define HFI_VALUE_CTX_MAX_CMDS 102 +#define HFI_VALUE_ADDRESS 103 +#define HFI_VALUE_MAX_GPU_PERF_INDEX 104 +#define HFI_VALUE_MIN_GPU_PERF_INDEX 105 +#define HFI_VALUE_MAX_BW_PERF_INDEX 106 +#define HFI_VALUE_MIN_BW_PERF_INDEX 107 +#define HFI_VALUE_MAX_GPU_THERMAL_INDEX 108 +#define HFI_VALUE_GPUCLK 109 +#define HFI_VALUE_CLK_TIME 110 +#define HFI_VALUE_LOG_GROUP 111 +#define HFI_VALUE_LOG_EVENT_ON 112 +#define HFI_VALUE_LOG_EVENT_OFF 113 +#define HFI_VALUE_DCVS_OBJ 114 +#define HFI_VALUE_LM_CS0 115 +#define HFI_VALUE_BIN_TIME 117 +#define HFI_VALUE_LOG_STREAM_ENABLE 119 +#define HFI_VALUE_PREEMPT_COUNT 120 + +#define HFI_VALUE_GLOBAL_TOKEN 0xFFFFFFFF + +#define HFI_CTXT_FLAG_PMODE BIT(0) +#define HFI_CTXT_FLAG_SWITCH_INTERNAL BIT(1) +#define HFI_CTXT_FLAG_SWITCH BIT(3) +#define HFI_CTXT_FLAG_NOTIFY BIT(5) +#define HFI_CTXT_FLAG_NO_FAULT_TOLERANCE BIT(9) +#define HFI_CTXT_FLAG_PWR_RULE BIT(11) +#define HFI_CTXT_FLAG_PRIORITY_MASK GENMASK(15, 12) +#define HFI_CTXT_FLAG_IFH_NOP BIT(16) +#define HFI_CTXT_FLAG_SECURE BIT(17) +#define HFI_CTXT_FLAG_TYPE_MASK GENMASK(24, 20) +#define HFI_CTXT_FLAG_TYPE_ANY 0 +#define HFI_CTXT_FLAG_TYPE_GL 1 +#define HFI_CTXT_FLAG_TYPE_CL 2 +#define HFI_CTXT_FLAG_TYPE_C2D 3 +#define HFI_CTXT_FLAG_TYPE_RS 4 +#define HFI_CTXT_FLAG_TYPE_VK 5 +#define HFI_CTXT_FLAG_TYPE_UNKNOWN 0x1e +#define HFI_CTXT_FLAG_PREEMPT_STYLE_MASK GENMASK(27, 25) +#define HFI_CTXT_FLAG_PREEMPT_STYLE_ANY 0 +#define HFI_CTXT_FLAG_PREEMPT_STYLE_RB 1 +#define HFI_CTXT_FLAG_PREEMPT_STYLE_FG 2 +#define CMDBATCH_INDIRECT 0x00000200 + +enum hfi_mem_kind { + /** @HFI_MEMKIND_GENERIC: Used for requesting generic memory */ + HFI_MEMKIND_GENERIC = 0, + /** @HFI_MEMKIND_RB: Used for requesting ringbuffer memory */ + HFI_MEMKIND_RB, + /** @HFI_MEMKIND_SCRATCH: Used for requesting scratch memory */ + HFI_MEMKIND_SCRATCH, + /** + * @HFI_MEMKIND_CSW_SMMU_INFO: Used for requesting SMMU record for + * preemption context switching + */ + HFI_MEMKIND_CSW_SMMU_INFO, + /** + * @HFI_MEMKIND_CSW_PRIV_NON_SECURE: Used for requesting privileged non + * secure preemption records + */ + HFI_MEMKIND_CSW_PRIV_NON_SECURE, + /** + * @HFI_MEMKIND_CSW_PRIV_SECURE: Used for requesting privileged secure + * preemption records + */ + HFI_MEMKIND_CSW_PRIV_SECURE, + /** + * @HFI_MEMKIND_CSW_NON_PRIV: Used for requesting non privileged per + * context preemption buffer + */ + HFI_MEMKIND_CSW_NON_PRIV, + /** + * @HFI_MEMKIND_CSW_COUNTER: Used for requesting preemption performance + * counter save/restore buffer + */ + HFI_MEMKIND_CSW_COUNTER, + /** + * @HFI_MEMKIND_CTXTREC_PREEMPT_CNTR: Used for requesting preemption + * counter buffer + */ + HFI_MEMKIND_CTXTREC_PREEMPT_CNTR, + /** @HFI_MEMKIND_SYSLOG: Used for requesting system log memory */ + HFI_MEMKIND_SYS_LOG, + /** @HFI_MEMKIND_CRASH_DUMP: Used for requesting carsh dumper memory */ + HFI_MEMKIND_CRASH_DUMP, + /** + * @HFI_MEMKIND_MMIO_DPU: Used for requesting Display processing unit's + * register space + */ + HFI_MEMKIND_MMIO_DPU, + /** + * @HFI_MEMKIND_MMIO_TCSR: Used for requesting Top CSR(contains SoC + * doorbells) register space + */ + HFI_MEMKIND_MMIO_TCSR, + /** + * @HFI_MEMKIND_MMIO_QDSS_STM: Used for requesting QDSS STM register + * space + */ + HFI_MEMKIND_MMIO_QDSS_STM, + /** @HFI_MEMKIND_PROFILE: Used for kernel profiling */ + HFI_MEMKIND_PROFILE, + /** @HFI_MEMKIND_USER_PROFILING_IBS: Used for user profiling */ + HFI_MEMKIND_USER_PROFILE_IBS, + /** @MEMKIND_CMD_BUFFER: Used for composing ringbuffer content */ + HFI_MEMKIND_CMD_BUFFER, + HFI_MEMKIND_MAX, +}; + +static const char * const hfi_memkind_strings[] = { + [HFI_MEMKIND_GENERIC] = "GMU GENERIC", + [HFI_MEMKIND_RB] = "GMU RB", + [HFI_MEMKIND_SCRATCH] = "GMU SCRATCH", + [HFI_MEMKIND_CSW_SMMU_INFO] = "GMU SMMU INFO", + [HFI_MEMKIND_CSW_PRIV_NON_SECURE] = "GMU CSW PRIV NON SECURE", + [HFI_MEMKIND_CSW_PRIV_SECURE] = "GMU CSW PRIV SECURE", + [HFI_MEMKIND_CSW_NON_PRIV] = "GMU CSW NON PRIV", + [HFI_MEMKIND_CSW_COUNTER] = "GMU CSW COUNTER", + [HFI_MEMKIND_CTXTREC_PREEMPT_CNTR] = "GMU PREEMPT CNTR", + [HFI_MEMKIND_SYS_LOG] = "GMU SYS LOG", + [HFI_MEMKIND_CRASH_DUMP] = "GMU CRASHDUMP", + [HFI_MEMKIND_MMIO_DPU] = "GMU MMIO DPU", + [HFI_MEMKIND_MMIO_TCSR] = "GMU MMIO TCSR", + [HFI_MEMKIND_MMIO_QDSS_STM] = "GMU MMIO QDSS STM", + [HFI_MEMKIND_PROFILE] = "GMU KERNEL PROFILING", + [HFI_MEMKIND_USER_PROFILE_IBS] = "GMU USER PROFILING", + [HFI_MEMKIND_CMD_BUFFER] = "GMU CMD BUFFER", +}; + +/* CP/GFX pipeline can access */ +#define HFI_MEMFLAG_GFX_ACC BIT(0) + +/* Buffer has APRIV protection in GFX PTEs */ +#define HFI_MEMFLAG_GFX_PRIV BIT(1) + +/* Buffer is read-write for GFX PTEs. A 0 indicates read-only */ +#define HFI_MEMFLAG_GFX_WRITEABLE BIT(2) + +/* GMU can access */ +#define HFI_MEMFLAG_GMU_ACC BIT(3) + +/* Buffer has APRIV protection in GMU PTEs */ +#define HFI_MEMFLAG_GMU_PRIV BIT(4) + +/* Buffer is read-write for GMU PTEs. A 0 indicates read-only */ +#define HFI_MEMFLAG_GMU_WRITEABLE BIT(5) + +/* Buffer is located in GMU's non-cached bufferable VA range */ +#define HFI_MEMFLAG_GMU_BUFFERABLE BIT(6) + +/* Buffer is located in GMU's cacheable VA range */ +#define HFI_MEMFLAG_GMU_CACHEABLE BIT(7) + +/* Host can access */ +#define HFI_MEMFLAG_HOST_ACC BIT(8) + +/* Host initializes the buffer */ +#define HFI_MEMFLAG_HOST_INIT BIT(9) + +/* Gfx buffer needs to be secure */ +#define HFI_MEMFLAG_GFX_SECURE BIT(12) + +/** + * struct hfi_queue_table_header - HFI queue table structure + * @version: HFI protocol version + * @size: queue table size in dwords + * @qhdr0_offset: first queue header offset (dwords) in this table + * @qhdr_size: queue header size + * @num_q: number of queues defined in this table + * @num_active_q: number of active queues + */ +struct hfi_queue_table_header { + u32 version; + u32 size; + u32 qhdr0_offset; + u32 qhdr_size; + u32 num_q; + u32 num_active_q; +} __packed; + +/** + * struct hfi_queue_header - HFI queue header structure + * @status: active: 1; inactive: 0 + * @start_addr: starting address of the queue in GMU VA space + * @type: queue type encoded the priority, ID and send/recevie types + * @queue_size: size of the queue + * @msg_size: size of the message if each message has fixed size. + * Otherwise, 0 means variable size of message in the queue. + * @read_index: read index of the queue + * @write_index: write index of the queue + */ +struct hfi_queue_header { + u32 status; + u32 start_addr; + u32 type; + u32 queue_size; + u32 msg_size; + u32 unused0; + u32 unused1; + u32 unused2; + u32 unused3; + u32 unused4; + u32 read_index; + u32 write_index; +} __packed; + +#define HFI_MSG_CMD 0 /* V1 and V2 */ +#define HFI_MSG_ACK 1 /* V2 only */ +#define HFI_V1_MSG_POST 1 /* V1 only */ +#define HFI_V1_MSG_ACK 2/* V1 only */ + +/* Size is converted from Bytes to DWords */ +#define CREATE_MSG_HDR(id, size, type) \ + (((type) << 16) | ((((size) >> 2) & 0xFF) << 8) | ((id) & 0xFF)) +#define ACK_MSG_HDR(id, size) CREATE_MSG_HDR(id, size, HFI_MSG_ACK) + +#define HFI_QUEUE_DEFAULT_CNT 3 +#define HFI_QUEUE_DISPATCH_MAX_CNT 14 +#define HFI_QUEUE_HDR_MAX (HFI_QUEUE_DEFAULT_CNT + HFI_QUEUE_DISPATCH_MAX_CNT) + +struct hfi_queue_table { + struct hfi_queue_table_header qtbl_hdr; + struct hfi_queue_header qhdr[HFI_QUEUE_HDR_MAX]; +} __packed; + +#define HFI_QUEUE_OFFSET(i) \ + (ALIGN(sizeof(struct hfi_queue_table), SZ_16) + \ + ((i) * HFI_QUEUE_SIZE)) + +#define GMU_QUEUE_START_ADDR(gmuaddr, i) \ + (gmuaddr + HFI_QUEUE_OFFSET(i)) + +#define HOST_QUEUE_START_ADDR(hfi_mem, i) \ + ((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i)) + +#define MSG_HDR_GET_ID(hdr) ((hdr) & 0xFF) +#define MSG_HDR_GET_SIZE(hdr) (((hdr) >> 8) & 0xFF) +#define MSG_HDR_GET_TYPE(hdr) (((hdr) >> 16) & 0xF) +#define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF) + +#define MSG_HDR_GET_SIZE(hdr) (((hdr) >> 8) & 0xFF) +#define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF) + +#define HDR_CMP_SEQNUM(out_hdr, in_hdr) \ + (MSG_HDR_GET_SEQNUM(out_hdr) == MSG_HDR_GET_SEQNUM(in_hdr)) + +#define MSG_HDR_SET_SEQNUM(hdr, num) \ + (((hdr) & 0xFFFFF) | ((num) << 20)) + +#define QUEUE_HDR_TYPE(id, prio, rtype, stype) \ + (((id) & 0xFF) | (((prio) & 0xFF) << 8) | \ + (((rtype) & 0xFF) << 16) | (((stype) & 0xFF) << 24)) + +#define HFI_RSP_TIMEOUT 100 /* msec */ + +#define HFI_IRQ_MSGQ_MASK BIT(0) + +#define H2F_MSG_INIT 0 +#define H2F_MSG_FW_VER 1 +#define H2F_MSG_LM_CFG 2 +#define H2F_MSG_BW_VOTE_TBL 3 +#define H2F_MSG_PERF_TBL 4 +#define H2F_MSG_TEST 5 +#define H2F_MSG_ACD_TBL 7 +#define H2F_MSG_START 10 +#define H2F_MSG_FEATURE_CTRL 11 +#define H2F_MSG_GET_VALUE 12 +#define H2F_MSG_SET_VALUE 13 +#define H2F_MSG_CORE_FW_START 14 +#define F2H_MSG_MEM_ALLOC 20 +#define H2F_MSG_GX_BW_PERF_VOTE 30 +#define H2F_MSG_FW_HALT 32 +#define H2F_MSG_PREPARE_SLUMBER 33 +#define F2H_MSG_ERR 100 +#define F2H_MSG_DEBUG 101 +#define F2H_MSG_LOG_BLOCK 102 +#define F2H_MSG_GMU_CNTR_REGISTER 110 +#define F2H_MSG_GMU_CNTR_RELEASE 111 +#define F2H_MSG_ACK 126 /* Deprecated for v2.0*/ +#define H2F_MSG_ACK 127 /* Deprecated for v2.0*/ +#define H2F_MSG_REGISTER_CONTEXT 128 +#define H2F_MSG_UNREGISTER_CONTEXT 129 +#define H2F_MSG_ISSUE_CMD 130 +#define H2F_MSG_ISSUE_CMD_RAW 131 +#define H2F_MSG_TS_NOTIFY 132 +#define F2H_MSG_TS_RETIRE 133 +#define H2F_MSG_CONTEXT_POINTERS 134 +#define H2F_MSG_CONTEXT_RULE 140 /* AKA constraint */ +#define F2H_MSG_CONTEXT_BAD 150 + +/* H2F */ +struct hfi_gmu_init_cmd { + u32 hdr; + u32 seg_id; + u32 dbg_buffer_addr; + u32 dbg_buffer_size; + u32 boot_state; +} __packed; + +/* H2F */ +struct hfi_fw_version_cmd { + u32 hdr; + u32 supported_ver; +} __packed; + +/* H2F */ +struct hfi_bwtable_cmd { + u32 hdr; + u32 bw_level_num; + u32 cnoc_cmds_num; + u32 ddr_cmds_num; + u32 cnoc_wait_bitmask; + u32 ddr_wait_bitmask; + u32 cnoc_cmd_addrs[MAX_CNOC_CMDS]; + u32 cnoc_cmd_data[MAX_CNOC_LEVELS][MAX_CNOC_CMDS]; + u32 ddr_cmd_addrs[MAX_BW_CMDS]; + u32 ddr_cmd_data[MAX_GX_LEVELS][MAX_BW_CMDS]; +} __packed; + +struct opp_gx_desc { + u32 vote; + u32 acd; + u32 freq; +} __packed; + +struct opp_desc { + u32 vote; + u32 freq; +} __packed; + +/* H2F */ +struct hfi_dcvstable_v1_cmd { + u32 hdr; + u32 gpu_level_num; + u32 gmu_level_num; + struct opp_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc cx_votes[MAX_CX_LEVELS]; +} __packed; + +/* H2F */ +struct hfi_dcvstable_cmd { + u32 hdr; + u32 gpu_level_num; + u32 gmu_level_num; + struct opp_gx_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc cx_votes[MAX_CX_LEVELS]; +} __packed; + +#define MAX_ACD_STRIDE 2 +#define MAX_ACD_NUM_LEVELS 6 + +/* H2F */ +struct hfi_acd_table_cmd { + u32 hdr; + u32 version; + u32 enable_by_level; + u32 stride; + u32 num_levels; + u32 data[MAX_ACD_NUM_LEVELS * MAX_ACD_STRIDE]; +} __packed; + +/* H2F */ +struct hfi_test_cmd { + u32 hdr; + u32 data; +} __packed; + +/* H2F */ +struct hfi_start_cmd { + u32 hdr; +} __packed; + +/* H2F */ +struct hfi_feature_ctrl_cmd { + u32 hdr; + u32 feature; + u32 enable; + u32 data; +} __packed; + +/* H2F */ +struct hfi_get_value_cmd { + u32 hdr; + u32 type; + u32 subtype; +} __packed; + +/* Internal */ +struct hfi_get_value_req { + struct hfi_get_value_cmd cmd; + u32 data[16]; +} __packed; + +/* F2H */ +struct hfi_get_value_reply_cmd { + u32 hdr; + u32 req_hdr; + u32 data[16]; +} __packed; + +/* H2F */ +struct hfi_set_value_cmd { + u32 hdr; + u32 type; + u32 subtype; + u32 data; +} __packed; + +/* H2F */ +struct hfi_core_fw_start_cmd { + u32 hdr; + u32 handle; +} __packed; + +struct hfi_mem_alloc_desc { + u64 gpu_addr; + u32 flags; + u32 mem_kind; + u32 host_mem_handle; + u32 gmu_mem_handle; + u32 gmu_addr; + u32 size; /* Bytes */ +} __packed; + +struct hfi_mem_alloc_entry { + struct hfi_mem_alloc_desc desc; + struct kgsl_memdesc *md; +}; + +/* F2H */ +struct hfi_mem_alloc_cmd { + u32 hdr; + u32 reserved; /* Padding to ensure alignment of 'desc' below */ + struct hfi_mem_alloc_desc desc; +} __packed; + +/* H2F */ +struct hfi_mem_alloc_reply_cmd { + u32 hdr; + u32 req_hdr; + struct hfi_mem_alloc_desc desc; +} __packed; + +/* H2F */ +struct hfi_gx_bw_perf_vote_cmd { + u32 hdr; + u32 ack_type; + u32 freq; + u32 bw; +} __packed; + +/* H2F */ +struct hfi_fw_halt_cmd { + u32 hdr; + u32 en_halt; +} __packed; + +/* H2F */ +struct hfi_prep_slumber_cmd { + u32 hdr; + u32 bw; + u32 freq; +} __packed; + +/* F2H */ +struct hfi_err_cmd { + u32 hdr; + u32 error_code; + u32 data[16]; +} __packed; + +/* F2H */ +struct hfi_debug_cmd { + u32 hdr; + u32 type; + u32 timestamp; + u32 data; +} __packed; + +/* F2H */ +struct hfi_gmu_cntr_register_cmd { + u32 hdr; + u32 group_id; + u32 countable; +} __packed; + +/* H2F */ +struct hfi_gmu_cntr_register_reply_cmd { + u32 hdr; + u32 req_hdr; + u32 group_id; + u32 countable; + u64 counter_addr; +} __packed; + +/* F2H */ +struct hfi_gmu_cntr_release_cmd { + u32 hdr; + u32 group_id; + u32 countable; +} __packed; + +/* H2F */ +struct hfi_register_ctxt_cmd { + u32 hdr; + u32 ctxt_id; + u32 flags; + u64 pt_addr; + u32 ctxt_idr; + u32 ctxt_bank; +} __packed; + +/* H2F */ +struct hfi_unregister_ctxt_cmd { + u32 hdr; + u32 ctxt_id; + u32 ts; +} __packed; + +struct hfi_issue_ib { + u64 addr; + u32 size; +} __packed; + +/* H2F */ +struct hfi_issue_cmd_cmd { + u32 hdr; + u32 ctxt_id; + u32 flags; + u32 ts; + u32 count; + struct hfi_issue_ib *ibs[]; +} __packed; + +/* Internal */ +struct hfi_issue_cmd_req { + u32 queue; + u32 ctxt_id; + struct hfi_issue_cmd_cmd cmd; +} __packed; + +/* H2F */ +/* The length of *buf will be embedded in the hdr */ +struct hfi_issue_cmd_raw_cmd { + u32 hdr; + u32 *buf; +} __packed; + +/* Internal */ +struct hfi_issue_cmd_raw_req { + u32 queue; + u32 ctxt_id; + u32 len; + u32 *buf; +} __packed; + +/* H2F */ +struct hfi_ts_notify_cmd { + u32 hdr; + u32 ctxt_id; + u32 ts; +} __packed; + +#define CMDBATCH_SUCCESS 0 +#define CMDBATCH_RETIRED 1 +#define CMDBATCH_ERROR 2 +#define CMDBATCH_SKIP 3 + +#define CMDBATCH_PROFILING BIT(4) + +/* F2H */ +struct hfi_ts_retire_cmd { + u32 hdr; + u32 ctxt_id; + u32 ts; + u32 type; + u64 submitted_to_rb; + u64 sop; + u64 eop; + u64 retired_on_gmu; +} __packed; + +/* H2F */ +struct hfi_context_pointers_cmd { + u32 hdr; + u32 ctxt_id; + u64 sop_addr; + u64 eop_addr; + u64 user_ctxt_record_addr; +} __packed; + +/* H2F */ +struct hfi_context_rule_cmd { + u32 hdr; + u32 ctxt_id; + u32 type; + u32 status; +} __packed; + +/* F2H */ +struct hfi_context_bad_cmd { + u32 hdr; + u32 ctxt_id; + u32 policy; + u32 ts; + u32 error; + u32 payload[]; +} __packed; + +/* H2F */ +struct hfi_context_bad_reply_cmd { + u32 hdr; + u32 req_hdr; +} __packed; + +/* H2F */ +struct hfi_submit_cmd { + u32 hdr; + u32 ctxt_id; + u32 flags; + u32 ts; + u32 profile_gpuaddr_lo; + u32 profile_gpuaddr_hi; + u32 numibs; + u32 big_ib_gmu_va; +} __packed; + +struct hfi_log_block { + u32 hdr; + u32 version; + u32 start_index; + u32 stop_index; +} __packed; + +/** + * struct pending_cmd - data structure to track outstanding HFI + * command messages + */ +struct pending_cmd { + /** @sent_hdr: Header of the un-ack'd hfi packet */ + u32 sent_hdr; + /** @results: Array to store the ack packet */ + u32 results[MAX_RCVD_SIZE]; + /** @complete: Completion to signal hfi ack has been received */ + struct completion complete; + /** @node: to add it to the list of hfi packets waiting for ack */ + struct list_head node; +}; + +static inline int _CMD_MSG_HDR(u32 *hdr, int id, size_t size) +{ + if (WARN_ON(size > HFI_MAX_MSG_SIZE)) + return -EMSGSIZE; + + *hdr = CREATE_MSG_HDR(id, size, HFI_MSG_CMD); + return 0; +} + +#define CMD_MSG_HDR(cmd, id) \ + _CMD_MSG_HDR(&(cmd).hdr, id, sizeof(cmd)) + +/* Maximum number of IBs in a submission */ +#define HWSCHED_MAX_DISPATCH_NUMIBS \ + ((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \ + / sizeof(struct hfi_issue_ib)) + +/** + * struct payload_section - Container of keys values + * + * There may be a variable number of payload sections appended + * to the context bad HFI message. Each payload section contains + * a variable number of key-value pairs, both key and value being + * single dword each. + */ +struct payload_section { + /** @type: Type of the payload */ + u16 type; + /** @dwords: Number of dwords in the data array. */ + u16 dwords; + /** @data: A sequence of key-value pairs. Each pair is 2 dwords. */ + u32 data[]; +} __packed; + +/* IDs for context bad hfi payloads */ +#define PAYLOAD_FAULT_REGS 1 +#define PAYLOAD_RB 2 +#define PAYLOAD_PREEMPT_TIMEOUT 3 + +/* Keys for PAYLOAD_FAULT_REGS type payload */ +#define KEY_CP_OPCODE_ERROR 1 +#define KEY_CP_PROTECTED_ERROR 2 +#define KEY_CP_HW_FAULT 3 +#define KEY_CP_BV_OPCODE_ERROR 4 +#define KEY_CP_BV_PROTECTED_ERROR 5 +#define KEY_CP_BV_HW_FAULT 6 + +/* Keys for PAYLOAD_RB type payload */ +#define KEY_RB_ID 1 +#define KEY_RB_RPTR 2 +#define KEY_RB_WPTR 3 +#define KEY_RB_SIZEDWORDS 4 +#define KEY_RB_QUEUED_TS 5 +#define KEY_RB_RETIRED_TS 6 +#define KEY_RB_GPUADDR_LO 7 +#define KEY_RB_GPUADDR_HI 8 + +/* Keys for PAYLOAD_PREEMPT_TIMEOUT type payload */ +#define KEY_PREEMPT_TIMEOUT_CUR_RB_ID 1 +#define KEY_PREEMPT_TIMEOUT_NEXT_RB_ID 2 + +/* Types of errors that trigger context bad HFI */ + +/* GPU encountered a CP HW error */ +#define GMU_CP_HW_ERROR 600 +/* GPU encountered a GPU Hang interrupt */ +#define GMU_GPU_HW_HANG 601 +/* Preemption didn't complete in given time */ +#define GMU_GPU_PREEMPT_TIMEOUT 602 +/* Fault due to Long IB timeout */ +#define GMU_GPU_SW_HANG 603 +/* GPU encountered a bad opcode */ +#define GMU_CP_OPCODE_ERROR 604 +/* GPU encountered protected mode error */ +#define GMU_CP_PROTECTED_ERROR 605 +/* GPU encountered an illegal instruction */ +#define GMU_CP_ILLEGAL_INST_ERROR 606 +/* GPU encountered a CP ucode error */ +#define GMU_CP_UCODE_ERROR 607 +/* GPU encountered a CP hw fault error */ +#define GMU_CP_HW_FAULT_ERROR 608 +/* GPU encountered a GPC error */ +#define GMU_CP_GPC_ERROR 609 +/* GPU BV encountered a bad opcode */ +#define GMU_CP_BV_OPCODE_ERROR 610 +/* GPU BV encountered protected mode error */ +#define GMU_CP_BV_PROTECTED_ERROR 611 +/* GPU BV encountered a CP hw fault error */ +#define GMU_CP_BV_HW_FAULT_ERROR 612 +/* GPU BV encountered a CP ucode error */ +#define GMU_CP_BV_UCODE_ERROR 613 +/* GPU BV encountered an illegal instruction */ +#define GMU_CP_BV_ILLEGAL_INST_ERROR 614 +/* GPU encountered an unknown CP error */ +#define GMU_CP_UNKNOWN_ERROR 700 + +/** + * hfi_update_read_idx - Update the read index of an hfi queue + * hdr: Pointer to the hfi queue header + * index: New read index + * + * This function makes sure that kgsl has consumed f2h packets + * before GMU sees the updated read index. This avoids a corner + * case where GMU might over-write f2h packets that have not yet + * been consumed by kgsl. + */ +static inline void hfi_update_read_idx(struct hfi_queue_header *hdr, u32 index) +{ + /* + * This is to make sure packets are consumed before gmu sees the updated + * read index + */ + mb(); + + hdr->read_index = index; +} + +/** + * hfi_update_write_idx - Update the write index of an hfi queue + * hdr: Pointer to the hfi queue header + * index: New write index + * + * This function makes sure that the h2f packets are written out + * to memory before GMU sees the updated write index. This avoids + * corner cases where GMU might fetch stale entries that can happen + * if write index is updated before new packets have been written + * out to memory. + */ +static inline void hfi_update_write_idx(struct hfi_queue_header *hdr, u32 index) +{ + /* + * This is to make sure packets are written out before gmu sees the + * updated write index + */ + wmb(); + + hdr->write_index = index; + + /* + * Memory barrier to make sure write index is written before an + * interrupt is raised + */ + wmb(); +} +#endif diff --git a/adreno_hwsched.c b/adreno_hwsched.c new file mode 100644 index 0000000000..84a21aec8c --- /dev/null +++ b/adreno_hwsched.c @@ -0,0 +1,1714 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include "adreno.h" +#include "adreno_hfi.h" +#include "adreno_snapshot.h" +#include "adreno_sysfs.h" +#include "adreno_trace.h" +#include "kgsl_timeline.h" + +/* This structure represents inflight command object */ +struct cmd_list_obj { + /** @cmdobj: Handle to the command object */ + struct kgsl_drawobj_cmd *cmdobj; + /** @node: List node to put it in the list of inflight commands */ + struct list_head node; +}; + +/* + * Number of commands that can be queued in a context before it sleeps + * + * Our code that "puts back" a command from the context is much cleaner + * if we are sure that there will always be enough room in the ringbuffer + * so restrict the size of the context queue to ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1 + */ +static u32 _context_drawqueue_size = ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1; + +/* Number of milliseconds to wait for the context queue to clear */ +static unsigned int _context_queue_wait = 10000; + +/* + * GFT throttle parameters. If GFT recovered more than + * X times in Y ms invalidate the context and do not attempt recovery. + * X -> _fault_throttle_burst + * Y -> _fault_throttle_time + */ +static unsigned int _fault_throttle_time = 2000; +static unsigned int _fault_throttle_burst = 3; + +/* Use a kmem cache to speed up allocations for dispatcher jobs */ +static struct kmem_cache *jobs_cache; +/* Use a kmem cache to speed up allocations for inflight command objects */ +static struct kmem_cache *obj_cache; + +static bool _check_context_queue(struct adreno_context *drawctxt, u32 count) +{ + bool ret; + + spin_lock(&drawctxt->lock); + + /* + * Wake up if there is room in the context or if the whole thing got + * invalidated while we were asleep + */ + + if (kgsl_context_invalid(&drawctxt->base)) + ret = false; + else + ret = ((drawctxt->queued + count) < _context_drawqueue_size) ? 1 : 0; + + spin_unlock(&drawctxt->lock); + + return ret; +} + +static void _pop_drawobj(struct adreno_context *drawctxt) +{ + drawctxt->drawqueue_head = DRAWQUEUE_NEXT(drawctxt->drawqueue_head, + ADRENO_CONTEXT_DRAWQUEUE_SIZE); + drawctxt->queued--; +} + +static int _retire_syncobj(struct kgsl_drawobj_sync *syncobj, + struct adreno_context *drawctxt) +{ + if (!kgsl_drawobj_events_pending(syncobj)) { + _pop_drawobj(drawctxt); + kgsl_drawobj_destroy(DRAWOBJ(syncobj)); + return 0; + } + + /* + * If we got here, there are pending events for sync object. + * Start the canary timer if it hasnt been started already. + */ + if (!syncobj->timeout_jiffies) { + syncobj->timeout_jiffies = jiffies + msecs_to_jiffies(5000); + mod_timer(&syncobj->timer, syncobj->timeout_jiffies); + } + + return -EAGAIN; +} + +static bool _marker_expired(struct kgsl_drawobj_cmd *markerobj) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj); + + return (drawobj->flags & KGSL_DRAWOBJ_MARKER) && + kgsl_check_timestamp(drawobj->device, drawobj->context, + markerobj->marker_timestamp); +} + +static void _retire_timestamp(struct kgsl_drawobj *drawobj) +{ + struct kgsl_context *context = drawobj->context; + struct kgsl_device *device = context->device; + + /* + * Write the start and end timestamp to the memstore to keep the + * accounting sane + */ + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + drawobj->timestamp); + + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + drawobj->timestamp); + + if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) + atomic64_inc(&drawobj->context->proc_priv->frame_count); + + /* Retire pending GPU events for the object */ + kgsl_process_event_group(device, &context->events); + + kgsl_drawobj_destroy(drawobj); +} + +static int _retire_markerobj(struct kgsl_drawobj_cmd *cmdobj, + struct adreno_context *drawctxt) +{ + if (_marker_expired(cmdobj)) { + _pop_drawobj(drawctxt); + _retire_timestamp(DRAWOBJ(cmdobj)); + return 0; + } + + /* + * If the marker isn't expired but the SKIP bit + * is set then there are real commands following + * this one in the queue. This means that we + * need to dispatch the command so that we can + * keep the timestamp accounting correct. If + * skip isn't set then we block this queue + * until the dependent timestamp expires + */ + + return test_bit(CMDOBJ_SKIP, &cmdobj->priv) ? 1 : -EAGAIN; +} + +static int _retire_timelineobj(struct kgsl_drawobj *drawobj, + struct adreno_context *drawctxt) +{ + struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); + int i; + + for (i = 0; i < timelineobj->count; i++) + kgsl_timeline_signal(timelineobj->timelines[i].timeline, + timelineobj->timelines[i].seqno); + + _pop_drawobj(drawctxt); + _retire_timestamp(drawobj); + + return 0; +} + +static int drawqueue_retire_bindobj(struct kgsl_drawobj *drawobj, + struct adreno_context *drawctxt) +{ + struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj); + + if (test_bit(KGSL_BINDOBJ_STATE_DONE, &bindobj->state)) { + _pop_drawobj(drawctxt); + _retire_timestamp(drawobj); + return 0; + } + + if (!test_and_set_bit(KGSL_BINDOBJ_STATE_START, &bindobj->state)) { + /* + * Take a reference to the drawobj and the context because both + * get referenced in the bind callback + */ + _kgsl_context_get(&drawctxt->base); + kref_get(&drawobj->refcount); + + kgsl_sharedmem_bind_ranges(bindobj->bind); + } + + return -EAGAIN; +} + +/* + * Retires all expired marker and sync objs from the context + * queue and returns one of the below + * a) next drawobj that needs to be sent to ringbuffer + * b) -EAGAIN for syncobj with syncpoints pending. + * c) -EAGAIN for markerobj whose marker timestamp has not expired yet. + * c) NULL for no commands remaining in drawqueue. + */ +static struct kgsl_drawobj *_process_drawqueue_get_next_drawobj( + struct adreno_device *adreno_dev, struct adreno_context *drawctxt) +{ + struct kgsl_drawobj *drawobj; + unsigned int i = drawctxt->drawqueue_head; + struct kgsl_drawobj_cmd *cmdobj; + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int ret = 0; + + if (drawctxt->drawqueue_head == drawctxt->drawqueue_tail) + return NULL; + + for (i = drawctxt->drawqueue_head; i != drawctxt->drawqueue_tail; + i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE)) { + + drawobj = drawctxt->drawqueue[i]; + + if (!drawobj) + return NULL; + + switch (drawobj->type) { + case CMDOBJ_TYPE: + cmdobj = CMDOBJ(drawobj); + + /* We only support one big IB inflight */ + if ((cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) && + hwsched->big_cmdobj) + return ERR_PTR(-ENOSPC); + + return drawobj; + case SYNCOBJ_TYPE: + ret = _retire_syncobj(SYNCOBJ(drawobj), drawctxt); + break; + case MARKEROBJ_TYPE: + ret = _retire_markerobj(CMDOBJ(drawobj), drawctxt); + /* Special case where marker needs to be sent to GPU */ + if (ret == 1) + return drawobj; + break; + case BINDOBJ_TYPE: + ret = drawqueue_retire_bindobj(drawobj, drawctxt); + break; + case TIMELINEOBJ_TYPE: + ret = _retire_timelineobj(drawobj, drawctxt); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + return ERR_PTR(ret); + } + + return NULL; +} + +/** + * hwsched_dispatcher_requeue_cmdobj() - Put a command back on the context + * queue + * @drawctxt: Pointer to the adreno draw context + * @cmdobj: Pointer to the KGSL command object to requeue + * + * Failure to submit a command to the ringbuffer isn't the fault of the command + * being submitted so if a failure happens, push it back on the head of the + * context queue to be reconsidered again unless the context got detached. + */ +static inline int hwsched_dispatcher_requeue_cmdobj( + struct adreno_context *drawctxt, + struct kgsl_drawobj_cmd *cmdobj) +{ + unsigned int prev; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + spin_lock(&drawctxt->lock); + + if (kgsl_context_is_bad(&drawctxt->base)) { + spin_unlock(&drawctxt->lock); + /* get rid of this drawobj since the context is bad */ + kgsl_drawobj_destroy(drawobj); + return -ENOENT; + } + + prev = drawctxt->drawqueue_head == 0 ? + (ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1) : + (drawctxt->drawqueue_head - 1); + + /* + * The maximum queue size always needs to be one less then the size of + * the ringbuffer queue so there is "room" to put the drawobj back in + */ + + WARN_ON(prev == drawctxt->drawqueue_tail); + + drawctxt->drawqueue[prev] = drawobj; + drawctxt->queued++; + + /* Reset the command queue head to reflect the newly requeued change */ + drawctxt->drawqueue_head = prev; + spin_unlock(&drawctxt->lock); + return 0; +} + +/** + * hwsched_queue_context() - Queue a context in the dispatcher list of jobs + * @adreno_dev: Pointer to the adreno device structure + * @drawctxt: Pointer to the adreno draw context + * + * Add a context to the dispatcher list of jobs. + */ +static int hwsched_queue_context(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_dispatch_job *job; + + /* Refuse to queue a detached context */ + if (kgsl_context_detached(&drawctxt->base)) + return 0; + + if (!_kgsl_context_get(&drawctxt->base)) + return 0; + + job = kmem_cache_alloc(jobs_cache, GFP_ATOMIC); + if (!job) { + kgsl_context_put(&drawctxt->base); + return -ENOMEM; + } + + job->drawctxt = drawctxt; + + trace_dispatch_queue_context(drawctxt); + llist_add(&job->node, &hwsched->jobs[drawctxt->base.priority]); + + return 0; +} + +void adreno_hwsched_flush(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + kthread_flush_worker(hwsched->worker); +} + +static bool hwsched_in_fault(struct adreno_hwsched *hwsched) +{ + /* make sure we're reading the latest value */ + smp_rmb(); + return atomic_read(&hwsched->fault) != 0; +} + +/** + * sendcmd() - Send a drawobj to the GPU hardware + * @dispatcher: Pointer to the adreno dispatcher struct + * @drawobj: Pointer to the KGSL drawobj being sent + * + * Send a KGSL drawobj to the GPU hardware + */ +static int hwsched_sendcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_context *context = drawobj->context; + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + int ret; + struct cmd_list_obj *obj; + + obj = kmem_cache_alloc(obj_cache, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + mutex_lock(&device->mutex); + + if (adreno_gpu_halt(adreno_dev) || hwsched_in_fault(hwsched)) { + mutex_unlock(&device->mutex); + kmem_cache_free(obj_cache, obj); + return -EBUSY; + } + + + if (kgsl_context_detached(context)) { + mutex_unlock(&device->mutex); + kmem_cache_free(obj_cache, obj); + return -ENOENT; + } + + hwsched->inflight++; + + if (hwsched->inflight == 1 && + !test_bit(ADRENO_HWSCHED_POWER, &hwsched->flags)) { + ret = adreno_active_count_get(adreno_dev); + if (ret) { + hwsched->inflight--; + mutex_unlock(&device->mutex); + kmem_cache_free(obj_cache, obj); + return ret; + } + set_bit(ADRENO_HWSCHED_POWER, &hwsched->flags); + } + + ret = hwsched->hwsched_ops->submit_cmdobj(adreno_dev, cmdobj); + if (ret) { + /* + * If the first submission failed, then put back the active + * count to relinquish active vote + */ + if (hwsched->inflight == 1) { + adreno_active_count_put(adreno_dev); + clear_bit(ADRENO_HWSCHED_POWER, &hwsched->flags); + } + + hwsched->inflight--; + kmem_cache_free(obj_cache, obj); + mutex_unlock(&device->mutex); + return ret; + } + + if ((hwsched->inflight == 1) && + !test_and_set_bit(ADRENO_HWSCHED_ACTIVE, &hwsched->flags)) + reinit_completion(&hwsched->idle_gate); + + if (cmdobj->numibs > HWSCHED_MAX_DISPATCH_NUMIBS) { + hwsched->big_cmdobj = cmdobj; + kref_get(&drawobj->refcount); + } + + drawctxt->internal_timestamp = drawobj->timestamp; + + obj->cmdobj = cmdobj; + list_add_tail(&obj->node, &hwsched->cmd_list); + mutex_unlock(&device->mutex); + + return 0; +} + +/** + * hwsched_sendcmds() - Send commands from a context to the GPU + * @adreno_dev: Pointer to the adreno device struct + * @drawctxt: Pointer to the adreno context to dispatch commands from + * + * Dequeue and send a burst of commands from the specified context to the GPU + * Returns postive if the context needs to be put back on the pending queue + * 0 if the context is empty or detached and negative on error + */ +static int hwsched_sendcmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int count = 0; + int ret = 0; + unsigned int timestamp; + + while (1) { + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_cmd *cmdobj; + + spin_lock(&drawctxt->lock); + drawobj = _process_drawqueue_get_next_drawobj(adreno_dev, + drawctxt); + + /* + * adreno_context_get_drawobj returns -EAGAIN if the current + * drawobj has pending sync points so no more to do here. + * When the sync points are satisfied then the context will get + * reqeueued + */ + + if (IS_ERR_OR_NULL(drawobj)) { + if (IS_ERR(drawobj)) + ret = PTR_ERR(drawobj); + spin_unlock(&drawctxt->lock); + break; + } + _pop_drawobj(drawctxt); + spin_unlock(&drawctxt->lock); + + timestamp = drawobj->timestamp; + cmdobj = CMDOBJ(drawobj); + ret = hwsched_sendcmd(adreno_dev, cmdobj); + + /* + * On error from hwsched_sendcmd() try to requeue the cmdobj + * unless we got back -ENOENT which means that the context has + * been detached and there will be no more deliveries from here + */ + if (ret != 0) { + /* Destroy the cmdobj on -ENOENT */ + if (ret == -ENOENT) + kgsl_drawobj_destroy(drawobj); + else { + /* + * If we couldn't put it on dispatch queue + * then return it to the context queue + */ + int r = hwsched_dispatcher_requeue_cmdobj( + drawctxt, cmdobj); + if (r) + ret = r; + } + + break; + } + + drawctxt->submitted_timestamp = timestamp; + + count++; + } + + /* + * Wake up any snoozing threads if we have consumed any real commands + * or marker commands and we have room in the context queue. + */ + + if (_check_context_queue(drawctxt, 0)) + wake_up_all(&drawctxt->wq); + + if (!ret) + ret = count; + + /* Return error or the number of commands queued */ + return ret; +} + +static void hwsched_handle_jobs_list(struct adreno_device *adreno_dev, + int id, unsigned long *map, struct llist_node *list) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_dispatch_job *job, *next; + + if (!list) + return; + + /* Reverse the list so we deal with oldest submitted contexts first */ + list = llist_reverse_order(list); + + llist_for_each_entry_safe(job, next, list, node) { + int ret; + + if (kgsl_context_is_bad(&job->drawctxt->base)) { + kgsl_context_put(&job->drawctxt->base); + kmem_cache_free(jobs_cache, job); + continue; + } + + /* + * Due to the nature of the lockless queue the same context + * might have multiple jobs on the list. We allow this so we + * don't have to query the list on the producer side but on the + * consumer side we only want each context to be considered + * once. Use a bitmap to remember which contexts we've already + * seen and quietly discard duplicate jobs + */ + if (test_and_set_bit(job->drawctxt->base.id, map)) { + kgsl_context_put(&job->drawctxt->base); + kmem_cache_free(jobs_cache, job); + continue; + } + + ret = hwsched_sendcmds(adreno_dev, job->drawctxt); + + /* + * If the context had nothing queued or the context has been + * destroyed then drop the job + */ + if (!ret || ret == -ENOENT) { + kgsl_context_put(&job->drawctxt->base); + kmem_cache_free(jobs_cache, job); + continue; + } + + /* + * If the dispatch queue is full then requeue the job to be + * considered first next time. Otherwise the context + * either successfully submmitted to the GPU or another error + * happened and it should go back on the regular queue + */ + if (ret == -ENOSPC) + llist_add(&job->node, &hwsched->requeue[id]); + else + llist_add(&job->node, &hwsched->jobs[id]); + } +} + +static void hwsched_handle_jobs(struct adreno_device *adreno_dev, int id) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + unsigned long map[BITS_TO_LONGS(KGSL_MEMSTORE_MAX)]; + struct llist_node *requeue, *jobs; + + memset(map, 0, sizeof(map)); + + requeue = llist_del_all(&hwsched->requeue[id]); + jobs = llist_del_all(&hwsched->jobs[id]); + + hwsched_handle_jobs_list(adreno_dev, id, map, requeue); + hwsched_handle_jobs_list(adreno_dev, id, map, jobs); +} + +/** + * hwsched_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Issue as many commands as possible (up to inflight) from the pending contexts + * This function assumes the dispatcher mutex has been locked. + */ +static void hwsched_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int i; + + for (i = 0; i < ARRAY_SIZE(hwsched->jobs); i++) + hwsched_handle_jobs(adreno_dev, i); +} + +void adreno_hwsched_trigger(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + kthread_queue_work(hwsched->worker, &hwsched->work); +} + +/** + * adreno_hwsched_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Lock the dispatcher and call hwsched_issuecmds + */ +static void adreno_hwsched_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + /* If the dispatcher is busy then schedule the work for later */ + if (!mutex_trylock(&hwsched->mutex)) { + adreno_hwsched_trigger(adreno_dev); + return; + } + + if (!hwsched_in_fault(hwsched)) + hwsched_issuecmds(adreno_dev); + + mutex_unlock(&hwsched->mutex); +} + +/** + * get_timestamp() - Return the next timestamp for the context + * @drawctxt - Pointer to an adreno draw context struct + * @drawobj - Pointer to a drawobj + * @timestamp - Pointer to a timestamp value possibly passed from the user + * @user_ts - user generated timestamp + * + * Assign a timestamp based on the settings of the draw context and the command + * batch. + */ +static int get_timestamp(struct adreno_context *drawctxt, + struct kgsl_drawobj *drawobj, unsigned int *timestamp, + unsigned int user_ts) +{ + + if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) { + /* + * User specified timestamps need to be greater than the last + * issued timestamp in the context + */ + if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0) + return -ERANGE; + + drawctxt->timestamp = user_ts; + } else + drawctxt->timestamp++; + + *timestamp = drawctxt->timestamp; + drawobj->timestamp = *timestamp; + return 0; +} + +static inline int _check_context_state(struct kgsl_context *context) +{ + if (kgsl_context_invalid(context)) + return -EDEADLK; + + if (kgsl_context_detached(context)) + return -ENOENT; + + return 0; +} + +static inline bool _verify_ib(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_memobj_node *ib) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_process_private *private = dev_priv->process_priv; + + /* The maximum allowable size for an IB in the CP is 0xFFFFF dwords */ + if (ib->size == 0 || ((ib->size >> 2) > 0xFFFFF)) { + pr_context(device, context, "ctxt %d invalid ib size %lld\n", + context->id, ib->size); + return false; + } + + /* Make sure that the address is mapped */ + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) { + pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n", + context->id, ib->gpuaddr); + return false; + } + + return true; +} + +static inline int _verify_cmdobj(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + uint32_t count) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_memobj_node *ib; + unsigned int i; + + for (i = 0; i < count; i++) { + /* Verify the IBs before they get queued */ + if (drawobj[i]->type == CMDOBJ_TYPE) { + struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj[i]); + + list_for_each_entry(ib, &cmdobj->cmdlist, node) + if (!_verify_ib(dev_priv, + &ADRENO_CONTEXT(context)->base, ib)) + return -EINVAL; + + /* + * Clear the wake on touch bit to indicate an IB has + * been submitted since the last time we set it. + * But only clear it when we have rendering commands. + */ + ADRENO_DEVICE(device)->wake_on_touch = false; + } + } + + return 0; +} + +static inline int _wait_for_room_in_context_queue( + struct adreno_context *drawctxt, u32 count) +{ + int ret = 0; + + /* + * There is always a possibility that dispatcher may end up pushing + * the last popped draw object back to the context drawqueue. Hence, + * we can only queue up to _context_drawqueue_size - 1 here to make + * sure we never let drawqueue->queued exceed _context_drawqueue_size. + */ + if ((drawctxt->queued + count) > (_context_drawqueue_size - 1)) { + trace_adreno_drawctxt_sleep(drawctxt); + spin_unlock(&drawctxt->lock); + + ret = wait_event_interruptible_timeout(drawctxt->wq, + _check_context_queue(drawctxt, count), + msecs_to_jiffies(_context_queue_wait)); + + spin_lock(&drawctxt->lock); + trace_adreno_drawctxt_wake(drawctxt); + + /* + * Account for the possibility that the context got invalidated + * while we were sleeping + */ + if (ret > 0) + ret = _check_context_state(&drawctxt->base); + else if (ret == 0) + ret = -ETIMEDOUT; + } + + return ret; +} + +static unsigned int _check_context_state_to_queue_cmds( + struct adreno_context *drawctxt, u32 count) +{ + int ret = _check_context_state(&drawctxt->base); + + if (ret) + return ret; + + return _wait_for_room_in_context_queue(drawctxt, count); +} + +static void _queue_drawobj(struct adreno_context *drawctxt, + struct kgsl_drawobj *drawobj) +{ + /* Put the command into the queue */ + drawctxt->drawqueue[drawctxt->drawqueue_tail] = drawobj; + drawctxt->drawqueue_tail = (drawctxt->drawqueue_tail + 1) % + ADRENO_CONTEXT_DRAWQUEUE_SIZE; + drawctxt->queued++; + trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued); +} + +static int _queue_cmdobj(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *cmdobj, + uint32_t *timestamp, unsigned int user_ts) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + u32 j; + int ret; + + ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts); + if (ret) + return ret; + + /* + * If this is a real command then we need to force any markers + * queued before it to dispatch to keep time linear - set the + * skip bit so the commands get NOPed. + */ + j = drawctxt->drawqueue_head; + + while (j != drawctxt->drawqueue_tail) { + if (drawctxt->drawqueue[j]->type == MARKEROBJ_TYPE) { + struct kgsl_drawobj_cmd *markerobj = + CMDOBJ(drawctxt->drawqueue[j]); + + set_bit(CMDOBJ_SKIP, &markerobj->priv); + } + + j = DRAWQUEUE_NEXT(j, ADRENO_CONTEXT_DRAWQUEUE_SIZE); + } + + drawctxt->queued_timestamp = *timestamp; + + _queue_drawobj(drawctxt, drawobj); + + return 0; +} + +static void _queue_syncobj(struct adreno_context *drawctxt, + struct kgsl_drawobj_sync *syncobj, uint32_t *timestamp) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + + *timestamp = 0; + drawobj->timestamp = 0; + + _queue_drawobj(drawctxt, drawobj); +} + +static int _queue_markerobj(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *markerobj, + u32 *timestamp, u32 user_ts) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj); + int ret; + + ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts); + if (ret) + return ret; + + /* + * See if we can fastpath this thing - if nothing is queued + * and nothing is inflight retire without bothering the GPU + */ + if (!drawctxt->queued && kgsl_check_timestamp(drawobj->device, + drawobj->context, drawctxt->queued_timestamp)) { + _retire_timestamp(drawobj); + return 1; + } + + /* + * Remember the last queued timestamp - the marker will block + * until that timestamp is expired (unless another command + * comes along and forces the marker to execute) + */ + markerobj->marker_timestamp = drawctxt->queued_timestamp; + drawctxt->queued_timestamp = *timestamp; + + _queue_drawobj(drawctxt, drawobj); + + return 0; +} + +static int _queue_auxobj(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_drawobj *drawobj, + u32 *timestamp, u32 user_ts) +{ + int ret; + + ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts); + if (ret) + return ret; + + drawctxt->queued_timestamp = *timestamp; + _queue_drawobj(drawctxt, drawobj); + + return 0; +} + +static int adreno_hwsched_queue_cmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + u32 count, u32 *timestamp) + +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct adreno_dispatch_job *job; + int ret; + unsigned int i, user_ts; + + /* + * There is always a possibility that dispatcher may end up pushing + * the last popped draw object back to the context drawqueue. Hence, + * we can only queue up to _context_drawqueue_size - 1 here to make + * sure we never let drawqueue->queued exceed _context_drawqueue_size. + */ + if (!count || count > _context_drawqueue_size - 1) + return -EINVAL; + + for (i = 0; i < count; i++) { + struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_memobj_node *ib; + + if (drawobj[i]->type != CMDOBJ_TYPE) + continue; + + cmdobj = CMDOBJ(drawobj[i]); + + list_for_each_entry(ib, &cmdobj->cmdlist, node) + cmdobj->numibs++; + + if (cmdobj->numibs > HWSCHED_MAX_IBS) + return -EINVAL; + } + + ret = _check_context_state(&drawctxt->base); + if (ret) + return ret; + + ret = _verify_cmdobj(dev_priv, context, drawobj, count); + if (ret) + return ret; + + /* wait for the suspend gate */ + wait_for_completion(&device->halt_gate); + + job = kmem_cache_alloc(jobs_cache, GFP_KERNEL); + if (!job) + return -ENOMEM; + + job->drawctxt = drawctxt; + + spin_lock(&drawctxt->lock); + + ret = _check_context_state_to_queue_cmds(drawctxt, count); + if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + + user_ts = *timestamp; + + for (i = 0; i < count; i++) { + + switch (drawobj[i]->type) { + case MARKEROBJ_TYPE: + ret = _queue_markerobj(adreno_dev, drawctxt, + CMDOBJ(drawobj[i]), + timestamp, user_ts); + if (ret == 1) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return 0; + } else if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + break; + case CMDOBJ_TYPE: + ret = _queue_cmdobj(adreno_dev, drawctxt, + CMDOBJ(drawobj[i]), + timestamp, user_ts); + if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + break; + case SYNCOBJ_TYPE: + _queue_syncobj(drawctxt, SYNCOBJ(drawobj[i]), + timestamp); + break; + case BINDOBJ_TYPE: + case TIMELINEOBJ_TYPE: + ret = _queue_auxobj(adreno_dev, drawctxt, drawobj[i], + timestamp, user_ts); + if (ret) { + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return ret; + } + break; + default: + spin_unlock(&drawctxt->lock); + kmem_cache_free(jobs_cache, job); + return -EINVAL; + } + + } + + spin_unlock(&drawctxt->lock); + + /* Add the context to the dispatcher pending list */ + if (_kgsl_context_get(&drawctxt->base)) { + trace_dispatch_queue_context(drawctxt); + llist_add(&job->node, &hwsched->jobs[drawctxt->base.priority]); + adreno_hwsched_issuecmds(adreno_dev); + + } else + kmem_cache_free(jobs_cache, job); + + return 0; +} + +static void retire_cmdobj(struct adreno_hwsched *hwsched, + struct kgsl_drawobj_cmd *cmdobj) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_mem_entry *entry; + struct kgsl_drawobj_profiling_buffer *profile_buffer; + + if (cmdobj != NULL) { + if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) + atomic64_inc(&drawobj->context->proc_priv->frame_count); + + entry = cmdobj->profiling_buf_entry; + if (entry) { + profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, + cmdobj->profiling_buffer_gpuaddr); + + if (profile_buffer == NULL) + return; + + kgsl_memdesc_unmap(&entry->memdesc); + } + } + + if (hwsched->big_cmdobj == cmdobj) { + hwsched->big_cmdobj = NULL; + kgsl_drawobj_put(drawobj); + } + + kgsl_drawobj_destroy(drawobj); +} + +static int retire_cmd_list(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + struct cmd_list_obj *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + if (!kgsl_check_timestamp(device, drawobj->context, + drawobj->timestamp)) + continue; + + retire_cmdobj(hwsched, cmdobj); + + list_del_init(&obj->node); + + kmem_cache_free(obj_cache, obj); + + hwsched->inflight--; + + count++; + } + + return count; +} + +/* Take down the dispatcher and release any power states */ +static void hwsched_power_down(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + + mutex_lock(&device->mutex); + + if (test_and_clear_bit(ADRENO_HWSCHED_ACTIVE, &hwsched->flags)) + complete_all(&hwsched->idle_gate); + + if (test_bit(ADRENO_HWSCHED_POWER, &hwsched->flags)) { + adreno_active_count_put(adreno_dev); + clear_bit(ADRENO_HWSCHED_POWER, &hwsched->flags); + } + + mutex_unlock(&device->mutex); +} + +static void adreno_hwsched_queue_context(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + hwsched_queue_context(adreno_dev, drawctxt); + adreno_hwsched_trigger(adreno_dev); +} + +void adreno_hwsched_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + complete_all(&device->halt_gate); + + adreno_hwsched_trigger(adreno_dev); +} + +static int _skipsaverestore_store(struct adreno_device *adreno_dev, bool val) +{ + return adreno_power_cycle_bool(adreno_dev, + &adreno_dev->preempt.skipsaverestore, val); +} + +static bool _skipsaverestore_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->preempt.skipsaverestore; +} + +static int _usesgmem_store(struct adreno_device *adreno_dev, bool val) +{ + return adreno_power_cycle_bool(adreno_dev, + &adreno_dev->preempt.usesgmem, val); +} + +static bool _usesgmem_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->preempt.usesgmem; +} + +static int _preempt_level_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + return adreno_power_cycle_u32(adreno_dev, + &adreno_dev->preempt.preempt_level, + min_t(unsigned int, val, 2)); +} + +static unsigned int _preempt_level_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->preempt.preempt_level; +} + +static void change_preemption(struct adreno_device *adreno_dev, void *priv) +{ + change_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); +} + +static int _preemption_store(struct adreno_device *adreno_dev, bool val) +{ + if (!(ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) || + (test_bit(ADRENO_DEVICE_PREEMPTION, + &adreno_dev->priv) == val)) + return 0; + + return adreno_power_cycle(adreno_dev, change_preemption, NULL); +} + +static bool _preemption_show(struct adreno_device *adreno_dev) +{ + return adreno_is_preemption_enabled(adreno_dev); +} + +static unsigned int _preempt_count_show(struct adreno_device *adreno_dev) +{ + const struct adreno_hwsched_ops *hwsched_ops = + adreno_dev->hwsched.hwsched_ops; + + return hwsched_ops->preempt_count(adreno_dev); +} + +static int _ft_long_ib_detect_store(struct adreno_device *adreno_dev, bool val) +{ + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->long_ib_detect, + val); +} + +static bool _ft_long_ib_detect_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->long_ib_detect; +} + +static ADRENO_SYSFS_BOOL(preemption); +static ADRENO_SYSFS_U32(preempt_level); +static ADRENO_SYSFS_BOOL(usesgmem); +static ADRENO_SYSFS_BOOL(skipsaverestore); +static ADRENO_SYSFS_RO_U32(preempt_count); +static ADRENO_SYSFS_BOOL(ft_long_ib_detect); + +static const struct attribute *_hwsched_attr_list[] = { + &adreno_attr_preemption.attr.attr, + &adreno_attr_preempt_level.attr.attr, + &adreno_attr_usesgmem.attr.attr, + &adreno_attr_skipsaverestore.attr.attr, + &adreno_attr_preempt_count.attr.attr, + &adreno_attr_ft_long_ib_detect.attr.attr, + NULL, +}; + +static void adreno_hwsched_dispatcher_close(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (!IS_ERR_OR_NULL(hwsched->worker)) + kthread_destroy_worker(hwsched->worker); + + adreno_set_dispatch_ops(adreno_dev, NULL); + + kmem_cache_destroy(jobs_cache); + kmem_cache_destroy(obj_cache); + + sysfs_remove_files(&device->dev->kobj, _hwsched_attr_list); + + kfree(hwsched->ctxt_bad); +} + +static void force_retire_timestamp(struct kgsl_device *device, + struct kgsl_drawobj *drawobj) +{ + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(drawobj->context->id, soptimestamp), + drawobj->timestamp); + + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(drawobj->context->id, eoptimestamp), + drawobj->timestamp); +} + +static void adreno_hwsched_replay(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct cmd_list_obj *obj, *tmp; + u32 retired = 0; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_context *context = drawobj->context; + + /* + * Get rid of retired objects or objects that belong to detached + * or invalidated contexts + */ + if ((kgsl_check_timestamp(device, context, drawobj->timestamp)) + || kgsl_context_is_bad(context)) { + + retire_cmdobj(hwsched, cmdobj); + retired++; + list_del_init(&obj->node); + kmem_cache_free(obj_cache, obj); + hwsched->inflight--; + + continue; + } + + hwsched->hwsched_ops->submit_cmdobj(adreno_dev, cmdobj); + } + + /* Signal fences */ + if (retired) + kgsl_process_event_groups(device); +} + +static void do_fault_header(struct adreno_device *adreno_dev, + struct kgsl_drawobj *drawobj) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + u32 status, rptr, wptr, ib1sz, ib2sz; + u64 ib1base, ib2base; + + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &ib1base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, &ib2base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz); + + drawobj->context->last_faulted_cmd_ts = drawobj->timestamp; + drawobj->context->total_fault_count++; + + pr_context(device, drawobj->context, + "ctx %d ctx_type %s ts %d status %8.8X dispatch_queue=%d rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + drawobj->context->id, kgsl_context_type(drawctxt->type), + drawobj->timestamp, status, + drawobj->context->gmu_dispatch_queue, rptr, wptr, + ib1base, ib1sz, ib2base, ib2sz); + + trace_adreno_gpu_fault(drawobj->context->id, drawobj->timestamp, status, + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz, + adreno_get_level(drawobj->context->priority)); +} + +static struct cmd_list_obj *get_active_cmdobj( + struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct cmd_list_obj *obj, *tmp, *active_obj = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 consumed = 0, retired = 0, prio = UINT_MAX; + struct kgsl_drawobj *drawobj = NULL; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + drawobj = DRAWOBJ(obj->cmdobj); + + kgsl_readtimestamp(device, drawobj->context, + KGSL_TIMESTAMP_CONSUMED, &consumed); + kgsl_readtimestamp(device, drawobj->context, + KGSL_TIMESTAMP_RETIRED, &retired); + + if (!consumed) + continue; + + if (consumed == retired) + continue; + + /* Find the first submission that started but didn't finish */ + if (!active_obj) { + active_obj = obj; + prio = adreno_get_level(drawobj->context->priority); + continue; + } + + /* Find the highest priority active submission */ + if (adreno_get_level(drawobj->context->priority) < prio) { + active_obj = obj; + prio = adreno_get_level(drawobj->context->priority); + } + } + + if (active_obj) { + drawobj = DRAWOBJ(active_obj->cmdobj); + + if (kref_get_unless_zero(&drawobj->refcount)) { + set_bit(CMDOBJ_FAULT, &active_obj->cmdobj->priv); + return active_obj; + } + } + + return NULL; +} + +static struct cmd_list_obj *get_fault_cmdobj(struct adreno_device *adreno_dev) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct cmd_list_obj *obj, *tmp; + struct hfi_context_bad_cmd *bad = hwsched->ctxt_bad; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj *drawobj = DRAWOBJ(obj->cmdobj); + + if ((bad->ctxt_id == drawobj->context->id) && + (bad->ts == drawobj->timestamp)) { + if (kref_get_unless_zero(&drawobj->refcount)) { + set_bit(CMDOBJ_FAULT, &obj->cmdobj->priv); + return obj; + } + } + } + + return NULL; +} + +static bool context_is_throttled(struct kgsl_device *device, + struct kgsl_context *context) +{ + if (ktime_ms_delta(ktime_get(), context->fault_time) > + _fault_throttle_time) { + context->fault_time = ktime_get(); + context->fault_count = 1; + return false; + } + + context->fault_count++; + + if (context->fault_count > _fault_throttle_burst) { + pr_context(device, context, + "gpu fault threshold exceeded %d faults in %d msecs\n", + _fault_throttle_burst, _fault_throttle_time); + return true; + } + + return false; +} +static void reset_and_snapshot(struct adreno_device *adreno_dev, int fault) +{ + struct kgsl_drawobj *drawobj = NULL; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_context *context = NULL; + struct cmd_list_obj *obj; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct hfi_context_bad_cmd *cmd = adreno_dev->hwsched.ctxt_bad; + + if (device->state != KGSL_STATE_ACTIVE) + return; + + /* + * First, try to see if the faulted command object is marked + * in case there was a context bad hfi. But, with stall-on-fault, + * we know that GMU cannot send context bad hfi. Hence, attempt + * to walk the list of active submissions to find the one that + * faulted. + */ + obj = get_fault_cmdobj(adreno_dev); + if (!obj && (fault & ADRENO_IOMMU_PAGE_FAULT)) + obj = get_active_cmdobj(adreno_dev); + + if (!obj) { + kgsl_device_snapshot(device, NULL, false); + goto done; + } + + drawobj = DRAWOBJ(obj->cmdobj); + + context = drawobj->context; + + do_fault_header(adreno_dev, drawobj); + + kgsl_device_snapshot(device, context, false); + + force_retire_timestamp(device, drawobj); + + if ((context->flags & KGSL_CONTEXT_INVALIDATE_ON_FAULT) || + (context->flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE) || + (cmd->error == GMU_GPU_SW_HANG) || + context_is_throttled(device, context)) { + adreno_drawctxt_set_guilty(device, context); + } + + /* + * Put back the reference which we incremented while trying to find + * faulted command object + */ + kgsl_drawobj_put(drawobj); +done: + memset(adreno_dev->hwsched.ctxt_bad, 0x0, HFI_MAX_MSG_SIZE); + gpudev->reset(adreno_dev); +} + +static bool adreno_hwsched_do_fault(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int fault; + + fault = atomic_xchg(&hwsched->fault, 0); + if (fault == 0) + return false; + + mutex_lock(&device->mutex); + + reset_and_snapshot(adreno_dev, fault); + + adreno_hwsched_replay(adreno_dev); + + adreno_hwsched_trigger(adreno_dev); + + mutex_unlock(&device->mutex); + + return true; +} + +static void adreno_hwsched_work(struct kthread_work *work) +{ + struct adreno_hwsched *hwsched = container_of(work, + struct adreno_hwsched, work); + struct adreno_device *adreno_dev = container_of(hwsched, + struct adreno_device, hwsched); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int count = 0; + + mutex_lock(&hwsched->mutex); + + if (adreno_hwsched_do_fault(adreno_dev)) { + mutex_unlock(&hwsched->mutex); + return; + } + + /* + * As long as there are inflight commands, process retired comamnds from + * all drawqueues + */ + count += retire_cmd_list(adreno_dev); + + /* Signal fences */ + kgsl_process_event_groups(device); + + /* Run the scheduler for to dispatch new commands */ + hwsched_issuecmds(adreno_dev); + + if (hwsched->inflight == 0) { + hwsched_power_down(adreno_dev); + } else { + mutex_lock(&device->mutex); + kgsl_pwrscale_update(device); + kgsl_start_idle_timer(device); + mutex_unlock(&device->mutex); + } + + mutex_unlock(&hwsched->mutex); +} + +void adreno_hwsched_fault(struct adreno_device *adreno_dev, + u32 fault) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + u32 curr = atomic_read(&hwsched->fault); + + atomic_set(&hwsched->fault, curr | fault); + + /* make sure fault is written before triggering dispatcher */ + smp_wmb(); + + adreno_hwsched_trigger(adreno_dev); +} + +static const struct adreno_dispatch_ops hwsched_ops = { + .close = adreno_hwsched_dispatcher_close, + .queue_cmds = adreno_hwsched_queue_cmds, + .queue_context = adreno_hwsched_queue_context, + .fault = adreno_hwsched_fault, + .idle = adreno_hwsched_idle, +}; + +int adreno_hwsched_init(struct adreno_device *adreno_dev, + const struct adreno_hwsched_ops *target_hwsched_ops) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int i; + + memset(hwsched, 0, sizeof(*hwsched)); + + hwsched->ctxt_bad = kzalloc(HFI_MAX_MSG_SIZE, GFP_KERNEL); + if (!hwsched->ctxt_bad) + return -ENOMEM; + + hwsched->worker = kthread_create_worker(0, "kgsl_hwsched"); + if (IS_ERR(hwsched->worker)) { + kfree(hwsched->ctxt_bad); + return PTR_ERR(hwsched->worker); + } + + mutex_init(&hwsched->mutex); + + kthread_init_work(&hwsched->work, adreno_hwsched_work); + + jobs_cache = KMEM_CACHE(adreno_dispatch_job, 0); + obj_cache = KMEM_CACHE(cmd_list_obj, 0); + + INIT_LIST_HEAD(&hwsched->cmd_list); + + for (i = 0; i < ARRAY_SIZE(hwsched->jobs); i++) { + init_llist_head(&hwsched->jobs[i]); + init_llist_head(&hwsched->requeue[i]); + } + + sched_set_fifo(hwsched->worker->task); + + sysfs_create_files(&device->dev->kobj, _hwsched_attr_list); + adreno_set_dispatch_ops(adreno_dev, &hwsched_ops); + hwsched->hwsched_ops = target_hwsched_ops; + init_completion(&hwsched->idle_gate); + complete_all(&hwsched->idle_gate); + return 0; +} + +void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + struct cmd_list_obj *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { + struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; + + if (test_bit(CMDOBJ_FAULT, &cmdobj->priv)) { + struct kgsl_memobj_node *ib; + + list_for_each_entry(ib, &cmdobj->cmdlist, node) { + adreno_parse_ib(KGSL_DEVICE(adreno_dev), + snapshot, snapshot->process, + ib->gpuaddr, ib->size >> 2); + } + clear_bit(CMDOBJ_FAULT, &cmdobj->priv); + } + } +} + +static int unregister_context(int id, void *ptr, void *data) +{ + struct kgsl_context *context = ptr; + + /* + * We don't need to send the unregister hfi packet because + * we are anyway going to lose the gmu state of registered + * contexts. So just reset the flag so that the context + * registers with gmu on its first submission post slumber. + */ + context->gmu_registered = false; + + return 0; +} + +void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, unregister_context, NULL); + read_unlock(&device->context_lock); +} + +static int hwsched_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + int ret; + + /* Block any new submissions from being submitted */ + adreno_get_gpu_halt(adreno_dev); + + mutex_unlock(&device->mutex); + + /* + * Flush the worker to make sure all executing + * or pending dispatcher works on worker are + * finished + */ + adreno_hwsched_flush(adreno_dev); + + ret = wait_for_completion_timeout(&hwsched->idle_gate, + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT)); + if (ret == 0) { + ret = -ETIMEDOUT; + WARN(1, "hwsched halt timeout\n"); + } else if (ret < 0) { + dev_err(device->dev, "hwsched halt failed %d\n", ret); + } else { + ret = 0; + } + + mutex_lock(&device->mutex); + + /* + * This will allow the dispatcher to start submitting to + * hardware once device mutex is released + */ + adreno_put_gpu_halt(adreno_dev); + + /* + * Requeue dispatcher work to resubmit pending commands + * that may have been blocked due to this idling request + */ + adreno_hwsched_trigger(adreno_dev); + return ret; +} + +int adreno_hwsched_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; + unsigned long wait = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EDEADLK; + + if (!kgsl_state_is_awake(device)) + return 0; + + ret = hwsched_idle(adreno_dev); + if (ret) + return ret; + + do { + if (hwsched_in_fault(hwsched)) + return -EDEADLK; + + if (gpudev->hw_isidle(adreno_dev)) + return 0; + } while (time_before(jiffies, wait)); + + /* + * Under rare conditions, preemption can cause the while loop to exit + * without checking if the gpu is idle. check one last time before we + * return failure. + */ + if (hwsched_in_fault(hwsched)) + return -EDEADLK; + + if (gpudev->hw_isidle(adreno_dev)) + return 0; + + return -ETIMEDOUT; +} diff --git a/adreno_hwsched.h b/adreno_hwsched.h new file mode 100644 index 0000000000..4ae34fad48 --- /dev/null +++ b/adreno_hwsched.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_HWSCHED_H_ +#define _ADRENO_HWSCHED_H_ + +/** + * struct adreno_hwsched_ops - Function table to hook hwscheduler things + * to target specific routines + */ +struct adreno_hwsched_ops { + /** + * @submit_cmdobj - Target specific function to submit IBs to hardware + */ + int (*submit_cmdobj)(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj); + /** + * @preempt_count - Target specific function to get preemption count + */ + u32 (*preempt_count)(struct adreno_device *adreno_dev); +}; + +/** + * struct adreno_hwsched - Container for the hardware scheduler + */ +struct adreno_hwsched { + /** @mutex: Mutex needed to run dispatcher function */ + struct mutex mutex; + /** @flags: Container for the dispatcher internal flags */ + unsigned long flags; + /** @inflight: Number of active submissions to the dispatch queues */ + u32 inflight; + /** @jobs - Array of dispatch job lists for each priority level */ + struct llist_head jobs[16]; + /** @requeue - Array of lists for dispatch jobs that got requeued */ + struct llist_head requeue[16]; + /** @work: The work structure to execute dispatcher function */ + struct kthread_work work; + /** @cmd_list: List of objects submitted to dispatch queues */ + struct list_head cmd_list; + /** @fault: Atomic to record a fault */ + atomic_t fault; + struct kthread_worker *worker; + /** @hwsched_ops: Container for target specific hwscheduler ops */ + const struct adreno_hwsched_ops *hwsched_ops; + /** @ctxt_bad: Container for the context bad hfi packet */ + void *ctxt_bad; + /** @idle_gate: Gate to wait on for hwscheduler to idle */ + struct completion idle_gate; + /** @big_cmdobj = Points to the big IB that is inflight */ + struct kgsl_drawobj_cmd *big_cmdobj; +}; + +/* + * This value is based on maximum number of IBs that can fit + * in the ringbuffer. + */ +#define HWSCHED_MAX_IBS 2000 + +enum adreno_hwsched_flags { + ADRENO_HWSCHED_POWER = 0, + ADRENO_HWSCHED_ACTIVE, +}; + +/** + * adreno_hwsched_trigger - Function to schedule the hwsched thread + * @adreno_dev: A handle to adreno device + * + * Schedule the hw dispatcher for retiring and submitting command objects + */ +void adreno_hwsched_trigger(struct adreno_device *adreno_dev); + +/** + * adreno_hwsched_start() - activate the hwsched dispatcher + * @adreno_dev: pointer to the adreno device + * + * Enable dispatcher thread to execute + */ +void adreno_hwsched_start(struct adreno_device *adreno_dev); +/** + * adreno_hwsched_dispatcher_init() - Initialize the hwsched dispatcher + * @adreno_dev: pointer to the adreno device + * @hwsched_ops: Pointer to target specific hwsched ops + * + * Set up the dispatcher resources. + * Return: 0 on success or negative on failure. + */ +int adreno_hwsched_init(struct adreno_device *adreno_dev, + const struct adreno_hwsched_ops *hwsched_ops); + +/** + * adreno_hwsched_fault - Set hwsched fault to request recovery + * @adreno_dev: A handle to adreno device + * @fault: The type of fault + */ +void adreno_hwsched_fault(struct adreno_device *adreno_dev, u32 fault); + +/** + * adreno_hwsched_parse_fault_ib - Parse the faulty submission + * @adreno_dev: pointer to the adreno device + * @snapshot: Pointer to the snapshot structure + * + * Walk the list of active submissions to find the one that faulted and + * parse it so that relevant command buffers can be added to the snapshot + */ +void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +void adreno_hwsched_flush(struct adreno_device *adreno_dev); + +/** + * adreno_hwsched_unregister_contexts - Reset context gmu_registered bit + * @adreno_dev: pointer to the adreno device + * + * Walk the list of contexts and reset the gmu_registered for all + * contexts + */ +void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev); + +/** + * adreno_hwsched_idle - Wait for dispatcher and hardware to become idle + * @adreno_dev: A handle to adreno device + * + * Return: 0 on success or negative error on failure + */ +int adreno_hwsched_idle(struct adreno_device *adreno_dev); +#endif diff --git a/adreno_ioctl.c b/adreno_ioctl.c new file mode 100644 index 0000000000..2b69d7934d --- /dev/null +++ b/adreno_ioctl.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_a5xx.h" + +/* + * Add a perfcounter to the per-fd list. + * Call with the device mutex held + */ +static int adreno_process_perfcounter_add(struct kgsl_device_private *dev_priv, + unsigned int groupid, unsigned int countable) +{ + struct adreno_device_private *adreno_priv = container_of(dev_priv, + struct adreno_device_private, dev_priv); + struct adreno_perfcounter_list_node *perfctr; + + perfctr = kmalloc(sizeof(*perfctr), GFP_KERNEL); + if (!perfctr) + return -ENOMEM; + + perfctr->groupid = groupid; + perfctr->countable = countable; + + /* add the pair to process perfcounter list */ + list_add(&perfctr->node, &adreno_priv->perfcounter_list); + return 0; +} + +/* + * Remove a perfcounter from the per-fd list. + * Call with the device mutex held + */ +static int adreno_process_perfcounter_del(struct kgsl_device_private *dev_priv, + unsigned int groupid, unsigned int countable) +{ + struct adreno_device_private *adreno_priv = container_of(dev_priv, + struct adreno_device_private, dev_priv); + struct adreno_perfcounter_list_node *p; + + list_for_each_entry(p, &adreno_priv->perfcounter_list, node) { + if (p->groupid == groupid && p->countable == countable) { + list_del(&p->node); + kfree(p); + return 0; + } + } + return -ENODEV; +} + +long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_perfcounter_get *get = data; + int result; + + mutex_lock(&device->mutex); + + /* + * adreno_perfcounter_get() is called by kernel clients + * during start(), so it is not safe to take an + * active count inside that function. + */ + + result = adreno_perfcntr_active_oob_get(adreno_dev); + if (result) { + mutex_unlock(&device->mutex); + return (long)result; + } + + result = adreno_perfcounter_get(adreno_dev, + get->groupid, get->countable, &get->offset, + &get->offset_hi, PERFCOUNTER_FLAG_NONE); + + /* Add the perfcounter into the list */ + if (!result) { + result = adreno_process_perfcounter_add(dev_priv, get->groupid, + get->countable); + if (result) + adreno_perfcounter_put(adreno_dev, get->groupid, + get->countable, PERFCOUNTER_FLAG_NONE); + } + + adreno_perfcntr_active_oob_put(adreno_dev); + + mutex_unlock(&device->mutex); + + return (long) result; +} + +long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_perfcounter_put *put = data; + int result; + + mutex_lock(&device->mutex); + + /* Delete the perfcounter from the process list */ + result = adreno_process_perfcounter_del(dev_priv, put->groupid, + put->countable); + + /* Put the perfcounter refcount */ + if (!result) + adreno_perfcounter_put(adreno_dev, put->groupid, + put->countable, PERFCOUNTER_FLAG_NONE); + mutex_unlock(&device->mutex); + + return (long) result; +} + +static long adreno_ioctl_perfcounter_query(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_query *query = data; + + return (long) adreno_perfcounter_query_group(adreno_dev, query->groupid, + query->countables, query->count, &query->max_counters); +} + +static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_read *read = data; + + return (long) adreno_perfcounter_read_group(adreno_dev, read->reads, + read->count); +} + +static long adreno_ioctl_preemption_counters_query( + struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_preemption_counters_query *read = data; + int size_level = A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE; + int levels_to_copy; + + if (!adreno_is_a5xx(adreno_dev) || + !adreno_is_preemption_enabled(adreno_dev)) + return -EOPNOTSUPP; + + if (read->size_user < size_level) + return -EINVAL; + + /* Calculate number of preemption counter levels to copy to userspace */ + levels_to_copy = (read->size_user / size_level); + + levels_to_copy = min_t(int, levels_to_copy, + ARRAY_SIZE(adreno_dev->ringbuffers)); + + if (copy_to_user(u64_to_user_ptr(read->counters), + adreno_dev->preempt.scratch->hostptr, + levels_to_copy * size_level)) + return -EFAULT; + + read->max_priority_level = levels_to_copy; + read->size_priority_level = size_level; + + return 0; +} + +long adreno_ioctl_helper(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len) +{ + unsigned char data[128] = { 0 }; + long ret; + int i; + + for (i = 0; i < len; i++) { + if (_IOC_NR(cmd) == _IOC_NR(cmds[i].cmd)) + break; + } + + if (i == len) + return -ENOIOCTLCMD; + + if (_IOC_SIZE(cmds[i].cmd > sizeof(data))) { + dev_err_ratelimited(dev_priv->device->dev, + "data too big for ioctl 0x%08x: %d/%zu\n", + cmd, _IOC_SIZE(cmds[i].cmd), sizeof(data)); + return -EINVAL; + } + + if (_IOC_SIZE(cmds[i].cmd)) { + ret = kgsl_ioctl_copy_in(cmds[i].cmd, cmd, arg, data); + + if (ret) + return ret; + } else { + memset(data, 0, sizeof(data)); + } + + ret = cmds[i].func(dev_priv, cmd, data); + + if (ret == 0 && _IOC_SIZE(cmds[i].cmd)) + ret = kgsl_ioctl_copy_out(cmds[i].cmd, cmd, arg, data); + + return ret; +} + +static struct kgsl_ioctl adreno_ioctl_funcs[] = { + { IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get }, + { IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put }, + { IOCTL_KGSL_PERFCOUNTER_QUERY, adreno_ioctl_perfcounter_query }, + { IOCTL_KGSL_PERFCOUNTER_READ, adreno_ioctl_perfcounter_read }, + { IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY, + adreno_ioctl_preemption_counters_query }, +}; + +long adreno_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg) +{ + return adreno_ioctl_helper(dev_priv, cmd, arg, + adreno_ioctl_funcs, ARRAY_SIZE(adreno_ioctl_funcs)); +} diff --git a/adreno_perfcounter.c b/adreno_perfcounter.c new file mode 100644 index 0000000000..c9260e9d6e --- /dev/null +++ b/adreno_perfcounter.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_perfcounter.h" + +static inline int active_countable(unsigned int countable) +{ + return ((countable != KGSL_PERFCOUNTER_NOT_USED) && + (countable != KGSL_PERFCOUNTER_BROKEN)); +} + +/** + * adreno_perfcounter_restore() - Restore performance counters + * @adreno_dev: adreno device to configure + * + * Load the physical performance counters with 64 bit value which are + * saved on GPU power collapse. + */ +void adreno_perfcounter_restore(struct adreno_device *adreno_dev) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int counter, groupid; + + if (counters == NULL) + return; + + for (groupid = 0; groupid < counters->group_count; groupid++) { + group = &(counters->groups[groupid]); + + if (!group->load) + continue; + + /* Restore the counters for the group */ + for (counter = 0; counter < group->reg_count; counter++) { + /* If not active or broken, skip this counter */ + if (!active_countable(group->regs[counter].countable)) + continue; + + group->load(adreno_dev, &group->regs[counter]); + } + } +} + +/** + * adreno_perfcounter_save() - Save performance counters + * @adreno_dev: adreno device to configure + * + * Save the performance counter values before GPU power collapse. + * The saved values are restored on restart. + * This ensures physical counters are coherent across power-collapse. + * This function must be called with the oob_gpu set request. + */ +inline void adreno_perfcounter_save(struct adreno_device *adreno_dev) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int counter, groupid; + + if (counters == NULL) + return; + + for (groupid = 0; groupid < counters->group_count; groupid++) { + group = &(counters->groups[groupid]); + + /* Save the counter values for the group */ + for (counter = 0; counter < group->reg_count; counter++) { + /* If not active or broken, skip this counter */ + if (!active_countable(group->regs[counter].countable)) + continue; + + /* accumulate values for non-loadable counters */ + if (group->regs[counter].load_bit >= 0) + group->regs[counter].value = 0; + + group->regs[counter].value = + group->regs[counter].value + + adreno_perfcounter_read(adreno_dev, groupid, + counter); + } + } +} + +static int adreno_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, unsigned int countable); + +/** + * adreno_perfcounter_start: Enable performance counters + * @adreno_dev: Adreno device to configure + * + * Ensure all performance counters are enabled that are allocated. Since + * the device was most likely stopped, we can't trust that the counters + * are still valid so make it so. + */ + +void adreno_perfcounter_start(struct adreno_device *adreno_dev) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int i, j; + + if (counters == NULL) + return; + /* group id iter */ + for (i = 0; i < counters->group_count; i++) { + group = &(counters->groups[i]); + + /* countable iter */ + for (j = 0; j < group->reg_count; j++) { + if (!active_countable(group->regs[j].countable)) + continue; + + /* + * The GPU has to be idle before calling the perfcounter + * enable function, but since this function is called + * during start we already know the GPU is idle. + * Since the countable/counter pairs have already been + * validated, there is no way for _enable() to fail so + * no need to check the return code. + */ + adreno_perfcounter_enable(adreno_dev, i, j, + group->regs[j].countable); + } + } +} + +/** + * adreno_perfcounter_read_group() - Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @reads: List of kgsl_perfcounter_read_groups + * @count: Length of list + * + * Read the performance counters for the groupid/countable pairs and return + * the 64 bit result for each pair + */ + +int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, + struct kgsl_perfcounter_read_group __user *reads, unsigned int count) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + struct kgsl_perfcounter_read_group *list = NULL; + unsigned int i, j; + int ret = 0; + + if (counters == NULL) + return -EINVAL; + + /* sanity check params passed in */ + if (reads == NULL || count == 0 || count > 100) + return -EINVAL; + + list = kmalloc_array(count, sizeof(struct kgsl_perfcounter_read_group), + GFP_KERNEL); + if (!list) + return -ENOMEM; + + if (copy_from_user(list, reads, + sizeof(struct kgsl_perfcounter_read_group) * count)) { + ret = -EFAULT; + goto done; + } + + mutex_lock(&device->mutex); + + ret = adreno_perfcntr_active_oob_get(adreno_dev); + if (ret) { + mutex_unlock(&device->mutex); + goto done; + } + + /* list iterator */ + for (j = 0; j < count; j++) { + + list[j].value = 0; + + /* Verify that the group ID is within range */ + if (list[j].groupid >= counters->group_count) { + ret = -EINVAL; + break; + } + + group = &(counters->groups[list[j].groupid]); + + /* group/counter iterator */ + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == list[j].countable) { + list[j].value = adreno_perfcounter_read( + adreno_dev, list[j].groupid, i); + break; + } + } + } + + adreno_perfcntr_active_oob_put(adreno_dev); + + mutex_unlock(&device->mutex); + + /* write the data */ + if (ret == 0) + if (copy_to_user(reads, list, + sizeof(struct kgsl_perfcounter_read_group) * count)) + ret = -EFAULT; + +done: + kfree(list); + return ret; +} + +/** + * adreno_perfcounter_get_groupid() - Get the performance counter ID + * @adreno_dev: Adreno device + * @name: Performance counter group name string + * + * Get the groupid based on the name and return this ID + */ + +int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev, + const char *name) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + int i; + + if (name == NULL || counters == NULL) + return -EINVAL; + + for (i = 0; i < counters->group_count; ++i) { + group = &(counters->groups[i]); + + /* make sure there is a name for this group */ + if (group->name == NULL) + continue; + + /* verify name and length */ + if (strlen(name) == strlen(group->name) && + strcmp(group->name, name) == 0) + return i; + } + + return -EINVAL; +} + +/** + * adreno_perfcounter_get_name() - Get the group name + * @adreno_dev: Adreno device + * @groupid: Desired performance counter groupid + * + * Get the name based on the groupid and return it + */ + +const char *adreno_perfcounter_get_name(struct adreno_device *adreno_dev, + unsigned int groupid) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + + if (counters != NULL && groupid < counters->group_count) + return counters->groups[groupid].name; + + return NULL; +} + +/** + * adreno_perfcounter_query_group: Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countables: Return list of all countables in the groups counters + * @count: Max length of the array + * @max_counters: max counters for the groupid + * + * Query the current state of counters for the group. + */ + +int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int __user *countables, + unsigned int count, unsigned int *max_counters) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int i, t; + int ret = 0; + unsigned int *buf; + + *max_counters = 0; + + if (counters == NULL || groupid >= counters->group_count) + return -EINVAL; + + mutex_lock(&device->mutex); + + group = &(counters->groups[groupid]); + *max_counters = group->reg_count; + + /* + * if NULL countable or *count of zero, return max reg_count in + * *max_counters and return success + */ + if (countables == NULL || count == 0) { + mutex_unlock(&device->mutex); + return 0; + } + + t = min_t(unsigned int, group->reg_count, count); + + buf = kmalloc_array(t, sizeof(unsigned int), GFP_KERNEL); + if (buf == NULL) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + + for (i = 0; i < t; i++) + buf[i] = group->regs[i].countable; + + mutex_unlock(&device->mutex); + + if (copy_to_user(countables, buf, sizeof(unsigned int) * t)) + ret = -EFAULT; + + kfree(buf); + + return ret; +} + +static inline void refcount_group(const struct adreno_perfcount_group *group, + unsigned int reg, unsigned int flags, + unsigned int *lo, unsigned int *hi) +{ + if (flags & PERFCOUNTER_FLAG_KERNEL) + group->regs[reg].kernelcount++; + else + group->regs[reg].usercount++; + + if (lo) + *lo = group->regs[reg].offset; + + if (hi) + *hi = group->regs[reg].offset_hi; +} + +/** + * adreno_perfcounter_get: Try to put a countable in an available counter + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be in a counter + * @offset: Return offset of the LO counter assigned + * @offset_hi: Return offset of the HI counter assigned + * @flags: Used to setup kernel perf counters + * + * Try to place a countable in an available counter. If the countable is + * already in a counter, reference count the counter/countable pair resource + * and return success + */ + +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int *offset_hi, unsigned int flags) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int empty = -1; + int ret = 0; + + /* always clear return variables */ + if (offset) + *offset = 0; + if (offset_hi) + *offset_hi = 0; + + if (counters == NULL) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED) { + /* + * In fixed groups the countable equals the fixed register the + * user wants. First make sure it is in range + */ + + if (countable >= group->reg_count) + return -EINVAL; + + /* If it is already reserved, just increase the refcounts */ + if ((group->regs[countable].kernelcount != 0) || + (group->regs[countable].usercount != 0)) { + refcount_group(group, countable, flags, + offset, offset_hi); + return 0; + } + + empty = countable; + } else { + unsigned int i; + + /* + * Check if the countable is already associated with a counter. + * Refcount and return the offset, otherwise, try and find an + * empty counter and assign the countable to it. + */ + + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == countable) { + refcount_group(group, i, flags, + offset, offset_hi); + return 0; + } else if (group->regs[i].countable == + KGSL_PERFCOUNTER_NOT_USED) { + /* keep track of unused counter */ + empty = i; + } + } + } + + /* no available counters, so do nothing else */ + if (empty == -1) + return -EBUSY; + + /* initialize the new counter */ + group->regs[empty].countable = countable; + + /* enable the new counter */ + ret = adreno_perfcounter_enable(adreno_dev, groupid, empty, countable); + if (ret) { + /* Put back the perfcounter */ + if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED)) + group->regs[empty].countable = + KGSL_PERFCOUNTER_NOT_USED; + return ret; + } + + /* set initial kernel and user count */ + if (flags & PERFCOUNTER_FLAG_KERNEL) { + group->regs[empty].kernelcount = 1; + group->regs[empty].usercount = 0; + } else { + group->regs[empty].kernelcount = 0; + group->regs[empty].usercount = 1; + } + + if (offset) + *offset = group->regs[empty].offset; + if (offset_hi) + *offset_hi = group->regs[empty].offset_hi; + + return ret; +} + + +/** + * adreno_perfcounter_put: Release a countable from counter resource + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be freed from a counter + * @flags: Flag to determine if kernel or user space request + * + * Put a performance counter/countable pair that was previously received. If + * noone else is using the countable, free up the counter for others. + */ +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int flags) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + unsigned int i; + + if (counters == NULL || groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + /* + * Find if the counter/countable pair is used currently. + * Start cycling through registers in the bank. + */ + for (i = 0; i < group->reg_count; i++) { + /* check if countable assigned is what we are looking for */ + if (group->regs[i].countable == countable) { + /* found pair, book keep count based on request type */ + if (flags & PERFCOUNTER_FLAG_KERNEL && + group->regs[i].kernelcount > 0) + group->regs[i].kernelcount--; + else if (group->regs[i].usercount > 0) + group->regs[i].usercount--; + else + break; + + /* mark available if not used anymore */ + if (group->regs[i].kernelcount == 0 && + group->regs[i].usercount == 0) + group->regs[i].countable = + KGSL_PERFCOUNTER_NOT_USED; + + return 0; + } + } + + return -EINVAL; +} + +/** + * adreno_perfcounter_enable - Configure a performance counter for a countable + * @adreno_dev - Adreno device to configure + * @group - Desired performance counter group + * @counter - Desired performance counter in the group + * @countable - Desired countable + * + * Function is used for adreno cores + * Physically set up a counter within a group with the desired countable + * Return 0 on success else error code + */ +static int adreno_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int counter, unsigned int countable) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + + if (counters == NULL) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &counters->groups[groupid]; + + if (counter >= group->reg_count) + return -EINVAL; + + return group->enable(adreno_dev, group, counter, countable); +} + +/** + * adreno_perfcounter_read() - Reads a performance counter + * @adreno_dev: The device on which the counter is running + * @group: The group of the counter + * @counter: The counter within the group + * + * Function is used to read the counter of adreno devices + * Returns the 64 bit counter value on success else 0. + */ +uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int counter) +{ + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + + /* Lets hope this doesn't fail. Now subfunctions don't need to check */ + if (counters == NULL) + return 0; + + if (groupid >= counters->group_count) + return 0; + + group = &counters->groups[groupid]; + + if (counter >= group->reg_count) + return 0; + + return group->read(adreno_dev, group, counter); +} diff --git a/adreno_perfcounter.h b/adreno_perfcounter.h new file mode 100644 index 0000000000..85006b0174 --- /dev/null +++ b/adreno_perfcounter.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2008-2015,2017,2019-2021 The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_PERFCOUNTER_H +#define __ADRENO_PERFCOUNTER_H + +struct adreno_device; + +/* ADRENO_PERFCOUNTERS - Given an adreno device, return the perfcounters list */ +#define ADRENO_PERFCOUNTERS(_a) ((_a)->gpucore->perfcounters) + +#define PERFCOUNTER_FLAG_NONE 0x0 +#define PERFCOUNTER_FLAG_KERNEL 0x1 + +/* Structs to maintain the list of active performance counters */ + +/** + * struct adreno_perfcount_register: register state + * @countable: countable the register holds + * @kernelcount: number of user space users of the register + * @usercount: number of kernel users of the register + * @offset: register hardware offset + * @load_bit: The bit number in LOAD register which corresponds to this counter + * @select: The countable register offset + * @value: The 64 bit countable register value + */ +struct adreno_perfcount_register { + unsigned int countable; + unsigned int kernelcount; + unsigned int usercount; + unsigned int offset; + unsigned int offset_hi; + int load_bit; + unsigned int select; + uint64_t value; +}; + +/** + * struct adreno_perfcount_group: registers for a hardware group + * @regs: available registers for this group + * @reg_count: total registers for this group + * @name: group name for this group + */ +struct adreno_perfcount_group { + struct adreno_perfcount_register *regs; + unsigned int reg_count; + const char *name; + unsigned long flags; + int (*enable)(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter, unsigned int countable); + u64 (*read)(struct adreno_device *adreno_dev, + const struct adreno_perfcount_group *group, + unsigned int counter); + void (*load)(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg); +}; + +/* + * ADRENO_PERFCOUNTER_GROUP_FIXED indicates that a perfcounter group is fixed - + * instead of having configurable countables like the other groups, registers in + * fixed groups have a hardwired countable. So when the user requests a + * countable in one of these groups, that countable should be used as the + * register offset to return + */ + +#define ADRENO_PERFCOUNTER_GROUP_FIXED BIT(0) + +/* + * ADRENO_PERFCOUNTER_GROUP_RESTORE indicates CP needs to restore the select + * registers of this perfcounter group as part of preemption and IFPC + */ +#define ADRENO_PERFCOUNTER_GROUP_RESTORE BIT(1) + + +/** + * adreno_perfcounts: all available perfcounter groups + * @groups: available groups for this device + * @group_count: total groups for this device + */ +struct adreno_perfcounters { + const struct adreno_perfcount_group *groups; + unsigned int group_count; +}; + +#define ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags, \ + enable, read, load) \ + [KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \ + ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags, \ + enable, read, load } + +#define ADRENO_PERFCOUNTER_GROUP(core, offset, name, enable, read, load) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, 0, enable, read, \ + load) + +int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int __user *countables, + unsigned int count, unsigned int *max_counters); + +int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, + struct kgsl_perfcounter_read_group __user *reads, unsigned int count); + +void adreno_perfcounter_restore(struct adreno_device *adreno_dev); + +void adreno_perfcounter_save(struct adreno_device *adreno_dev); + +void adreno_perfcounter_start(struct adreno_device *adreno_dev); + +int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev, + const char *name); + +uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter); + +const char *adreno_perfcounter_get_name(struct adreno_device + *adreno_dev, unsigned int groupid); + +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int *offset_hi, unsigned int flags); + +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int flags); + +static inline int adreno_perfcounter_kernel_get( + struct adreno_device *adreno_dev, + int group, int countable, u32 *lo, u32 *hi) +{ + if (*lo) + return 0; + + return adreno_perfcounter_get(adreno_dev, group, countable, lo, hi, + PERFCOUNTER_FLAG_KERNEL); +} + +#endif /* __ADRENO_PERFCOUNTER_H */ diff --git a/adreno_pm4types.h b/adreno_pm4types.h new file mode 100644 index 0000000000..1d5ab43fa9 --- /dev/null +++ b/adreno_pm4types.h @@ -0,0 +1,404 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_PM4TYPES_H +#define __ADRENO_PM4TYPES_H + +#include "adreno.h" + +#define CP_TYPE0_PKT (0 << 30) +#define CP_TYPE3_PKT (3 << 30) +#define CP_TYPE4_PKT (4 << 28) +#define CP_TYPE7_PKT (7 << 28) + +#define PM4_TYPE4_PKT_SIZE_MAX 128 + +/* type3 packets */ + +/* Enable preemption flag */ +#define CP_PREEMPT_ENABLE 0x1C +/* Preemption token command on which preemption occurs */ +#define CP_PREEMPT_TOKEN 0x1E +/* Bit to set in CP_PREEMPT_TOKEN ordinal for interrupt on preemption */ +#define CP_PREEMPT_ORDINAL_INTERRUPT 24 + +/* Wait for memory writes to complete */ +#define CP_WAIT_MEM_WRITES 0x12 + +/* initialize CP's micro-engine */ +#define CP_ME_INIT 0x48 + +/* skip N 32-bit words to get to the next packet */ +#define CP_NOP 0x10 + +/* indirect buffer dispatch. same as IB, but init is pipelined */ +#define CP_INDIRECT_BUFFER_PFD 0x37 + +/* wait for the IDLE state of the engine */ +#define CP_WAIT_FOR_IDLE 0x26 + +/* wait until a register or memory location is a specific value */ +#define CP_WAIT_REG_MEM 0x3c + +/* wait until a register location is equal to a specific value */ +#define CP_WAIT_REG_EQ 0x52 + +/* switches SMMU pagetable, used on a5xx only */ +#define CP_SMMU_TABLE_UPDATE 0x53 + +/* Set internal CP registers, used to indicate context save data addresses */ +#define CP_SET_PSEUDO_REGISTER 0x56 + +/* Tell CP the current operation mode, indicates save and restore procedure */ +#define CP_SET_MARKER 0x65 + +/* register read/modify/write */ +#define CP_REG_RMW 0x21 + +/* Set binning configuration registers */ +#define CP_SET_BIN_DATA 0x2f + +/* reads register in chip and writes to memory */ +#define CP_REG_TO_MEM 0x3e + +/* write N 32-bit words to memory */ +#define CP_MEM_WRITE 0x3d + +/* conditional execution of a sequence of packets */ +#define CP_COND_EXEC 0x44 + +/* conditional write to memory or register */ +#define CP_COND_WRITE 0x45 + +/* generate an event that creates a write to memory when completed */ +#define CP_EVENT_WRITE 0x46 + +/* initiate fetch of index buffer and draw */ +#define CP_DRAW_INDX 0x22 + +/* New draw packets defined for A4XX */ +#define CP_DRAW_INDX_OFFSET 0x38 +#define CP_DRAW_INDIRECT 0x28 +#define CP_DRAW_INDX_INDIRECT 0x29 +#define CP_DRAW_AUTO 0x24 + +/* load constant into chip and to memory */ +#define CP_SET_CONSTANT 0x2d + +/* selective invalidation of state pointers */ +#define CP_INVALIDATE_STATE 0x3b + +/* generate interrupt from the command stream */ +#define CP_INTERRUPT 0x40 + +/* A5XX Enable yield in RB only */ +#define CP_YIELD_ENABLE 0x1C + +#define CP_WHERE_AM_I 0x62 + +/* Enable/Disable/Defer A5x global preemption model */ +#define CP_PREEMPT_ENABLE_GLOBAL 0x69 + +/* Enable/Disable A5x local preemption model */ +#define CP_PREEMPT_ENABLE_LOCAL 0x6A + +/* Yeild token on a5xx similar to CP_PREEMPT on a4xx */ +#define CP_CONTEXT_SWITCH_YIELD 0x6B + +/* Inform CP about current render mode (needed for a5xx preemption) */ +#define CP_SET_RENDER_MODE 0x6C + +/* Write register, ignoring context state for context sensitive registers */ +#define CP_REG_WR_NO_CTXT 0x78 + +/* + * for A4xx + * Write to register with address that does not fit into type-0 pkt + */ +#define CP_WIDE_REG_WRITE 0x74 + + +/* PFP waits until the FIFO between the PFP and the ME is empty */ +#define CP_WAIT_FOR_ME 0x13 + +/* Stall the SQE until the CP processing pipeline is empty */ +#define CP_WAIT_FOR_CP_FLUSH 0x13 + +#define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ + +/* Used to switch GPU between secure and non-secure modes */ +#define CP_SET_SECURE_MODE 0x66 + +#define CP_BOOTSTRAP_UCODE 0x6f /* bootstraps microcode */ + +/* + * for a3xx + */ + +#define CP_LOAD_STATE 0x30 /* load high level sequencer command */ + +/* Conditionally load a IB based on a flag */ +#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */ +#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */ + +/* Load a buffer with pre-fetch enabled */ +#define CP_INDIRECT_BUFFER_PFE 0x3F + +#define CP_EXEC_CL 0x31 + +/* (A4x) save PM4 stream pointers to execute upon a visible draw */ +#define CP_SET_DRAW_STATE 0x43 + +#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 +#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 +#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 +#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016 +#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000 +#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002 + +/* This is a commonly used CP_EVENT_WRITE */ +#define CACHE_FLUSH_TS 4 +#define CACHE_CLEAN 0x31 + +/* Controls which threads execute the PM4 commands the follow this packet */ +#define CP_THREAD_CONTROL 0x17 + +#define CP_SET_THREAD_BR FIELD_PREP(GENMASK(1, 0), 1) +#define CP_SET_THREAD_BOTH FIELD_PREP(GENMASK(1, 0), 3) +#define CP_SYNC_THREADS BIT(31) +#define CP_CONCURRENT_BIN_DISABLE BIT(27) + +#define CP_RESET_CONTEXT_STATE 0x1F + +#define CP_CLEAR_BV_BR_COUNTER BIT(2) +#define CP_CLEAR_RESOURCE_TABLE BIT(1) +#define CP_CLEAR_ON_CHIP_TS BIT(0) + +static inline uint pm4_calc_odd_parity_bit(uint val) +{ + return (0x9669 >> (0xf & ((val) ^ + ((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^ + ((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^ + ((val) >> 28)))) & 1; +} + +/* + * PM4 packet header functions + * For all the packet functions the passed in count should be the size of the + * payload excluding the header + */ +static inline uint cp_type0_packet(uint regindx, uint cnt) +{ + return CP_TYPE0_PKT | ((cnt-1) << 16) | ((regindx) & 0x7FFF); +} + +static inline uint cp_type3_packet(uint opcode, uint cnt) +{ + return CP_TYPE3_PKT | ((cnt-1) << 16) | (((opcode) & 0xFF) << 8); +} + +static inline uint cp_type4_packet(uint opcode, uint cnt) +{ + return CP_TYPE4_PKT | ((cnt) << 0) | + (pm4_calc_odd_parity_bit(cnt) << 7) | + (((opcode) & 0x3FFFF) << 8) | + ((pm4_calc_odd_parity_bit(opcode) << 27)); +} + +static inline uint cp_type7_packet(uint opcode, uint cnt) +{ + return CP_TYPE7_PKT | ((cnt) << 0) | + (pm4_calc_odd_parity_bit(cnt) << 15) | + (((opcode) & 0x7F) << 16) | + ((pm4_calc_odd_parity_bit(opcode) << 23)); + +} + +#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) + +#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) +#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) + +/* + * Check both for the type3 opcode and make sure that the reserved bits [1:7] + * and 15 are 0 + */ + +#define pkt_is_type3(pkt) \ + ((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \ + (((pkt) & 0x80FE) == 0)) + +#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) +#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) + +#define pkt_is_type4(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \ + ((((pkt) >> 27) & 0x1) == \ + pm4_calc_odd_parity_bit(cp_type4_base_index_one_reg_wr(pkt))) \ + && ((((pkt) >> 7) & 0x1) == \ + pm4_calc_odd_parity_bit(type4_pkt_size(pkt)))) + +#define cp_type4_base_index_one_reg_wr(pkt) (((pkt) >> 8) & 0x7FFFF) +#define type4_pkt_size(pkt) ((pkt) & 0x7F) + +#define pkt_is_type7(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \ + (((pkt) & 0x0F000000) == 0) && \ + ((((pkt) >> 23) & 0x1) == \ + pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \ + && ((((pkt) >> 15) & 0x1) == \ + pm4_calc_odd_parity_bit(type7_pkt_size(pkt)))) + +#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F) +#define type7_pkt_size(pkt) ((pkt) & 0x3FFF) + +/* dword base address of the GFX decode space */ +#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) + +/* gmem command buffer length */ +#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) + +/* Return true if the hardware uses the legacy (A4XX and older) PM4 format */ +#define ADRENO_LEGACY_PM4(_d) (ADRENO_GPUREV(_d) < 500) + +/** + * cp_packet - Generic CP packet to support different opcodes on + * different GPU cores. + * @adreno_dev: The adreno device + * @opcode: Operation for cp packet + * @size: size for cp packet + */ +static inline uint cp_packet(struct adreno_device *adreno_dev, + int opcode, uint size) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type3_packet(opcode, size); + + return cp_type7_packet(opcode, size); +} + +/** + * cp_mem_packet - Generic CP memory packet to support different + * opcodes on different GPU cores. + * @adreno_dev: The adreno device + * @opcode: mem operation for cp packet + * @size: size for cp packet + * @num_mem: num of mem access + */ +static inline uint cp_mem_packet(struct adreno_device *adreno_dev, + int opcode, uint size, uint num_mem) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type3_packet(opcode, size); + + return cp_type7_packet(opcode, size + num_mem); +} + +/* Return 1 if the command is an indirect buffer of any kind */ +static inline int adreno_cmd_is_ib(struct adreno_device *adreno_dev, + unsigned int cmd) +{ + return cmd == cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFE, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFD, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_COND_INDIRECT_BUFFER_PFE, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_COND_INDIRECT_BUFFER_PFD, 2, 1); +} + +/** + * cp_gpuaddr - Generic function to add 64bit and 32bit gpuaddr + * to pm4 commands + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + * @gpuaddr: gpuaddr to add + */ +static inline uint cp_gpuaddr(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) + *cmds++ = (uint)gpuaddr; + else { + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + } + return cmds - start; +} + +/** + * cp_register - Generic function for gpu register operation + * @adreno_dev: The adreno device + * @reg: GPU register + * @size: count for PM4 operation + */ +static inline uint cp_register(struct adreno_device *adreno_dev, + unsigned int reg, unsigned int size) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type0_packet(reg, size); + + return cp_type4_packet(reg, size); +} + +/** + * cp_wait_for_me - common function for WAIT_FOR_ME + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + */ +static inline uint cp_wait_for_me(struct adreno_device *adreno_dev, + uint *cmds) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1); + *cmds++ = 0; + } else + *cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0); + + return cmds - start; +} + +/** + * cp_wait_for_idle - common function for WAIT_FOR_IDLE + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + */ +static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev, + uint *cmds) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + } else + *cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + return cmds - start; +} + +static inline u32 cp_protected_mode(struct adreno_device *adreno_dev, + u32 *cmds, int on) +{ + cmds[0] = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + cmds[1] = on; + + return 2; +} + +static inline u32 cp_identifier(struct adreno_device *adreno_dev, + u32 *cmds, u32 id) +{ + cmds[0] = cp_packet(adreno_dev, CP_NOP, 1); + cmds[1] = id; + + return 2; +} + +#endif /* __ADRENO_PM4TYPES_H */ diff --git a/adreno_profile.c b/adreno_profile.c new file mode 100644 index 0000000000..925d9e844d --- /dev/null +++ b/adreno_profile.c @@ -0,0 +1,1130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "adreno.h" +#include "adreno_hwsched.h" +#include "adreno_profile.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" + +#define ASSIGNS_STR_FORMAT "%.8s:%u " + +/* + * Raw Data for processing later: + * : 3 - timestamp, count, context id + * [per counter] - data for each counter + * : 1 - Register offset + * : 2 - Pre IB register hi/lo value + * : 2 - Post IB register hi/lo value + * [per counter end] + */ +#define SIZE_DATA(cnt) (6 + (cnt) * 5) + +/* + * Pre-IB command size (in dwords): + * : 2 - NOP start identifier + * : 4 - timestamp + * : 4 - count + * : 4 - context id + * : 4 - pid + * : 4 - tid + * : 4 - type + * [loop count start] - for each counter to watch + * : 4 - Register offset + * : 4 - Register read lo + * : 4 - Register read high + * [loop end] + * : 2 - NOP end identifier + */ +#define SIZE_PREIB(cnt) (28 + (cnt) * 12) + +/* + * Post-IB command size (in dwords): + * : 2 - NOP start identifier + * [loop count start] - for each counter to watch + * : 4 - Register read lo + * : 4 - Register read high + * [loop end] + * : 2 - NOP end identifier + */ +#define SIZE_POSTIB(cnt) (4 + (cnt) * 8) + +/* Counter data + Pre size + post size = total size */ +#define SIZE_SHARED_ENTRY(cnt) (SIZE_DATA(cnt) + SIZE_PREIB(cnt) \ + + SIZE_POSTIB(cnt)) + +/* + * Space for following string :"%u %u %u %.5s %u " + * [count iterations]: "%.8s:%u %llu %llu%c" + */ +#define SIZE_PIPE_ENTRY(cnt) (50 + (cnt) * 62) +#define SIZE_LOG_ENTRY(cnt) (6 + (cnt) * 5) + +static inline uint _ib_cmd_mem_write(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr, uint val, uint *off) +{ + unsigned int *start = cmds; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + *cmds++ = val; + + *off += sizeof(unsigned int); + return cmds - start; +} + +static inline uint _ib_cmd_reg_to_mem(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr, uint val, uint *off) +{ + unsigned int *start = cmds; + + *cmds++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1); + *cmds++ = val; + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + + *off += sizeof(unsigned int); + return cmds - start; +} + +static u64 _build_pre_ib_cmds(struct adreno_device *adreno_dev, + struct adreno_profile *profile, + unsigned int head, unsigned int timestamp, + struct adreno_context *drawctxt, + u32 *dwords) +{ + struct adreno_profile_assigns_list *entry; + unsigned int *start, *ibcmds; + unsigned int count = profile->assignment_count; + uint64_t gpuaddr = profile->shared_buffer->gpuaddr; + unsigned int ib_offset = head + SIZE_DATA(count); + unsigned int data_offset = head * sizeof(unsigned int); + + ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer->hostptr); + start = ibcmds; + + ibcmds += cp_identifier(adreno_dev, ibcmds, START_PROFILE_IDENTIFIER); + + /* + * Write ringbuffer commands to save the following to memory: + * timestamp, count, context_id, pid, tid, context type + */ + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + timestamp, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + profile->assignment_count, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->base.id, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + pid_nr(drawctxt->base.proc_priv->pid), &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->base.tid, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->type, &data_offset); + + /* loop for each countable assigned */ + list_for_each_entry(entry, &profile->assignments_list, list) { + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset_hi, + &data_offset); + + /* skip over post_ib counter data */ + data_offset += sizeof(unsigned int) * 2; + } + + + ibcmds += cp_identifier(adreno_dev, ibcmds, END_PROFILE_IDENTIFIER); + + *dwords = (ibcmds - start); + return profile->shared_buffer->gpuaddr + (ib_offset * sizeof(u32)); +} + +static u64 _build_post_ib_cmds(struct adreno_device *adreno_dev, + struct adreno_profile *profile, unsigned int head, + u32 *dwords) +{ + struct adreno_profile_assigns_list *entry; + unsigned int *start, *ibcmds; + unsigned int count = profile->assignment_count; + uint64_t gpuaddr = profile->shared_buffer->gpuaddr; + unsigned int ib_offset = head + SIZE_DATA(count) + SIZE_PREIB(count); + unsigned int data_offset = head * sizeof(unsigned int); + + ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer->hostptr); + start = ibcmds; + + /* start of profile identifier */ + ibcmds += cp_identifier(adreno_dev, ibcmds, START_PROFILE_IDENTIFIER); + + /* skip over pre_ib preamble */ + data_offset += sizeof(unsigned int) * 6; + + /* loop for each countable assigned */ + list_for_each_entry(entry, &profile->assignments_list, list) { + /* skip over pre_ib counter data */ + data_offset += sizeof(unsigned int) * 3; + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset_hi, + &data_offset); + } + + /* end of profile identifier */ + ibcmds += cp_identifier(adreno_dev, ibcmds, END_PROFILE_IDENTIFIER); + + *dwords = (ibcmds - start); + return profile->shared_buffer->gpuaddr + (ib_offset * sizeof(u32)); +} + +static bool shared_buf_empty(struct adreno_profile *profile) +{ + if (profile->shared_buffer->hostptr == NULL || + profile->shared_buffer->size == 0) + return true; + + if (profile->shared_head == profile->shared_tail) + return true; + + return false; +} + +static inline void shared_buf_inc(unsigned int max_size, + unsigned int *offset, size_t inc) +{ + *offset = (*offset + inc) % max_size; +} + +static inline void log_buf_wrapcnt(unsigned int cnt, uintptr_t *off) +{ + *off = (*off + cnt) % ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS; +} + +static inline void log_buf_wrapinc_len(unsigned int *profile_log_buffer, + unsigned int **ptr, unsigned int len) +{ + *ptr += len; + if (*ptr >= (profile_log_buffer + + ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS)) + *ptr -= ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS; +} + +static inline void log_buf_wrapinc(unsigned int *profile_log_buffer, + unsigned int **ptr) +{ + log_buf_wrapinc_len(profile_log_buffer, ptr, 1); +} + +static inline unsigned int log_buf_available(struct adreno_profile *profile, + unsigned int *head_ptr) +{ + uintptr_t tail, head; + + tail = (uintptr_t) profile->log_tail - + (uintptr_t) profile->log_buffer; + head = (uintptr_t)head_ptr - (uintptr_t) profile->log_buffer; + if (tail > head) + return (tail - head) / sizeof(uintptr_t); + else + return ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS - ((head - tail) / + sizeof(uintptr_t)); +} + +static inline unsigned int shared_buf_available(struct adreno_profile *profile) +{ + if (profile->shared_tail > profile->shared_head) + return profile->shared_tail - profile->shared_head; + else + return profile->shared_size - + (profile->shared_head - profile->shared_tail); +} + +static struct adreno_profile_assigns_list *_find_assignment_by_offset( + struct adreno_profile *profile, unsigned int offset) +{ + struct adreno_profile_assigns_list *entry; + + list_for_each_entry(entry, &profile->assignments_list, list) { + if (entry->offset == offset) + return entry; + } + + return NULL; +} + +static bool _in_assignments_list(struct adreno_profile *profile, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile_assigns_list *entry; + + list_for_each_entry(entry, &profile->assignments_list, list) { + if (entry->groupid == groupid && entry->countable == + countable) + return true; + } + + return false; +} + +static bool _add_to_assignments_list(struct adreno_profile *profile, + const char *str, unsigned int groupid, unsigned int countable, + unsigned int offset, unsigned int offset_hi) +{ + struct adreno_profile_assigns_list *entry; + + /* first make sure we can alloc memory */ + entry = kmalloc(sizeof(struct adreno_profile_assigns_list), GFP_KERNEL); + if (!entry) + return false; + + list_add_tail(&entry->list, &profile->assignments_list); + + entry->countable = countable; + entry->groupid = groupid; + entry->offset = offset; + entry->offset_hi = offset_hi; + + strlcpy(entry->name, str, sizeof(entry->name)); + + profile->assignment_count++; + + return true; +} + +static bool results_available(struct adreno_device *adreno_dev, + struct adreno_profile *profile, unsigned int *shared_buf_tail) +{ + unsigned int global_eop; + unsigned int off = profile->shared_tail; + unsigned int *shared_ptr = (unsigned int *) + profile->shared_buffer->hostptr; + unsigned int ts, cnt; + int ts_cmp; + + /* + * If shared_buffer empty or Memstore EOP timestamp is less than + * outstanding counter buffer timestamps then no results available + */ + if (shared_buf_empty(profile)) + return false; + + if (adreno_rb_readtimestamp(adreno_dev, + adreno_dev->cur_rb, + KGSL_TIMESTAMP_RETIRED, &global_eop)) + return false; + do { + cnt = *(shared_ptr + off + 1); + if (cnt == 0) + return false; + + ts = *(shared_ptr + off); + ts_cmp = timestamp_cmp(ts, global_eop); + if (ts_cmp >= 0) { + *shared_buf_tail = off; + if (off == profile->shared_tail) + return false; + else + return true; + } + shared_buf_inc(profile->shared_size, &off, + SIZE_SHARED_ENTRY(cnt)); + } while (off != profile->shared_head); + + *shared_buf_tail = profile->shared_head; + + return true; +} + +static void transfer_results(struct adreno_profile *profile, + unsigned int shared_buf_tail) +{ + unsigned int buf_off; + unsigned int ts, cnt, ctxt_id, pid, tid, client_type; + unsigned int *ptr = (unsigned int *) profile->shared_buffer->hostptr; + unsigned int *log_ptr, *log_base; + struct adreno_profile_assigns_list *assigns_list; + int i, tmp_tail; + + log_ptr = profile->log_head; + log_base = profile->log_buffer; + if (log_ptr == NULL) + return; + + /* + * go through counter buffers and format for write into log_buffer + * if log buffer doesn't have space just overwrite it circularly + * shared_buf is guaranteed to not wrap within an entry so can use + * ptr increment + */ + while (profile->shared_tail != shared_buf_tail) { + buf_off = profile->shared_tail; + /* + * format: timestamp, count, context_id + * count entries: pc_off, pc_start, pc_end + */ + ts = *(ptr + buf_off++); + cnt = *(ptr + buf_off++); + ctxt_id = *(ptr + buf_off++); + pid = *(ptr + buf_off++); + tid = *(ptr + buf_off++); + client_type = *(ptr + buf_off++); + + /* + * if entry overwrites the tail of log_buffer then adjust tail + * ptr to make room for the new entry, discarding old entry + */ + while (log_buf_available(profile, log_ptr) <= + SIZE_LOG_ENTRY(cnt)) { + unsigned int size_tail; + uintptr_t boff; + + size_tail = SIZE_LOG_ENTRY(0xffff & + *(profile->log_tail)); + boff = ((uintptr_t) profile->log_tail - + (uintptr_t) log_base) / sizeof(uintptr_t); + log_buf_wrapcnt(size_tail, &boff); + profile->log_tail = log_base + boff; + } + + *log_ptr = cnt; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = client_type; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = pid; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = tid; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = ctxt_id; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = ts; + log_buf_wrapinc(log_base, &log_ptr); + + for (i = 0; i < cnt; i++) { + assigns_list = _find_assignment_by_offset( + profile, *(ptr + buf_off++)); + if (assigns_list == NULL) { + *log_ptr = (unsigned int) -1; + + shared_buf_inc(profile->shared_size, + &profile->shared_tail, + SIZE_SHARED_ENTRY(cnt)); + goto err; + } else { + *log_ptr = assigns_list->groupid << 16 | + (assigns_list->countable & 0xffff); + } + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr start hi */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr start lo */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr end hi */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr end lo */ + log_buf_wrapinc(log_base, &log_ptr); + + } + + tmp_tail = profile->shared_tail; + shared_buf_inc(profile->shared_size, + &profile->shared_tail, + SIZE_SHARED_ENTRY(cnt)); + /* + * Possibly lost some room as we cycled around, so it's safe to + * reset the max size + */ + if (profile->shared_tail < tmp_tail) + profile->shared_size = + ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS; + + } + profile->log_head = log_ptr; + return; +err: + /* reset head/tail to same on error in hopes we work correctly later */ + profile->log_head = profile->log_tail; +} + +static int profile_enable_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + mutex_lock(&device->mutex); + *val = adreno_profile_enabled(&adreno_dev->profile); + mutex_unlock(&device->mutex); + + return 0; +} + +static int profile_enable_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + + mutex_lock(&device->mutex); + + if (val && profile->log_buffer == NULL) { + /* allocate profile_log_buffer the first time enabled */ + profile->log_buffer = vmalloc(ADRENO_PROFILE_LOG_BUF_SIZE); + if (profile->log_buffer == NULL) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + profile->log_tail = profile->log_buffer; + profile->log_head = profile->log_buffer; + } + + profile->enabled = val; + + mutex_unlock(&device->mutex); + + return 0; +} + +static ssize_t profile_assignments_read(struct file *filep, + char __user *ubuf, size_t max, loff_t *ppos) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry; + int len = 0, max_size = PAGE_SIZE; + char *buf, *pos; + ssize_t size = 0; + + mutex_lock(&device->mutex); + + if (profile->assignment_count == 0) { + mutex_unlock(&device->mutex); + return 0; + } + + buf = kzalloc(max_size, GFP_KERNEL); + if (!buf) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + + pos = buf; + + /* copy all assingments from list to str */ + list_for_each_entry(entry, &profile->assignments_list, list) { + len = scnprintf(pos, max_size, ASSIGNS_STR_FORMAT, + entry->name, entry->countable); + + max_size -= len; + pos += len; + } + + size = simple_read_from_buffer(ubuf, max, ppos, buf, + pos - buf); + + kfree(buf); + + mutex_unlock(&device->mutex); + return size; +} + +static void _remove_assignment(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) { + if (entry->groupid == groupid && + entry->countable == countable) { + list_del(&entry->list); + + profile->assignment_count--; + + kfree(entry); + + /* remove from perf counter allocation */ + adreno_perfcounter_put(adreno_dev, groupid, countable, + PERFCOUNTER_FLAG_KERNEL); + } + } +} + +static void _add_assignment(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile *profile = &adreno_dev->profile; + unsigned int offset, offset_hi; + const char *name = NULL; + + name = adreno_perfcounter_get_name(adreno_dev, groupid); + if (!name) + return; + + /* if already in assigned list skip it */ + if (_in_assignments_list(profile, groupid, countable)) + return; + + /* add to perf counter allocation, if fail skip it */ + if (adreno_perfcounter_get(adreno_dev, groupid, countable, + &offset, &offset_hi, PERFCOUNTER_FLAG_NONE)) + return; + + /* add to assignments list, put counter back if error */ + if (!_add_to_assignments_list(profile, name, groupid, + countable, offset, offset_hi)) + adreno_perfcounter_put(adreno_dev, groupid, + countable, PERFCOUNTER_FLAG_KERNEL); +} + +static char *_parse_next_assignment(struct adreno_device *adreno_dev, + char *str, int *groupid, int *countable, bool *remove) +{ + char *groupid_str, *countable_str, *next_str = NULL; + int ret; + + *groupid = -EINVAL; + *countable = -EINVAL; + *remove = false; + + /* remove spaces */ + while (*str == ' ') + str++; + + /* check if it's a remove assignment */ + if (*str == '-') { + *remove = true; + str++; + } + + /* get the groupid string */ + groupid_str = str; + while (*str != ':') { + if (*str == '\0') + return NULL; + *str = tolower(*str); + str++; + } + if (groupid_str == str) + return NULL; + + *str = '\0'; + str++; + + /* get the countable string */ + countable_str = str; + while (*str != ' ' && *str != '\0') + str++; + if (countable_str == str) + return NULL; + + /* + * If we have reached the end of the original string then make sure we + * return NULL from this function or we could accidently overrun + */ + + if (*str != '\0') { + *str = '\0'; + next_str = str + 1; + } + + /* set results */ + *groupid = adreno_perfcounter_get_groupid(adreno_dev, + groupid_str); + if (*groupid < 0) + return NULL; + ret = kstrtou32(countable_str, 10, countable); + if (ret) + return NULL; + + return next_str; +} + +static ssize_t profile_assignments_write(struct file *filep, + const char __user *user_buf, size_t len, loff_t *off) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + size_t size = 0; + char *buf, *pbuf; + bool remove_assignment = false; + int groupid, countable, ret; + + if (len >= PAGE_SIZE || len == 0) + return -EINVAL; + + buf = kmalloc(len + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, len)) { + size = -EFAULT; + goto error_free; + } + + mutex_lock(&device->mutex); + + if (adreno_profile_enabled(profile)) { + size = -EINVAL; + goto error_unlock; + } + + ret = adreno_perfcntr_active_oob_get(adreno_dev); + if (ret) { + size = ret; + goto error_unlock; + } + + /* + * When adding/removing assignments, ensure that the GPU is done with + * all it's work. This helps to synchronize the work flow to the + * GPU and avoid racey conditions. + */ + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->idle) + ret = adreno_dev->dispatch_ops->idle(adreno_dev); + else + ret = adreno_idle(device); + if (ret) { + size = -ETIMEDOUT; + goto error_put; + } + + /* clear all shared buffer results */ + adreno_profile_process_results(adreno_dev); + + pbuf = buf; + + /* clear the log buffer */ + if (profile->log_buffer != NULL) { + profile->log_head = profile->log_buffer; + profile->log_tail = profile->log_buffer; + } + + + /* for sanity and parsing, ensure it is null terminated */ + buf[len] = '\0'; + + /* parse file buf and add(remove) to(from) appropriate lists */ + while (pbuf) { + pbuf = _parse_next_assignment(adreno_dev, pbuf, &groupid, + &countable, &remove_assignment); + if (groupid < 0 || countable < 0) + break; + + if (remove_assignment) + _remove_assignment(adreno_dev, groupid, countable); + else + _add_assignment(adreno_dev, groupid, countable); + } + + size = len; + +error_put: + adreno_perfcntr_active_oob_put(adreno_dev); +error_unlock: + mutex_unlock(&device->mutex); +error_free: + kfree(buf); + return size; +} + +static int _pipe_print_pending(char __user *ubuf, size_t max) +{ + loff_t unused = 0; + char str[] = "Operation Would Block!"; + + return simple_read_from_buffer(ubuf, max, + &unused, str, strlen(str)); +} + +static int _pipe_print_results(struct adreno_device *adreno_dev, + char __user *ubuf, size_t max) +{ + struct adreno_profile *profile = &adreno_dev->profile; + const char *grp_name; + char __user *usr_buf = ubuf; + unsigned int *log_ptr = NULL, *tmp_log_ptr = NULL; + int len, i; + int status = 0; + ssize_t size, total_size = 0; + unsigned int cnt, api_type, ctxt_id, pid, tid, ts, cnt_reg; + unsigned long long pc_start, pc_end; + const char *api_str; + char format_space; + loff_t unused = 0; + char pipe_hdr_buf[51]; /* 4 uint32 + 5 space + 5 API type + '\0' */ + char pipe_cntr_buf[63]; /* 2 uint64 + 1 uint32 + 4 spaces + 8 group */ + + /* convert unread entries to ASCII, copy to user-space */ + log_ptr = profile->log_tail; + + do { + /* store the tmp var for error cases so we can skip */ + tmp_log_ptr = log_ptr; + + /* Too many to output to pipe, so skip this data */ + cnt = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + + if (SIZE_PIPE_ENTRY(cnt) > max) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + /* + * Not enough space left in pipe, return without doing + * anything + */ + if ((max - (usr_buf - ubuf)) < SIZE_PIPE_ENTRY(cnt)) { + log_ptr = tmp_log_ptr; + goto done; + } + + api_type = *log_ptr; + api_str = kgsl_context_type(api_type); + log_buf_wrapinc(profile->log_buffer, &log_ptr); + pid = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + tid = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + ctxt_id = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + ts = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + len = scnprintf(pipe_hdr_buf, sizeof(pipe_hdr_buf) - 1, + "%u %u %u %.5s %u ", + pid, tid, ctxt_id, api_str, ts); + size = simple_read_from_buffer(usr_buf, + max - (usr_buf - ubuf), + &unused, pipe_hdr_buf, len); + + /* non-fatal error, so skip rest of entry and return */ + if (size < 0) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + unused = 0; + usr_buf += size; + total_size += size; + + for (i = 0; i < cnt; i++) { + unsigned int start_lo, start_hi; + unsigned int end_lo, end_hi; + + grp_name = adreno_perfcounter_get_name( + adreno_dev, (*log_ptr >> 16) & 0xffff); + + /* non-fatal error, so skip rest of entry and return */ + if (grp_name == NULL) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + if (i == cnt - 1) + format_space = '\n'; + else + format_space = ' '; + + cnt_reg = *log_ptr & 0xffff; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + start_lo = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + start_hi = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + end_lo = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + end_hi = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + + pc_start = (((uint64_t) start_hi) << 32) | start_lo; + pc_end = (((uint64_t) end_hi) << 32) | end_lo; + + len = scnprintf(pipe_cntr_buf, + sizeof(pipe_cntr_buf) - 1, + "%.8s:%u %llu %llu%c", + grp_name, cnt_reg, pc_start, + pc_end, format_space); + + size = simple_read_from_buffer(usr_buf, + max - (usr_buf - ubuf), + &unused, pipe_cntr_buf, len); + + /* non-fatal error, so skip rest of entry and return */ + if (size < 0) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + unused = 0; + usr_buf += size; + total_size += size; + } + } while (log_ptr != profile->log_head); + +done: + status = total_size; + profile->log_tail = log_ptr; + + return status; +} + +static ssize_t profile_pipe_print(struct file *filep, char __user *ubuf, + size_t max, loff_t *ppos) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + char __user *usr_buf = ubuf; + int status = 0; + + /* + * this file not seekable since it only supports streaming, ignore + * ppos <> 0 + */ + /* + * format + * for each perf counter + */ + + mutex_lock(&device->mutex); + + while (1) { + /* process any results that are available into the log_buffer */ + status = adreno_profile_process_results(adreno_dev); + if (status > 0) { + /* if we have results, print them and exit */ + status = _pipe_print_results(adreno_dev, usr_buf, max); + break; + } + + /* there are no unread results, act accordingly */ + if (filep->f_flags & O_NONBLOCK) { + if (profile->shared_tail != profile->shared_head) { + status = _pipe_print_pending(usr_buf, max); + break; + } + + status = 0; + break; + } + + mutex_unlock(&device->mutex); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(100)); + mutex_lock(&device->mutex); + + if (signal_pending(current)) { + status = 0; + break; + } + } + + mutex_unlock(&device->mutex); + + return status; +} + +static int profile_groups_show(struct seq_file *s, void *unused) +{ + struct kgsl_device *device = (struct kgsl_device *) s->private; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_perfcounters *counters = + ADRENO_PERFCOUNTERS(adreno_dev); + const struct adreno_perfcount_group *group; + int i, j, used; + + mutex_lock(&device->mutex); + + for (i = 0; i < counters->group_count; ++i) { + group = &(counters->groups[i]); + /* get number of counters used for this group */ + used = 0; + for (j = 0; j < group->reg_count; j++) { + if (group->regs[j].countable != + KGSL_PERFCOUNTER_NOT_USED) + used++; + } + + seq_printf(s, "%s %d %d\n", group->name, + group->reg_count, used); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(profile_groups); + +static const struct file_operations profile_pipe_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = profile_pipe_print, + .llseek = noop_llseek, +}; + +static const struct file_operations profile_assignments_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = profile_assignments_read, + .write = profile_assignments_write, + .llseek = noop_llseek, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(profile_enable_fops, + profile_enable_get, + profile_enable_set, "%llu\n"); + +void adreno_profile_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct adreno_profile *profile = &adreno_dev->profile; + struct dentry *profile_dir; + + profile->enabled = false; + + /* allocate shared_buffer, which includes pre_ib and post_ib */ + profile->shared_size = ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS; + profile->shared_buffer = kgsl_allocate_global(device, + profile->shared_size * sizeof(unsigned int), + 0, 0, 0, "profile"); + if (IS_ERR(profile->shared_buffer)) { + profile->shared_size = 0; + return; + } + + INIT_LIST_HEAD(&profile->assignments_list); + + /* Create perf counter debugfs */ + profile_dir = debugfs_create_dir("profiling", device->d_debugfs); + if (IS_ERR(profile_dir)) + return; + + debugfs_create_file("enable", 0644, profile_dir, device, + &profile_enable_fops); + debugfs_create_file("blocks", 0444, profile_dir, device, + &profile_groups_fops); + debugfs_create_file("pipe", 0444, profile_dir, device, + &profile_pipe_fops); + debugfs_create_file("assignments", 0644, profile_dir, device, + &profile_assignments_fops); +} + +void adreno_profile_close(struct adreno_device *adreno_dev) +{ + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry, *tmp; + + profile->enabled = false; + vfree(profile->log_buffer); + profile->log_buffer = NULL; + profile->log_head = NULL; + profile->log_tail = NULL; + profile->shared_head = 0; + profile->shared_tail = 0; + profile->shared_size = 0; + + profile->assignment_count = 0; + + list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) { + list_del(&entry->list); + kfree(entry); + } +} + +int adreno_profile_process_results(struct adreno_device *adreno_dev) +{ + struct adreno_profile *profile = &adreno_dev->profile; + unsigned int shared_buf_tail = profile->shared_tail; + + if (!results_available(adreno_dev, profile, &shared_buf_tail)) + return 0; + + /* + * transfer retired results to log_buffer + * update shared_buffer tail ptr + */ + transfer_results(profile, shared_buf_tail); + + return 1; +} + +u64 adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + struct adreno_profile *profile = &adreno_dev->profile; + int count = profile->assignment_count; + unsigned int entry_head = profile->shared_head; + unsigned int *shared_ptr; + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + + if (!drawctxt || !adreno_profile_assignments_ready(profile)) + return 0; + + /* + * check if space available, include the post_ib in space available + * check so don't have to handle trying to undo the pre_ib insertion in + * ringbuffer in the case where only the post_ib fails enough space + */ + if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile)) + return 0; + + if (entry_head + SIZE_SHARED_ENTRY(count) >= profile->shared_size) { + /* entry_head would wrap, start entry_head at 0 in buffer */ + entry_head = 0; + profile->shared_size = profile->shared_head; + profile->shared_head = 0; + + /* recheck space available */ + if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile)) + return 0; + } + + /* zero out the counter area of shared_buffer entry_head */ + shared_ptr = entry_head + ((unsigned int *) + profile->shared_buffer->hostptr); + memset(shared_ptr, 0, SIZE_SHARED_ENTRY(count) * sizeof(unsigned int)); + + /* reserve space for the pre ib shared buffer */ + shared_buf_inc(profile->shared_size, &profile->shared_head, + SIZE_SHARED_ENTRY(count)); + + /* create the shared ibdesc */ + return _build_pre_ib_cmds(adreno_dev, profile, entry_head, + rb->timestamp + 1, drawctxt, dwords); +} + +u64 adreno_profile_postib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + struct adreno_profile *profile = &adreno_dev->profile; + int count = profile->assignment_count; + unsigned int entry_head = profile->shared_head - + SIZE_SHARED_ENTRY(count); + + if (!drawctxt || !adreno_profile_assignments_ready(profile)) + return 0; + + /* create the shared ibdesc */ + return _build_post_ib_cmds(adreno_dev, profile, entry_head, dwords); +} diff --git a/adreno_profile.h b/adreno_profile.h new file mode 100644 index 0000000000..1408d91a9b --- /dev/null +++ b/adreno_profile.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2014,2019-2021 The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_PROFILE_H +#define __ADRENO_PROFILE_H + +/** + * struct adreno_profile_assigns_list: linked list for assigned perf counters + * @list: linkage for nodes in list + * @name: group name or GPU name name + * @groupid: group id + * @countable: countable assigned to perfcounter + * @offset: perfcounter register address offset + */ +struct adreno_profile_assigns_list { + struct list_head list; + char name[25]; + unsigned int groupid; + unsigned int countable; + unsigned int offset; /* LO offset */ + unsigned int offset_hi; /* HI offset */ +}; + +struct adreno_profile { + struct list_head assignments_list; /* list of all assignments */ + unsigned int assignment_count; /* Number of assigned counters */ + unsigned int *log_buffer; + unsigned int *log_head; + unsigned int *log_tail; + bool enabled; + /* counter, pre_ib, and post_ib held in one large circular buffer + * shared between kgsl and GPU + * counter entry 0 + * pre_ib entry 0 + * post_ib entry 0 + * ... + * counter entry N + * pre_ib entry N + * post_ib entry N + */ + struct kgsl_memdesc *shared_buffer; + unsigned int shared_head; + unsigned int shared_tail; + unsigned int shared_size; +}; + +#define ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS (48 * 4096 / sizeof(uint)) +/* sized @ 48 pages should allow for over 50 outstanding IBs minimum, 1755 max*/ + +#define ADRENO_PROFILE_LOG_BUF_SIZE (1024 * 920) +/* sized for 1024 entries of fully assigned 45 cnters in log buffer, 230 pages*/ +#define ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS (ADRENO_PROFILE_LOG_BUF_SIZE / \ + sizeof(unsigned int)) + +#ifdef CONFIG_DEBUG_FS +void adreno_profile_init(struct adreno_device *adreno_dev); +void adreno_profile_close(struct adreno_device *adreno_dev); +int adreno_profile_process_results(struct adreno_device *adreno_dev); +u64 adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords); +u64 adreno_profile_postib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords); +#else +static inline void adreno_profile_init(struct adreno_device *adreno_dev) { } +static inline void adreno_profile_close(struct adreno_device *adreno_dev) { } +static inline int adreno_profile_process_results( + struct adreno_device *adreno_dev) +{ + return 0; +} + +static inline u64 +adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + return 0; +} + +static inline u64 +adreno_profile_postib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, u32 *dwords) +{ + return 0; +} + +#endif + +static inline bool adreno_profile_enabled(struct adreno_profile *profile) +{ + return profile->enabled; +} + +static inline bool adreno_profile_has_assignments( + struct adreno_profile *profile) +{ + return list_empty(&profile->assignments_list) ? false : true; +} + +static inline bool adreno_profile_assignments_ready( + struct adreno_profile *profile) +{ + return adreno_profile_enabled(profile) && + adreno_profile_has_assignments(profile); +} + +#endif diff --git a/adreno_ringbuffer.c b/adreno_ringbuffer.c new file mode 100644 index 0000000000..5721bb4fb1 --- /dev/null +++ b/adreno_ringbuffer.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "a3xx_reg.h" +#include "a5xx_reg.h" +#include "a6xx_reg.h" +#include "adreno.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_trace.h" + + +#define RB_HOSTPTR(_rb, _pos) \ + ((unsigned int *) ((_rb)->buffer_desc->hostptr + \ + ((_pos) * sizeof(unsigned int)))) + +#define RB_GPUADDR(_rb, _pos) \ + ((_rb)->buffer_desc->gpuaddr + ((_pos) * sizeof(unsigned int))) + +void adreno_get_submit_time(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_submit_time *time) +{ + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned long flags; + struct adreno_context *drawctxt = rb->drawctxt_active; + struct kgsl_context *context = &drawctxt->base; + + if (!time) + return; + + /* + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. + */ + + local_irq_save(flags); + + time->ticks = gpudev->read_alwayson(adreno_dev); + + /* Trace the GPU time to create a mapping to ftrace time */ + trace_adreno_cmdbatch_sync(context->id, context->priority, + drawctxt->timestamp, time->ticks); + + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); + + /* Get the timeofday for the wall time (for the user) */ + ktime_get_real_ts64(&time->utime); + + local_irq_restore(flags); +} + +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int dwords) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + unsigned int rptr = adreno_get_rptr(rb); + unsigned int ret; + + if (rptr <= rb->_wptr) { + unsigned int *cmds; + + if (rb->_wptr + dwords <= (KGSL_RB_DWORDS - 2)) { + ret = rb->_wptr; + rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS; + return RB_HOSTPTR(rb, ret); + } + + /* + * There isn't enough space toward the end of ringbuffer. So + * look for space from the beginning of ringbuffer upto the + * read pointer. + */ + if (dwords < rptr) { + cmds = RB_HOSTPTR(rb, rb->_wptr); + *cmds = cp_packet(adreno_dev, CP_NOP, + KGSL_RB_DWORDS - rb->_wptr - 1); + rb->_wptr = dwords; + return RB_HOSTPTR(rb, 0); + } + } + + if (rb->_wptr + dwords < rptr) { + ret = rb->_wptr; + rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS; + return RB_HOSTPTR(rb, ret); + } + + return ERR_PTR(-ENOSPC); +} + +void adreno_ringbuffer_stop(struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) + kgsl_cancel_events(KGSL_DEVICE(adreno_dev), &(rb->events)); +} + +static int _rb_readtimestamp(struct kgsl_device *device, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + return adreno_rb_readtimestamp(ADRENO_DEVICE(device), priv, type, + timestamp); +} + +int adreno_ringbuffer_setup(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, int id) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + unsigned int priv = 0; + int ret; + + /* + * Allocate mem for storing RB pagetables and commands to + * switch pagetable + */ + ret = adreno_allocate_global(device, &rb->pagetable_desc, PAGE_SIZE, + SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); + if (ret) + return ret; + + /* allocate a chunk of memory to create user profiling IB1s */ + adreno_allocate_global(device, &rb->profile_desc, PAGE_SIZE, + 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) + priv |= KGSL_MEMDESC_PRIVILEGED; + + ret = adreno_allocate_global(device, &rb->buffer_desc, KGSL_RB_SIZE, + SZ_4K, KGSL_MEMFLAGS_GPUREADONLY, priv, "ringbuffer"); + if (ret) + return ret; + + if (!list_empty(&rb->events.group)) + return 0; + + rb->id = id; + kgsl_add_event_group(device, &rb->events, NULL, _rb_readtimestamp, rb, + "rb_events-%d", id); + + rb->timestamp = 0; + init_waitqueue_head(&rb->ts_expire_waitq); + + spin_lock_init(&rb->preempt_lock); + + return 0; +} + +void adreno_preemption_timer(struct timer_list *t) +{ + struct adreno_preemption *preempt = from_timer(preempt, t, timer); + struct adreno_device *adreno_dev = container_of(preempt, + struct adreno_device, preempt); + + /* We should only be here from a triggered state */ + if (!adreno_move_preempt_state(adreno_dev, + ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_FAULTED)) + return; + + /* Schedule the worker to take care of the details */ + queue_work(system_unbound_wq, &adreno_dev->preempt.work); +} + +void adreno_drawobj_set_constraint(struct kgsl_device *device, + struct kgsl_drawobj *drawobj) +{ + struct kgsl_context *context = drawobj->context; + unsigned long flags = drawobj->flags; + + /* + * Check if the context has a constraint and constraint flags are + * set. + */ + if (context->pwr_constraint.type && + ((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) || + (drawobj->flags & KGSL_CONTEXT_PWR_CONSTRAINT))) + kgsl_pwrctrl_set_constraint(device, &context->pwr_constraint, + context->id, drawobj->timestamp); + + if (context->l3_pwr_constraint.type && + ((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) || + (flags & KGSL_CONTEXT_PWR_CONSTRAINT))) { + + if (!device->num_l3_pwrlevels) { + dev_err_once(device->dev, + "l3 voting not available\n"); + return; + } + + switch (context->l3_pwr_constraint.type) { + case KGSL_CONSTRAINT_L3_PWRLEVEL: { + unsigned int sub_type; + unsigned int new_l3; + int ret = 0; + struct dcvs_freq freq = {0}; + + if (!device->l3_vote) + return; + + sub_type = context->l3_pwr_constraint.sub_type; + + /* + * If an L3 constraint is already set, set the new + * one only if it is higher. + */ + new_l3 = max_t(unsigned int, sub_type + 1, + device->cur_l3_pwrlevel); + new_l3 = min_t(unsigned int, new_l3, + device->num_l3_pwrlevels - 1); + + if (device->cur_l3_pwrlevel == new_l3) + return; + + freq.ib = device->l3_freq[new_l3]; + freq.hw_type = DCVS_L3; + ret = qcom_dcvs_update_votes(KGSL_L3_DEVICE, &freq, 1, + DCVS_SLOW_PATH); + if (!ret) { + trace_kgsl_constraint(device, + KGSL_CONSTRAINT_L3_PWRLEVEL, new_l3, 1); + device->cur_l3_pwrlevel = new_l3; + } else { + dev_err_ratelimited(device->dev, + "Could not set l3_vote: %d\n", + ret); + } + break; + } + } + } +} + +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, + struct adreno_submit_time *time) +{ + struct adreno_submit_time local = { 0 }; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); + struct adreno_ringbuffer *rb = drawctxt->rb; + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + u32 flags = 0; + int ret; + + /* + * If SKIP CMD flag is set for current context + * a) set SKIPCMD as fault_recovery for current commandbatch + * b) store context's commandbatch fault_policy in current + * commandbatch fault_policy and clear context's commandbatch + * fault_policy + * c) force preamble for commandbatch + */ + if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) && + (!test_bit(CMDOBJ_SKIP, &cmdobj->priv))) { + + set_bit(KGSL_FT_SKIPCMD, &cmdobj->fault_recovery); + cmdobj->fault_policy = drawctxt->fault_policy; + set_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv); + + /* if context is detached print fault recovery */ + adreno_fault_skipcmd_detached(adreno_dev, drawctxt, drawobj); + + /* clear the drawctxt flags */ + clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv); + drawctxt->fault_policy = 0; + } + + /* Check if user profiling should be enabled */ + + if ((drawobj->flags & KGSL_DRAWOBJ_PROFILING) && + cmdobj->profiling_buf_entry) { + flags |= F_USER_PROFILE; + + /* + * we want to use an adreno_submit_time struct to get the + * precise moment when the command is submitted to the + * ringbuffer. If an upstream caller already passed down a + * pointer piggyback on that otherwise use a local struct + */ + if (!time) + time = &local; + + time->drawobj = drawobj; + } + + flags |= F_PREAMBLE; + + /* + * When preamble is enabled, the preamble buffer with state restoration + * commands are stored in the first node of the IB chain. + * We can skip that if a context switch hasn't occurred. + */ + if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) && + !test_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv) && + (rb->drawctxt_active == drawctxt)) + flags &= ~F_PREAMBLE; + + /* + * In skip mode don't issue the draw IBs but keep all the other + * accoutrements of a submision (including the interrupt) to keep + * the accounting sane. Set start_index and numibs to 0 to just + * generate the start and end markers and skip everything else + */ + if (test_bit(CMDOBJ_SKIP, &cmdobj->priv)) { + flags &= ~F_PREAMBLE; + flags |= F_SKIP; + } + + /* Enable kernel profiling */ + if (test_bit(CMDOBJ_PROFILE, &cmdobj->priv)) + flags |= F_KERNEL_PROFILE; + + /* Add a WFI to the end of the submission */ + if (test_bit(CMDOBJ_WFI, &cmdobj->priv)) + flags |= F_WFI; + + /* + * For some targets, we need to execute a dummy shader operation after a + * power collapse + */ + if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) && + test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) + flags |= F_PWRON_FIXUP; + + /* Check to see the submission should be secure */ + if (drawobj->context->flags & KGSL_CONTEXT_SECURE) + flags |= F_SECURE; + + /* process any profiling results that are available into the log_buf */ + adreno_profile_process_results(adreno_dev); + + ret = gpudev->ringbuffer_submitcmd(adreno_dev, cmdobj, + flags, time); + + if (!ret) { + set_bit(KGSL_CONTEXT_PRIV_SUBMITTED, &drawobj->context->priv); + cmdobj->global_ts = drawctxt->internal_timestamp; + } + + return ret; +} + +/** + * adreno_ringbuffer_wait_callback() - Callback function for event registered + * on a ringbuffer timestamp + * @device: Device for which the the callback is valid + * @context: The context of the event + * @priv: The private parameter of the event + * @result: Result of the event trigger + */ +static void adreno_ringbuffer_wait_callback(struct kgsl_device *device, + struct kgsl_event_group *group, + void *priv, int result) +{ + struct adreno_ringbuffer *rb = group->priv; + + wake_up_all(&rb->ts_expire_waitq); +} + +/* check if timestamp is greater than the current rb timestamp */ +static inline int adreno_ringbuffer_check_timestamp( + struct adreno_ringbuffer *rb, + unsigned int timestamp, int type) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + unsigned int ts; + + adreno_rb_readtimestamp(adreno_dev, rb, type, &ts); + return (timestamp_cmp(ts, timestamp) >= 0); +} + + +/** + * adreno_ringbuffer_waittimestamp() - Wait for a RB timestamp + * @rb: The ringbuffer to wait on + * @timestamp: The timestamp to wait for + * @msecs: The wait timeout period + */ +int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb, + unsigned int timestamp, + unsigned int msecs) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + unsigned long wait_time; + + /* check immediately if timeout is 0 */ + if (msecs == 0) + return adreno_ringbuffer_check_timestamp(rb, + timestamp, KGSL_TIMESTAMP_RETIRED) ? 0 : -EBUSY; + + ret = kgsl_add_event(device, &rb->events, timestamp, + adreno_ringbuffer_wait_callback, NULL); + if (ret) + return ret; + + mutex_unlock(&device->mutex); + + wait_time = msecs_to_jiffies(msecs); + if (wait_event_timeout(rb->ts_expire_waitq, + !kgsl_event_pending(device, &rb->events, timestamp, + adreno_ringbuffer_wait_callback, NULL), + wait_time) == 0) + ret = -ETIMEDOUT; + + mutex_lock(&device->mutex); + /* + * after wake up make sure that expected timestamp has retired + * because the wakeup could have happened due to a cancel event + */ + if (!ret && !adreno_ringbuffer_check_timestamp(rb, + timestamp, KGSL_TIMESTAMP_RETIRED)) { + ret = -EAGAIN; + } + + return ret; +} diff --git a/adreno_ringbuffer.h b/adreno_ringbuffer.h new file mode 100644 index 0000000000..447586e72d --- /dev/null +++ b/adreno_ringbuffer.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_RINGBUFFER_H +#define __ADRENO_RINGBUFFER_H + +/* Given a ringbuffer, return the adreno device that owns it */ + +#define _RB_OFFSET(_id) (offsetof(struct adreno_device, ringbuffers) + \ + ((_id) * sizeof(struct adreno_ringbuffer))) + +#define ADRENO_RB_DEVICE(_rb) \ + ((struct adreno_device *) (((void *) (_rb)) - _RB_OFFSET((_rb)->id))) + +/* Adreno ringbuffer size in bytes */ +#define KGSL_RB_SIZE (32 * 1024) + +/* + * A handy macro to convert the RB size to dwords since most ringbuffer + * operations happen in dword increments + */ +#define KGSL_RB_DWORDS (KGSL_RB_SIZE >> 2) + +/* Specifies that the command should be run in protected mode */ +#define F_NOTPROTECTED BIT(0) +/* Indicates that the CP should wait for idle after executing the command */ +#define F_WFI BIT(1) +/* Indicates that the poweron fixup should be executed before the command */ +#define F_PWRON_FIXUP BIT(2) +/* Indicates that the submission should be secure */ +#define F_SECURE BIT(3) +/* Indicates that the IBs in the submission should be skipped */ +#define F_SKIP BIT(4) +/* Indicates that user always on timer profiling is enabled */ +#define F_USER_PROFILE BIT(5) +/* Indicates that kernel always on timer profiling is enabled */ +#define F_KERNEL_PROFILE BIT(6) +/* Indicates that the submission has a preamble */ +#define F_PREAMBLE BIT(7) + +#define IS_NOTPROTECTED(flags) ((flags) & F_NOTPROTECTED) +#define IS_WFI(flags) ((flags) & F_WFI) +#define IS_PWRON_FIXUP(flags) ((flags) & F_PWRON_FIXUP) +#define IS_SECURE(flags) ((flags) & F_SECURE) +#define IS_SKIP(flags) ((flags) & F_SKIP) +#define IS_USER_PROFILE(flags) ((flags) & F_USER_PROFILE) +#define IS_KERNEL_PROFILE(flags) ((flags) & F_KERNEL_PROFILE) +#define IS_PREAMBLE(flags) ((flags) & F_PREAMBLE) + +struct kgsl_device; +struct kgsl_device_private; + +/** + * struct adreno_submit_time - utility structure to store the wall clock / GPU + * ticks at command submit time + * @ticks: GPU ticks at submit time (from the 19.2Mhz timer) + * @ktime: local clock time (in nanoseconds) + * @utime: Wall clock time + * @drawobj: the object that we want to profile + */ +struct adreno_submit_time { + uint64_t ticks; + u64 ktime; + struct timespec64 utime; + struct kgsl_drawobj *drawobj; +}; + +/** + * struct adreno_ringbuffer_pagetable_info - Contains fields used during a + * pagetable switch. + * @current_global_ptname: The current pagetable id being used by the GPU. + * Only the ringbuffers[0] current_global_ptname is used to keep track of + * the current pagetable id + * @current_rb_ptname: The current pagetable active on the given RB + * @incoming_ptname: Contains the incoming pagetable we are switching to. After + * switching of pagetable this value equals current_rb_ptname. + * @switch_pt_enable: Flag used during pagetable switch to check if pt + * switch can be skipped + * @ttbr0: value to program into TTBR0 during pagetable switch. + * @contextidr: value to program into CONTEXTIDR during pagetable switch. + */ +struct adreno_ringbuffer_pagetable_info { + int current_global_ptname; + int current_rb_ptname; + int incoming_ptname; + int switch_pt_enable; + uint64_t ttbr0; + unsigned int contextidr; +}; + +#define PT_INFO_OFFSET(_field) \ + offsetof(struct adreno_ringbuffer_pagetable_info, _field) + +/** + * struct adreno_ringbuffer - Definition for an adreno ringbuffer object + * @flags: Internal control flags for the ringbuffer + * @buffer_desc: Pointer to the ringbuffer memory descriptor + * @_wptr: The next value of wptr to be written to the hardware on submit + * @wptr: Local copy of the wptr offset last written to hardware + * @last_wptr: offset of the last wptr that was written to CFF + * @rb_ctx: The context that represents a ringbuffer + * @id: Priority level of the ringbuffer, also used as an ID + * @fault_detect_ts: The last retired global timestamp read during fault detect + * @timestamp: The RB's global timestamp + * @events: A kgsl_event_group for this context - contains the list of GPU + * events + * @drawctxt_active: The last pagetable that this ringbuffer is set to + * @preemption_desc: The memory descriptor containing + * preemption info written/read by CP + * @secure_preemption_desc: The memory descriptor containing + * preemption info written/read by CP for secure contexts + * @perfcounter_save_restore_desc: Used by CP to save/restore the perfcounter + * values across preemption + * @pagetable_desc: Memory to hold information about the pagetables being used + * and the commands to switch pagetable on the RB + * @dispatch_q: The dispatcher side queue for this ringbuffer + * @ts_expire_waitq: Wait queue to wait for rb timestamp to expire + * @ts_expire_waitq: Wait q to wait for rb timestamp to expire + * @wptr_preempt_end: Used during preemption to check that preemption occurred + * at the right rptr + * @gpr11: The gpr11 value of this RB + * @preempted_midway: Indicates that the RB was preempted before rptr = wptr + * @preempt_lock: Lock to protect the wptr pointer while it is being updated + * @skip_inline_wptr: Used during preemption to make sure wptr is updated in + * hardware + */ +struct adreno_ringbuffer { + uint32_t flags; + struct kgsl_memdesc *buffer_desc; + unsigned int _wptr; + unsigned int wptr; + unsigned int last_wptr; + int id; + unsigned int fault_detect_ts; + unsigned int timestamp; + struct kgsl_event_group events; + struct adreno_context *drawctxt_active; + struct kgsl_memdesc *preemption_desc; + struct kgsl_memdesc *secure_preemption_desc; + struct kgsl_memdesc *perfcounter_save_restore_desc; + struct kgsl_memdesc *pagetable_desc; + struct adreno_dispatcher_drawqueue dispatch_q; + wait_queue_head_t ts_expire_waitq; + unsigned int wptr_preempt_end; + unsigned int gpr11; + int preempted_midway; + spinlock_t preempt_lock; + bool skip_inline_wptr; + /** + * @profile_desc: global memory to construct IB1s to do user side + * profiling + */ + struct kgsl_memdesc *profile_desc; + /** + * @profile_index: Pointer to the next "slot" in profile_desc for a user + * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous + * commands per ringbuffer with user profiling enabled + * enough. + */ + u32 profile_index; +}; + +/* Returns the current ringbuffer */ +#define ADRENO_CURRENT_RINGBUFFER(a) ((a)->cur_rb) + +int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, + struct kgsl_drawobj *drawobj, + uint32_t *timestamp); + +/** + * adreno_ringbuffer_setup - Do generic set up on a ringbuffer + * @adreno_dev: Pointer to an Adreno GPU handle + * @rb: Pointer to the ringbuffer struct to set up + * @id: Index of the ringbuffer + * + * Set up generic memory and other bits of a ringbuffer. + * Return: 0 on success or negative on error. + */ +int adreno_ringbuffer_setup(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, int id); + +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_drawobj_cmd *cmdobj, + struct adreno_submit_time *time); + + +void adreno_ringbuffer_stop(struct adreno_device *adreno_dev); + +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +void kgsl_cp_intrcallback(struct kgsl_device *device); + +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int numcmds); + +void adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device); + +void adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device); + +int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb, + unsigned int timestamp, + unsigned int msecs); + +int adreno_rb_readtimestamp(struct adreno_device *adreno_dev, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp); + +static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb, + unsigned int rptr) +{ + if (rb->wptr >= rptr) + return rb->wptr - rptr; + return rb->wptr + KGSL_RB_DWORDS - rptr; +} + +/* Increment a value by 4 bytes with wrap-around based on size */ +static inline unsigned int adreno_ringbuffer_inc_wrapped(unsigned int val, + unsigned int size) +{ + return (val + sizeof(unsigned int)) % size; +} + +/* Decrement a value by 4 bytes with wrap-around based on size */ +static inline unsigned int adreno_ringbuffer_dec_wrapped(unsigned int val, + unsigned int size) +{ + return (val + size - sizeof(unsigned int)) % size; +} + +/** + * adreno_ringbuffer_set_constraint - Set a system constraint before submission + * @device: A KGSL GPU device handle + * @drawobj: Pointer to the drawobj being sbumitted + * + * Check the drawobj to see if a constraint is applied and apply it. + */ +void adreno_ringbuffer_set_constraint(struct kgsl_device *device, + struct kgsl_drawobj *drawobj); + +void adreno_get_submit_time(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +#endif /* __ADRENO_RINGBUFFER_H */ diff --git a/adreno_snapshot.c b/adreno_snapshot.c new file mode 100644 index 0000000000..ec6defa94f --- /dev/null +++ b/adreno_snapshot.c @@ -0,0 +1,1134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_cp_parser.h" +#include "adreno_pm4types.h" +#include "adreno_snapshot.h" + +/* Maintain a list of the objects we see during parsing */ + +#define SNAPSHOT_OBJ_BUFSIZE 64 + +/* Used to print error message if an IB has too many objects in it */ +static int ib_max_objs; + +struct snapshot_rb_params { + struct kgsl_snapshot *snapshot; + struct adreno_ringbuffer *rb; +}; + +/* Keep track of how many bytes are frozen after a snapshot and tell the user */ +static size_t snapshot_frozen_objsize; + +static struct kgsl_snapshot_object objbuf[SNAPSHOT_OBJ_BUFSIZE]; + +/* Pointer to the next open entry in the object list */ +static unsigned int objbufptr; + +static inline int adreno_rb_ctxtswitch(struct adreno_device *adreno_dev, + unsigned int *cmd) +{ + return cmd[0] == cp_packet(adreno_dev, CP_NOP, 1) && + cmd[1] == CONTEXT_TO_MEM_IDENTIFIER; +} + +/* Push a new buffer object onto the list */ +void kgsl_snapshot_push_object(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords) +{ + int index; + struct kgsl_mem_entry *entry; + + if (process == NULL || gpuaddr == 0) + return; + + /* + * Sometimes IBs can be reused in the same dump. Because we parse from + * oldest to newest, if we come across an IB that has already been used, + * assume that it has been reused and update the list with the newest + * size. + */ + + for (index = 0; index < objbufptr; index++) { + if (objbuf[index].gpuaddr == gpuaddr && + objbuf[index].entry->priv == process) { + /* + * Check if newly requested size is within the + * allocated range or not, otherwise continue + * with previous size. + */ + if (!kgsl_gpuaddr_in_memdesc( + &objbuf[index].entry->memdesc, + gpuaddr, dwords << 2)) { + dev_err(device->dev, + "snapshot: gpuaddr 0x%016llX size is less than requested\n", + gpuaddr); + return; + } + + objbuf[index].size = max_t(uint64_t, + objbuf[index].size, + dwords << 2); + return; + } + } + + if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) { + dev_err(device->dev, "snapshot: too many snapshot objects\n"); + return; + } + + entry = kgsl_sharedmem_find(process, gpuaddr); + if (entry == NULL) { + dev_err(device->dev, + "snapshot: Can't find entry for 0x%016llX\n", gpuaddr); + return; + } + + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, dwords << 2)) { + dev_err(device->dev, + "snapshot: Mem entry 0x%016llX is too small\n", + gpuaddr); + kgsl_mem_entry_put(entry); + return; + } + + /* Put it on the list of things to parse */ + objbuf[objbufptr].gpuaddr = gpuaddr; + objbuf[objbufptr].size = dwords << 2; + objbuf[objbufptr++].entry = entry; +} + +/* + * Returns index of the specified object is already on the list of buffers + * to be dumped + */ + +static int find_object(uint64_t gpuaddr, struct kgsl_process_private *process) +{ + int index; + + for (index = 0; index < objbufptr; index++) { + if (objbuf[index].gpuaddr == gpuaddr && + objbuf[index].entry->priv == process) + return index; + } + return -ENOENT; +} + +/* + * snapshot_freeze_obj_list() - Take a list of ib objects and freeze their + * memory for snapshot + * @snapshot: The snapshot data. + * @process: The process to which the IB belongs + * @ib_obj_list: List of the IB objects + * + * Returns 0 on success else error code + */ +static int snapshot_freeze_obj_list(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list) +{ + int ret = 0; + struct adreno_ib_object *ib_objs; + int i; + + for (i = 0; i < ib_obj_list->num_objs; i++) { + int temp_ret; + int index; + int freeze = 1; + + ib_objs = &(ib_obj_list->obj_list[i]); + /* Make sure this object is not going to be saved statically */ + for (index = 0; index < objbufptr; index++) { + if ((objbuf[index].gpuaddr <= ib_objs->gpuaddr) && + ((objbuf[index].gpuaddr + + (objbuf[index].size)) >= + (ib_objs->gpuaddr + ib_objs->size)) && + (objbuf[index].entry->priv == process)) { + freeze = 0; + objbuf[index].entry->memdesc.priv &= + ~KGSL_MEMDESC_SKIP_RECLAIM; + break; + } + } + + if (freeze) { + temp_ret = kgsl_snapshot_get_object(snapshot, + process, ib_objs->gpuaddr, + ib_objs->size, + ib_objs->snapshot_obj_type); + if (temp_ret < 0) { + if (ret >= 0) + ret = temp_ret; + } else { + snapshot_frozen_objsize += temp_ret; + } + } + } + return ret; +} + +void adreno_parse_ib(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords) +{ + struct adreno_ib_object_list *ib_obj_list; + + /* + * Check the IB address - if it is either the last executed IB1 + * then push it into the static blob otherwise put it in the dynamic + * list + */ + if (kgsl_addr_range_overlap(gpuaddr, dwords, + snapshot->ib1base, snapshot->ib1size)) { + /* + * During restore after preemption, ib1base in the register + * can be updated by CP. In such scenarios, to dump complete + * IB1 in snapshot, we should consider ib1base from ringbuffer. + */ + if (gpuaddr != snapshot->ib1base) { + snapshot->ib1base = gpuaddr; + snapshot->ib1size = dwords; + } + kgsl_snapshot_push_object(device, process, gpuaddr, dwords); + return; + } + + if (kgsl_snapshot_have_object(snapshot, process, + gpuaddr, dwords << 2)) + return; + + if (-E2BIG == adreno_ib_create_object_list(device, process, + gpuaddr, dwords, snapshot->ib2base, + &ib_obj_list)) + ib_max_objs = 1; + + if (ib_obj_list) + kgsl_snapshot_add_ib_obj_list(snapshot, ib_obj_list); + +} + +static void dump_all_ibs(struct kgsl_device *device, + struct adreno_ringbuffer *rb, + struct kgsl_snapshot *snapshot) +{ + int index = 0; + unsigned int *rbptr; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + + rbptr = rb->buffer_desc->hostptr; + + for (index = 0; index < KGSL_RB_DWORDS;) { + + if (adreno_cmd_is_ib(adreno_dev, rbptr[index])) { + uint64_t ibaddr; + uint64_t ibsize; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + ibaddr = rbptr[index + 1]; + ibsize = rbptr[index + 2]; + index += 3; + } else { + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; + index += 4; + } + + /* Don't parse known global IBs */ + if (kgsl_gpuaddr_in_memdesc(iommu->setstate, + ibaddr, ibsize)) + continue; + + if (kgsl_gpuaddr_in_memdesc(adreno_dev->pwron_fixup, + ibaddr, ibsize)) + continue; + + adreno_parse_ib(device, snapshot, snapshot->process, + ibaddr, ibsize); + } else + index = index + 1; + } +} + +/** + * snapshot_rb_ibs() - Dump rb data and capture the IB's in the RB as well + * @device: Pointer to a KGSL device + * @rb: The RB to dump + * @data: Pointer to memory where the RB data is to be dumped + * @snapshot: Pointer to information about the current snapshot being taken + */ +static void snapshot_rb_ibs(struct kgsl_device *device, + struct adreno_ringbuffer *rb, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *rbptr, rptr = adreno_get_rptr(rb); + int index, i; + int parse_ibs = 0, ib_parse_start; + + /* + * Figure out the window of ringbuffer data to dump. First we need to + * find where the last processed IB ws submitted. Start walking back + * from the rptr + */ + index = rptr; + rbptr = rb->buffer_desc->hostptr; + + do { + index--; + + if (index < 0) { + if (ADRENO_LEGACY_PM4(adreno_dev)) + index = KGSL_RB_DWORDS - 3; + else + index = KGSL_RB_DWORDS - 4; + + /* We wrapped without finding what we wanted */ + if (index < rb->wptr) { + index = rb->wptr; + break; + } + } + + if (adreno_cmd_is_ib(adreno_dev, rbptr[index])) { + uint64_t ibaddr; + uint64_t ibsize; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + ibaddr = rbptr[index + 1]; + ibsize = rbptr[index + 2]; + } else { + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; + } + + if (kgsl_addr_range_overlap(ibaddr, ibsize, + snapshot->ib1base, snapshot->ib1size)) { + /* + * During restore after preemption, ib1base in + * the register can be updated by CP. In such + * scenario, to dump complete IB1 in snapshot, + * we should consider ib1base from ringbuffer. + */ + snapshot->ib1base = ibaddr; + snapshot->ib1size = ibsize; + break; + } + } + } while (index != rb->wptr); + + /* + * If the ib1 was not found, for example, if ib1base was restored + * incorrectly after preemption, then simply dump the entire + * ringbuffer along with all the IBs in the ringbuffer. + */ + + if (index == rb->wptr) { + dump_all_ibs(device, rb, snapshot); + return; + } + + /* + * index points at the last submitted IB. We can only trust that the + * memory between the context switch and the hanging IB is valid, so + * the next step is to find the context switch before the submission + */ + + while (index != rb->wptr) { + index--; + + if (index < 0) { + index = KGSL_RB_DWORDS - 2; + + /* + * Wrapped without finding the context switch. This is + * harmless - we should still have enough data to dump a + * valid state + */ + + if (index < rb->wptr) { + index = rb->wptr; + break; + } + } + + /* Break if the current packet is a context switch identifier */ + if ((rbptr[index] == cp_packet(adreno_dev, CP_NOP, 1)) && + (rbptr[index + 1] == CONTEXT_TO_MEM_IDENTIFIER)) + break; + } + + /* + * Index represents the start of the window of interest. We will try + * to dump all buffers between here and the rptr + */ + + ib_parse_start = index; + + /* + * Loop through the RB, looking for indirect buffers and MMU pagetable + * changes + */ + + index = rb->wptr; + for (i = 0; i < KGSL_RB_DWORDS; i++) { + /* + * Only parse IBs between the start and the rptr or the next + * context switch, whichever comes first + */ + + if (parse_ibs == 0 && index == ib_parse_start) + parse_ibs = 1; + else if (index == rptr || adreno_rb_ctxtswitch(adreno_dev, + &rbptr[index])) + parse_ibs = 0; + + if (parse_ibs && adreno_cmd_is_ib(adreno_dev, rbptr[index])) { + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + uint64_t ibaddr; + uint64_t ibsize; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + ibaddr = rbptr[index + 1]; + ibsize = rbptr[index + 2]; + } else { + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; + } + + index = (index + 1) % KGSL_RB_DWORDS; + + /* Don't parse known global IBs */ + if (kgsl_gpuaddr_in_memdesc(iommu->setstate, + ibaddr, ibsize)) + continue; + + if (kgsl_gpuaddr_in_memdesc(adreno_dev->pwron_fixup, + ibaddr, ibsize)) + continue; + + adreno_parse_ib(device, snapshot, snapshot->process, + ibaddr, ibsize); + } else + index = (index + 1) % KGSL_RB_DWORDS; + } + +} + +/* Snapshot the ringbuffer memory */ +static size_t snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct snapshot_rb_params *snap_rb_params = priv; + struct kgsl_snapshot *snapshot = snap_rb_params->snapshot; + struct adreno_ringbuffer *rb = snap_rb_params->rb; + + /* + * Dump the entire ringbuffer - the parser can choose how much of it to + * process + */ + + if (remain < KGSL_RB_SIZE + sizeof(*header)) { + dev_err(device->dev, + "snapshot: Not enough memory for the rb section\n"); + return 0; + } + + /* Write the sub-header for the section */ + header->start = 0; + header->end = KGSL_RB_DWORDS; + header->wptr = rb->wptr; + header->rptr = adreno_get_rptr(rb); + header->rbsize = KGSL_RB_DWORDS; + header->count = KGSL_RB_DWORDS; + adreno_rb_readtimestamp(adreno_dev, rb, KGSL_TIMESTAMP_QUEUED, + &header->timestamp_queued); + adreno_rb_readtimestamp(adreno_dev, rb, KGSL_TIMESTAMP_RETIRED, + &header->timestamp_retired); + header->gpuaddr = rb->buffer_desc->gpuaddr; + header->id = rb->id; + + if (rb == adreno_dev->cur_rb) + snapshot_rb_ibs(device, rb, snapshot); + + /* Just copy the ringbuffer, there are no active IBs */ + memcpy(data, rb->buffer_desc->hostptr, KGSL_RB_SIZE); + + /* Return the size of the section */ + return KGSL_RB_SIZE + sizeof(*header); +} + +static int _count_mem_entries(int id, void *ptr, void *data) +{ + int *count = data; + *count = *count + 1; + return 0; +} + +struct mem_entry { + uint64_t gpuaddr; + uint64_t size; + unsigned int type; +} __packed; + +static int _save_mem_entries(int id, void *ptr, void *data) +{ + struct kgsl_mem_entry *entry = ptr; + struct mem_entry *m = (struct mem_entry *) data; + unsigned int index = id - 1; + + m[index].gpuaddr = entry->memdesc.gpuaddr; + m[index].size = entry->memdesc.size; + m[index].type = kgsl_memdesc_get_memtype(&entry->memdesc); + + return 0; +} + +static size_t snapshot_capture_mem_list(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_mem_list_v2 *header = + (struct kgsl_snapshot_mem_list_v2 *)buf; + int num_mem = 0; + int ret = 0; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct kgsl_process_private *process = priv; + + /* we need a process to search! */ + if (process == NULL) + return 0; + + spin_lock(&process->mem_lock); + + /* We need to know the number of memory objects that the process has */ + idr_for_each(&process->mem_idr, _count_mem_entries, &num_mem); + + if (num_mem == 0) + goto out; + + if (remain < ((num_mem * sizeof(struct mem_entry)) + sizeof(*header))) { + dev_err(device->dev, + "snapshot: Not enough memory for the mem list\n"); + goto out; + } + + header->num_entries = num_mem; + header->ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable); + + /* + * Walk through the memory list and store the + * tuples(gpuaddr, size, memtype) in snapshot + */ + idr_for_each(&process->mem_idr, _save_mem_entries, data); + + ret = sizeof(*header) + (num_mem * sizeof(struct mem_entry)); +out: + spin_unlock(&process->mem_lock); + return ret; +} + +struct snapshot_ib_meta { + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj; + uint64_t ib1base; + uint64_t ib1size; + uint64_t ib2base; + uint64_t ib2size; +}; + +static void kgsl_snapshot_add_active_ib_obj_list(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_ib_object_list *ib_obj_list; + int index = -ENOENT; + + if (!snapshot->ib1dumped) + index = find_object(snapshot->ib1base, snapshot->process); + + /* only do this for IB1 because the IB2's are part of IB1 objects */ + if ((index != -ENOENT) && + (snapshot->ib1base == objbuf[index].gpuaddr)) { + if (-E2BIG == adreno_ib_create_object_list(device, + objbuf[index].entry->priv, + objbuf[index].gpuaddr, + objbuf[index].size >> 2, + snapshot->ib2base, + &ib_obj_list)) + ib_max_objs = 1; + if (ib_obj_list) { + /* freeze the IB objects in the IB */ + snapshot_freeze_obj_list(snapshot, + objbuf[index].entry->priv, + ib_obj_list); + adreno_ib_destroy_obj_list(ib_obj_list); + } + } else { + /* Get the IB2 index from parsed object */ + index = find_object(snapshot->ib2base, snapshot->process); + + if (index != -ENOENT) + adreno_parse_ib(device, snapshot, snapshot->process, + snapshot->ib2base, objbuf[index].size >> 2); + } +} + +/* + * active_ib_is_parsed() - Checks if active ib is already parsed + * @gpuaddr: Active IB base address at the time of fault + * @size: Active IB size + * @process: The process to which the IB belongs + * + * Function returns true if the active is already is parsed + * else false + */ +static bool active_ib_is_parsed(uint64_t gpuaddr, uint64_t size, + struct kgsl_process_private *process) +{ + int index; + /* go through the static list for gpuaddr is in list or not */ + for (index = 0; index < objbufptr; index++) { + if ((objbuf[index].gpuaddr <= gpuaddr) && + ((objbuf[index].gpuaddr + + (objbuf[index].size)) >= + (gpuaddr + size)) && + (objbuf[index].entry->priv == process)) + return true; + } + return false; +} +/* Snapshot the memory for an indirect buffer */ +static size_t snapshot_ib(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_ib_v2 *header = (struct kgsl_snapshot_ib_v2 *)buf; + struct snapshot_ib_meta *meta = priv; + unsigned int *src; + unsigned int *dst = (unsigned int *)(buf + sizeof(*header)); + struct adreno_ib_object_list *ib_obj_list; + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj; + struct kgsl_memdesc *memdesc; + + if (meta == NULL || meta->snapshot == NULL || meta->obj == NULL) { + dev_err(device->dev, "snapshot: bad metadata\n"); + return 0; + } + snapshot = meta->snapshot; + obj = meta->obj; + memdesc = &obj->entry->memdesc; + + /* If size is zero get it from the medesc size */ + if (!obj->size) + obj->size = (memdesc->size - (obj->gpuaddr - memdesc->gpuaddr)); + + if (remain < (obj->size + sizeof(*header))) { + dev_err(device->dev, "snapshot: Not enough memory for the ib\n"); + return 0; + } + + src = kgsl_gpuaddr_to_vaddr(memdesc, obj->gpuaddr); + if (src == NULL) { + dev_err(device->dev, + "snapshot: Unable to map GPU memory object 0x%016llX into the kernel\n", + obj->gpuaddr); + return 0; + } + + /* only do this for IB1 because the IB2's are part of IB1 objects */ + if (meta->ib1base == obj->gpuaddr) { + + snapshot->ib1dumped = active_ib_is_parsed(obj->gpuaddr, + obj->size, obj->entry->priv); + if (-E2BIG == adreno_ib_create_object_list(device, + obj->entry->priv, + obj->gpuaddr, obj->size >> 2, + snapshot->ib2base, + &ib_obj_list)) + ib_max_objs = 1; + if (ib_obj_list) { + /* freeze the IB objects in the IB */ + snapshot_freeze_obj_list(snapshot, + obj->entry->priv, + ib_obj_list); + adreno_ib_destroy_obj_list(ib_obj_list); + } + } + + + if (meta->ib2base == obj->gpuaddr) + snapshot->ib2dumped = active_ib_is_parsed(obj->gpuaddr, + obj->size, obj->entry->priv); + + /* Write the sub-header for the section */ + header->gpuaddr = obj->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable); + header->size = obj->size >> 2; + + /* Write the contents of the ib */ + memcpy((void *)dst, (void *)src, (size_t) obj->size); + /* Write the contents of the ib */ + + return obj->size + sizeof(*header); +} + +/* Dump another item on the current pending list */ +static void dump_object(struct kgsl_device *device, int obj, + struct kgsl_snapshot *snapshot) +{ + struct snapshot_ib_meta meta; + + meta.snapshot = snapshot; + meta.obj = &objbuf[obj]; + meta.ib1base = snapshot->ib1base; + meta.ib1size = snapshot->ib1size; + meta.ib2base = snapshot->ib2base; + meta.ib2size = snapshot->ib2size; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_IB_V2, + snapshot, snapshot_ib, &meta); + if (objbuf[obj].entry) { + kgsl_memdesc_unmap(&(objbuf[obj].entry->memdesc)); + kgsl_mem_entry_put(objbuf[obj].entry); + } +} + +/* setup_fault process - Find kgsl_process_private struct that caused the fault + * + * Find the faulting process based what the dispatcher thinks happened and + * what the hardware is using for the current pagetable. The process struct + * will be used to look up GPU addresses that are encountered while parsing + * the GPU state. + */ +static void setup_fault_process(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process) +{ + u64 hw_ptbase, proc_ptbase; + + if (process != NULL && !kgsl_process_private_get(process)) + process = NULL; + + /* Get the physical address of the MMU pagetable */ + hw_ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu); + + /* if we have an input process, make sure the ptbases match */ + if (process) { + proc_ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable); + /* agreement! No need to check further */ + if (hw_ptbase == proc_ptbase) + goto done; + + kgsl_process_private_put(process); + process = NULL; + dev_err(device->dev, + "snapshot: ptbase mismatch hw %llx sw %llx\n", + hw_ptbase, proc_ptbase); + } + + /* try to find the right pagetable by walking the process list */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + struct kgsl_process_private *tmp; + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(tmp, &kgsl_driver.process_list, list) { + u64 pt_ttbr0; + + pt_ttbr0 = kgsl_mmu_pagetable_get_ttbr0(tmp->pagetable); + if ((pt_ttbr0 == hw_ptbase) + && kgsl_process_private_get(tmp)) { + process = tmp; + break; + } + } + read_unlock(&kgsl_driver.proclist_lock); + } +done: + snapshot->process = process; +} + +/* Snapshot a global memory buffer */ +size_t adreno_snapshot_global(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (!memdesc || memdesc->size == 0) + return 0; + + if (remain < (memdesc->size + sizeof(*header))) { + dev_err(device->dev, + "snapshot: Not enough memory for the memdesc\n"); + return 0; + } + + if (memdesc->hostptr == NULL) { + dev_err(device->dev, + "snapshot: no kernel mapping for global object 0x%016llX\n", + memdesc->gpuaddr); + return 0; + } + + header->size = memdesc->size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = MMU_DEFAULT_TTBR0(device); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, memdesc->size); + + return memdesc->size + sizeof(*header); +} + +/* Snapshot IOMMU specific buffers */ +static void adreno_snapshot_iommu(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, iommu->setstate); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, iommu->smmu_info); +} + +static void adreno_snapshot_ringbuffer(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct adreno_ringbuffer *rb) +{ + struct snapshot_rb_params params = { + .snapshot = snapshot, + .rb = rb, + }; + + if (rb == NULL) + return; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, snapshot, + snapshot_rb, ¶ms); +} + +static void adreno_snapshot_os(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_context *guilty, + bool dump_contexts) +{ + struct kgsl_snapshot_section_header *sect = + (struct kgsl_snapshot_section_header *) snapshot->ptr; + struct kgsl_snapshot_linux_v2 *header = (struct kgsl_snapshot_linux_v2 *) + (snapshot->ptr + sizeof(*sect)); + + if (snapshot->remain < (sizeof(*sect) + sizeof(*header))) { + SNAPSHOT_ERR_NOMEM(device, "OS"); + return; + } + + header->osid = KGSL_SNAPSHOT_OS_LINUX_V3; + + strlcpy(header->release, init_utsname()->release, sizeof(header->release)); + strlcpy(header->version, init_utsname()->version, sizeof(header->version)); + + header->seconds = get_seconds(); + header->power_flags = device->pwrctrl.power_flags; + header->power_level = device->pwrctrl.active_pwrlevel; + header->power_interval_timeout = device->pwrctrl.interval_timeout; + header->grpclk = clk_get_rate(device->pwrctrl.grp_clks[0]); + + /* Get the current PT base */ + header->ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu); + header->ctxtcount = 0; + + /* If we know the guilty context then dump it */ + if (guilty) { + header->pid = guilty->tid; + strlcpy(header->comm, guilty->proc_priv->comm, + sizeof(header->comm)); + } + + if (dump_contexts) { + u32 remain = snapshot->remain - sizeof(*sect) + sizeof(*header); + void *mem = snapshot->ptr + sizeof(*sect) + sizeof(*header); + struct kgsl_context *context; + int id; + + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + struct kgsl_snapshot_linux_context_v2 *c = mem; + + if (remain < sizeof(*c)) + break; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &c->timestamp_queued); + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, + &c->timestamp_consumed); + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &c->timestamp_retired); + + header->ctxtcount++; + + mem += sizeof(*c); + remain -= sizeof(*c); + + } + read_unlock(&device->context_lock); + } + + sect->magic = SNAPSHOT_SECTION_MAGIC; + sect->id = KGSL_SNAPSHOT_SECTION_OS; + sect->size = sizeof(*sect) + sizeof(*header) + + header->ctxtcount * sizeof(struct kgsl_snapshot_linux_context_v2); + + snapshot->ptr += sect->size; + snapshot->remain -= sect->size; + snapshot->size += sect->size; +} + +/* adreno_snapshot - Snapshot the Adreno GPU state + * @device - KGSL device to snapshot + * @snapshot - Pointer to the snapshot instance + * @context - context that caused the fault, if known by the driver + * This is a hook function called by kgsl_snapshot to snapshot the + * Adreno specific information for the GPU snapshot. In turn, this function + * calls the GPU specific snapshot function to get core specific information. + */ +void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, + struct kgsl_context *context) +{ + unsigned int i; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_snapshot_header *header = (struct kgsl_snapshot_header *) + snapshot->ptr; + + /* Set up the master header */ + header->magic = SNAPSHOT_MAGIC; + /* gpuid is deprecated so initialize it to an obviously wrong value */ + header->gpuid = UINT_MAX; + header->chipid = adreno_dev->chipid; + + snapshot->ptr += sizeof(*header); + snapshot->remain -= sizeof(*header); + snapshot->size += sizeof(*header); + + /* Write the OS section */ + adreno_snapshot_os(device, snapshot, context, device->gmu_fault); + + ib_max_objs = 0; + /* Reset the list of objects */ + objbufptr = 0; + + snapshot_frozen_objsize = 0; + + setup_fault_process(device, snapshot, + context ? context->proc_priv : NULL); + + /* Add GPU specific sections - registers mainly, but other stuff too */ + if (gpudev->snapshot) + gpudev->snapshot(adreno_dev, snapshot); + + snapshot->ib1dumped = false; + snapshot->ib2dumped = false; + + adreno_snapshot_ringbuffer(device, snapshot, adreno_dev->cur_rb); + + /* Dump the prev ringbuffer */ + if (adreno_dev->prev_rb != adreno_dev->cur_rb) + adreno_snapshot_ringbuffer(device, snapshot, + adreno_dev->prev_rb); + + if ((adreno_dev->next_rb != adreno_dev->prev_rb) && + (adreno_dev->next_rb != adreno_dev->cur_rb)) + adreno_snapshot_ringbuffer(device, snapshot, + adreno_dev->next_rb); + + if (device->snapshot_atomic) + return; + + /* Dump selected global buffers */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, device->memstore); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, device->scratch); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + adreno_dev->pwron_fixup); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, adreno_snapshot_global, + adreno_dev->profile_buffer); + + if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_IOMMU) + adreno_snapshot_iommu(device, snapshot); + + /* + * Add a section that lists (gpuaddr, size, memtype) tuples of the + * hanging process + */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MEMLIST_V2, + snapshot, snapshot_capture_mem_list, snapshot->process); + /* + * Make sure that the last IB1 that was being executed is dumped. + * Since this was the last IB1 that was processed, we should have + * already added it to the list during the ringbuffer parse but we + * want to be double plus sure. + * The problem is that IB size from the register is the unprocessed size + * of the buffer not the original size, so if we didn't catch this + * buffer being directly used in the RB, then we might not be able to + * dump the whole thing. Try to dump the maximum possible size from the + * IB1 base address till the end of memdesc size so that we dont miss + * what we are interested in. Print a warning message so we can try to + * figure how often this really happens. + */ + + if (-ENOENT == find_object(snapshot->ib1base, snapshot->process)) { + struct kgsl_mem_entry *entry; + u64 ibsize; + + entry = kgsl_sharedmem_find(snapshot->process, + snapshot->ib1base); + if (entry == NULL) { + dev_err(device->dev, + "Can't find a memory entry containing IB1BASE %16llx\n", + snapshot->ib1base); + } else { + ibsize = entry->memdesc.size - + (snapshot->ib1base - entry->memdesc.gpuaddr); + kgsl_mem_entry_put(entry); + + kgsl_snapshot_push_object(device, snapshot->process, + snapshot->ib1base, ibsize >> 2); + dev_err(device->dev, + "CP_IB1_BASE is not found in the ringbuffer. Dumping %llx dwords of the buffer\n", + ibsize >> 2); + } + } + + /* + * Add the last parsed IB2 to the list. The IB2 should be found as we + * parse the objects below, but we try to add it to the list first, so + * it too can be parsed. Don't print an error message in this case - if + * the IB2 is found during parsing, the list will be updated with the + * correct size. + */ + + if (-ENOENT == find_object(snapshot->ib2base, snapshot->process)) + kgsl_snapshot_push_object(device, snapshot->process, + snapshot->ib2base, snapshot->ib2size); + + /* + * Go through the list of found objects and dump each one. As the IBs + * are parsed, more objects might be found, and objbufptr will increase + */ + for (i = 0; i < objbufptr; i++) + dump_object(device, i, snapshot); + + /* + * Incase snapshot static blob is running out of memory, Add Active IB1 + * and IB2 entries to obj_list so that active ib's can be dumped to + * snapshot dynamic blob. + */ + if (!snapshot->ib1dumped || !snapshot->ib2dumped) + kgsl_snapshot_add_active_ib_obj_list(device, snapshot); + + if (ib_max_objs) + dev_err(device->dev, "Max objects found in IB\n"); + if (snapshot_frozen_objsize) + dev_err(device->dev, + "GPU snapshot froze %zdKb of GPU buffers\n", + snapshot_frozen_objsize / 1024); + +} + +void adreno_snapshot_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const unsigned int *regs, unsigned int count) +{ + struct kgsl_snapshot_registers r; + + r.regs = regs; + r.count = count; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + kgsl_snapshot_dump_registers, &r); +} + +int adreno_snapshot_regs_count(const u32 *ptr) +{ + unsigned int count = 0; + unsigned int group_count; + + for ( ; ptr[0] != UINT_MAX; ptr += 2) { + group_count = REG_COUNT(ptr); + if (group_count == 1) + count += group_count + 1; + else + count += group_count + 2; + } + return count; +} + +/* + * This is a new format for dumping the registers, where we dump just the first + * address of the register along with the count of the contiguous registers + * which we going to dump. This helps us save memory by not dumping the + * address for each register + */ +size_t adreno_snapshot_registers_v2(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + const u32 *ptr = (const u32 *)priv; + unsigned int *data = (unsigned int *)buf; + int count = 0, k; + + /* Figure out how many registers we are going to dump */ + count = adreno_snapshot_regs_count(ptr); + + if (remain < (count * 4)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (ptr = (const u32 *)priv; ptr[0] != UINT_MAX; ptr += 2) { + int cnt = REG_COUNT(ptr); + + if (cnt == 1) + *data++ = BIT(31) | ptr[0]; + else { + *data++ = ptr[0]; + *data++ = cnt; + } + for (k = ptr[0]; k <= ptr[1]; k++) { + kgsl_regread(device, k, data); + data++; + } + } + + /* Return the size of the section */ + return (count * 4); +} diff --git a/adreno_snapshot.h b/adreno_snapshot.h new file mode 100644 index 0000000000..254c0c4a5c --- /dev/null +++ b/adreno_snapshot.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2015,2020-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __ADRENO_SNAPSHOT_H +#define __ADRENO_SNAPSHOT_H + +#include "kgsl_snapshot.h" + +#define CP_CRASH_DUMPER_TIMEOUT 500 + +#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + + sizeof(struct kgsl_snapshot_debug)) + +#define SHADER_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + + sizeof(struct kgsl_snapshot_shader)) + +/* Macro to make it super easy to dump registers */ +#define SNAPSHOT_REGISTERS(_d, _s, _r) \ + adreno_snapshot_registers((_d), (_s), \ + (unsigned int *) _r, ARRAY_SIZE(_r) / 2) + +#define REG_COUNT(_ptr) ((_ptr[1] - _ptr[0]) + 1) + +void adreno_snapshot_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const unsigned int *regs, unsigned int count); + +/** + * adreno_snapshot_regs_count - Helper function to calculate register and + * header size + * @ptr: Pointer to the register array + * + * Return: Number of registers in the array + * + * Helper function to count the total number of regsiters + * in a given array plus the header space needed for each group. + */ +int adreno_snapshot_regs_count(const u32 *ptr); + +/** + * adreno_snapshot_registers_v2 - Dump a series of registers + * @device: Pointer to the kgsl device + * @buf: The snapshot buffer + * @remain: The size remaining in the snapshot buffer + * @priv: Pointer to the register array to be dumped + * + * Return: Number of bytes written to the snapshot + * + * This function dumps the registers in a way that we need to + * only dump the start address and count for each pair of register + * in the array. This helps us save some memory in snapshot. + */ +size_t adreno_snapshot_registers_v2(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv); + +/** + * adreno_parse_ib - Parse the given IB + * @device: Pointer to the kgsl device + * @snapshot: Pointer to the snapshot structure + * @process: Process to which this IB belongs + * @gpuaddr: Gpu address of the IB + * @dwords: Size in dwords of the IB + * + * We want to store the last executed IB1 and IB2 in the static region to ensure + * that we get at least some information out of the snapshot even if we can't + * access the dynamic data from the sysfs file. Push all other IBs on the + * dynamic list + */ +void adreno_parse_ib(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + u64 gpuaddr, u64 dwords); +/** + * adreno_snapshot_global - Add global buffer to snapshot + * @device: Pointer to the kgsl device + * @buf: Where the global buffer section is to be written + * @remain: Remaining bytes in snapshot buffer + * @priv: Opaque data + * + * Return: Number of bytes written to the snapshot buffer + */ +size_t adreno_snapshot_global(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); +#endif /*__ADRENO_SNAPSHOT_H */ diff --git a/adreno_sysfs.c b/adreno_sysfs.c new file mode 100644 index 0000000000..5fb55aad09 --- /dev/null +++ b/adreno_sysfs.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "adreno.h" +#include "adreno_sysfs.h" +#include "kgsl_sysfs.h" + +static ssize_t _gpu_model_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, adreno_get_gpu_model(device)); +} + +static ssize_t gpu_model_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _gpu_model_show(device, buf); +} + +static int _l3_vote_store(struct adreno_device *adreno_dev, bool val) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_L3_VOTE)) + device->l3_vote = val; + + return 0; +} + +static bool _l3_vote_show(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + return device->l3_vote; +} + +static int _ft_policy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + adreno_dev->ft_policy = val & KGSL_FT_POLICY_MASK; + return 0; +} + +static unsigned int _ft_policy_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->ft_policy; +} + +static int _ft_pagefault_policy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret = 0; + + mutex_lock(&device->mutex); + val &= KGSL_FT_PAGEFAULT_MASK; + + if (device->state == KGSL_STATE_ACTIVE) + ret = kgsl_mmu_set_pagefault_policy(&device->mmu, + (unsigned long) val); + + if (ret == 0) + device->mmu.pfpolicy = val; + + mutex_unlock(&device->mutex); + + return 0; +} + +static unsigned int _ft_pagefault_policy_show(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + + return device->mmu.pfpolicy; +} + +static int _gpu_llc_slice_enable_store(struct adreno_device *adreno_dev, + bool val) +{ + if (!IS_ERR_OR_NULL(adreno_dev->gpu_llc_slice)) + adreno_dev->gpu_llc_slice_enable = val; + return 0; +} + +static bool _gpu_llc_slice_enable_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->gpu_llc_slice_enable; +} + +static int _gpuhtw_llc_slice_enable_store(struct adreno_device *adreno_dev, + bool val) +{ + if (!IS_ERR_OR_NULL(adreno_dev->gpuhtw_llc_slice)) + adreno_dev->gpuhtw_llc_slice_enable = val; + return 0; +} + +static bool _gpuhtw_llc_slice_enable_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->gpuhtw_llc_slice_enable; +} + +static bool _ft_hang_intr_status_show(struct adreno_device *adreno_dev) +{ + /* Hang interrupt is always on on all targets */ + return true; +} + +static int _hwcg_store(struct adreno_device *adreno_dev, bool val) +{ + if (adreno_dev->hwcg_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->hwcg_enabled, + val); +} + +static bool _hwcg_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->hwcg_enabled; +} + +static int _throttling_store(struct adreno_device *adreno_dev, bool val) +{ + if (!adreno_is_a540(adreno_dev) || + adreno_dev->throttling_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, + &adreno_dev->throttling_enabled, val); +} + +static bool _throttling_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->throttling_enabled; +} + +static int _sptp_pc_store(struct adreno_device *adreno_dev, bool val) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) || + adreno_dev->sptp_pc_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->sptp_pc_enabled, + val); +} + +static bool _sptp_pc_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->sptp_pc_enabled; +} + +static int _lm_store(struct adreno_device *adreno_dev, bool val) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) || + adreno_dev->lm_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->lm_enabled, + val); +} + +static bool _lm_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->lm_enabled; +} + +static int _ifpc_store(struct adreno_device *adreno_dev, bool val) +{ + return gmu_core_dev_ifpc_store(KGSL_DEVICE(adreno_dev), val); +} + +static bool _ifpc_show(struct adreno_device *adreno_dev) +{ + return gmu_core_dev_ifpc_show(KGSL_DEVICE(adreno_dev)); +} + +static unsigned int _ifpc_count_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->ifpc_count; +} + +static bool _acd_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->acd_enabled; +} + +static int _acd_store(struct adreno_device *adreno_dev, bool val) +{ + return gmu_core_dev_acd_set(KGSL_DEVICE(adreno_dev), val); +} + +static bool _bcl_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->bcl_enabled; +} + +static int _bcl_store(struct adreno_device *adreno_dev, bool val) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL) || + adreno_dev->bcl_enabled == val) + return 0; + + return adreno_power_cycle_bool(adreno_dev, &adreno_dev->bcl_enabled, + val); +} + +ssize_t adreno_sysfs_store_u32(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_u32 *_attr = + container_of(attr, struct adreno_sysfs_attribute_u32, attr); + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + ret = _attr->store(adreno_dev, val); + if (ret) + return ret; + + return count; +} + +ssize_t adreno_sysfs_show_u32(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_u32 *_attr = + container_of(attr, struct adreno_sysfs_attribute_u32, attr); + + return scnprintf(buf, PAGE_SIZE, "0x%X\n", _attr->show(adreno_dev)); +} + +ssize_t adreno_sysfs_store_bool(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_bool *_attr = + container_of(attr, struct adreno_sysfs_attribute_bool, attr); + bool val; + int ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + ret = _attr->store(adreno_dev, val); + if (ret) + return ret; + + return count; +} + +ssize_t adreno_sysfs_show_bool(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_get_drvdata(dev)); + const struct adreno_sysfs_attribute_bool *_attr = + container_of(attr, struct adreno_sysfs_attribute_bool, attr); + + return scnprintf(buf, PAGE_SIZE, "%d\n", _attr->show(adreno_dev)); +} + +static ADRENO_SYSFS_U32(ft_policy); +static ADRENO_SYSFS_U32(ft_pagefault_policy); +static ADRENO_SYSFS_RO_BOOL(ft_hang_intr_status); +static ADRENO_SYSFS_BOOL(gpu_llc_slice_enable); +static ADRENO_SYSFS_BOOL(gpuhtw_llc_slice_enable); + +static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice); +static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout); + +static ADRENO_SYSFS_BOOL(sptp_pc); +static ADRENO_SYSFS_BOOL(lm); +static ADRENO_SYSFS_BOOL(hwcg); +static ADRENO_SYSFS_BOOL(throttling); +static ADRENO_SYSFS_BOOL(ifpc); +static ADRENO_SYSFS_RO_U32(ifpc_count); +static ADRENO_SYSFS_BOOL(acd); +static ADRENO_SYSFS_BOOL(bcl); +static ADRENO_SYSFS_BOOL(l3_vote); + +static DEVICE_ATTR_RO(gpu_model); + +static const struct attribute *_attr_list[] = { + &adreno_attr_ft_policy.attr.attr, + &adreno_attr_ft_pagefault_policy.attr.attr, + &adreno_attr_ft_hang_intr_status.attr.attr, + &dev_attr_wake_nice.attr.attr, + &dev_attr_wake_timeout.attr.attr, + &adreno_attr_sptp_pc.attr.attr, + &adreno_attr_lm.attr.attr, + &adreno_attr_hwcg.attr.attr, + &adreno_attr_throttling.attr.attr, + &adreno_attr_gpu_llc_slice_enable.attr.attr, + &adreno_attr_gpuhtw_llc_slice_enable.attr.attr, + &adreno_attr_ifpc.attr.attr, + &adreno_attr_ifpc_count.attr.attr, + &adreno_attr_acd.attr.attr, + &adreno_attr_bcl.attr.attr, + &dev_attr_gpu_model.attr, + &adreno_attr_l3_vote.attr.attr, + NULL, +}; + +static GPU_SYSFS_ATTR(gpu_model, 0444, _gpu_model_show, NULL); + +/** + * adreno_sysfs_init() - Initialize adreno sysfs files + * @adreno_dev: Pointer to the adreno device + * + * Initialize many of the adreno specific sysfs files especially for fault + * tolerance and power control + */ +int adreno_sysfs_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + int ret; + + ret = sysfs_create_files(&device->dev->kobj, _attr_list); + + if (!ret) + ret = sysfs_create_file(&device->gpu_sysfs_kobj, + &gpu_sysfs_attr_gpu_model.attr); + + return ret; +} + diff --git a/adreno_sysfs.h b/adreno_sysfs.h new file mode 100644 index 0000000000..a60b5ab47d --- /dev/null +++ b/adreno_sysfs.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _ADRENO_SYSFS_H_ +#define _ADRENO_SYSFS_H_ + +/* + * struct adreno_sysfs_attribute_u32 - Container for accessing and modifying + * integers in kgsl via sysfs + */ +struct adreno_sysfs_attribute_u32 { + /** #attr: The device attribute corresponding to the sysfs node */ + struct device_attribute attr; + /** @show: Function to show the value of the integer */ + u32 (*show)(struct adreno_device *adreno_dev); + /** @store: Function to store the value of the integer */ + int (*store)(struct adreno_device *adreno_dev, u32 val); +}; + +/* + * struct adreno_sysfs_attribute_bool - Container for accessing and modifying + * booleans in kgsl via sysfs + */ +struct adreno_sysfs_attribute_bool { + /** #attr: The device attribute corresponding to the sysfs node */ + struct device_attribute attr; + /** @show: Function to show the value of the boolean */ + bool (*show)(struct adreno_device *adreno_dev); + /** @store: Function to store the value of the boolean */ + int (*store)(struct adreno_device *adreno_dev, bool val); +}; + +/* Helper function to modify an integer in kgsl */ +ssize_t adreno_sysfs_store_u32(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); + +/* Helper function to read an integer in kgsl */ +ssize_t adreno_sysfs_show_u32(struct device *dev, + struct device_attribute *attr, char *buf); + +/* Helper function to modify a boolean in kgsl */ +ssize_t adreno_sysfs_store_bool(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); + +/* Helper function to read a boolean in kgsl */ +ssize_t adreno_sysfs_show_bool(struct device *dev, + struct device_attribute *attr, char *buf); + +#define ADRENO_SYSFS_BOOL(_name) \ +const struct adreno_sysfs_attribute_bool adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0644, adreno_sysfs_show_bool, \ + adreno_sysfs_store_bool), \ + .show = _ ## _name ## _show, \ + .store = _ ## _name ## _store, \ +} + +#define ADRENO_SYSFS_RO_BOOL(_name) \ +const struct adreno_sysfs_attribute_bool adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0444, adreno_sysfs_show_bool, NULL), \ + .show = _ ## _name ## _show, \ +} + +#define ADRENO_SYSFS_U32(_name) \ +const struct adreno_sysfs_attribute_u32 adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0644, adreno_sysfs_show_u32, \ + adreno_sysfs_store_u32), \ + .show = _ ## _name ## _show, \ + .store = _ ## _name ## _store, \ +} + +#define ADRENO_SYSFS_RO_U32(_name) \ +const struct adreno_sysfs_attribute_u32 adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0444, adreno_sysfs_show_u32, NULL), \ + .show = _ ## _name ## _show, \ +} +#endif diff --git a/adreno_trace.c b/adreno_trace.c new file mode 100644 index 0000000000..84577f2441 --- /dev/null +++ b/adreno_trace.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 The Linux Foundation. All rights reserved. + */ + +#include +#include "adreno.h" + +/* Instantiate tracepoints */ +#define CREATE_TRACE_POINTS +#include "adreno_trace.h" + +static const char * const kgsl_fence_trace_events[] = { + "adreno_cmdbatch_submitted", + "adreno_cmdbatch_retired", + "syncpoint_fence", + "syncpoint_fence_expire", + "kgsl_fire_event", + "kgsl_timeline_fence_alloc", + "kgsl_timeline_fence_release", +}; + +void adreno_fence_trace_array_init(struct kgsl_device *device) +{ + int i; + + device->fence_trace_array = trace_array_get_by_name("kgsl-fence"); + + if (!device->fence_trace_array) + return; + + for (i = 0; i < ARRAY_SIZE(kgsl_fence_trace_events); i++) + trace_array_set_clr_event(device->fence_trace_array, + "kgsl", kgsl_fence_trace_events[i], true); + +} diff --git a/adreno_trace.h b/adreno_trace.h new file mode 100644 index 0000000000..3890dfc501 --- /dev/null +++ b/adreno_trace.h @@ -0,0 +1,786 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. + */ + +#if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _ADRENO_TRACE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kgsl +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE adreno_trace + +#include +#include "adreno_a3xx.h" +#include "adreno_a5xx.h" +#include "adreno_gen7.h" + +#define ADRENO_FT_TYPES \ + { BIT(KGSL_FT_OFF), "off" }, \ + { BIT(KGSL_FT_REPLAY), "replay" }, \ + { BIT(KGSL_FT_SKIPIB), "skipib" }, \ + { BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \ + { BIT(KGSL_FT_DISABLE), "disable" }, \ + { BIT(KGSL_FT_TEMP_DISABLE), "temp" }, \ + { BIT(KGSL_FT_THROTTLE), "throttle"}, \ + { BIT(KGSL_FT_SKIPCMD), "skipcmd" } + +TRACE_EVENT(adreno_cmdbatch_queued, + TP_PROTO(struct kgsl_drawobj *drawobj, unsigned int queued), + TP_ARGS(drawobj, queued), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, queued) + __field(unsigned int, flags) + __field(unsigned int, prio) + ), + TP_fast_assign( + __entry->id = drawobj->context->id; + __entry->timestamp = drawobj->timestamp; + __entry->queued = queued; + __entry->flags = drawobj->flags; + __entry->prio = drawobj->context->priority; + ), + TP_printk( + "ctx=%u ctx_prio=%u ts=%u queued=%u flags=%s", + __entry->id, __entry->prio, + __entry->timestamp, __entry->queued, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "none" + ) +); + +TRACE_EVENT(adreno_cmdbatch_submitted, + TP_PROTO(struct kgsl_drawobj *drawobj, struct submission_info *info, + uint64_t ticks, unsigned long secs, unsigned long usecs, + int q_inflight), + TP_ARGS(drawobj, info, ticks, secs, usecs, q_inflight), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(int, inflight) + __field(unsigned int, flags) + __field(uint64_t, ticks) + __field(unsigned long, secs) + __field(unsigned long, usecs) + __field(int, prio) + __field(int, rb_id) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(int, q_inflight) + __field(int, dispatch_queue) + ), + TP_fast_assign( + __entry->id = drawobj->context->id; + __entry->timestamp = drawobj->timestamp; + __entry->inflight = info->inflight; + __entry->flags = drawobj->flags; + __entry->ticks = ticks; + __entry->secs = secs; + __entry->usecs = usecs; + __entry->prio = drawobj->context->priority; + __entry->rb_id = info->rb_id; + __entry->rptr = info->rptr; + __entry->wptr = info->wptr; + __entry->q_inflight = q_inflight; + __entry->dispatch_queue = info->gmu_dispatch_queue; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u inflight=%d flags=%s ticks=%lld time=%lu.%0lu rb_id=%d r/w=%x/%x, q_inflight=%d dq_id=%d", + __entry->id, __entry->prio, __entry->timestamp, + __entry->inflight, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "none", + __entry->ticks, __entry->secs, __entry->usecs, + __entry->rb_id, __entry->rptr, __entry->wptr, + __entry->q_inflight, __entry->dispatch_queue + ) +); + +TRACE_EVENT(adreno_cmdbatch_retired, + TP_PROTO(struct kgsl_context *context, struct retire_info *info, + unsigned int flags, int q_inflight, + unsigned long fault_recovery), + TP_ARGS(context, info, flags, q_inflight, fault_recovery), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(int, inflight) + __field(unsigned int, recovery) + __field(unsigned int, flags) + __field(uint64_t, start) + __field(uint64_t, retire) + __field(int, prio) + __field(int, rb_id) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(int, q_inflight) + __field(unsigned long, fault_recovery) + __field(unsigned int, dispatch_queue) + __field(uint64_t, submitted_to_rb) + __field(uint64_t, retired_on_gmu) + ), + TP_fast_assign( + __entry->id = context->id; + __entry->timestamp = info->timestamp; + __entry->inflight = info->inflight; + __entry->recovery = fault_recovery; + __entry->flags = flags; + __entry->start = info->sop; + __entry->retire = info->eop; + __entry->prio = context->priority; + __entry->rb_id = info->rb_id; + __entry->rptr = info->rptr; + __entry->wptr = info->wptr; + __entry->q_inflight = q_inflight; + __entry->dispatch_queue = info->gmu_dispatch_queue; + __entry->submitted_to_rb = info->submitted_to_rb; + __entry->retired_on_gmu = info->retired_on_gmu; + ), + + TP_printk( + "ctx=%u ctx_prio=%d ts=%u inflight=%d recovery=%s flags=%s start=%llu retire=%llu rb_id=%d, r/w=%x/%x, q_inflight=%d, dq_id=%u, submitted_to_rb=%llu retired_on_gmu=%llu", + __entry->id, __entry->prio, __entry->timestamp, + __entry->inflight, + __entry->recovery ? + __print_flags(__entry->fault_recovery, "|", + ADRENO_FT_TYPES) : "none", + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "none", + __entry->start, + __entry->retire, + __entry->rb_id, __entry->rptr, __entry->wptr, + __entry->q_inflight, + __entry->dispatch_queue, + __entry->submitted_to_rb, __entry->retired_on_gmu + ) +); + +TRACE_EVENT(gmu_ao_sync, + TP_PROTO(u64 ticks), + TP_ARGS(ticks), + TP_STRUCT__entry( + __field(u64, ticks) + ), + TP_fast_assign( + __entry->ticks = ticks; + ), + TP_printk( + "ticks=%llu", __entry->ticks + ) +); + +TRACE_EVENT(gmu_event, + TP_PROTO(u32 *event_info), + TP_ARGS(event_info), + TP_STRUCT__entry( + __field(u32, event) + __field(u32, ticks) + __field(u32, data1) + __field(u32, data2) + ), + TP_fast_assign( + __entry->event = event_info[0]; + __entry->ticks = event_info[1]; + __entry->data1 = event_info[2]; + __entry->data2 = event_info[3]; + ), + TP_printk( + "event=%08u ticks=%08u data1=0x%08x data2=0x%08x", + __entry->event, __entry->ticks, __entry->data1, __entry->data2 + ) +); + +TRACE_EVENT(adreno_cmdbatch_sync, + TP_PROTO(unsigned int ctx_id, unsigned int ctx_prio, + unsigned int timestamp, uint64_t ticks), + TP_ARGS(ctx_id, ctx_prio, timestamp, ticks), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(uint64_t, ticks) + __field(int, prio) + ), + TP_fast_assign( + __entry->id = ctx_id; + __entry->timestamp = timestamp; + __entry->ticks = ticks; + __entry->prio = ctx_prio; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u ticks=%lld", + __entry->id, __entry->prio, __entry->timestamp, + __entry->ticks + ) +); + +TRACE_EVENT(adreno_cmdbatch_fault, + TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int fault), + TP_ARGS(cmdobj, fault), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, fault) + ), + TP_fast_assign( + __entry->id = cmdobj->base.context->id; + __entry->timestamp = cmdobj->base.timestamp; + __entry->fault = fault; + ), + TP_printk( + "ctx=%u ts=%u type=%s", + __entry->id, __entry->timestamp, + __print_symbolic(__entry->fault, + { 0, "none" }, + { ADRENO_SOFT_FAULT, "soft" }, + { ADRENO_HARD_FAULT, "hard" }, + { ADRENO_TIMEOUT_FAULT, "timeout" }) + ) +); + +TRACE_EVENT(adreno_cmdbatch_recovery, + TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int action), + TP_ARGS(cmdobj, action), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, action) + ), + TP_fast_assign( + __entry->id = cmdobj->base.context->id; + __entry->timestamp = cmdobj->base.timestamp; + __entry->action = action; + ), + TP_printk( + "ctx=%u ts=%u action=%s", + __entry->id, __entry->timestamp, + __print_symbolic(__entry->action, ADRENO_FT_TYPES) + ) +); + +DECLARE_EVENT_CLASS(adreno_drawctxt_template, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, priority) + ), + TP_fast_assign( + __entry->id = drawctxt->base.id; + __entry->priority = drawctxt->base.priority; + ), + TP_printk("ctx=%u priority=%u", __entry->id, __entry->priority) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_sleep, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_wake, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, dispatch_queue_context, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_invalidate, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +TRACE_EVENT(adreno_drawctxt_wait_start, + TP_PROTO(unsigned int rb_id, unsigned int ctx_id, unsigned int ts), + TP_ARGS(rb_id, ctx_id, ts), + TP_STRUCT__entry( + __field(unsigned int, rb_id) + __field(unsigned int, ctx_id) + __field(unsigned int, ts) + ), + TP_fast_assign( + __entry->rb_id = rb_id; + __entry->ctx_id = ctx_id; + __entry->ts = ts; + ), + TP_printk( + "rb=%u ctx=%u ts=%u", + __entry->rb_id, __entry->ctx_id, __entry->ts + ) +); + +TRACE_EVENT(adreno_drawctxt_wait_done, + TP_PROTO(unsigned int rb_id, unsigned int ctx_id, + unsigned int ts, int status), + TP_ARGS(rb_id, ctx_id, ts, status), + TP_STRUCT__entry( + __field(unsigned int, rb_id) + __field(unsigned int, ctx_id) + __field(unsigned int, ts) + __field(int, status) + ), + TP_fast_assign( + __entry->rb_id = rb_id; + __entry->ctx_id = ctx_id; + __entry->ts = ts; + __entry->status = status; + ), + TP_printk( + "rb=%u ctx=%u ts=%u status=%d", + __entry->rb_id, __entry->ctx_id, __entry->ts, __entry->status + ) +); + +TRACE_EVENT(adreno_drawctxt_switch, + TP_PROTO(struct adreno_ringbuffer *rb, + struct adreno_context *newctx), + TP_ARGS(rb, newctx), + TP_STRUCT__entry( + __field(int, rb_level) + __field(unsigned int, oldctx) + __field(unsigned int, newctx) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->rb_level = rb->id; + __entry->oldctx = rb->drawctxt_active ? + rb->drawctxt_active->base.id : 0; + __entry->newctx = newctx ? newctx->base.id : 0; + ), + TP_printk( + "rb level=%d oldctx=%u newctx=%u", + __entry->rb_level, __entry->oldctx, __entry->newctx + ) +); + +TRACE_EVENT(adreno_gpu_fault, + TP_PROTO(unsigned int ctx, unsigned int ts, + unsigned int status, unsigned int rptr, unsigned int wptr, + unsigned int ib1base, unsigned int ib1size, + unsigned int ib2base, unsigned int ib2size, int rb_id), + TP_ARGS(ctx, ts, status, rptr, wptr, ib1base, ib1size, ib2base, + ib2size, rb_id), + TP_STRUCT__entry( + __field(unsigned int, ctx) + __field(unsigned int, ts) + __field(unsigned int, status) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(unsigned int, ib1base) + __field(unsigned int, ib1size) + __field(unsigned int, ib2base) + __field(unsigned int, ib2size) + __field(int, rb_id) + ), + TP_fast_assign( + __entry->ctx = ctx; + __entry->ts = ts; + __entry->status = status; + __entry->rptr = rptr; + __entry->wptr = wptr; + __entry->ib1base = ib1base; + __entry->ib1size = ib1size; + __entry->ib2base = ib2base; + __entry->ib2size = ib2size; + __entry->rb_id = rb_id; + ), + TP_printk( + "ctx=%d ts=%d rb_id=%d status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X", + __entry->ctx, __entry->ts, __entry->rb_id, __entry->status, + __entry->wptr, __entry->rptr, __entry->ib1base, + __entry->ib1size, __entry->ib2base, __entry->ib2size) +); + +TRACE_EVENT(adreno_sp_tp, + + TP_PROTO(unsigned long ip), + + TP_ARGS(ip), + + TP_STRUCT__entry( + __field(unsigned long, ip) + ), + + TP_fast_assign( + __entry->ip = ip; + ), + + TP_printk( + "func=%pS", (void *) __entry->ip + ) +); + +/* + * Tracepoint for a3xx irq. Includes status info + */ +TRACE_EVENT(kgsl_a3xx_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { BIT(A3XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, + { BIT(A3XX_INT_RBBM_AHB_ERROR), "RBBM_AHB_ERR" }, + { BIT(A3XX_INT_RBBM_REG_TIMEOUT), "RBBM_REG_TIMEOUT" }, + { BIT(A3XX_INT_RBBM_ME_MS_TIMEOUT), + "RBBM_ME_MS_TIMEOUT" }, + { BIT(A3XX_INT_RBBM_PFP_MS_TIMEOUT), + "RBBM_PFP_MS_TIMEOUT" }, + { BIT(A3XX_INT_RBBM_ATB_BUS_OVERFLOW), + "RBBM_ATB_BUS_OVERFLOW" }, + { BIT(A3XX_INT_VFD_ERROR), "RBBM_VFD_ERROR" }, + { BIT(A3XX_INT_CP_SW_INT), "CP_SW" }, + { BIT(A3XX_INT_CP_T0_PACKET_IN_IB), + "CP_T0_PACKET_IN_IB" }, + { BIT(A3XX_INT_CP_OPCODE_ERROR), "CP_OPCODE_ERROR" }, + { BIT(A3XX_INT_CP_RESERVED_BIT_ERROR), + "CP_RESERVED_BIT_ERROR" }, + { BIT(A3XX_INT_CP_HW_FAULT), "CP_HW_FAULT" }, + { BIT(A3XX_INT_CP_DMA), "CP_DMA" }, + { BIT(A3XX_INT_CP_IB2_INT), "CP_IB2_INT" }, + { BIT(A3XX_INT_CP_IB1_INT), "CP_IB1_INT" }, + { BIT(A3XX_INT_CP_RB_INT), "CP_RB_INT" }, + { BIT(A3XX_INT_CP_REG_PROTECT_FAULT), + "CP_REG_PROTECT_FAULT" }, + { BIT(A3XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, + { BIT(A3XX_INT_CP_VS_DONE_TS), "CP_VS_DONE_TS" }, + { BIT(A3XX_INT_CP_PS_DONE_TS), "CP_PS_DONE_TS" }, + { BIT(A3XX_INT_CACHE_FLUSH_TS), "CACHE_FLUSH_TS" }, + { BIT(A3XX_INT_CP_AHB_ERROR_HALT), + "CP_AHB_ERROR_HALT" }, + { BIT(A3XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, + { BIT(A3XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }) + : "None" + ) +); + +/* + * Tracepoint for a5xx irq. Includes status info + */ +TRACE_EVENT(kgsl_a5xx_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { BIT(A5XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, + { BIT(A5XX_INT_RBBM_AHB_ERROR), "RBBM_AHB_ERR" }, + { BIT(A5XX_INT_RBBM_TRANSFER_TIMEOUT), + "RBBM_TRANSFER_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_ME_MS_TIMEOUT), + "RBBM_ME_MS_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_PFP_MS_TIMEOUT), + "RBBM_PFP_MS_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_ETS_MS_TIMEOUT), + "RBBM_ETS_MS_TIMEOUT" }, + { BIT(A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW), + "RBBM_ATB_ASYNC_OVERFLOW" }, + { BIT(A5XX_INT_RBBM_GPC_ERROR), "RBBM_GPC_ERR" }, + { BIT(A5XX_INT_CP_SW), "CP_SW" }, + { BIT(A5XX_INT_CP_HW_ERROR), "CP_OPCODE_ERROR" }, + { BIT(A5XX_INT_CP_CCU_FLUSH_DEPTH_TS), + "CP_CCU_FLUSH_DEPTH_TS" }, + { BIT(A5XX_INT_CP_CCU_FLUSH_COLOR_TS), + "CP_CCU_FLUSH_COLOR_TS" }, + { BIT(A5XX_INT_CP_CCU_RESOLVE_TS), + "CP_CCU_RESOLVE_TS" }, + { BIT(A5XX_INT_CP_IB2), "CP_IB2_INT" }, + { BIT(A5XX_INT_CP_IB1), "CP_IB1_INT" }, + { BIT(A5XX_INT_CP_RB), "CP_RB_INT" }, + { BIT(A5XX_INT_CP_UNUSED_1), "CP_UNUSED_1" }, + { BIT(A5XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, + { BIT(A5XX_INT_CP_WT_DONE_TS), "CP_WT_DONE_TS" }, + { BIT(A5XX_INT_UNKNOWN_1), "UNKNOWN_1" }, + { BIT(A5XX_INT_CP_CACHE_FLUSH_TS), + "CP_CACHE_FLUSH_TS" }, + { BIT(A5XX_INT_UNUSED_2), "UNUSED_2" }, + { BIT(A5XX_INT_RBBM_ATB_BUS_OVERFLOW), + "RBBM_ATB_BUS_OVERFLOW" }, + { BIT(A5XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, + { BIT(A5XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }, + { BIT(A5XX_INT_UCHE_TRAP_INTR), "UCHE_TRAP_INTR" }, + { BIT(A5XX_INT_DEBBUS_INTR_0), "DEBBUS_INTR_0" }, + { BIT(A5XX_INT_DEBBUS_INTR_1), "DEBBUS_INTR_1" }, + { BIT(A5XX_INT_GPMU_VOLTAGE_DROOP), + "GPMU_VOLTAGE_DROOP" }, + { BIT(A5XX_INT_GPMU_FIRMWARE), "GPMU_FIRMWARE" }, + { BIT(A5XX_INT_ISDB_CPU_IRQ), "ISDB_CPU_IRQ" }, + { BIT(A5XX_INT_ISDB_UNDER_DEBUG), "ISDB_UNDER_DEBUG" }) + : "None" + ) +); + +/* + * Tracepoint for gen7 irq. Includes status info + */ +TRACE_EVENT(kgsl_gen7_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { BIT(GEN7_INT_GPUIDLE), "GPUIDLE" }, + { BIT(GEN7_INT_AHBERROR), "AHBERROR" }, + { BIT(GEN7_INT_CPIPCINT0), "CPIPCINT0" }, + { BIT(GEN7_INT_CPIPCINT1), "CPIPCINT1" }, + { BIT(GEN7_INT_ATBASYNCFIFOOVERFLOW), + "ATBASYNCFIFOOVERFLOW" }, + { BIT(GEN7_INT_GPCERROR), "GPCERROR" }, + { BIT(GEN7_INT_SWINTERRUPT), "SWINTERRUPT" }, + { BIT(GEN7_INT_HWERROR), "HWERROR" }, + { BIT(GEN7_INT_CCU_CLEAN_DEPTH_TS), + "CCU_CLEAN_DEPTH_TS" }, + { BIT(GEN7_INT_CCU_CLEAN_COLOR_TS), + "CCU_CLEAN_COLOR_TS" }, + { BIT(GEN7_INT_CCU_RESOLVE_CLEAN_TS), + "CCU_RESOLVE_CLEAN_TS" }, + { BIT(GEN7_INT_PM4CPINTERRUPT), "PM4CPINTERRUPT" }, + { BIT(GEN7_INT_PM4CPINTERRUPTLPAC), + "PM4CPINTERRUPTLPAC" }, + { BIT(GEN7_INT_RB_DONE_TS), "RB_DONE_TS" }, + { BIT(GEN7_INT_CACHE_CLEAN_TS), "CACHE_CLEAN_TS" }, + { BIT(GEN7_INT_CACHE_CLEAN_TS_LPAC), + "CACHE_CLEAN_TS_LPAC" }, + { BIT(GEN7_INT_ATBBUSOVERFLOW), "ATBBUSOVERFLOW" }, + { BIT(GEN7_INT_HANGDETECTINTERRUPT), + "HANGDETECTINTERRUPT" }, + { BIT(GEN7_INT_OUTOFBOUNDACCESS), + "OUTOFBOUNDACCESS" }, + { BIT(GEN7_INT_UCHETRAPINTERRUPT), + "UCHETRAPINTERRUPT" }, + { BIT(GEN7_INT_DEBUGBUSINTERRUPT0), + "DEBUGBUSINTERRUPT0" }, + { BIT(GEN7_INT_DEBUGBUSINTERRUPT1), + "DEBUGBUSINTERRUPT1" }, + { BIT(GEN7_INT_TSBWRITEERROR), "TSBWRITEERROR" }, + { BIT(GEN7_INT_ISDBCPUIRQ), "ISDBCPUIRQ" }, + { BIT(GEN7_INT_ISDBUNDERDEBUG), "ISDBUNDERDEBUG" }, + { BIT(GEN7_INT_ISDBUNDERDEBUG), "ISDBUNDERDEBUG" }) + : "None" + ) +); + +DECLARE_EVENT_CLASS(adreno_hw_preempt_template, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rptr; + __entry->new_rptr = new_rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc->gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc->gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr, __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr, __entry->new_rbbase + ) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_clear_to_trig, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp_int, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr) +); + +TRACE_EVENT(adreno_hw_preempt_comp_to_clear, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, new_wptr_end) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rptr; + __entry->new_rptr = new_rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->new_wptr_end = new_rb->wptr_preempt_end; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc->gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc->gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x prev_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr, __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr_end, __entry->new_wptr, __entry->new_rbbase + ) +); + +TRACE_EVENT(adreno_hw_preempt_token_submit, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb, + unsigned int cur_rptr, unsigned int new_rptr), + TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, cur_wptr_end) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rptr; + __entry->new_rptr = new_rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->cur_wptr_end = cur_rb->wptr_preempt_end; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc->gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc->gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr_end, __entry->cur_wptr, + __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr, __entry->new_rbbase + ) +); + +TRACE_EVENT(adreno_preempt_trigger, + TP_PROTO(struct adreno_ringbuffer *cur, struct adreno_ringbuffer *next, + unsigned int cntl), + TP_ARGS(cur, next, cntl), + TP_STRUCT__entry( + __field(unsigned int, cur) + __field(unsigned int, next) + __field(unsigned int, cntl) + ), + TP_fast_assign( + __entry->cur = cur->id; + __entry->next = next->id; + __entry->cntl = cntl; + ), + TP_printk("trigger from id=%d to id=%d cntl=%x", + __entry->cur, __entry->next, __entry->cntl + ) +); + +TRACE_EVENT(adreno_preempt_done, + TP_PROTO(struct adreno_ringbuffer *cur, struct adreno_ringbuffer *next, + unsigned int level), + TP_ARGS(cur, next, level), + TP_STRUCT__entry( + __field(unsigned int, cur) + __field(unsigned int, next) + __field(unsigned int, level) + ), + TP_fast_assign( + __entry->cur = cur->id; + __entry->next = next->id; + __entry->level = level; + ), + TP_printk("done switch to id=%d from id=%d level=%x", + __entry->next, __entry->cur, __entry->level + ) +); + +TRACE_EVENT(adreno_ifpc_count, + TP_PROTO(unsigned int ifpc_count), + TP_ARGS(ifpc_count), + TP_STRUCT__entry( + __field(unsigned int, ifpc_count) + ), + TP_fast_assign( + __entry->ifpc_count = ifpc_count; + ), + TP_printk("total times GMU entered IFPC = %d", __entry->ifpc_count) +); + +#endif /* _ADRENO_TRACE_H */ + +/* This part must be outside protection */ +#include diff --git a/build.config.msm_kgsl b/build.config.msm_kgsl new file mode 100644 index 0000000000..f30c524b08 --- /dev/null +++ b/build.config.msm_kgsl @@ -0,0 +1 @@ +EXT_MODULES+=msm_kgsl diff --git a/config/gki_waipiodisp.conf b/config/gki_waipiodisp.conf new file mode 100644 index 0000000000..87097eadd1 --- /dev/null +++ b/config/gki_waipiodisp.conf @@ -0,0 +1,15 @@ +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT = y +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" + +ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ + -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ + -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=1 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" diff --git a/gen7_reg.h b/gen7_reg.h new file mode 100644 index 0000000000..e530a02854 --- /dev/null +++ b/gen7_reg.h @@ -0,0 +1,1158 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _GEN7_REG_H +#define _GEN7_REG_H + +/* GEN7 interrupt bits */ +#define GEN7_INT_GPUIDLE 0 +#define GEN7_INT_AHBERROR 1 +#define GEN7_INT_CPIPCINT0 4 +#define GEN7_INT_CPIPCINT1 5 +#define GEN7_INT_ATBASYNCFIFOOVERFLOW 6 +#define GEN7_INT_GPCERROR 7 +#define GEN7_INT_SWINTERRUPT 8 +#define GEN7_INT_HWERROR 9 +#define GEN7_INT_CCU_CLEAN_DEPTH_TS 10 +#define GEN7_INT_CCU_CLEAN_COLOR_TS 11 +#define GEN7_INT_CCU_RESOLVE_CLEAN_TS 12 +#define GEN7_INT_PM4CPINTERRUPT 15 +#define GEN7_INT_PM4CPINTERRUPTLPAC 16 +#define GEN7_INT_RB_DONE_TS 17 +#define GEN7_INT_CACHE_CLEAN_TS 20 +#define GEN7_INT_CACHE_CLEAN_TS_LPAC 21 +#define GEN7_INT_ATBBUSOVERFLOW 22 +#define GEN7_INT_HANGDETECTINTERRUPT 23 +#define GEN7_INT_OUTOFBOUNDACCESS 24 +#define GEN7_INT_UCHETRAPINTERRUPT 25 +#define GEN7_INT_DEBUGBUSINTERRUPT0 26 +#define GEN7_INT_DEBUGBUSINTERRUPT1 27 +#define GEN7_INT_TSBWRITEERROR 28 +#define GEN7_INT_ISDBCPUIRQ 30 +#define GEN7_INT_ISDBUNDERDEBUG 31 + +/* CP registers */ +#define GEN7_CP_RB_BASE 0x800 +#define GEN7_CP_RB_BASE_HI 0x801 +#define GEN7_CP_RB_CNTL 0x802 +#define GEN7_CP_RB_RPTR_ADDR_LO 0x804 +#define GEN7_CP_RB_RPTR_ADDR_HI 0x805 +#define GEN7_CP_RB_RPTR 0x806 +#define GEN7_CP_RB_WPTR 0x807 +#define GEN7_CP_SQE_CNTL 0x808 +#define GEN7_CP_CP2GMU_STATUS 0x812 +#define GEN7_CP_HW_FAULT 0x821 +#define GEN7_CP_INTERRUPT_STATUS 0x823 +#define GEN7_CP_PROTECT_STATUS 0x824 +#define GEN7_CP_STATUS_1 0x825 +#define GEN7_CP_SQE_INSTR_BASE_LO 0x830 +#define GEN7_CP_SQE_INSTR_BASE_HI 0x831 +#define GEN7_CP_MISC_CNTL 0x840 +#define GEN7_CP_CHICKEN_DBG 0x841 +#define GEN7_CP_DBG_ECO_CNTL 0x843 +#define GEN7_CP_APRIV_CNTL 0x844 +#define GEN7_CP_PROTECT_CNTL 0x84f +#define GEN7_CP_PROTECT_REG 0x850 +#define GEN7_CP_CONTEXT_SWITCH_CNTL 0x8a0 +#define GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x8a1 +#define GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x8a2 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO 0x8a3 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI 0x8a4 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO 0x8a5 +#define GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI 0x8a6 +#define GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO 0x8a7 +#define GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI 0x8a8 +#define GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS 0x8ab +#define GEN7_CP_PERFCTR_CP_SEL_0 0x8d0 +#define GEN7_CP_PERFCTR_CP_SEL_1 0x8d1 +#define GEN7_CP_PERFCTR_CP_SEL_2 0x8d2 +#define GEN7_CP_PERFCTR_CP_SEL_3 0x8d3 +#define GEN7_CP_PERFCTR_CP_SEL_4 0x8d4 +#define GEN7_CP_PERFCTR_CP_SEL_5 0x8d5 +#define GEN7_CP_PERFCTR_CP_SEL_6 0x8d6 +#define GEN7_CP_PERFCTR_CP_SEL_7 0x8d7 +#define GEN7_CP_PERFCTR_CP_SEL_8 0x8d8 +#define GEN7_CP_PERFCTR_CP_SEL_9 0x8d9 +#define GEN7_CP_PERFCTR_CP_SEL_10 0x8da +#define GEN7_CP_PERFCTR_CP_SEL_11 0x8db +#define GEN7_CP_PERFCTR_CP_SEL_12 0x8dc +#define GEN7_CP_PERFCTR_CP_SEL_13 0x8dd +#define GEN7_CP_BV_PERFCTR_CP_SEL_0 0x8e0 +#define GEN7_CP_BV_PERFCTR_CP_SEL_1 0x8e1 +#define GEN7_CP_BV_PERFCTR_CP_SEL_2 0x8e2 +#define GEN7_CP_BV_PERFCTR_CP_SEL_3 0x8e3 +#define GEN7_CP_BV_PERFCTR_CP_SEL_4 0x8e4 +#define GEN7_CP_BV_PERFCTR_CP_SEL_5 0x8e5 +#define GEN7_CP_BV_PERFCTR_CP_SEL_6 0x8e6 +#define GEN7_CP_CRASH_SCRIPT_BASE_LO 0x900 +#define GEN7_CP_CRASH_SCRIPT_BASE_HI 0x901 +#define GEN7_CP_CRASH_DUMP_CNTL 0x902 +#define GEN7_CP_CRASH_DUMP_STATUS 0x903 +#define GEN7_CP_SQE_STAT_ADDR 0x908 +#define GEN7_CP_SQE_STAT_DATA 0x909 +#define GEN7_CP_DRAW_STATE_ADDR 0x90a +#define GEN7_CP_DRAW_STATE_DATA 0x90b +#define GEN7_CP_ROQ_DBG_ADDR 0x90c +#define GEN7_CP_ROQ_DBG_DATA 0x90d +#define GEN7_CP_MEM_POOL_DBG_ADDR 0x90e +#define GEN7_CP_MEM_POOL_DBG_DATA 0x90f +#define GEN7_CP_SQE_UCODE_DBG_ADDR 0x910 +#define GEN7_CP_SQE_UCODE_DBG_DATA 0x911 +#define GEN7_CP_IB1_BASE 0x928 +#define GEN7_CP_IB1_BASE_HI 0x929 +#define GEN7_CP_IB1_REM_SIZE 0x92a +#define GEN7_CP_IB2_BASE 0x92b +#define GEN7_CP_IB2_BASE_HI 0x92c +#define GEN7_CP_IB2_REM_SIZE 0x92d +#define GEN7_CP_ALWAYS_ON_COUNTER_LO 0x980 +#define GEN7_CP_ALWAYS_ON_COUNTER_HI 0x981 +#define GEN7_CP_AHB_CNTL 0x98d +#define GEN7_CP_APERTURE_CNTL_HOST 0xa00 +#define GEN7_CP_APERTURE_CNTL_CD 0xa03 +#define GEN7_CP_BV_PROTECT_STATUS 0xa61 +#define GEN7_CP_BV_HW_FAULT 0xa64 +#define GEN7_CP_BV_DRAW_STATE_ADDR 0xa81 +#define GEN7_CP_BV_DRAW_STATE_DATA 0xa82 +#define GEN7_CP_BV_ROQ_DBG_ADDR 0xa83 +#define GEN7_CP_BV_ROQ_DBG_DATA 0xa84 +#define GEN7_CP_BV_SQE_UCODE_DBG_ADDR 0xa85 +#define GEN7_CP_BV_SQE_UCODE_DBG_DATA 0xa86 +#define GEN7_CP_BV_SQE_STAT_ADDR 0xa87 +#define GEN7_CP_BV_SQE_STAT_DATA 0xa88 +#define GEN7_CP_BV_RB_RPTR_ADDR_LO 0xa98 +#define GEN7_CP_BV_RB_RPTR_ADDR_HI 0xa99 +#define GEN7_CP_RESOURCE_TBL_DBG_ADDR 0xa9a +#define GEN7_CP_RESOURCE_TBL_DBG_DATA 0xa9b +#define GEN7_CP_BV_MEM_POOL_DBG_ADDR 0xa96 +#define GEN7_CP_BV_MEM_POOL_DBG_DATA 0xa97 +#define GEN7_CP_BV_APRIV_CNTL 0xad0 +#define GEN7_CP_BV_CHICKEN_DBG 0xada + +/* LPAC registers */ +#define GEN7_CP_LPAC_DRAW_STATE_ADDR 0xb0a +#define GEN7_CP_LPAC_DRAW_STATE_DATA 0xb0b +#define GEN7_CP_LPAC_ROQ_DBG_ADDR 0xb0c +#define GEN7_CP_SQE_AC_UCODE_DBG_ADDR 0xb27 +#define GEN7_CP_SQE_AC_UCODE_DBG_DATA 0xb28 +#define GEN7_CP_SQE_AC_STAT_ADDR 0xb29 +#define GEN7_CP_SQE_AC_STAT_DATA 0xb2a +#define GEN7_CP_LPAC_APRIV_CNTL 0xb31 +#define GEN7_CP_LPAC_ROQ_DBG_DATA 0xb35 +#define GEN7_CP_LPAC_FIFO_DBG_DATA 0xb36 +#define GEN7_CP_LPAC_FIFO_DBG_ADDR 0xb40 + +/* RBBM registers */ +#define GEN7_RBBM_INT_0_STATUS 0x201 +#define GEN7_RBBM_STATUS 0x210 +#define GEN7_RBBM_STATUS3 0x213 +#define GEN7_RBBM_PERFCTR_CP_0_LO 0x300 +#define GEN7_RBBM_PERFCTR_CP_0_HI 0x301 +#define GEN7_RBBM_PERFCTR_CP_1_LO 0x302 +#define GEN7_RBBM_PERFCTR_CP_1_HI 0x303 +#define GEN7_RBBM_PERFCTR_CP_2_LO 0x304 +#define GEN7_RBBM_PERFCTR_CP_2_HI 0x305 +#define GEN7_RBBM_PERFCTR_CP_3_LO 0x306 +#define GEN7_RBBM_PERFCTR_CP_3_HI 0x307 +#define GEN7_RBBM_PERFCTR_CP_4_LO 0x308 +#define GEN7_RBBM_PERFCTR_CP_4_HI 0x309 +#define GEN7_RBBM_PERFCTR_CP_5_LO 0x30a +#define GEN7_RBBM_PERFCTR_CP_5_HI 0x30b +#define GEN7_RBBM_PERFCTR_CP_6_LO 0x30c +#define GEN7_RBBM_PERFCTR_CP_6_HI 0x30d +#define GEN7_RBBM_PERFCTR_CP_7_LO 0x30e +#define GEN7_RBBM_PERFCTR_CP_7_HI 0x30f +#define GEN7_RBBM_PERFCTR_CP_8_LO 0x310 +#define GEN7_RBBM_PERFCTR_CP_8_HI 0x311 +#define GEN7_RBBM_PERFCTR_CP_9_LO 0x312 +#define GEN7_RBBM_PERFCTR_CP_9_HI 0x313 +#define GEN7_RBBM_PERFCTR_CP_10_LO 0x314 +#define GEN7_RBBM_PERFCTR_CP_10_HI 0x315 +#define GEN7_RBBM_PERFCTR_CP_11_LO 0x316 +#define GEN7_RBBM_PERFCTR_CP_11_HI 0x317 +#define GEN7_RBBM_PERFCTR_CP_12_LO 0x318 +#define GEN7_RBBM_PERFCTR_CP_12_HI 0x319 +#define GEN7_RBBM_PERFCTR_CP_13_LO 0x31a +#define GEN7_RBBM_PERFCTR_CP_13_HI 0x31b +#define GEN7_RBBM_PERFCTR_RBBM_0_LO 0x31c +#define GEN7_RBBM_PERFCTR_RBBM_0_HI 0x31d +#define GEN7_RBBM_PERFCTR_RBBM_1_LO 0x31e +#define GEN7_RBBM_PERFCTR_RBBM_1_HI 0x31f +#define GEN7_RBBM_PERFCTR_RBBM_2_LO 0x320 +#define GEN7_RBBM_PERFCTR_RBBM_2_HI 0x321 +#define GEN7_RBBM_PERFCTR_RBBM_3_LO 0x322 +#define GEN7_RBBM_PERFCTR_RBBM_3_HI 0x323 +#define GEN7_RBBM_PERFCTR_PC_0_LO 0x324 +#define GEN7_RBBM_PERFCTR_PC_0_HI 0x325 +#define GEN7_RBBM_PERFCTR_PC_1_LO 0x326 +#define GEN7_RBBM_PERFCTR_PC_1_HI 0x327 +#define GEN7_RBBM_PERFCTR_PC_2_LO 0x328 +#define GEN7_RBBM_PERFCTR_PC_2_HI 0x329 +#define GEN7_RBBM_PERFCTR_PC_3_LO 0x32a +#define GEN7_RBBM_PERFCTR_PC_3_HI 0x32b +#define GEN7_RBBM_PERFCTR_PC_4_LO 0x32c +#define GEN7_RBBM_PERFCTR_PC_4_HI 0x32d +#define GEN7_RBBM_PERFCTR_PC_5_LO 0x32e +#define GEN7_RBBM_PERFCTR_PC_5_HI 0x32f +#define GEN7_RBBM_PERFCTR_PC_6_LO 0x330 +#define GEN7_RBBM_PERFCTR_PC_6_HI 0x331 +#define GEN7_RBBM_PERFCTR_PC_7_LO 0x332 +#define GEN7_RBBM_PERFCTR_PC_7_HI 0x333 +#define GEN7_RBBM_PERFCTR_VFD_0_LO 0x334 +#define GEN7_RBBM_PERFCTR_VFD_0_HI 0x335 +#define GEN7_RBBM_PERFCTR_VFD_1_LO 0x336 +#define GEN7_RBBM_PERFCTR_VFD_1_HI 0x337 +#define GEN7_RBBM_PERFCTR_VFD_2_LO 0x338 +#define GEN7_RBBM_PERFCTR_VFD_2_HI 0x339 +#define GEN7_RBBM_PERFCTR_VFD_3_LO 0x33a +#define GEN7_RBBM_PERFCTR_VFD_3_HI 0x33b +#define GEN7_RBBM_PERFCTR_VFD_4_LO 0x33c +#define GEN7_RBBM_PERFCTR_VFD_4_HI 0x33d +#define GEN7_RBBM_PERFCTR_VFD_5_LO 0x33e +#define GEN7_RBBM_PERFCTR_VFD_5_HI 0x33f +#define GEN7_RBBM_PERFCTR_VFD_6_LO 0x340 +#define GEN7_RBBM_PERFCTR_VFD_6_HI 0x341 +#define GEN7_RBBM_PERFCTR_VFD_7_LO 0x342 +#define GEN7_RBBM_PERFCTR_VFD_7_HI 0x343 +#define GEN7_RBBM_PERFCTR_HLSQ_0_LO 0x344 +#define GEN7_RBBM_PERFCTR_HLSQ_0_HI 0x345 +#define GEN7_RBBM_PERFCTR_HLSQ_1_LO 0x346 +#define GEN7_RBBM_PERFCTR_HLSQ_1_HI 0x347 +#define GEN7_RBBM_PERFCTR_HLSQ_2_LO 0x348 +#define GEN7_RBBM_PERFCTR_HLSQ_2_HI 0x349 +#define GEN7_RBBM_PERFCTR_HLSQ_3_LO 0x34a +#define GEN7_RBBM_PERFCTR_HLSQ_3_HI 0x34b +#define GEN7_RBBM_PERFCTR_HLSQ_4_LO 0x34c +#define GEN7_RBBM_PERFCTR_HLSQ_4_HI 0x34d +#define GEN7_RBBM_PERFCTR_HLSQ_5_LO 0x34e +#define GEN7_RBBM_PERFCTR_HLSQ_5_HI 0x34f +#define GEN7_RBBM_PERFCTR_VPC_0_LO 0x350 +#define GEN7_RBBM_PERFCTR_VPC_0_HI 0x351 +#define GEN7_RBBM_PERFCTR_VPC_1_LO 0x352 +#define GEN7_RBBM_PERFCTR_VPC_1_HI 0x353 +#define GEN7_RBBM_PERFCTR_VPC_2_LO 0x354 +#define GEN7_RBBM_PERFCTR_VPC_2_HI 0x355 +#define GEN7_RBBM_PERFCTR_VPC_3_LO 0x356 +#define GEN7_RBBM_PERFCTR_VPC_3_HI 0x357 +#define GEN7_RBBM_PERFCTR_VPC_4_LO 0x358 +#define GEN7_RBBM_PERFCTR_VPC_4_HI 0x359 +#define GEN7_RBBM_PERFCTR_VPC_5_LO 0x35a +#define GEN7_RBBM_PERFCTR_VPC_5_HI 0x35b +#define GEN7_RBBM_PERFCTR_CCU_0_LO 0x35c +#define GEN7_RBBM_PERFCTR_CCU_0_HI 0x35d +#define GEN7_RBBM_PERFCTR_CCU_1_LO 0x35e +#define GEN7_RBBM_PERFCTR_CCU_1_HI 0x35f +#define GEN7_RBBM_PERFCTR_CCU_2_LO 0x360 +#define GEN7_RBBM_PERFCTR_CCU_2_HI 0x361 +#define GEN7_RBBM_PERFCTR_CCU_3_LO 0x362 +#define GEN7_RBBM_PERFCTR_CCU_3_HI 0x363 +#define GEN7_RBBM_PERFCTR_CCU_4_LO 0x364 +#define GEN7_RBBM_PERFCTR_CCU_4_HI 0x365 +#define GEN7_RBBM_PERFCTR_TSE_0_LO 0x366 +#define GEN7_RBBM_PERFCTR_TSE_0_HI 0x367 +#define GEN7_RBBM_PERFCTR_TSE_1_LO 0x368 +#define GEN7_RBBM_PERFCTR_TSE_1_HI 0x369 +#define GEN7_RBBM_PERFCTR_TSE_2_LO 0x36a +#define GEN7_RBBM_PERFCTR_TSE_2_HI 0x36b +#define GEN7_RBBM_PERFCTR_TSE_3_LO 0x36c +#define GEN7_RBBM_PERFCTR_TSE_3_HI 0x36d +#define GEN7_RBBM_PERFCTR_RAS_0_LO 0x36e +#define GEN7_RBBM_PERFCTR_RAS_0_HI 0x36f +#define GEN7_RBBM_PERFCTR_RAS_1_LO 0x370 +#define GEN7_RBBM_PERFCTR_RAS_1_HI 0x371 +#define GEN7_RBBM_PERFCTR_RAS_2_LO 0x372 +#define GEN7_RBBM_PERFCTR_RAS_2_HI 0x373 +#define GEN7_RBBM_PERFCTR_RAS_3_LO 0x374 +#define GEN7_RBBM_PERFCTR_RAS_3_HI 0x375 +#define GEN7_RBBM_PERFCTR_UCHE_0_LO 0x376 +#define GEN7_RBBM_PERFCTR_UCHE_0_HI 0x377 +#define GEN7_RBBM_PERFCTR_UCHE_1_LO 0x378 +#define GEN7_RBBM_PERFCTR_UCHE_1_HI 0x379 +#define GEN7_RBBM_PERFCTR_UCHE_2_LO 0x37a +#define GEN7_RBBM_PERFCTR_UCHE_2_HI 0x37b +#define GEN7_RBBM_PERFCTR_UCHE_3_LO 0x37c +#define GEN7_RBBM_PERFCTR_UCHE_3_HI 0x37d +#define GEN7_RBBM_PERFCTR_UCHE_4_LO 0x37e +#define GEN7_RBBM_PERFCTR_UCHE_4_HI 0x37f +#define GEN7_RBBM_PERFCTR_UCHE_5_LO 0x380 +#define GEN7_RBBM_PERFCTR_UCHE_5_HI 0x381 +#define GEN7_RBBM_PERFCTR_UCHE_6_LO 0x382 +#define GEN7_RBBM_PERFCTR_UCHE_6_HI 0x383 +#define GEN7_RBBM_PERFCTR_UCHE_7_LO 0x384 +#define GEN7_RBBM_PERFCTR_UCHE_7_HI 0x385 +#define GEN7_RBBM_PERFCTR_UCHE_8_LO 0x386 +#define GEN7_RBBM_PERFCTR_UCHE_8_HI 0x387 +#define GEN7_RBBM_PERFCTR_UCHE_9_LO 0x388 +#define GEN7_RBBM_PERFCTR_UCHE_9_HI 0x389 +#define GEN7_RBBM_PERFCTR_UCHE_10_LO 0x38a +#define GEN7_RBBM_PERFCTR_UCHE_10_HI 0x38b +#define GEN7_RBBM_PERFCTR_UCHE_11_LO 0x38c +#define GEN7_RBBM_PERFCTR_UCHE_11_HI 0x38d +#define GEN7_RBBM_PERFCTR_TP_0_LO 0x38e +#define GEN7_RBBM_PERFCTR_TP_0_HI 0x38f +#define GEN7_RBBM_PERFCTR_TP_1_LO 0x390 +#define GEN7_RBBM_PERFCTR_TP_1_HI 0x391 +#define GEN7_RBBM_PERFCTR_TP_2_LO 0x392 +#define GEN7_RBBM_PERFCTR_TP_2_HI 0x393 +#define GEN7_RBBM_PERFCTR_TP_3_LO 0x394 +#define GEN7_RBBM_PERFCTR_TP_3_HI 0x395 +#define GEN7_RBBM_PERFCTR_TP_4_LO 0x396 +#define GEN7_RBBM_PERFCTR_TP_4_HI 0x397 +#define GEN7_RBBM_PERFCTR_TP_5_LO 0x398 +#define GEN7_RBBM_PERFCTR_TP_5_HI 0x399 +#define GEN7_RBBM_PERFCTR_TP_6_LO 0x39a +#define GEN7_RBBM_PERFCTR_TP_6_HI 0x39b +#define GEN7_RBBM_PERFCTR_TP_7_LO 0x39c +#define GEN7_RBBM_PERFCTR_TP_7_HI 0x39d +#define GEN7_RBBM_PERFCTR_TP_8_LO 0x39e +#define GEN7_RBBM_PERFCTR_TP_8_HI 0x39f +#define GEN7_RBBM_PERFCTR_TP_9_LO 0x3a0 +#define GEN7_RBBM_PERFCTR_TP_9_HI 0x3a1 +#define GEN7_RBBM_PERFCTR_TP_10_LO 0x3a2 +#define GEN7_RBBM_PERFCTR_TP_10_HI 0x3a3 +#define GEN7_RBBM_PERFCTR_TP_11_LO 0x3a4 +#define GEN7_RBBM_PERFCTR_TP_11_HI 0x3a5 +#define GEN7_RBBM_PERFCTR_SP_0_LO 0x3a6 +#define GEN7_RBBM_PERFCTR_SP_0_HI 0x3a7 +#define GEN7_RBBM_PERFCTR_SP_1_LO 0x3a8 +#define GEN7_RBBM_PERFCTR_SP_1_HI 0x3a9 +#define GEN7_RBBM_PERFCTR_SP_2_LO 0x3aa +#define GEN7_RBBM_PERFCTR_SP_2_HI 0x3ab +#define GEN7_RBBM_PERFCTR_SP_3_LO 0x3ac +#define GEN7_RBBM_PERFCTR_SP_3_HI 0x3ad +#define GEN7_RBBM_PERFCTR_SP_4_LO 0x3ae +#define GEN7_RBBM_PERFCTR_SP_4_HI 0x3af +#define GEN7_RBBM_PERFCTR_SP_5_LO 0x3b0 +#define GEN7_RBBM_PERFCTR_SP_5_HI 0x3b1 +#define GEN7_RBBM_PERFCTR_SP_6_LO 0x3b2 +#define GEN7_RBBM_PERFCTR_SP_6_HI 0x3b3 +#define GEN7_RBBM_PERFCTR_SP_7_LO 0x3b4 +#define GEN7_RBBM_PERFCTR_SP_7_HI 0x3b5 +#define GEN7_RBBM_PERFCTR_SP_8_LO 0x3b6 +#define GEN7_RBBM_PERFCTR_SP_8_HI 0x3b7 +#define GEN7_RBBM_PERFCTR_SP_9_LO 0x3b8 +#define GEN7_RBBM_PERFCTR_SP_9_HI 0x3b9 +#define GEN7_RBBM_PERFCTR_SP_10_LO 0x3ba +#define GEN7_RBBM_PERFCTR_SP_10_HI 0x3bb +#define GEN7_RBBM_PERFCTR_SP_11_LO 0x3bc +#define GEN7_RBBM_PERFCTR_SP_11_HI 0x3bd +#define GEN7_RBBM_PERFCTR_SP_12_LO 0x3be +#define GEN7_RBBM_PERFCTR_SP_12_HI 0x3bf +#define GEN7_RBBM_PERFCTR_SP_13_LO 0x3c0 +#define GEN7_RBBM_PERFCTR_SP_13_HI 0x3c1 +#define GEN7_RBBM_PERFCTR_SP_14_LO 0x3c2 +#define GEN7_RBBM_PERFCTR_SP_14_HI 0x3c3 +#define GEN7_RBBM_PERFCTR_SP_15_LO 0x3c4 +#define GEN7_RBBM_PERFCTR_SP_15_HI 0x3c5 +#define GEN7_RBBM_PERFCTR_SP_16_LO 0x3c6 +#define GEN7_RBBM_PERFCTR_SP_16_HI 0x3c7 +#define GEN7_RBBM_PERFCTR_SP_17_LO 0x3c8 +#define GEN7_RBBM_PERFCTR_SP_17_HI 0x3c9 +#define GEN7_RBBM_PERFCTR_SP_18_LO 0x3ca +#define GEN7_RBBM_PERFCTR_SP_18_HI 0x3cb +#define GEN7_RBBM_PERFCTR_SP_19_LO 0x3cc +#define GEN7_RBBM_PERFCTR_SP_19_HI 0x3cd +#define GEN7_RBBM_PERFCTR_SP_20_LO 0x3ce +#define GEN7_RBBM_PERFCTR_SP_20_HI 0x3cf +#define GEN7_RBBM_PERFCTR_SP_21_LO 0x3d0 +#define GEN7_RBBM_PERFCTR_SP_21_HI 0x3d1 +#define GEN7_RBBM_PERFCTR_SP_22_LO 0x3d2 +#define GEN7_RBBM_PERFCTR_SP_22_HI 0x3d3 +#define GEN7_RBBM_PERFCTR_SP_23_LO 0x3d4 +#define GEN7_RBBM_PERFCTR_SP_23_HI 0x3d5 +#define GEN7_RBBM_PERFCTR_RB_0_LO 0x3d6 +#define GEN7_RBBM_PERFCTR_RB_0_HI 0x3d7 +#define GEN7_RBBM_PERFCTR_RB_1_LO 0x3d8 +#define GEN7_RBBM_PERFCTR_RB_1_HI 0x3d9 +#define GEN7_RBBM_PERFCTR_RB_2_LO 0x3da +#define GEN7_RBBM_PERFCTR_RB_2_HI 0x3db +#define GEN7_RBBM_PERFCTR_RB_3_LO 0x3dc +#define GEN7_RBBM_PERFCTR_RB_3_HI 0x3dd +#define GEN7_RBBM_PERFCTR_RB_4_LO 0x3de +#define GEN7_RBBM_PERFCTR_RB_4_HI 0x3df +#define GEN7_RBBM_PERFCTR_RB_5_LO 0x3e0 +#define GEN7_RBBM_PERFCTR_RB_5_HI 0x3e1 +#define GEN7_RBBM_PERFCTR_RB_6_LO 0x3e2 +#define GEN7_RBBM_PERFCTR_RB_6_HI 0x3e3 +#define GEN7_RBBM_PERFCTR_RB_7_LO 0x3e4 +#define GEN7_RBBM_PERFCTR_RB_7_HI 0x3e5 +#define GEN7_RBBM_PERFCTR_VSC_0_LO 0x3e6 +#define GEN7_RBBM_PERFCTR_VSC_0_HI 0x3e7 +#define GEN7_RBBM_PERFCTR_VSC_1_LO 0x3e8 +#define GEN7_RBBM_PERFCTR_VSC_1_HI 0x3e9 +#define GEN7_RBBM_PERFCTR_LRZ_0_LO 0x3ea +#define GEN7_RBBM_PERFCTR_LRZ_0_HI 0x3eb +#define GEN7_RBBM_PERFCTR_LRZ_1_LO 0x3ec +#define GEN7_RBBM_PERFCTR_LRZ_1_HI 0x3ed +#define GEN7_RBBM_PERFCTR_LRZ_2_LO 0x3ee +#define GEN7_RBBM_PERFCTR_LRZ_2_HI 0x3ef +#define GEN7_RBBM_PERFCTR_LRZ_3_LO 0x3f0 +#define GEN7_RBBM_PERFCTR_LRZ_3_HI 0x3f1 +#define GEN7_RBBM_PERFCTR_CMP_0_LO 0x3f2 +#define GEN7_RBBM_PERFCTR_CMP_0_HI 0x3f3 +#define GEN7_RBBM_PERFCTR_CMP_1_LO 0x3f4 +#define GEN7_RBBM_PERFCTR_CMP_1_HI 0x3f5 +#define GEN7_RBBM_PERFCTR_CMP_2_LO 0x3f6 +#define GEN7_RBBM_PERFCTR_CMP_2_HI 0x3f7 +#define GEN7_RBBM_PERFCTR_CMP_3_LO 0x3f8 +#define GEN7_RBBM_PERFCTR_CMP_3_HI 0x3f9 +#define GEN7_RBBM_PERFCTR_UFC_0_LO 0x3fa +#define GEN7_RBBM_PERFCTR_UFC_0_HI 0x3fb +#define GEN7_RBBM_PERFCTR_UFC_1_LO 0x3fc +#define GEN7_RBBM_PERFCTR_UFC_1_HI 0x3fd +#define GEN7_RBBM_PERFCTR_UFC_2_LO 0x3fe +#define GEN7_RBBM_PERFCTR_UFC_2_HI 0x3ff +#define GEN7_RBBM_PERFCTR_UFC_3_LO 0x400 +#define GEN7_RBBM_PERFCTR_UFC_3_HI 0x401 +#define GEN7_RBBM_PERFCTR2_HLSQ_0_LO 0x410 +#define GEN7_RBBM_PERFCTR2_HLSQ_0_HI 0x411 +#define GEN7_RBBM_PERFCTR2_HLSQ_1_LO 0x412 +#define GEN7_RBBM_PERFCTR2_HLSQ_1_HI 0x413 +#define GEN7_RBBM_PERFCTR2_HLSQ_2_LO 0x414 +#define GEN7_RBBM_PERFCTR2_HLSQ_2_HI 0x415 +#define GEN7_RBBM_PERFCTR2_HLSQ_3_LO 0x416 +#define GEN7_RBBM_PERFCTR2_HLSQ_3_HI 0x417 +#define GEN7_RBBM_PERFCTR2_HLSQ_4_LO 0x418 +#define GEN7_RBBM_PERFCTR2_HLSQ_4_HI 0x419 +#define GEN7_RBBM_PERFCTR2_HLSQ_5_LO 0x41a +#define GEN7_RBBM_PERFCTR2_HLSQ_5_HI 0x41b +#define GEN7_RBBM_PERFCTR2_CP_0_LO 0x41c +#define GEN7_RBBM_PERFCTR2_CP_0_HI 0x41d +#define GEN7_RBBM_PERFCTR2_CP_1_LO 0x41e +#define GEN7_RBBM_PERFCTR2_CP_1_HI 0x41f +#define GEN7_RBBM_PERFCTR2_CP_2_LO 0x420 +#define GEN7_RBBM_PERFCTR2_CP_2_HI 0x421 +#define GEN7_RBBM_PERFCTR2_CP_3_LO 0x422 +#define GEN7_RBBM_PERFCTR2_CP_3_HI 0x423 +#define GEN7_RBBM_PERFCTR2_CP_4_LO 0x424 +#define GEN7_RBBM_PERFCTR2_CP_4_HI 0x425 +#define GEN7_RBBM_PERFCTR2_CP_5_LO 0x426 +#define GEN7_RBBM_PERFCTR2_CP_5_HI 0x427 +#define GEN7_RBBM_PERFCTR2_CP_6_LO 0x428 +#define GEN7_RBBM_PERFCTR2_CP_6_HI 0x429 +#define GEN7_RBBM_PERFCTR2_SP_0_LO 0x42a +#define GEN7_RBBM_PERFCTR2_SP_0_HI 0x42b +#define GEN7_RBBM_PERFCTR2_SP_1_LO 0x42c +#define GEN7_RBBM_PERFCTR2_SP_1_HI 0x42d +#define GEN7_RBBM_PERFCTR2_SP_2_LO 0x42e +#define GEN7_RBBM_PERFCTR2_SP_2_HI 0x42f +#define GEN7_RBBM_PERFCTR2_SP_3_LO 0x430 +#define GEN7_RBBM_PERFCTR2_SP_3_HI 0x431 +#define GEN7_RBBM_PERFCTR2_SP_4_LO 0x432 +#define GEN7_RBBM_PERFCTR2_SP_4_HI 0x433 +#define GEN7_RBBM_PERFCTR2_SP_5_LO 0x434 +#define GEN7_RBBM_PERFCTR2_SP_5_HI 0x435 +#define GEN7_RBBM_PERFCTR2_SP_6_LO 0x436 +#define GEN7_RBBM_PERFCTR2_SP_6_HI 0x437 +#define GEN7_RBBM_PERFCTR2_SP_7_LO 0x438 +#define GEN7_RBBM_PERFCTR2_SP_7_HI 0x439 +#define GEN7_RBBM_PERFCTR2_SP_8_LO 0x43a +#define GEN7_RBBM_PERFCTR2_SP_8_HI 0x43b +#define GEN7_RBBM_PERFCTR2_SP_9_LO 0x43c +#define GEN7_RBBM_PERFCTR2_SP_9_HI 0x43d +#define GEN7_RBBM_PERFCTR2_SP_10_LO 0x43e +#define GEN7_RBBM_PERFCTR2_SP_10_HI 0x43f +#define GEN7_RBBM_PERFCTR2_SP_11_LO 0x440 +#define GEN7_RBBM_PERFCTR2_SP_11_HI 0x441 +#define GEN7_RBBM_PERFCTR2_TP_0_LO 0x442 +#define GEN7_RBBM_PERFCTR2_TP_0_HI 0x443 +#define GEN7_RBBM_PERFCTR2_TP_1_LO 0x444 +#define GEN7_RBBM_PERFCTR2_TP_1_HI 0x445 +#define GEN7_RBBM_PERFCTR2_TP_2_LO 0x446 +#define GEN7_RBBM_PERFCTR2_TP_2_HI 0x447 +#define GEN7_RBBM_PERFCTR2_TP_3_LO 0x448 +#define GEN7_RBBM_PERFCTR2_TP_3_HI 0x449 +#define GEN7_RBBM_PERFCTR2_TP_4_LO 0x44a +#define GEN7_RBBM_PERFCTR2_TP_4_HI 0x44b +#define GEN7_RBBM_PERFCTR2_TP_5_LO 0x44c +#define GEN7_RBBM_PERFCTR2_TP_5_HI 0x44d +#define GEN7_RBBM_PERFCTR2_UFC_0_LO 0x44e +#define GEN7_RBBM_PERFCTR2_UFC_0_HI 0x44f +#define GEN7_RBBM_PERFCTR2_UFC_1_LO 0x450 +#define GEN7_RBBM_PERFCTR2_UFC_1_HI 0x451 +#define GEN7_RBBM_PERFCTR_BV_PC_0_LO 0x460 +#define GEN7_RBBM_PERFCTR_BV_PC_0_HI 0x461 +#define GEN7_RBBM_PERFCTR_BV_PC_1_LO 0x462 +#define GEN7_RBBM_PERFCTR_BV_PC_1_HI 0x463 +#define GEN7_RBBM_PERFCTR_BV_PC_2_LO 0x464 +#define GEN7_RBBM_PERFCTR_BV_PC_2_HI 0x465 +#define GEN7_RBBM_PERFCTR_BV_PC_3_LO 0x466 +#define GEN7_RBBM_PERFCTR_BV_PC_3_HI 0x467 +#define GEN7_RBBM_PERFCTR_BV_PC_4_LO 0x468 +#define GEN7_RBBM_PERFCTR_BV_PC_4_HI 0x469 +#define GEN7_RBBM_PERFCTR_BV_PC_5_LO 0x46a +#define GEN7_RBBM_PERFCTR_BV_PC_5_HI 0x46b +#define GEN7_RBBM_PERFCTR_BV_PC_6_LO 0x46c +#define GEN7_RBBM_PERFCTR_BV_PC_6_HI 0x46d +#define GEN7_RBBM_PERFCTR_BV_PC_7_LO 0x46e +#define GEN7_RBBM_PERFCTR_BV_PC_7_HI 0x46f +#define GEN7_RBBM_PERFCTR_BV_VFD_0_LO 0x470 +#define GEN7_RBBM_PERFCTR_BV_VFD_0_HI 0x471 +#define GEN7_RBBM_PERFCTR_BV_VFD_1_LO 0x472 +#define GEN7_RBBM_PERFCTR_BV_VFD_1_HI 0x473 +#define GEN7_RBBM_PERFCTR_BV_VFD_2_LO 0x474 +#define GEN7_RBBM_PERFCTR_BV_VFD_2_HI 0x475 +#define GEN7_RBBM_PERFCTR_BV_VFD_3_LO 0x476 +#define GEN7_RBBM_PERFCTR_BV_VFD_3_HI 0x477 +#define GEN7_RBBM_PERFCTR_BV_VFD_4_LO 0x478 +#define GEN7_RBBM_PERFCTR_BV_VFD_4_HI 0x479 +#define GEN7_RBBM_PERFCTR_BV_VFD_5_LO 0x47a +#define GEN7_RBBM_PERFCTR_BV_VFD_5_HI 0x47b +#define GEN7_RBBM_PERFCTR_BV_VFD_6_LO 0x47c +#define GEN7_RBBM_PERFCTR_BV_VFD_6_HI 0x47d +#define GEN7_RBBM_PERFCTR_BV_VFD_7_LO 0x47e +#define GEN7_RBBM_PERFCTR_BV_VFD_7_HI 0x47f +#define GEN7_RBBM_PERFCTR_BV_VPC_0_LO 0x480 +#define GEN7_RBBM_PERFCTR_BV_VPC_0_HI 0x481 +#define GEN7_RBBM_PERFCTR_BV_VPC_1_LO 0x482 +#define GEN7_RBBM_PERFCTR_BV_VPC_1_HI 0x483 +#define GEN7_RBBM_PERFCTR_BV_VPC_2_LO 0x484 +#define GEN7_RBBM_PERFCTR_BV_VPC_2_HI 0x485 +#define GEN7_RBBM_PERFCTR_BV_VPC_3_LO 0x486 +#define GEN7_RBBM_PERFCTR_BV_VPC_3_HI 0x487 +#define GEN7_RBBM_PERFCTR_BV_VPC_4_LO 0x488 +#define GEN7_RBBM_PERFCTR_BV_VPC_4_HI 0x489 +#define GEN7_RBBM_PERFCTR_BV_VPC_5_LO 0x48a +#define GEN7_RBBM_PERFCTR_BV_VPC_5_HI 0x48b +#define GEN7_RBBM_PERFCTR_BV_TSE_0_LO 0x48c +#define GEN7_RBBM_PERFCTR_BV_TSE_0_HI 0x48d +#define GEN7_RBBM_PERFCTR_BV_TSE_1_LO 0x48e +#define GEN7_RBBM_PERFCTR_BV_TSE_1_HI 0x48f +#define GEN7_RBBM_PERFCTR_BV_TSE_2_LO 0x490 +#define GEN7_RBBM_PERFCTR_BV_TSE_2_HI 0x491 +#define GEN7_RBBM_PERFCTR_BV_TSE_3_LO 0x492 +#define GEN7_RBBM_PERFCTR_BV_TSE_3_HI 0x493 +#define GEN7_RBBM_PERFCTR_BV_RAS_0_LO 0x494 +#define GEN7_RBBM_PERFCTR_BV_RAS_0_HI 0x495 +#define GEN7_RBBM_PERFCTR_BV_RAS_1_LO 0x496 +#define GEN7_RBBM_PERFCTR_BV_RAS_1_HI 0x497 +#define GEN7_RBBM_PERFCTR_BV_RAS_2_LO 0x498 +#define GEN7_RBBM_PERFCTR_BV_RAS_2_HI 0x499 +#define GEN7_RBBM_PERFCTR_BV_RAS_3_LO 0x49a +#define GEN7_RBBM_PERFCTR_BV_RAS_3_HI 0x49b +#define GEN7_RBBM_PERFCTR_BV_LRZ_0_LO 0x49c +#define GEN7_RBBM_PERFCTR_BV_LRZ_0_HI 0x49d +#define GEN7_RBBM_PERFCTR_BV_LRZ_1_LO 0x49e +#define GEN7_RBBM_PERFCTR_BV_LRZ_1_HI 0x49f +#define GEN7_RBBM_PERFCTR_BV_LRZ_2_LO 0x4a0 +#define GEN7_RBBM_PERFCTR_BV_LRZ_2_HI 0x4a1 +#define GEN7_RBBM_PERFCTR_BV_LRZ_3_LO 0x4a2 +#define GEN7_RBBM_PERFCTR_BV_LRZ_3_HI 0x4a3 + +#define GEN7_RBBM_PERFCTR_CNTL 0x500 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_0 0x507 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_1 0x508 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_2 0x509 +#define GEN7_RBBM_PERFCTR_RBBM_SEL_3 0x50a +#define GEN7_RBBM_PERFCTR_GPU_BUSY_MASKED 0x50b + +#define GEN7_RBBM_ISDB_CNT 0x533 +#define GEN7_RBBM_NC_MODE_CNTL 0x534 +#define GEN7_RBBM_SNAPSHOT_STATUS 0x535 + +#define GEN7_RBBM_SECVID_TRUST_CNTL 0xf400 +#define GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xf800 +#define GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xf801 +#define GEN7_RBBM_SECVID_TSB_TRUSTED_SIZE 0xf802 +#define GEN7_RBBM_SECVID_TSB_CNTL 0xf803 + +#define GEN7_RBBM_GBIF_CLIENT_QOS_CNTL 0x00011 +#define GEN7_RBBM_GBIF_HALT 0x00016 +#define GEN7_RBBM_GBIF_HALT_ACK 0x00017 +#define GEN7_RBBM_INTERFACE_HANG_INT_CNTL 0x0001f +#define GEN7_RBBM_INT_CLEAR_CMD 0x00037 +#define GEN7_RBBM_INT_0_MASK 0x00038 +#define GEN7_RBBM_INT_2_MASK 0x0003a +#define GEN7_RBBM_SP_HYST_CNT 0x00042 +#define GEN7_RBBM_SW_RESET_CMD 0x00043 +#define GEN7_RBBM_RAC_THRESHOLD_CNT 0x00044 +#define GEN7_RBBM_CLOCK_CNTL 0x000ae +#define GEN7_RBBM_CLOCK_CNTL_SP0 0x000b0 +#define GEN7_RBBM_CLOCK_CNTL2_SP0 0x000b4 +#define GEN7_RBBM_CLOCK_DELAY_SP0 0x000b8 +#define GEN7_RBBM_CLOCK_HYST_SP0 0x000bc +#define GEN7_RBBM_CLOCK_CNTL_TP0 0x000c0 +#define GEN7_RBBM_CLOCK_CNTL2_TP0 0x000c4 +#define GEN7_RBBM_CLOCK_CNTL3_TP0 0x000c8 +#define GEN7_RBBM_CLOCK_CNTL4_TP0 0x000cc +#define GEN7_RBBM_CLOCK_DELAY_TP0 0x000d0 +#define GEN7_RBBM_CLOCK_DELAY2_TP0 0x000d4 +#define GEN7_RBBM_CLOCK_DELAY3_TP0 0x000d8 +#define GEN7_RBBM_CLOCK_DELAY4_TP0 0x000dc +#define GEN7_RBBM_CLOCK_HYST_TP0 0x000e0 +#define GEN7_RBBM_CLOCK_HYST2_TP0 0x000e4 +#define GEN7_RBBM_CLOCK_HYST3_TP0 0x000e8 +#define GEN7_RBBM_CLOCK_HYST4_TP0 0x000ec +#define GEN7_RBBM_CLOCK_CNTL_RB0 0x000f0 +#define GEN7_RBBM_CLOCK_CNTL2_RB0 0x000f4 +#define GEN7_RBBM_CLOCK_CNTL_CCU0 0x000f8 +#define GEN7_RBBM_CLOCK_HYST_RB_CCU0 0x00100 +#define GEN7_RBBM_CLOCK_CNTL_RAC 0x00104 +#define GEN7_RBBM_CLOCK_CNTL2_RAC 0x00105 +#define GEN7_RBBM_CLOCK_DELAY_RAC 0x00106 +#define GEN7_RBBM_CLOCK_HYST_RAC 0x00107 +#define GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00108 +#define GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00109 +#define GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0010a +#define GEN7_RBBM_CLOCK_CNTL_UCHE 0x0010b +#define GEN7_RBBM_CLOCK_DELAY_UCHE 0x0010f +#define GEN7_RBBM_CLOCK_HYST_UCHE 0x00110 +#define GEN7_RBBM_CLOCK_MODE_VFD 0x00111 +#define GEN7_RBBM_CLOCK_DELAY_VFD 0x00112 +#define GEN7_RBBM_CLOCK_HYST_VFD 0x00113 +#define GEN7_RBBM_CLOCK_MODE_GPC 0x00114 +#define GEN7_RBBM_CLOCK_DELAY_GPC 0x00115 +#define GEN7_RBBM_CLOCK_HYST_GPC 0x00116 +#define GEN7_RBBM_CLOCK_DELAY_HLSQ_2 0x00117 +#define GEN7_RBBM_CLOCK_CNTL_GMU_GX 0x00118 +#define GEN7_RBBM_CLOCK_DELAY_GMU_GX 0x00119 +#define GEN7_RBBM_CLOCK_HYST_GMU_GX 0x0011a +#define GEN7_RBBM_CLOCK_MODE_HLSQ 0x0011b +#define GEN7_RBBM_CLOCK_DELAY_HLSQ 0x0011c +#define GEN7_RBBM_CLOCK_HYST_HLSQ 0x0011d +#define GEN7_RBBM_CLOCK_MODE_CP 0x00260 +#define GEN7_RBBM_CLOCK_MODE_BV_LRZ 0x00284 +#define GEN7_RBBM_CLOCK_MODE_BV_GRAS 0x00285 +#define GEN7_RBBM_CLOCK_MODE2_GRAS 0x00286 +#define GEN7_RBBM_CLOCK_MODE_BV_VFD 0x00287 +#define GEN7_RBBM_CLOCK_MODE_BV_GPC 0x00288 + +/* DBGC_CFG registers */ +#define GEN7_DBGC_CFG_DBGBUS_SEL_A 0x600 +#define GEN7_DBGC_CFG_DBGBUS_SEL_B 0x601 +#define GEN7_DBGC_CFG_DBGBUS_SEL_C 0x602 +#define GEN7_DBGC_CFG_DBGBUS_SEL_D 0x603 +#define GEN7_DBGC_CFG_DBGBUS_CNTLT 0x604 +#define GEN7_DBGC_CFG_DBGBUS_CNTLM 0x605 +#define GEN7_DBGC_CFG_DBGBUS_OPL 0x606 +#define GEN7_DBGC_CFG_DBGBUS_OPE 0x607 +#define GEN7_DBGC_CFG_DBGBUS_IVTL_0 0x608 +#define GEN7_DBGC_CFG_DBGBUS_IVTL_1 0x609 +#define GEN7_DBGC_CFG_DBGBUS_IVTL_2 0x60a +#define GEN7_DBGC_CFG_DBGBUS_IVTL_3 0x60b +#define GEN7_DBGC_CFG_DBGBUS_MASKL_0 0x60c +#define GEN7_DBGC_CFG_DBGBUS_MASKL_1 0x60d +#define GEN7_DBGC_CFG_DBGBUS_MASKL_2 0x60e +#define GEN7_DBGC_CFG_DBGBUS_MASKL_3 0x60f +#define GEN7_DBGC_CFG_DBGBUS_BYTEL_0 0x610 +#define GEN7_DBGC_CFG_DBGBUS_BYTEL_1 0x611 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_0 0x612 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_1 0x613 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_2 0x614 +#define GEN7_DBGC_CFG_DBGBUS_IVTE_3 0x615 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_0 0x616 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_1 0x617 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_2 0x618 +#define GEN7_DBGC_CFG_DBGBUS_MASKE_3 0x619 +#define GEN7_DBGC_CFG_DBGBUS_NIBBLEE 0x61a +#define GEN7_DBGC_CFG_DBGBUS_PTRC0 0x61b +#define GEN7_DBGC_CFG_DBGBUS_PTRC1 0x61c +#define GEN7_DBGC_CFG_DBGBUS_LOADREG 0x61d +#define GEN7_DBGC_CFG_DBGBUS_IDX 0x61e +#define GEN7_DBGC_CFG_DBGBUS_CLRC 0x61f +#define GEN7_DBGC_CFG_DBGBUS_LOADIVT 0x620 +#define GEN7_DBGC_VBIF_DBG_CNTL 0x621 +#define GEN7_DBGC_DBG_LO_HI_GPIO 0x622 +#define GEN7_DBGC_EXT_TRACE_BUS_CNTL 0x623 +#define GEN7_DBGC_READ_AHB_THROUGH_DBG 0x624 +#define GEN7_DBGC_CFG_DBGBUS_OVER 0x626 +#define GEN7_DBGC_CFG_DBGBUS_TRACE_BUF1 0x62f +#define GEN7_DBGC_CFG_DBGBUS_TRACE_BUF2 0x630 +#define GEN7_DBGC_EVT_CFG 0x640 +#define GEN7_DBGC_EVT_INTF_SEL_0 0x641 +#define GEN7_DBGC_EVT_INTF_SEL_1 0x642 +#define GEN7_DBGC_PERF_ATB_CFG 0x643 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_0 0x644 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_1 0x645 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_2 0x646 +#define GEN7_DBGC_PERF_ATB_COUNTER_SEL_3 0x647 +#define GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x648 +#define GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x649 +#define GEN7_DBGC_PERF_ATB_DRAIN_CMD 0x64a +#define GEN7_DBGC_ECO_CNTL 0x650 +#define GEN7_DBGC_AHB_DBG_CNTL 0x651 + +/* VSC registers */ +#define GEN7_VSC_PERFCTR_VSC_SEL_0 0xcd8 +#define GEN7_VSC_PERFCTR_VSC_SEL_1 0xcd9 + +/* GRAS registers */ +#define GEN7_GRAS_NC_MODE_CNTL 0x8602 +#define GEN7_GRAS_PERFCTR_TSE_SEL_0 0x8610 +#define GEN7_GRAS_PERFCTR_TSE_SEL_1 0x8611 +#define GEN7_GRAS_PERFCTR_TSE_SEL_2 0x8612 +#define GEN7_GRAS_PERFCTR_TSE_SEL_3 0x8613 +#define GEN7_GRAS_PERFCTR_RAS_SEL_0 0x8614 +#define GEN7_GRAS_PERFCTR_RAS_SEL_1 0x8615 +#define GEN7_GRAS_PERFCTR_RAS_SEL_2 0x8616 +#define GEN7_GRAS_PERFCTR_RAS_SEL_3 0x8617 +#define GEN7_GRAS_PERFCTR_LRZ_SEL_0 0x8618 +#define GEN7_GRAS_PERFCTR_LRZ_SEL_1 0x8619 +#define GEN7_GRAS_PERFCTR_LRZ_SEL_2 0x861a +#define GEN7_GRAS_PERFCTR_LRZ_SEL_3 0x861b + +/* RB registers */ +#define GEN7_RB_NC_MODE_CNTL 0x8e08 +#define GEN7_RB_PERFCTR_RB_SEL_0 0x8e10 +#define GEN7_RB_PERFCTR_RB_SEL_1 0x8e11 +#define GEN7_RB_PERFCTR_RB_SEL_2 0x8e12 +#define GEN7_RB_PERFCTR_RB_SEL_3 0x8e13 +#define GEN7_RB_PERFCTR_RB_SEL_4 0x8e14 +#define GEN7_RB_PERFCTR_RB_SEL_5 0x8e15 +#define GEN7_RB_PERFCTR_RB_SEL_6 0x8e16 +#define GEN7_RB_PERFCTR_RB_SEL_7 0x8e17 +#define GEN7_RB_PERFCTR_CCU_SEL_0 0x8e18 +#define GEN7_RB_PERFCTR_CCU_SEL_1 0x8e19 +#define GEN7_RB_PERFCTR_CCU_SEL_2 0x8e1a +#define GEN7_RB_PERFCTR_CCU_SEL_3 0x8e1b +#define GEN7_RB_PERFCTR_CCU_SEL_4 0x8e1c +#define GEN7_RB_PERFCTR_CMP_SEL_0 0x8e2c +#define GEN7_RB_PERFCTR_CMP_SEL_1 0x8e2d +#define GEN7_RB_PERFCTR_CMP_SEL_2 0x8e2e +#define GEN7_RB_PERFCTR_CMP_SEL_3 0x8e2f +#define GEN7_RB_PERFCTR_UFC_SEL_0 0x8e30 +#define GEN7_RB_PERFCTR_UFC_SEL_1 0x8e31 +#define GEN7_RB_PERFCTR_UFC_SEL_2 0x8e32 +#define GEN7_RB_PERFCTR_UFC_SEL_3 0x8e33 +#define GEN7_RB_PERFCTR_UFC_SEL_4 0x8e34 +#define GEN7_RB_PERFCTR_UFC_SEL_5 0x8e35 +#define GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_HOST 0x8e3b +#define GEN7_RB_RB_SUB_BLOCK_SEL_CNTL_CD 0x8e3d +#define GEN7_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8e50 + +/* PC registers */ +#define GEN7_PC_PERFCTR_PC_SEL_0 0x9e42 +#define GEN7_PC_PERFCTR_PC_SEL_1 0x9e43 +#define GEN7_PC_PERFCTR_PC_SEL_2 0x9e44 +#define GEN7_PC_PERFCTR_PC_SEL_3 0x9e45 +#define GEN7_PC_PERFCTR_PC_SEL_4 0x9e46 +#define GEN7_PC_PERFCTR_PC_SEL_5 0x9e47 +#define GEN7_PC_PERFCTR_PC_SEL_6 0x9e48 +#define GEN7_PC_PERFCTR_PC_SEL_7 0x9e49 +#define GEN7_PC_PERFCTR_PC_SEL_8 0x9e4a +#define GEN7_PC_PERFCTR_PC_SEL_9 0x9e4b +#define GEN7_PC_PERFCTR_PC_SEL_10 0x9e4c +#define GEN7_PC_PERFCTR_PC_SEL_11 0x9e4d +#define GEN7_PC_PERFCTR_PC_SEL_12 0x9e4e +#define GEN7_PC_PERFCTR_PC_SEL_13 0x9e4f +#define GEN7_PC_PERFCTR_PC_SEL_14 0x9e50 +#define GEN7_PC_PERFCTR_PC_SEL_15 0x9e51 + +/* VFD registers */ +#define GEN7_VFD_PERFCTR_VFD_SEL_0 0xa610 +#define GEN7_VFD_PERFCTR_VFD_SEL_1 0xa611 +#define GEN7_VFD_PERFCTR_VFD_SEL_2 0xa612 +#define GEN7_VFD_PERFCTR_VFD_SEL_3 0xa613 +#define GEN7_VFD_PERFCTR_VFD_SEL_4 0xa614 +#define GEN7_VFD_PERFCTR_VFD_SEL_5 0xa615 +#define GEN7_VFD_PERFCTR_VFD_SEL_6 0xa616 +#define GEN7_VFD_PERFCTR_VFD_SEL_7 0xa617 +#define GEN7_VFD_PERFCTR_VFD_SEL_8 0xa618 +#define GEN7_VFD_PERFCTR_VFD_SEL_9 0xa619 +#define GEN7_VFD_PERFCTR_VFD_SEL_10 0xa61a +#define GEN7_VFD_PERFCTR_VFD_SEL_11 0xa61b +#define GEN7_VFD_PERFCTR_VFD_SEL_12 0xa61c +#define GEN7_VFD_PERFCTR_VFD_SEL_13 0xa61d +#define GEN7_VFD_PERFCTR_VFD_SEL_14 0xa61e +#define GEN7_VFD_PERFCTR_VFD_SEL_15 0xa61f +#define GEN7_SP_READ_SEL 0xae6d +#define GEN7_SP_AHB_READ_APERTURE 0xc000 + +/* VPC registers */ +#define GEN7_VPC_PERFCTR_VPC_SEL_0 0x960b +#define GEN7_VPC_PERFCTR_VPC_SEL_1 0x960c +#define GEN7_VPC_PERFCTR_VPC_SEL_2 0x960c +#define GEN7_VPC_PERFCTR_VPC_SEL_3 0x960e +#define GEN7_VPC_PERFCTR_VPC_SEL_4 0x960f +#define GEN7_VPC_PERFCTR_VPC_SEL_5 0x9610 +#define GEN7_VPC_PERFCTR_VPC_SEL_6 0x9611 +#define GEN7_VPC_PERFCTR_VPC_SEL_7 0x9612 +#define GEN7_VPC_PERFCTR_VPC_SEL_8 0x9613 +#define GEN7_VPC_PERFCTR_VPC_SEL_9 0x9614 +#define GEN7_VPC_PERFCTR_VPC_SEL_10 0x9615 +#define GEN7_VPC_PERFCTR_VPC_SEL_11 0x9616 + +/* UCHE registers */ +#define GEN7_UCHE_MODE_CNTL 0xe01 +#define GEN7_UCHE_WRITE_THRU_BASE_LO 0xe07 +#define GEN7_UCHE_WRITE_THRU_BASE_HI 0xe08 +#define GEN7_UCHE_TRAP_BASE_LO 0xe09 +#define GEN7_UCHE_TRAP_BASE_HI 0xe0a +#define GEN7_UCHE_GMEM_RANGE_MIN_LO 0xe0b +#define GEN7_UCHE_GMEM_RANGE_MIN_HI 0xe0c +#define GEN7_UCHE_GMEM_RANGE_MAX_LO 0xe0d +#define GEN7_UCHE_GMEM_RANGE_MAX_HI 0xe0e +#define GEN7_UCHE_CACHE_WAYS 0xe17 +#define GEN7_UCHE_CLIENT_PF 0xe19 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_0 0xe1c +#define GEN7_UCHE_PERFCTR_UCHE_SEL_1 0xe1d +#define GEN7_UCHE_PERFCTR_UCHE_SEL_2 0xe1e +#define GEN7_UCHE_PERFCTR_UCHE_SEL_3 0xe1f +#define GEN7_UCHE_PERFCTR_UCHE_SEL_4 0xe20 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_5 0xe21 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_6 0xe22 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_7 0xe23 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_8 0xe24 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_9 0xe25 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_10 0xe26 +#define GEN7_UCHE_PERFCTR_UCHE_SEL_11 0xe27 +#define GEN7_UCHE_GBIF_GX_CONFIG 0xe3a +#define GEN7_UCHE_CMDQ_CONFIG 0xe3c + +/* SP registers */ +#define GEN7_SP_NC_MODE_CNTL 0xae02 +#define GEN7_SP_PERFCTR_HLSQ_SEL_0 0xae60 +#define GEN7_SP_PERFCTR_HLSQ_SEL_1 0xae61 +#define GEN7_SP_PERFCTR_HLSQ_SEL_2 0xae62 +#define GEN7_SP_PERFCTR_HLSQ_SEL_3 0xae63 +#define GEN7_SP_PERFCTR_HLSQ_SEL_4 0xae64 +#define GEN7_SP_PERFCTR_HLSQ_SEL_5 0xae65 +#define GEN7_SP_PERFCTR_SP_SEL_0 0xae80 +#define GEN7_SP_PERFCTR_SP_SEL_1 0xae81 +#define GEN7_SP_PERFCTR_SP_SEL_2 0xae82 +#define GEN7_SP_PERFCTR_SP_SEL_3 0xae83 +#define GEN7_SP_PERFCTR_SP_SEL_4 0xae84 +#define GEN7_SP_PERFCTR_SP_SEL_5 0xae85 +#define GEN7_SP_PERFCTR_SP_SEL_6 0xae86 +#define GEN7_SP_PERFCTR_SP_SEL_7 0xae87 +#define GEN7_SP_PERFCTR_SP_SEL_8 0xae88 +#define GEN7_SP_PERFCTR_SP_SEL_9 0xae89 +#define GEN7_SP_PERFCTR_SP_SEL_10 0xae8a +#define GEN7_SP_PERFCTR_SP_SEL_11 0xae8b +#define GEN7_SP_PERFCTR_SP_SEL_12 0xae8c +#define GEN7_SP_PERFCTR_SP_SEL_13 0xae8d +#define GEN7_SP_PERFCTR_SP_SEL_14 0xae8e +#define GEN7_SP_PERFCTR_SP_SEL_15 0xae8f +#define GEN7_SP_PERFCTR_SP_SEL_16 0xae90 +#define GEN7_SP_PERFCTR_SP_SEL_17 0xae91 +#define GEN7_SP_PERFCTR_SP_SEL_18 0xae92 +#define GEN7_SP_PERFCTR_SP_SEL_19 0xae93 +#define GEN7_SP_PERFCTR_SP_SEL_20 0xae94 +#define GEN7_SP_PERFCTR_SP_SEL_21 0xae95 +#define GEN7_SP_PERFCTR_SP_SEL_22 0xae96 +#define GEN7_SP_PERFCTR_SP_SEL_23 0xae97 +#define GEN7_SP_PERFCTR_SP_SEL_24 0xae98 +#define GEN7_SP_PERFCTR_SP_SEL_25 0xae99 +#define GEN7_SP_PERFCTR_SP_SEL_26 0xae9a +#define GEN7_SP_PERFCTR_SP_SEL_27 0xae9b +#define GEN7_SP_PERFCTR_SP_SEL_28 0xae9c +#define GEN7_SP_PERFCTR_SP_SEL_29 0xae9d +#define GEN7_SP_PERFCTR_SP_SEL_30 0xae9e +#define GEN7_SP_PERFCTR_SP_SEL_31 0xae9f +#define GEN7_SP_PERFCTR_SP_SEL_32 0xaea0 +#define GEN7_SP_PERFCTR_SP_SEL_33 0xaea1 +#define GEN7_SP_PERFCTR_SP_SEL_34 0xaea2 +#define GEN7_SP_PERFCTR_SP_SEL_35 0xaea3 + +/* TP registers */ +#define GEN7_TPL1_NC_MODE_CNTL 0xb604 +#define GEN7_TPL1_PERFCTR_TP_SEL_0 0xb610 +#define GEN7_TPL1_PERFCTR_TP_SEL_1 0xb611 +#define GEN7_TPL1_PERFCTR_TP_SEL_2 0xb612 +#define GEN7_TPL1_PERFCTR_TP_SEL_3 0xb613 +#define GEN7_TPL1_PERFCTR_TP_SEL_4 0xb614 +#define GEN7_TPL1_PERFCTR_TP_SEL_5 0xb615 +#define GEN7_TPL1_PERFCTR_TP_SEL_6 0xb616 +#define GEN7_TPL1_PERFCTR_TP_SEL_7 0xb617 +#define GEN7_TPL1_PERFCTR_TP_SEL_8 0xb618 +#define GEN7_TPL1_PERFCTR_TP_SEL_9 0xb619 +#define GEN7_TPL1_PERFCTR_TP_SEL_10 0xb61a +#define GEN7_TPL1_PERFCTR_TP_SEL_11 0xb61b +#define GEN7_TPL1_PERFCTR_TP_SEL_12 0xb61c +#define GEN7_TPL1_PERFCTR_TP_SEL_13 0xb61d +#define GEN7_TPL1_PERFCTR_TP_SEL_14 0xb61e +#define GEN7_TPL1_PERFCTR_TP_SEL_15 0xb61f +#define GEN7_TPL1_PERFCTR_TP_SEL_16 0xb620 +#define GEN7_TPL1_PERFCTR_TP_SEL_17 0xb621 + +/* VBIF registers */ +#define GEN7_VBIF_XIN_HALT_CTRL1 0x3081 +#define GEN7_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define GEN7_VBIF_TEST_BUS1_CTRL0 0x3085 +#define GEN7_VBIF_TEST_BUS1_CTRL1 0x3086 +#define GEN7_VBIF_TEST_BUS2_CTRL0 0x3087 +#define GEN7_VBIF_TEST_BUS2_CTRL1 0x3088 +#define GEN7_VBIF_TEST_BUS_OUT 0x308c +#define GEN7_VBIF_PERF_CNT_SEL0 0x30d0 +#define GEN7_VBIF_PERF_CNT_SEL1 0x30d1 +#define GEN7_VBIF_PERF_CNT_SEL2 0x30d2 +#define GEN7_VBIF_PERF_CNT_SEL3 0x30d3 +#define GEN7_VBIF_PERF_CNT_LOW0 0x30d8 +#define GEN7_VBIF_PERF_CNT_LOW1 0x30d9 +#define GEN7_VBIF_PERF_CNT_LOW2 0x30da +#define GEN7_VBIF_PERF_CNT_LOW3 0x30db +#define GEN7_VBIF_PERF_CNT_HIGH0 0x30e0 +#define GEN7_VBIF_PERF_CNT_HIGH1 0x30e1 +#define GEN7_VBIF_PERF_CNT_HIGH2 0x30e2 +#define GEN7_VBIF_PERF_CNT_HIGH3 0x30e3 +#define GEN7_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define GEN7_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define GEN7_VBIF_PERF_PWR_CNT_EN2 0x3102 +#define GEN7_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define GEN7_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define GEN7_VBIF_PERF_PWR_CNT_LOW2 0x3112 +#define GEN7_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define GEN7_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define GEN7_VBIF_PERF_PWR_CNT_HIGH2 0x311a + +/* GBIF countables */ +#define GBIF_AXI0_READ_DATA_TOTAL_BEATS 34 +#define GBIF_AXI1_READ_DATA_TOTAL_BEATS 35 +#define GBIF_AXI0_WRITE_DATA_TOTAL_BEATS 46 +#define GBIF_AXI1_WRITE_DATA_TOTAL_BEATS 47 + +/* GBIF registers */ +#define GEN7_GBIF_SCACHE_CNTL0 0x3c01 +#define GEN7_GBIF_SCACHE_CNTL1 0x3c02 +#define GEN7_GBIF_QSB_SIDE0 0x3c03 +#define GEN7_GBIF_QSB_SIDE1 0x3c04 +#define GEN7_GBIF_QSB_SIDE2 0x3c05 +#define GEN7_GBIF_QSB_SIDE3 0x3c06 +#define GEN7_GBIF_HALT 0x3c45 +#define GEN7_GBIF_HALT_ACK 0x3c46 + +#define GEN7_GBIF_CLIENT_HALT_MASK BIT(0) +#define GEN7_GBIF_ARB_HALT_MASK BIT(1) +#define GEN7_GBIF_GX_HALT_MASK BIT(0) + +#define GEN7_GBIF_PERF_PWR_CNT_EN 0x3cc0 +#define GEN7_GBIF_PERF_PWR_CNT_CLR 0x3cc1 +#define GEN7_GBIF_PERF_CNT_SEL 0x3cc2 +#define GEN7_GBIF_PERF_PWR_CNT_SEL 0x3cc3 +#define GEN7_GBIF_PERF_CNT_LOW0 0x3cc4 +#define GEN7_GBIF_PERF_CNT_LOW1 0x3cc5 +#define GEN7_GBIF_PERF_CNT_LOW2 0x3cc6 +#define GEN7_GBIF_PERF_CNT_LOW3 0x3cc7 +#define GEN7_GBIF_PERF_CNT_HIGH0 0x3cc8 +#define GEN7_GBIF_PERF_CNT_HIGH1 0x3cc9 +#define GEN7_GBIF_PERF_CNT_HIGH2 0x3cca +#define GEN7_GBIF_PERF_CNT_HIGH3 0x3ccb +#define GEN7_GBIF_PWR_CNT_LOW0 0x3ccc +#define GEN7_GBIF_PWR_CNT_LOW1 0x3ccd +#define GEN7_GBIF_PWR_CNT_LOW2 0x3cce +#define GEN7_GBIF_PWR_CNT_HIGH0 0x3ccf +#define GEN7_GBIF_PWR_CNT_HIGH1 0x3cd0 +#define GEN7_GBIF_PWR_CNT_HIGH2 0x3cd1 + + +/* CX_DBGC_CFG registers */ +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_A 0x18400 +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_B 0x18401 +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_C 0x18402 +#define GEN7_CX_DBGC_CFG_DBGBUS_SEL_D 0x18403 +#define GEN7_CX_DBGC_CFG_DBGBUS_CNTLT 0x18404 +#define GEN7_CX_DBGC_CFG_DBGBUS_CNTLM 0x18405 +#define GEN7_CX_DBGC_CFG_DBGBUS_OPL 0x18406 +#define GEN7_CX_DBGC_CFG_DBGBUS_OPE 0x18407 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_0 0x18408 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_1 0x18409 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_2 0x1840a +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTL_3 0x1840b +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_0 0x1840c +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_1 0x1840d +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_2 0x1840e +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKL_3 0x1840f +#define GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x18410 +#define GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x18411 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_0 0x18412 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_1 0x18413 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_2 0x18414 +#define GEN7_CX_DBGC_CFG_DBGBUS_IVTE_3 0x18415 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_0 0x18416 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_1 0x18417 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_2 0x18418 +#define GEN7_CX_DBGC_CFG_DBGBUS_MASKE_3 0x18419 +#define GEN7_CX_DBGC_CFG_DBGBUS_NIBBLEE 0x1841a +#define GEN7_CX_DBGC_CFG_DBGBUS_PTRC0 0x1841b +#define GEN7_CX_DBGC_CFG_DBGBUS_PTRC1 0x1841c +#define GEN7_CX_DBGC_CFG_DBGBUS_LOADREG 0x1841d +#define GEN7_CX_DBGC_CFG_DBGBUS_IDX 0x1841e +#define GEN7_CX_DBGC_CFG_DBGBUS_CLRC 0x1841f +#define GEN7_CX_DBGC_CFG_DBGBUS_LOADIVT 0x18420 +#define GEN7_CX_DBGC_VBIF_DBG_CNTL 0x18421 +#define GEN7_CX_DBGC_DBG_LO_HI_GPIO 0x18422 +#define GEN7_CX_DBGC_EXT_TRACE_BUS_CNTL 0x18423 +#define GEN7_CX_DBGC_READ_AHB_THROUGH_DBG 0x18424 +#define GEN7_CX_DBGC_CFG_DBGBUS_OVER 0x18426 +#define GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x1842f +#define GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x18430 +#define GEN7_CX_DBGC_EVT_CFG 0x18440 +#define GEN7_CX_DBGC_EVT_INTF_SEL_0 0x18441 +#define GEN7_CX_DBGC_EVT_INTF_SEL_1 0x18442 +#define GEN7_CX_DBGC_PERF_ATB_CFG 0x18443 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_0 0x18444 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_1 0x18445 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_2 0x18446 +#define GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_3 0x18447 +#define GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 0x18448 +#define GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 0x18449 +#define GEN7_CX_DBGC_PERF_ATB_DRAIN_CMD 0x1844a +#define GEN7_CX_DBGC_ECO_CNTL 0x18450 +#define GEN7_CX_DBGC_AHB_DBG_CNTL 0x18451 + +/* GMU control registers */ +#define GEN7_GMU_CM3_ITCM_START 0x1b400 +#define GEN7_GMU_CM3_DTCM_START 0x1c400 +#define GEN7_GMU_NMI_CONTROL_STATUS 0x1cbf0 +#define GEN7_GMU_BOOT_SLUMBER_OPTION 0x1cbf8 +#define GEN7_GMU_GX_VOTE_IDX 0x1cbf9 +#define GEN7_GMU_MX_VOTE_IDX 0x1cbfa +#define GEN7_GMU_DCVS_ACK_OPTION 0x1cbfc +#define GEN7_GMU_DCVS_PERF_SETTING 0x1cbfd +#define GEN7_GMU_DCVS_BW_SETTING 0x1cbfe +#define GEN7_GMU_DCVS_RETURN 0x1cbff +#define GEN7_GMU_ICACHE_CONFIG 0x1f400 +#define GEN7_GMU_DCACHE_CONFIG 0x1f401 +#define GEN7_GMU_SYS_BUS_CONFIG 0x1f40f +#define GEN7_GMU_CM3_SYSRESET 0x1f800 +#define GEN7_GMU_CM3_BOOT_CONFIG 0x1f801 +#define GEN7_GMU_CX_GMU_WFI_CONFIG 0x1f802 +#define GEN7_GMU_CX_GMU_WDOG_CTRL 0x1f813 +#define GEN7_GMU_CM3_FW_BUSY 0x1f81a +#define GEN7_GMU_CM3_FW_INIT_RESULT 0x1f81c +#define GEN7_GMU_CM3_CFG 0x1f82d +#define GEN7_GMU_CX_GMU_POWER_COUNTER_ENABLE 0x1f840 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_0 0x1f841 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_1 0x1f842 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L 0x1f844 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H 0x1f845 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L 0x1f846 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H 0x1f847 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L 0x1f848 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H 0x1f849 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L 0x1f84a +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H 0x1f84b +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L 0x1f84c +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H 0x1f84d +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L 0x1f84e +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H 0x1f84f +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_L 0x1f850 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_0_H 0x1f851 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_L 0x1f852 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_GMUCLK_1_H 0x1f853 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_SELECT_2 0x1f860 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L 0x1f870 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H 0x1f871 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L 0x1f872 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1f843 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L 0x1f874 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H 0x1f875 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L 0x1f876 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_H 0x1f877 +#define GEN7_GMU_CX_GMU_ALWAYS_ON_COUNTER_L 0x1f888 +#define GEN7_GMU_CX_GMU_ALWAYS_ON_COUNTER_H 0x1f889 +#define GEN7_GMU_PWR_COL_INTER_FRAME_CTRL 0x1f8c0 +#define GEN7_GMU_PWR_COL_INTER_FRAME_HYST 0x1f8c1 +#define GEN7_GMU_GFX_PWR_CLK_STATUS 0x1f8d0 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_ENABLE 0x1f8a0 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_0 0x1f8a1 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_SELECT_1 0x1f8a2 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_0_L 0x1f8a4 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_0_H 0x1f8a5 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_1_L 0x1f8a6 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_1_H 0x1f8a7 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_2_L 0x1f8a8 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_2_H 0x1f8a9 +#define GEN7_GMU_CX_GMU_PERF_COUNTER_3_L 0x1f8aa +#define GEN7_GMU_CX_GMU_PERF_COUNTER_3_H 0x1f8ab +#define GEN7_GMU_CX_GMU_PERF_COUNTER_4_L 0x1f8ac +#define GEN7_GMU_CX_GMU_PERF_COUNTER_4_H 0x1f8ad +#define GEN7_GMU_CX_GMU_PERF_COUNTER_5_L 0x1f8ae +#define GEN7_GMU_CX_GMU_PERF_COUNTER_5_H 0x1f8af +#define GEN7_GPU_GMU_CX_GMU_RPMH_POWER_STATE 0x1f8ec +#define GEN7_GMU_BOOT_KMD_LM_HANDSHAKE 0x1f9f0 + +/* HFI registers*/ +#define GEN7_GMU_ALWAYS_ON_COUNTER_L 0x1f888 +#define GEN7_GMU_ALWAYS_ON_COUNTER_H 0x1f889 +#define GEN7_GMU_GMU_PWR_COL_KEEPALIVE 0x1f8c3 +#define GEN7_GMU_PWR_COL_PREEMPT_KEEPALIVE 0x1f8c4 +#define GEN7_GMU_HFI_CTRL_STATUS 0x1f980 +#define GEN7_GMU_HFI_QTBL_INFO 0x1f984 +#define GEN7_GMU_HFI_QTBL_ADDR 0x1f985 +#define GEN7_GMU_HFI_CTRL_INIT 0x1f986 +#define GEN7_GMU_GMU2HOST_INTR_SET 0x1f990 +#define GEN7_GMU_GMU2HOST_INTR_CLR 0x1f991 +#define GEN7_GMU_GMU2HOST_INTR_INFO 0x1f992 +#define GEN7_GMU_GMU2HOST_INTR_MASK 0x1f993 +#define GEN7_GMU_HOST2GMU_INTR_SET 0x1f994 +#define GEN7_GMU_HOST2GMU_INTR_CLR 0x1f995 +#define GEN7_GMU_HOST2GMU_INTR_RAW_INFO 0x1f996 +#define GEN7_GMU_HOST2GMU_INTR_EN_0 0x1f997 +#define GEN7_GMU_HOST2GMU_INTR_EN_1 0x1f998 +#define GEN7_GMU_HOST2GMU_INTR_EN_2 0x1f999 +#define GEN7_GMU_HOST2GMU_INTR_EN_3 0x1f99a +#define GEN7_GMU_HOST2GMU_INTR_INFO_0 0x1f99b +#define GEN7_GMU_HOST2GMU_INTR_INFO_1 0x1f99c +#define GEN7_GMU_HOST2GMU_INTR_INFO_2 0x1f99d +#define GEN7_GMU_HOST2GMU_INTR_INFO_3 0x1f99e +#define GEN7_GMU_GENERAL_0 0x1f9c5 +#define GEN7_GMU_GENERAL_1 0x1f9c6 +#define GEN7_GMU_GENERAL_6 0x1f9cb +#define GEN7_GMU_GENERAL_7 0x1f9cc +#define GEN7_GMU_GENERAL_8 0x1f9cd +#define GEN7_GMU_GENERAL_9 0x1f9ce +#define GEN7_GMU_GENERAL_10 0x1f9cf + +/* FAL10 veto register */ +#define GEN7_GPU_GMU_CX_GMU_CX_FAL_INTF 0x1f8f0 +#define GEN7_GPU_GMU_CX_GMU_CX_FALNEXT_INTF 0x1f8f1 + +#define GEN7_GMU_AO_INTERRUPT_EN 0x23b03 +#define GEN7_GMU_AO_HOST_INTERRUPT_CLR 0x23b04 +#define GEN7_GMU_AO_HOST_INTERRUPT_STATUS 0x23b05 +#define GEN7_GMU_AO_HOST_INTERRUPT_MASK 0x23b06 +#define GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL 0x23b09 +#define GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL 0x23b0a +#define GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL 0x23b0b +#define GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS 0x23b0c +#define GEN7_GPU_GMU_AO_GPU_CX_BUSY_STATUS2 0x23b0d +#define GEN7_GPU_GMU_AO_GPU_CX_BUSY_MASK 0x23b0e +#define GEN7_GMU_AO_AHB_FENCE_CTRL 0x23b10 +#define GEN7_GMU_AHB_FENCE_STATUS 0x23b13 +#define GEN7_GMU_AHB_FENCE_STATUS_CLR 0x23b14 +#define GEN7_GMU_RBBM_INT_UNMASKED_STATUS 0x23b15 +#define GEN7_GMU_AO_SPARE_CNTL 0x23b16 + +/* GMU RSC control registers */ +#define GEN7_GMU_RSCC_CONTROL_REQ 0x23b07 +#define GEN7_GMU_RSCC_CONTROL_ACK 0x23b08 + +/* FENCE control registers */ +#define GEN7_GMU_AHB_FENCE_RANGE_0 0x23b11 + +/* GPUCC registers */ +#define GEN7_GPU_CC_GX_DOMAIN_MISC3 0x26541 +#define GEN7_GPU_CC_CX_GDSCR 0x26442 + +/* GPU RSC sequencer registers */ +#define GEN7_GPU_RSCC_RSC_STATUS0_DRV0 0x00004 +#define GEN7_RSCC_PDC_SEQ_START_ADDR 0x00008 +#define GEN7_RSCC_PDC_MATCH_VALUE_LO 0x00009 +#define GEN7_RSCC_PDC_MATCH_VALUE_HI 0x0000a +#define GEN7_RSCC_PDC_SLAVE_ID_DRV0 0x0000b +#define GEN7_RSCC_HIDDEN_TCS_CMD0_ADDR 0x0000d +#define GEN7_RSCC_HIDDEN_TCS_CMD0_DATA 0x0000e +#define GEN7_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0 0x00082 +#define GEN7_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0 0x00083 +#define GEN7_RSCC_TIMESTAMP_UNIT1_EN_DRV0 0x00089 +#define GEN7_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0 0x0008c +#define GEN7_RSCC_OVERRIDE_START_ADDR 0x00100 +#define GEN7_RSCC_SEQ_BUSY_DRV0 0x00101 +#define GEN7_RSCC_SEQ_MEM_0_DRV0 0x00180 +#define GEN7_RSCC_TCS0_DRV0_STATUS 0x00346 +#define GEN7_RSCC_TCS1_DRV0_STATUS 0x003ee +#define GEN7_RSCC_TCS2_DRV0_STATUS 0x00496 +#define GEN7_RSCC_TCS3_DRV0_STATUS 0x0053e + +/* GPU PDC sequencer registers in AOSS.RPMh domain */ +#define GEN7_PDC_GPU_ENABLE_PDC 0x1140 +#define GEN7_PDC_GPU_SEQ_START_ADDR 0x1148 + +#define GEN7_SMMU_BASE 0x28000 + +/* GPU CX_MISC registers */ +#define GEN7_GPU_CX_MISC_TCM_RET_CNTL 0x39 + +#endif /* _GEN7_REG_H */ diff --git a/gfx_driver_product.mk b/gfx_driver_product.mk new file mode 100644 index 0000000000..64bee7aea5 --- /dev/null +++ b/gfx_driver_product.mk @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +PRODUCT_PACKAGES += msm_kgsl.ko + diff --git a/gfx_kernel_board.mk b/gfx_kernel_board.mk new file mode 100644 index 0000000000..238b8ff9ed --- /dev/null +++ b/gfx_kernel_board.mk @@ -0,0 +1,10 @@ +#SPDX-License-Identifier: GPL-2.0-only + +ifneq ($(TARGET_BOARD_AUTO),true) + ifeq ($(call is-board-platform-in-list,$(TARGET_BOARD_PLATFORM)),true) + BOARD_VENDOR_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_kgsl.ko + BOARD_VENDOR_RAMDISK_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_kgsl.ko + BOARD_VENDOR_RAMDISK_RECOVERY_KERNEL_MODULES_LOAD += $(KERNEL_MODULES_OUT)/msm_kgsl.ko + endif +endif + diff --git a/gfx_kernel_headers.py b/gfx_kernel_headers.py new file mode 100644 index 0000000000..ab0c06a2a5 --- /dev/null +++ b/gfx_kernel_headers.py @@ -0,0 +1,96 @@ +# Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 as published by +# the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . + +import argparse +import filecmp +import os +import re +import subprocess +import sys + +def run_headers_install(verbose, gen_dir, headers_install, unifdef, prefix, h): + if not h.startswith(prefix): + print('error: expected prefix [%s] on header [%s]' % (prefix, h)) + return False + + out_h = os.path.join(gen_dir, h[len(prefix):]) + (out_h_dirname, out_h_basename) = os.path.split(out_h) + env = os.environ.copy() + env["LOC_UNIFDEF"] = unifdef + cmd = ["sh", headers_install, h, out_h] + + if verbose: + print('run_headers_install: cmd is %s' % cmd) + + result = subprocess.call(cmd, env=env) + + if result != 0: + print('error: run_headers_install: cmd %s failed %d' % (cmd, result)) + return False + return True + +def gen_gfx_headers(verbose, gen_dir, headers_install, unifdef, gfx_include_uapi): + error_count = 0 + for h in gfx_include_uapi: + gfx_uapi_include_prefix = os.path.join(h.split('/include/uapi/')[0], + 'include', + 'uapi', + 'linux') + os.sep + + if not run_headers_install( + verbose, gen_dir, headers_install, unifdef, + gfx_uapi_include_prefix, h): error_count += 1 + return error_count + +def main(): + """Parse command line arguments and perform top level control.""" + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Arguments that apply to every invocation of this script. + parser.add_argument( + '--verbose', action='store_true', + help='Print output that describes the workings of this script.') + parser.add_argument( + '--header_arch', required=True, + help='The arch for which to generate headers.') + parser.add_argument( + '--gen_dir', required=True, + help='Where to place the generated files.') + parser.add_argument( + '--gfx_include_uapi', required=True, nargs='*', + help='The list of techpack/*/include/uapi header files.') + parser.add_argument( + '--headers_install', required=True, + help='The headers_install tool to process input headers.') + parser.add_argument( + '--unifdef', + required=True, + help='The unifdef tool used by headers_install.') + + args = parser.parse_args() + + if args.verbose: + print('header_arch [%s]' % args.header_arch) + print('gen_dir [%s]' % args.gen_dir) + print('gfx_include_uapi [%s]' % args.gfx_include_uapi) + print('headers_install [%s]' % args.headers_install) + print('unifdef [%s]' % args.unifdef) + + return gen_gfx_headers(args.verbose, args.gen_dir, + args.headers_install, args.unifdef, args.gfx_include_uapi) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/governor_gpubw_mon.c b/governor_gpubw_mon.c new file mode 100644 index 0000000000..147c43511f --- /dev/null +++ b/governor_gpubw_mon.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "governor.h" +#include "msm_adreno_devfreq.h" + +#define MIN_BUSY 1000 +#define LONG_FLOOR 50000 +#define HIST 5 +#define TARGET 80 +#define CAP 75 +#define WAIT_THRESHOLD 10 +/* AB vote is in multiple of BW_STEP Mega bytes */ +#define BW_STEP 50 + +static void _update_cutoff(struct devfreq_msm_adreno_tz_data *priv, + unsigned int norm_max) +{ + int i; + + priv->bus.max = norm_max; + for (i = 0; i < priv->bus.num; i++) { + priv->bus.up[i] = priv->bus.p_up[i] * norm_max / 100; + priv->bus.down[i] = priv->bus.p_down[i] * norm_max / 100; + } +} + +static ssize_t cur_ab_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct devfreq *df = to_devfreq(dev); + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", bus_profile->ab_mbytes); +} + +static ssize_t sampling_interval_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *df = to_devfreq(dev); + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + + return scnprintf(buf, PAGE_SIZE, "%d\n", bus_profile->sampling_ms); +} + +static ssize_t sampling_interval_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct devfreq *df = to_devfreq(dev); + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + u32 value; + int ret; + + ret = kstrtou32(buf, 0, &value); + if (ret) + return ret; + + bus_profile->sampling_ms = value; + + return count; +} + +static DEVICE_ATTR_RW(sampling_interval); +static DEVICE_ATTR_RO(cur_ab); + +static const struct device_attribute *gpubw_attr_list[] = { + &dev_attr_sampling_interval, + &dev_attr_cur_ab, + NULL +}; + +static int devfreq_gpubw_get_target(struct devfreq *df, + unsigned long *freq) +{ + + struct devfreq_msm_adreno_tz_data *priv = df->data; + struct msm_busmon_extended_profile *bus_profile = container_of( + (df->profile), + struct msm_busmon_extended_profile, + profile); + struct devfreq_dev_status *stats = &df->last_status; + struct xstats b; + int result; + int act_level; + int norm_max_cycles; + int norm_cycles; + int wait_active_percent; + int gpu_percent; + /* + * Normalized AB should at max usage be the gpu_bimc frequency in MHz. + * Start with a reasonable value and let the system push it up to max. + */ + static int norm_ab_max = 300; + int norm_ab; + unsigned long ab_mbytes = 0; + + if (priv == NULL) + return 0; + + stats->private_data = &b; + + result = devfreq_update_stats(df); + + *freq = stats->current_frequency; + + priv->bus.total_time += stats->total_time; + priv->bus.gpu_time += stats->busy_time; + priv->bus.ram_time += b.ram_time; + priv->bus.ram_wait += b.ram_wait; + + if (priv->bus.total_time < bus_profile->sampling_ms) + return result; + + norm_max_cycles = (unsigned int)(priv->bus.ram_time) / + (unsigned int) priv->bus.total_time; + norm_cycles = (unsigned int)(priv->bus.ram_time + priv->bus.ram_wait) / + (unsigned int) priv->bus.total_time; + wait_active_percent = (100 * (unsigned int)priv->bus.ram_wait) / + (unsigned int) priv->bus.ram_time; + gpu_percent = (100 * (unsigned int)priv->bus.gpu_time) / + (unsigned int) priv->bus.total_time; + + /* + * If there's a new high watermark, update the cutoffs and send the + * FAST hint, provided that we are using a floating watermark. + * Otherwise check the current value against the current + * cutoffs. + */ + if (norm_max_cycles > priv->bus.max && priv->bus.floating) { + _update_cutoff(priv, norm_max_cycles); + bus_profile->flag = DEVFREQ_FLAG_FAST_HINT; + } else { + /* GPU votes for IB not AB so don't under vote the system */ + norm_cycles = (100 * norm_cycles) / TARGET; + act_level = b.buslevel; + act_level = (act_level < 0) ? 0 : act_level; + act_level = (act_level >= priv->bus.num) ? + (priv->bus.num - 1) : act_level; + if ((norm_cycles > priv->bus.up[act_level] || + wait_active_percent > WAIT_THRESHOLD) && + gpu_percent > CAP) + bus_profile->flag = DEVFREQ_FLAG_FAST_HINT; + else if (norm_cycles < priv->bus.down[act_level] && b.buslevel) + bus_profile->flag = DEVFREQ_FLAG_SLOW_HINT; + } + + /* Calculate the AB vote based on bus width if defined */ + if (priv->bus.width) { + norm_ab = (unsigned int)priv->bus.ram_time / + (unsigned int) priv->bus.total_time; + /* Calculate AB in Mega Bytes and roundup in BW_STEP */ + ab_mbytes = (norm_ab * priv->bus.width * 1000000ULL) >> 20; + bus_profile->ab_mbytes = roundup(ab_mbytes, BW_STEP); + } else if (bus_profile->flag) { + /* Re-calculate the AB percentage for a new IB vote */ + norm_ab = (unsigned int)priv->bus.ram_time / + (unsigned int) priv->bus.total_time; + if (norm_ab > norm_ab_max) + norm_ab_max = norm_ab; + bus_profile->percent_ab = (100 * norm_ab) / norm_ab_max; + } + + priv->bus.total_time = 0; + priv->bus.gpu_time = 0; + priv->bus.ram_time = 0; + priv->bus.ram_wait = 0; + + return result; +} + +static int gpubw_start(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv; + + struct msm_busmon_extended_profile *bus_profile = container_of( + (devfreq->profile), + struct msm_busmon_extended_profile, + profile); + unsigned int t1, t2 = 2 * HIST; + int i, bus_size; + + + devfreq->data = bus_profile->private_data; + priv = devfreq->data; + + bus_size = sizeof(u32) * priv->bus.num; + priv->bus.up = kzalloc(bus_size, GFP_KERNEL); + priv->bus.down = kzalloc(bus_size, GFP_KERNEL); + priv->bus.p_up = kzalloc(bus_size, GFP_KERNEL); + priv->bus.p_down = kzalloc(bus_size, GFP_KERNEL); + if (priv->bus.up == NULL || priv->bus.down == NULL || + priv->bus.p_up == NULL || priv->bus.p_down == NULL) + return -ENOMEM; + + /* Set up the cut-over percentages for the bus calculation. */ + for (i = 0; i < priv->bus.num; i++) { + t1 = (u32)(100 * priv->bus.ib_kbps[i]) / + (u32)priv->bus.ib_kbps[priv->bus.num - 1]; + priv->bus.p_up[i] = t1 - HIST; + priv->bus.p_down[i] = t2 - 2 * HIST; + t2 = t1; + } + /* Set the upper-most and lower-most bounds correctly. */ + priv->bus.p_down[0] = 0; + + for (i = 0; i < priv->bus.num; i++) { + if (priv->bus.p_down[i] < 2 * HIST) + priv->bus.p_down[i] = 2 * HIST; + } + + if (priv->bus.num >= 1) + priv->bus.p_up[priv->bus.num - 1] = 100; + _update_cutoff(priv, priv->bus.max); + + bus_profile->sampling_ms = LONG_FLOOR; + + for (i = 0; gpubw_attr_list[i] != NULL; i++) + device_create_file(&devfreq->dev, gpubw_attr_list[i]); + + return 0; +} + +static int gpubw_stop(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + int i; + + for (i = 0; gpubw_attr_list[i] != NULL; i++) + device_remove_file(&devfreq->dev, gpubw_attr_list[i]); + + if (priv) { + kfree(priv->bus.up); + kfree(priv->bus.down); + kfree(priv->bus.p_up); + kfree(priv->bus.p_down); + } + devfreq->data = NULL; + return 0; +} + +static int devfreq_gpubw_event_handler(struct devfreq *devfreq, + unsigned int event, void *data) +{ + int result = 0; + unsigned long freq; + + if (strcmp(dev_name(devfreq->dev.parent), "kgsl-busmon")) + return -EINVAL; + + mutex_lock(&devfreq->lock); + freq = devfreq->previous_freq; + switch (event) { + case DEVFREQ_GOV_START: + result = gpubw_start(devfreq); + break; + case DEVFREQ_GOV_STOP: + result = gpubw_stop(devfreq); + break; + case DEVFREQ_GOV_RESUME: + /* TODO ..... */ + /* ret = update_devfreq(devfreq); */ + break; + case DEVFREQ_GOV_SUSPEND: + { + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + + if (priv) { + priv->bus.total_time = 0; + priv->bus.gpu_time = 0; + priv->bus.ram_time = 0; + } + } + break; + default: + result = 0; + break; + } + mutex_unlock(&devfreq->lock); + return result; +} + +static struct devfreq_governor devfreq_gpubw = { + .name = "gpubw_mon", + .get_target_freq = devfreq_gpubw_get_target, + .event_handler = devfreq_gpubw_event_handler, + .immutable = 1, +}; + +int devfreq_gpubw_init(void) +{ + return devfreq_add_governor(&devfreq_gpubw); +} + +void devfreq_gpubw_exit(void) +{ + int ret; + + ret = devfreq_remove_governor(&devfreq_gpubw); + if (ret) + pr_err("%s: failed remove governor %d\n", __func__, ret); + +} diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c new file mode 100644 index 0000000000..18f4f16ead --- /dev/null +++ b/governor_msm_adreno_tz.c @@ -0,0 +1,563 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "governor.h" +#include "msm_adreno_devfreq.h" + +static DEFINE_SPINLOCK(tz_lock); +static DEFINE_SPINLOCK(sample_lock); +static DEFINE_SPINLOCK(suspend_lock); +/* + * FLOOR is 5msec to capture up to 3 re-draws + * per frame for 60fps content. + */ +#define FLOOR 5000 +/* + * MIN_BUSY is 1 msec for the sample to be sent + */ +#define MIN_BUSY 1000 +#define MAX_TZ_VERSION 0 + +/* + * CEILING is 50msec, larger than any standard + * frame length, but less than the idle timer. + */ +#define CEILING 50000 +#define TZ_RESET_ID 0x3 +#define TZ_UPDATE_ID 0x4 +#define TZ_INIT_ID 0x6 + +#define TZ_RESET_ID_64 0x7 +#define TZ_UPDATE_ID_64 0x8 +#define TZ_INIT_ID_64 0x9 + +#define TZ_V2_UPDATE_ID_64 0xA +#define TZ_V2_INIT_ID_64 0xB +#define TZ_V2_INIT_CA_ID_64 0xC +#define TZ_V2_UPDATE_WITH_CA_ID_64 0xD + +#define TAG "msm_adreno_tz: " + +static u64 suspend_time; +static u64 suspend_start; +static unsigned long acc_total, acc_relative_busy; + +/* + * Returns GPU suspend time in millisecond. + */ +u64 suspend_time_ms(void) +{ + u64 suspend_sampling_time; + u64 time_diff = 0; + + if (suspend_start == 0) + return 0; + + suspend_sampling_time = (u64)ktime_to_ms(ktime_get()); + time_diff = suspend_sampling_time - suspend_start; + /* Update the suspend_start sample again */ + suspend_start = suspend_sampling_time; + return time_diff; +} + +static ssize_t gpu_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long sysfs_busy_perc = 0; + /* + * Average out the samples taken since last read + * This will keep the average value in sync with + * with the client sampling duration. + */ + spin_lock(&sample_lock); + if (acc_total) + sysfs_busy_perc = (acc_relative_busy * 100) / acc_total; + + /* Reset the parameters */ + acc_total = 0; + acc_relative_busy = 0; + spin_unlock(&sample_lock); + return snprintf(buf, PAGE_SIZE, "%lu\n", sysfs_busy_perc); +} + +/* + * Returns the time in ms for which gpu was in suspend state + * since last time the entry is read. + */ +static ssize_t suspend_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 time_diff = 0; + + spin_lock(&suspend_lock); + time_diff = suspend_time_ms(); + /* + * Adding the previous suspend time also as the gpu + * can go and come out of suspend states in between + * reads also and we should have the total suspend + * since last read. + */ + time_diff += suspend_time; + suspend_time = 0; + spin_unlock(&suspend_lock); + + return snprintf(buf, PAGE_SIZE, "%llu\n", time_diff); +} + +static ssize_t mod_percent_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned int val; + struct devfreq *devfreq = to_devfreq(dev); + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + priv->mod_percent = clamp_t(u32, val, 10, 1000); + + return count; +} + +static ssize_t mod_percent_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + + return scnprintf(buf, PAGE_SIZE, "%u\n", priv->mod_percent); +} + +static DEVICE_ATTR_RO(gpu_load); + +static DEVICE_ATTR_RO(suspend_time); +static DEVICE_ATTR_RW(mod_percent); + +static const struct device_attribute *adreno_tz_attr_list[] = { + &dev_attr_gpu_load, + &dev_attr_suspend_time, + &dev_attr_mod_percent, + NULL +}; + +void compute_work_load(struct devfreq_dev_status *stats, + struct devfreq_msm_adreno_tz_data *priv, + struct devfreq *devfreq) +{ + u64 busy; + + spin_lock(&sample_lock); + /* + * Keep collecting the stats till the client + * reads it. Average of all samples and reset + * is done when the entry is read + */ + acc_total += stats->total_time; + busy = (u64)stats->busy_time * stats->current_frequency; + do_div(busy, devfreq->profile->freq_table[0]); + acc_relative_busy += busy; + + spin_unlock(&sample_lock); +} + +/* Trap into the TrustZone, and call funcs there. */ +static int __secure_tz_reset_entry2(unsigned int *scm_data, u32 size_scm_data, + bool is_64) +{ + int ret; + /* sync memory before sending the commands to tz */ + __iowmb(); + + if (!is_64) { + spin_lock(&tz_lock); + ret = qcom_scm_io_reset(); + spin_unlock(&tz_lock); + } else { + ret = qcom_scm_dcvs_reset(); + } + + return ret; +} + +static int __secure_tz_update_entry3(int level, s64 total_time, s64 busy_time, + int context_count, struct devfreq_msm_adreno_tz_data *priv) +{ + int ret; + /* sync memory before sending the commands to tz */ + __iowmb(); + + if (!priv->is_64) { + spin_lock(&tz_lock); + ret = qcom_scm_dcvs_update(level, total_time, busy_time); + spin_unlock(&tz_lock); + } else if (!priv->ctxt_aware_enable) { + ret = qcom_scm_dcvs_update_v2(level, total_time, busy_time); + } else { + ret = qcom_scm_dcvs_update_ca_v2(level, total_time, busy_time, + context_count); + } + + return ret; +} + +static int tz_init_ca(struct device *dev, + struct devfreq_msm_adreno_tz_data *priv) +{ + unsigned int tz_ca_data[2]; + phys_addr_t paddr; + u8 *tz_buf; + int ret; + struct qtee_shm shm; + + /* Set data for TZ */ + tz_ca_data[0] = priv->bin.ctxt_aware_target_pwrlevel; + tz_ca_data[1] = priv->bin.ctxt_aware_busy_penalty; + + if (!qtee_shmbridge_is_enabled()) { + tz_buf = kzalloc(PAGE_ALIGN(sizeof(tz_ca_data)), GFP_KERNEL); + if (!tz_buf) + return -ENOMEM; + paddr = virt_to_phys(tz_buf); + } else { + ret = qtee_shmbridge_allocate_shm( + PAGE_ALIGN(sizeof(tz_ca_data)), &shm); + if (ret) + return -ENOMEM; + tz_buf = shm.vaddr; + paddr = shm.paddr; + } + + memcpy(tz_buf, tz_ca_data, sizeof(tz_ca_data)); + /* Ensure memcpy completes execution */ + mb(); + dma_sync_single_for_device(dev, paddr, + PAGE_ALIGN(sizeof(tz_ca_data)), DMA_BIDIRECTIONAL); + + ret = qcom_scm_dcvs_init_ca_v2(paddr, sizeof(tz_ca_data)); + + if (!qtee_shmbridge_is_enabled()) + kfree_sensitive(tz_buf); + else + qtee_shmbridge_free_shm(&shm); + + return ret; +} + +static int tz_init(struct device *dev, struct devfreq_msm_adreno_tz_data *priv, + unsigned int *tz_pwrlevels, u32 size_pwrlevels, + unsigned int *version, u32 size_version) +{ + int ret; + phys_addr_t paddr; + + if (qcom_scm_dcvs_core_available()) { + u8 *tz_buf; + struct qtee_shm shm; + + if (!qtee_shmbridge_is_enabled()) { + tz_buf = kzalloc(PAGE_ALIGN(size_pwrlevels), + GFP_KERNEL); + if (!tz_buf) + return -ENOMEM; + paddr = virt_to_phys(tz_buf); + } else { + ret = qtee_shmbridge_allocate_shm( + PAGE_ALIGN(size_pwrlevels), &shm); + if (ret) + return -ENOMEM; + tz_buf = shm.vaddr; + paddr = shm.paddr; + } + + memcpy(tz_buf, tz_pwrlevels, size_pwrlevels); + /* Ensure memcpy completes execution */ + mb(); + dma_sync_single_for_device(dev, paddr, + PAGE_ALIGN(size_pwrlevels), DMA_BIDIRECTIONAL); + + ret = qcom_scm_dcvs_init_v2(paddr, size_pwrlevels, version); + if (!ret) + priv->is_64 = true; + if (!qtee_shmbridge_is_enabled()) + kfree_sensitive(tz_buf); + else + qtee_shmbridge_free_shm(&shm); + } else + ret = -EINVAL; + + /* Initialize context aware feature, if enabled. */ + if (!ret && priv->ctxt_aware_enable) { + if (priv->is_64 && qcom_scm_dcvs_ca_available()) { + ret = tz_init_ca(dev, priv); + /* + * If context aware feature initialization fails, + * just print an error message and return + * success as normal DCVS will still work. + */ + if (ret) { + pr_err(TAG "tz: context aware DCVS init failed\n"); + priv->ctxt_aware_enable = false; + return 0; + } + } else { + pr_warn(TAG "tz: context aware DCVS not supported\n"); + priv->ctxt_aware_enable = false; + } + } + + return ret; +} + +static inline int devfreq_get_freq_level(struct devfreq *devfreq, + unsigned long freq) +{ + int lev; + + for (lev = 0; lev < devfreq->profile->max_state; lev++) + if (freq == devfreq->profile->freq_table[lev]) + return lev; + + return -EINVAL; +} + +static int tz_get_target_freq(struct devfreq *devfreq, unsigned long *freq) +{ + int result = 0; + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + struct devfreq_dev_status *stats = &devfreq->last_status; + int val, level = 0; + int context_count = 0; + u64 busy_time; + + if (!priv) + return 0; + + /* keeps stats.private_data == NULL */ + result = devfreq_update_stats(devfreq); + if (result) { + pr_err(TAG "get_status failed %d\n", result); + return result; + } + + *freq = stats->current_frequency; + priv->bin.total_time += stats->total_time; + + /* Update gpu busy time as per mod_percent */ + busy_time = stats->busy_time * priv->mod_percent; + do_div(busy_time, 100); + + /* busy_time should not go over total_time */ + stats->busy_time = min_t(u64, busy_time, stats->total_time); + + priv->bin.busy_time += stats->busy_time; + + if (stats->private_data) + context_count = *((int *)stats->private_data); + + /* Update the GPU load statistics */ + compute_work_load(stats, priv, devfreq); + /* + * Do not waste CPU cycles running this algorithm if + * the GPU just started, or if less than FLOOR time + * has passed since the last run or the gpu hasn't been + * busier than MIN_BUSY. + */ + if ((stats->total_time == 0) || + (priv->bin.total_time < FLOOR) || + (unsigned int) priv->bin.busy_time < MIN_BUSY) { + return 0; + } + + level = devfreq_get_freq_level(devfreq, stats->current_frequency); + if (level < 0) { + pr_err(TAG "bad freq %ld\n", stats->current_frequency); + return level; + } + + /* + * If there is an extended block of busy processing, + * increase frequency. Otherwise run the normal algorithm. + */ + if (!priv->disable_busy_time_burst && + priv->bin.busy_time > CEILING) { + val = -1 * level; + } else { + val = __secure_tz_update_entry3(level, priv->bin.total_time, + priv->bin.busy_time, context_count, priv); + } + + priv->bin.total_time = 0; + priv->bin.busy_time = 0; + + /* + * If the decision is to move to a different level, make sure the GPU + * frequency changes. + */ + if (val) { + level += val; + level = max(level, 0); + level = min_t(int, level, devfreq->profile->max_state - 1); + } + + *freq = devfreq->profile->freq_table[level]; + return 0; +} + +static int tz_start(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv; + unsigned int tz_pwrlevels[MSM_ADRENO_MAX_PWRLEVELS + 1]; + int i, out, ret; + unsigned int version; + + struct msm_adreno_extended_profile *gpu_profile = container_of( + (devfreq->profile), + struct msm_adreno_extended_profile, + profile); + + /* + * Assuming that we have only one instance of the adreno device + * connected to this governor, + * can safely restore the pointer to the governor private data + * from the container of the device profile + */ + devfreq->data = gpu_profile->private_data; + + priv = devfreq->data; + + out = 1; + if (devfreq->profile->max_state < ARRAY_SIZE(tz_pwrlevels)) { + for (i = 0; i < devfreq->profile->max_state; i++) + tz_pwrlevels[out++] = devfreq->profile->freq_table[i]; + tz_pwrlevels[0] = i; + } else { + pr_err(TAG "tz_pwrlevels[] is too short\n"); + return -EINVAL; + } + + ret = tz_init(&devfreq->dev, priv, tz_pwrlevels, sizeof(tz_pwrlevels), + &version, sizeof(version)); + if (ret != 0 || version > MAX_TZ_VERSION) { + pr_err(TAG "tz_init failed\n"); + return ret; + } + + for (i = 0; adreno_tz_attr_list[i] != NULL; i++) + device_create_file(&devfreq->dev, adreno_tz_attr_list[i]); + + return 0; +} + +static int tz_stop(struct devfreq *devfreq) +{ + int i; + + for (i = 0; adreno_tz_attr_list[i] != NULL; i++) + device_remove_file(&devfreq->dev, adreno_tz_attr_list[i]); + + /* leaving the governor and cleaning the pointer to private data */ + devfreq->data = NULL; + return 0; +} + +static int tz_suspend(struct devfreq *devfreq) +{ + struct devfreq_msm_adreno_tz_data *priv = devfreq->data; + unsigned int scm_data[2] = {0, 0}; + + if (!priv) + return 0; + + __secure_tz_reset_entry2(scm_data, sizeof(scm_data), priv->is_64); + + priv->bin.total_time = 0; + priv->bin.busy_time = 0; + return 0; +} + +static int tz_handler(struct devfreq *devfreq, unsigned int event, void *data) +{ + int result; + struct device_node *node = devfreq->dev.parent->of_node; + + if (!of_device_is_compatible(node, "qcom,kgsl-3d0")) + return -EINVAL; + + switch (event) { + case DEVFREQ_GOV_START: + result = tz_start(devfreq); + break; + + case DEVFREQ_GOV_STOP: + spin_lock(&suspend_lock); + suspend_start = 0; + spin_unlock(&suspend_lock); + result = tz_stop(devfreq); + break; + + case DEVFREQ_GOV_SUSPEND: + result = tz_suspend(devfreq); + if (!result) { + spin_lock(&suspend_lock); + /* Collect the start sample for suspend time */ + suspend_start = (u64)ktime_to_ms(ktime_get()); + spin_unlock(&suspend_lock); + } + break; + + case DEVFREQ_GOV_RESUME: + spin_lock(&suspend_lock); + suspend_time += suspend_time_ms(); + /* Reset the suspend_start when gpu resumes */ + suspend_start = 0; + spin_unlock(&suspend_lock); + /* fallthrough */ + case DEVFREQ_GOV_UPDATE_INTERVAL: + /* fallthrough, this governor doesn't use polling */ + default: + result = 0; + break; + } + + return result; +} + +static struct devfreq_governor msm_adreno_tz = { + .name = "msm-adreno-tz", + .get_target_freq = tz_get_target_freq, + .event_handler = tz_handler, + .immutable = 1, +}; + +int msm_adreno_tz_init(void) +{ + return devfreq_add_governor(&msm_adreno_tz); +} + +void msm_adreno_tz_exit(void) +{ + int ret = devfreq_remove_governor(&msm_adreno_tz); + + if (ret) + pr_err(TAG "failed to remove governor %d\n", ret); +} diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h new file mode 100644 index 0000000000..e5950b135d --- /dev/null +++ b/include/linux/msm_kgsl.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef _MSM_KGSL_H +#define _MSM_KGSL_H + +/** + * struct kgsl_gpu_freq_stat - Per GPU freq stat struct + * @freq: GPU frequency in Hz + * @active_time: GPU busy time in usecs + * @idle_time: GPU idle time in usec + */ +struct kgsl_gpu_freq_stat { + u32 freq; + u64 active_time; + u64 idle_time; +}; + +/** + * kgsl_gpu_num_freqs - Get number of available GPU frequencies + * + * Return: number of available frequencies on success or negative error + * on failure + */ +int kgsl_gpu_num_freqs(void); + +/** + * kgsl_gpu_stat - Get per GPU freq stats + * @stats: Array of struct kgsl_gpu_freq_stat to hold stats + * @numfreq: Number of entries in @stats + * + * This function will populate @stats with per freq stats. + * Number of entries in @stats array must be greater or + * equal to value returned by function kgsl_gpu_num_freqs + * + * Return: 0 on success or negative error on failure + */ +int kgsl_gpu_stat(struct kgsl_gpu_freq_stat *stats, u32 numfreq); + +/** + * kgsl_gpu_frame_count - Get number of frames already processed by GPU + * @pid: pid of the process for which frame count is required + * @frame_count: pointer to a u64 to store frame count + * + * Return: zero on success and number of frames processed corresponding + * to @pid in @frame_count or negative error on failure + */ +int kgsl_gpu_frame_count(pid_t pid, u64 *frame_count); + +#endif /* _MSM_KGSL_H */ + diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h new file mode 100644 index 0000000000..4b67887f09 --- /dev/null +++ b/include/uapi/linux/msm_kgsl.h @@ -0,0 +1,2001 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _UAPI_MSM_KGSL_H +#define _UAPI_MSM_KGSL_H + +#include +#include + +/* + * The KGSL version has proven not to be very useful in userspace if features + * are cherry picked into other trees out of order so it is frozen as of 3.14. + * It is left here for backwards compatabilty and as a reminder that + * software releases are never linear. Also, I like pie. + */ + +#define KGSL_VERSION_MAJOR 3 +#define KGSL_VERSION_MINOR 14 + +/* + * We have traditionally mixed context and issueibcmds / command batch flags + * together into a big flag stew. This worked fine until we started adding a + * lot more command batch flags and we started running out of bits. Turns out + * we have a bit of room in the context type / priority mask that we could use + * for command batches, but that means we need to split out the flags into two + * coherent sets. + * + * If any future definitions are for both context and cmdbatch add both defines + * and link the cmdbatch to the context define as we do below. Otherwise feel + * free to add exclusive bits to either set. + */ + +/* --- context flags --- */ +#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 +#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 +#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 +#define KGSL_CONTEXT_PREAMBLE 0x00000010 +#define KGSL_CONTEXT_TRASH_STATE 0x00000020 +#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 +#define KGSL_CONTEXT_USER_GENERATED_TS 0x00000080 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_END_OF_FRAME 0x00000100 +#define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SYNC 0x00000400 +#define KGSL_CONTEXT_PWR_CONSTRAINT 0x00000800 +#define KGSL_CONTEXT_PRIORITY_MASK 0x0000F000 +#define KGSL_CONTEXT_PRIORITY_SHIFT 12 +#define KGSL_CONTEXT_PRIORITY_UNDEF 0 + +#define KGSL_CONTEXT_IFH_NOP 0x00010000 +#define KGSL_CONTEXT_SECURE 0x00020000 +#define KGSL_CONTEXT_NO_SNAPSHOT 0x00040000 +#define KGSL_CONTEXT_SPARSE 0x00080000 + +#define KGSL_CONTEXT_PREEMPT_STYLE_MASK 0x0E000000 +#define KGSL_CONTEXT_PREEMPT_STYLE_SHIFT 25 +#define KGSL_CONTEXT_PREEMPT_STYLE_DEFAULT 0x0 +#define KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER 0x1 +#define KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN 0x2 + +#define KGSL_CONTEXT_TYPE_MASK 0x01F00000 +#define KGSL_CONTEXT_TYPE_SHIFT 20 +#define KGSL_CONTEXT_TYPE_ANY 0 +#define KGSL_CONTEXT_TYPE_GL 1 +#define KGSL_CONTEXT_TYPE_CL 2 +#define KGSL_CONTEXT_TYPE_C2D 3 +#define KGSL_CONTEXT_TYPE_RS 4 +#define KGSL_CONTEXT_TYPE_VK 5 +#define KGSL_CONTEXT_TYPE_UNKNOWN 0x1E + +#define KGSL_CONTEXT_INVALIDATE_ON_FAULT 0x10000000 + +#define KGSL_CONTEXT_INVALID 0xffffffff + +/* + * --- command batch flags --- + * The bits that are linked to a KGSL_CONTEXT equivalent are either legacy + * definitions or bits that are valid for both contexts and cmdbatches. To be + * safe the other 8 bits that are still available in the context field should be + * omitted here in case we need to share - the other bits are available for + * cmdbatch only flags as needed + */ +#define KGSL_CMDBATCH_MEMLIST 0x00000001 +#define KGSL_CMDBATCH_MARKER 0x00000002 +#define KGSL_CMDBATCH_SUBMIT_IB_LIST KGSL_CONTEXT_SUBMIT_IB_LIST /* 0x004 */ +#define KGSL_CMDBATCH_CTX_SWITCH KGSL_CONTEXT_CTX_SWITCH /* 0x008 */ +#define KGSL_CMDBATCH_PROFILING 0x00000010 +/* + * KGSL_CMDBATCH_PROFILING must also be set for KGSL_CMDBATCH_PROFILING_KTIME + * to take effect, as the latter only affects the time data returned. + */ +#define KGSL_CMDBATCH_PROFILING_KTIME 0x00000020 +#define KGSL_CMDBATCH_END_OF_FRAME KGSL_CONTEXT_END_OF_FRAME /* 0x100 */ +#define KGSL_CMDBATCH_SYNC KGSL_CONTEXT_SYNC /* 0x400 */ +#define KGSL_CMDBATCH_PWR_CONSTRAINT KGSL_CONTEXT_PWR_CONSTRAINT /* 0x800 */ +#define KGSL_CMDBATCH_SPARSE 0x1000 /* 0x1000 */ + +/* + * Reserve bits [16:19] and bits [28:31] for possible bits shared between + * contexts and command batches. Update this comment as new flags are added. + */ + +/* + * gpu_command_object flags - these flags communicate the type of command or + * memory object being submitted for a GPU command + */ + +/* Flags for GPU command objects */ +#define KGSL_CMDLIST_IB 0x00000001U +#define KGSL_CMDLIST_CTXTSWITCH_PREAMBLE 0x00000002U +#define KGSL_CMDLIST_IB_PREAMBLE 0x00000004U + +/* Flags for GPU command memory objects */ +#define KGSL_OBJLIST_MEMOBJ 0x00000008U +#define KGSL_OBJLIST_PROFILE 0x00000010U + +/* Flags for GPU command sync points */ +#define KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP 0 +#define KGSL_CMD_SYNCPOINT_TYPE_FENCE 1 +#define KGSL_CMD_SYNCPOINT_TYPE_TIMELINE 2 + +/* --- Memory allocation flags --- */ + +/* General allocation hints */ +#define KGSL_MEMFLAGS_SECURE (1ULL << 3) +#define KGSL_MEMFLAGS_GPUREADONLY (1ULL << 24) +#define KGSL_MEMFLAGS_GPUWRITEONLY (1ULL << 25) +#define KGSL_MEMFLAGS_FORCE_32BIT (1ULL << 32) + +/* Flag for binding all the virt range to single phys data */ +#define KGSL_SPARSE_BIND_MULTIPLE_TO_PHYS 0x400000000ULL +#define KGSL_SPARSE_BIND 0x1ULL +#define KGSL_SPARSE_UNBIND 0x2ULL + +/* Memory caching hints */ +#define KGSL_CACHEMODE_MASK 0x0C000000U +#define KGSL_CACHEMODE_SHIFT 26 + +#define KGSL_CACHEMODE_WRITECOMBINE 0 +#define KGSL_CACHEMODE_UNCACHED 1 +#define KGSL_CACHEMODE_WRITETHROUGH 2 +#define KGSL_CACHEMODE_WRITEBACK 3 + +#define KGSL_MEMFLAGS_USE_CPU_MAP (1ULL << 28) +#define KGSL_MEMFLAGS_SPARSE_PHYS (1ULL << 29) +#define KGSL_MEMFLAGS_SPARSE_VIRT (1ULL << 30) +#define KGSL_MEMFLAGS_IOCOHERENT (1ULL << 31) +#define KGSL_MEMFLAGS_GUARD_PAGE (1ULL << 33) +#define KGSL_MEMFLAGS_VBO (1ULL << 34) + +/* Memory types for which allocations are made */ +#define KGSL_MEMTYPE_MASK 0x0000FF00 +#define KGSL_MEMTYPE_SHIFT 8 + +#define KGSL_MEMTYPE_OBJECTANY 0 +#define KGSL_MEMTYPE_FRAMEBUFFER 1 +#define KGSL_MEMTYPE_RENDERBUFFER 2 +#define KGSL_MEMTYPE_ARRAYBUFFER 3 +#define KGSL_MEMTYPE_ELEMENTARRAYBUFFER 4 +#define KGSL_MEMTYPE_VERTEXARRAYBUFFER 5 +#define KGSL_MEMTYPE_TEXTURE 6 +#define KGSL_MEMTYPE_SURFACE 7 +#define KGSL_MEMTYPE_EGL_SURFACE 8 +#define KGSL_MEMTYPE_GL 9 +#define KGSL_MEMTYPE_CL 10 +#define KGSL_MEMTYPE_CL_BUFFER_MAP 11 +#define KGSL_MEMTYPE_CL_BUFFER_NOMAP 12 +#define KGSL_MEMTYPE_CL_IMAGE_MAP 13 +#define KGSL_MEMTYPE_CL_IMAGE_NOMAP 14 +#define KGSL_MEMTYPE_CL_KERNEL_STACK 15 +#define KGSL_MEMTYPE_COMMAND 16 +#define KGSL_MEMTYPE_2D 17 +#define KGSL_MEMTYPE_EGL_IMAGE 18 +#define KGSL_MEMTYPE_EGL_SHADOW 19 +#define KGSL_MEMTYPE_MULTISAMPLE 20 +#define KGSL_MEMTYPE_KERNEL 255 + +/* + * Alignment hint, passed as the power of 2 exponent. + * i.e 4k (2^12) would be 12, 64k (2^16)would be 16. + */ +#define KGSL_MEMALIGN_MASK 0x00FF0000 +#define KGSL_MEMALIGN_SHIFT 16 + +enum kgsl_user_mem_type { + KGSL_USER_MEM_TYPE_PMEM = 0x00000000, + KGSL_USER_MEM_TYPE_ASHMEM = 0x00000001, + KGSL_USER_MEM_TYPE_ADDR = 0x00000002, + KGSL_USER_MEM_TYPE_ION = 0x00000003, + /* + * ION type is retained for backwards compatibility but Ion buffers are + * dma-bufs so try to use that naming if we can + */ + KGSL_USER_MEM_TYPE_DMABUF = 0x00000003, + KGSL_USER_MEM_TYPE_MAX = 0x00000007, +}; +#define KGSL_MEMFLAGS_USERMEM_MASK 0x000000e0 +#define KGSL_MEMFLAGS_USERMEM_SHIFT 5 + +/* + * Unfortunately, enum kgsl_user_mem_type starts at 0 which does not + * leave a good value for allocated memory. In the flags we use + * 0 to indicate allocated memory and thus need to add 1 to the enum + * values. + */ +#define KGSL_USERMEM_FLAG(x) (((x) + 1) << KGSL_MEMFLAGS_USERMEM_SHIFT) + +#define KGSL_MEMFLAGS_NOT_USERMEM 0 +#define KGSL_MEMFLAGS_USERMEM_PMEM KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_PMEM) +#define KGSL_MEMFLAGS_USERMEM_ASHMEM \ + KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ASHMEM) +#define KGSL_MEMFLAGS_USERMEM_ADDR KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ADDR) +#define KGSL_MEMFLAGS_USERMEM_ION KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ION) + +/* --- generic KGSL flag values --- */ + +#define KGSL_FLAGS_NORMALMODE 0x00000000 +#define KGSL_FLAGS_SAFEMODE 0x00000001 +#define KGSL_FLAGS_INITIALIZED0 0x00000002 +#define KGSL_FLAGS_INITIALIZED 0x00000004 +#define KGSL_FLAGS_STARTED 0x00000008 +#define KGSL_FLAGS_ACTIVE 0x00000010 +#define KGSL_FLAGS_RESERVED0 0x00000020 +#define KGSL_FLAGS_RESERVED1 0x00000040 +#define KGSL_FLAGS_RESERVED2 0x00000080 +#define KGSL_FLAGS_SOFT_RESET 0x00000100 +#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200 + +/* Server Side Sync Timeout in milliseconds */ +#define KGSL_SYNCOBJ_SERVER_TIMEOUT 2000 + +/* UBWC Modes */ +#define KGSL_UBWC_NONE 0 +#define KGSL_UBWC_1_0 1 +#define KGSL_UBWC_2_0 2 +#define KGSL_UBWC_3_0 3 +#define KGSL_UBWC_4_0 4 + +/* + * Reset status values for context + */ +enum kgsl_ctx_reset_stat { + KGSL_CTX_STAT_NO_ERROR = 0x00000000, + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 0x00000001, + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 0x00000002, + KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 0x00000003 +}; + +#define KGSL_CONVERT_TO_MBPS(val) \ + (val*1000*1000U) + +struct kgsl_devinfo { + + unsigned int device_id; + /* + * chip revision id + * coreid:8 majorrev:8 minorrev:8 patch:8 + */ + unsigned int chip_id; + unsigned int mmu_enabled; + unsigned long gmem_gpubaseaddr; + /* + * This field contains the adreno revision + * number 200, 205, 220, etc... + */ + unsigned int gpu_id; + __kernel_size_t gmem_sizebytes; +}; + +/* + * struct kgsl_devmemstore - this structure defines the region of memory + * that can be mmap()ed from this driver. The timestamp fields are volatile + * because they are written by the GPU + * @soptimestamp: Start of pipeline timestamp written by GPU before the + * commands in concern are processed + * @sbz: Unused, kept for 8 byte alignment + * @eoptimestamp: End of pipeline timestamp written by GPU after the + * commands in concern are processed + * @sbz2: Unused, kept for 8 byte alignment + * @preempted: Indicates if the context was preempted + * @sbz3: Unused, kept for 8 byte alignment + * @ref_wait_ts: Timestamp on which to generate interrupt, unused now. + * @sbz4: Unused, kept for 8 byte alignment + * @current_context: The current context the GPU is working on + * @sbz5: Unused, kept for 8 byte alignment + */ +struct kgsl_devmemstore { + volatile unsigned int soptimestamp; + unsigned int sbz; + volatile unsigned int eoptimestamp; + unsigned int sbz2; + volatile unsigned int preempted; + unsigned int sbz3; + volatile unsigned int ref_wait_ts; + unsigned int sbz4; + unsigned int current_context; + unsigned int sbz5; +}; + +#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \ + ((ctxt_id)*sizeof(struct kgsl_devmemstore) + \ + offsetof(struct kgsl_devmemstore, field)) + +/* timestamp id*/ +enum kgsl_timestamp_type { + KGSL_TIMESTAMP_CONSUMED = 0x00000001, /* start-of-pipeline timestamp */ + KGSL_TIMESTAMP_RETIRED = 0x00000002, /* end-of-pipeline timestamp*/ + KGSL_TIMESTAMP_QUEUED = 0x00000003, +}; + +/* property types - used with kgsl_device_getproperty */ +#define KGSL_PROP_DEVICE_INFO 0x1 +#define KGSL_PROP_DEVICE_SHADOW 0x2 +#define KGSL_PROP_DEVICE_POWER 0x3 +#define KGSL_PROP_SHMEM 0x4 +#define KGSL_PROP_SHMEM_APERTURES 0x5 +#define KGSL_PROP_MMU_ENABLE 0x6 +#define KGSL_PROP_INTERRUPT_WAITS 0x7 +#define KGSL_PROP_VERSION 0x8 +#define KGSL_PROP_GPU_RESET_STAT 0x9 +#define KGSL_PROP_PWRCTRL 0xE +#define KGSL_PROP_PWR_CONSTRAINT 0x12 +#define KGSL_PROP_UCHE_GMEM_VADDR 0x13 +#define KGSL_PROP_SP_GENERIC_MEM 0x14 +#define KGSL_PROP_UCODE_VERSION 0x15 +#define KGSL_PROP_GPMU_VERSION 0x16 +#define KGSL_PROP_HIGHEST_BANK_BIT 0x17 +#define KGSL_PROP_DEVICE_BITNESS 0x18 +#define KGSL_PROP_DEVICE_QDSS_STM 0x19 +#define KGSL_PROP_MIN_ACCESS_LENGTH 0x1A +#define KGSL_PROP_UBWC_MODE 0x1B +#define KGSL_PROP_DEVICE_QTIMER 0x20 +#define KGSL_PROP_L3_PWR_CONSTRAINT 0x22 +#define KGSL_PROP_SECURE_BUFFER_ALIGNMENT 0x23 +#define KGSL_PROP_SECURE_CTXT_SUPPORT 0x24 +#define KGSL_PROP_SPEED_BIN 0x25 +#define KGSL_PROP_GAMING_BIN 0x26 +#define KGSL_PROP_QUERY_CAPABILITIES 0x27 +#define KGSL_PROP_CONTEXT_PROPERTY 0x28 +#define KGSL_PROP_GPU_MODEL 0x29 +#define KGSL_PROP_VK_DEVICE_ID 0x2A + +/* + * kgsl_capabilities_properties returns a list of supported properties. + * If the user passes 0 for 'count' the kernel will set it to the number of + * supported properties. The list is expected to be 'count * sizeof(__u32)' + * bytes long. The kernel will return the actual number of entries copied into + * list via 'count'. + */ +struct kgsl_capabilities_properties { + __u64 list; + __u32 count; +}; + +/* + * KGSL_QUERY_CAPS_PROPERTIES returns a list of the valid properties in the + * kernel. The subtype data should be struct kgsl_capabilities_properties + */ +#define KGSL_QUERY_CAPS_PROPERTIES 1 + +/* + * kgsl_capabilities allows the user to query kernel capabilities. The 'data' + * type should be set appropriately for the querytype (see above). Pass 0 to + * 'size' and the kernel will set it to the expected size of 'data' that is + * appropriate for querytype (in bytes). + */ +struct kgsl_capabilities { + __u64 data; + __u64 size; + __u32 querytype; +}; + +struct kgsl_shadowprop { + unsigned long gpuaddr; + __kernel_size_t size; + unsigned int flags; /* contains KGSL_FLAGS_ values */ +}; + +struct kgsl_qdss_stm_prop { + __u64 gpuaddr; + __u64 size; +}; + +struct kgsl_qtimer_prop { + __u64 gpuaddr; + __u64 size; +}; + +struct kgsl_version { + unsigned int drv_major; + unsigned int drv_minor; + unsigned int dev_major; + unsigned int dev_minor; +}; + +struct kgsl_sp_generic_mem { + __u64 local; + __u64 pvt; +}; + +struct kgsl_ucode_version { + unsigned int pfp; + unsigned int pm4; +}; + +struct kgsl_gpmu_version { + unsigned int major; + unsigned int minor; + unsigned int features; +}; + +struct kgsl_context_property { + __u64 data; + __u32 size; + __u32 type; + __u32 contextid; +}; + +struct kgsl_context_property_fault { + __s32 faults; + __u32 timestamp; +}; + +struct kgsl_gpu_model { + char gpu_model[32]; +}; + +/* Context property sub types */ +#define KGSL_CONTEXT_PROP_FAULTS 1 + +/* Performance counter groups */ + +#define KGSL_PERFCOUNTER_GROUP_CP 0x0 +#define KGSL_PERFCOUNTER_GROUP_RBBM 0x1 +#define KGSL_PERFCOUNTER_GROUP_PC 0x2 +#define KGSL_PERFCOUNTER_GROUP_VFD 0x3 +#define KGSL_PERFCOUNTER_GROUP_HLSQ 0x4 +#define KGSL_PERFCOUNTER_GROUP_VPC 0x5 +#define KGSL_PERFCOUNTER_GROUP_TSE 0x6 +#define KGSL_PERFCOUNTER_GROUP_RAS 0x7 +#define KGSL_PERFCOUNTER_GROUP_UCHE 0x8 +#define KGSL_PERFCOUNTER_GROUP_TP 0x9 +#define KGSL_PERFCOUNTER_GROUP_SP 0xA +#define KGSL_PERFCOUNTER_GROUP_RB 0xB +#define KGSL_PERFCOUNTER_GROUP_PWR 0xC +#define KGSL_PERFCOUNTER_GROUP_VBIF 0xD +#define KGSL_PERFCOUNTER_GROUP_VBIF_PWR 0xE +#define KGSL_PERFCOUNTER_GROUP_MH 0xF +#define KGSL_PERFCOUNTER_GROUP_PA_SU 0x10 +#define KGSL_PERFCOUNTER_GROUP_SQ 0x11 +#define KGSL_PERFCOUNTER_GROUP_SX 0x12 +#define KGSL_PERFCOUNTER_GROUP_TCF 0x13 +#define KGSL_PERFCOUNTER_GROUP_TCM 0x14 +#define KGSL_PERFCOUNTER_GROUP_TCR 0x15 +#define KGSL_PERFCOUNTER_GROUP_L2 0x16 +#define KGSL_PERFCOUNTER_GROUP_VSC 0x17 +#define KGSL_PERFCOUNTER_GROUP_CCU 0x18 +#define KGSL_PERFCOUNTER_GROUP_LRZ 0x19 +#define KGSL_PERFCOUNTER_GROUP_CMP 0x1A +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON 0x1B +#define KGSL_PERFCOUNTER_GROUP_SP_PWR 0x1C +#define KGSL_PERFCOUNTER_GROUP_TP_PWR 0x1D +#define KGSL_PERFCOUNTER_GROUP_RB_PWR 0x1E +#define KGSL_PERFCOUNTER_GROUP_CCU_PWR 0x1F +#define KGSL_PERFCOUNTER_GROUP_UCHE_PWR 0x20 +#define KGSL_PERFCOUNTER_GROUP_CP_PWR 0x21 +#define KGSL_PERFCOUNTER_GROUP_GPMU_PWR 0x22 +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR 0x23 +#define KGSL_PERFCOUNTER_GROUP_GLC 0x24 +#define KGSL_PERFCOUNTER_GROUP_FCHE 0x25 +#define KGSL_PERFCOUNTER_GROUP_MHUB 0x26 +#define KGSL_PERFCOUNTER_GROUP_GMU_XOCLK 0x27 +#define KGSL_PERFCOUNTER_GROUP_GMU_GMUCLK 0x28 +#define KGSL_PERFCOUNTER_GROUP_GMU_PERF 0x29 +#define KGSL_PERFCOUNTER_GROUP_SW 0x2a +#define KGSL_PERFCOUNTER_GROUP_UFC 0x2b +#define KGSL_PERFCOUNTER_GROUP_BV_CP 0x2c +#define KGSL_PERFCOUNTER_GROUP_BV_PC 0x2d +#define KGSL_PERFCOUNTER_GROUP_BV_VFD 0x2e +#define KGSL_PERFCOUNTER_GROUP_BV_VPC 0x2f +#define KGSL_PERFCOUNTER_GROUP_BV_TP 0x30 +#define KGSL_PERFCOUNTER_GROUP_BV_SP 0x31 +#define KGSL_PERFCOUNTER_GROUP_BV_UFC 0x32 +#define KGSL_PERFCOUNTER_GROUP_MAX 0x33 + +#define KGSL_PERFCOUNTER_NOT_USED 0xFFFFFFFF +#define KGSL_PERFCOUNTER_BROKEN 0xFFFFFFFE + +/* structure holds list of ibs */ +struct kgsl_ibdesc { + unsigned long gpuaddr; + unsigned long __pad; + __kernel_size_t sizedwords; + unsigned int ctrl; +}; + +/** + * struct kgsl_cmdbatch_profiling_buffer + * @wall_clock_s: Ringbuffer submission time (seconds). + * If KGSL_CMDBATCH_PROFILING_KTIME is set, time is provided + * in kernel clocks, otherwise wall clock time is used. + * @wall_clock_ns: Ringbuffer submission time (nanoseconds). + * If KGSL_CMDBATCH_PROFILING_KTIME is set time is provided + * in kernel clocks, otherwise wall clock time is used. + * @gpu_ticks_queued: GPU ticks at ringbuffer submission + * @gpu_ticks_submitted: GPU ticks when starting cmdbatch execution + * @gpu_ticks_retired: GPU ticks when finishing cmdbatch execution + * + * This structure defines the profiling buffer used to measure cmdbatch + * execution time + */ +struct kgsl_cmdbatch_profiling_buffer { + __u64 wall_clock_s; + __u64 wall_clock_ns; + __u64 gpu_ticks_queued; + __u64 gpu_ticks_submitted; + __u64 gpu_ticks_retired; +}; + +/* ioctls */ +#define KGSL_IOC_TYPE 0x09 + +/* + * get misc info about the GPU + * type should be a value from enum kgsl_property_type + * value points to a structure that varies based on type + * sizebytes is sizeof() that structure + * for KGSL_PROP_DEVICE_INFO, use struct kgsl_devinfo + * this structure contaings hardware versioning info. + * for KGSL_PROP_DEVICE_SHADOW, use struct kgsl_shadowprop + * this is used to find mmap() offset and sizes for mapping + * struct kgsl_memstore into userspace. + */ +struct kgsl_device_getproperty { + unsigned int type; + void __user *value; + __kernel_size_t sizebytes; +}; + +#define IOCTL_KGSL_DEVICE_GETPROPERTY \ + _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty) + +/* IOCTL_KGSL_DEVICE_READ (0x3) - removed 03/2012 + */ + +/* block until the GPU has executed past a given timestamp + * timeout is in milliseconds. + */ +struct kgsl_device_waittimestamp { + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp) + +struct kgsl_device_waittimestamp_ctxtid { + unsigned int context_id; + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid) + +/* DEPRECATED: issue indirect commands to the GPU. + * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE + * ibaddr and sizedwords must specify a subset of a buffer created + * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM + * flags may be a mask of KGSL_CONTEXT_ values + * timestamp is a returned counter value which can be passed to + * other ioctls to determine when the commands have been executed by + * the GPU. + * + * This function is deprecated - consider using IOCTL_KGSL_SUBMIT_COMMANDS + * instead + */ +struct kgsl_ringbuffer_issueibcmds { + unsigned int drawctxt_id; + unsigned long ibdesc_addr; + unsigned int numibs; + unsigned int timestamp; /*output param */ + unsigned int flags; +}; + +#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS \ + _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds) + +/* read the most recently executed timestamp value + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_readtimestamp { + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP \ + _IOWR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +/* free memory when the GPU reaches a given timestamp. + * gpuaddr specify a memory region created by a + * IOCTL_KGSL_SHAREDMEM_FROM_PMEM call + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_freememontimestamp { + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* + * Previous versions of this header had incorrectly defined + * IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP as a read-only ioctl instead + * of a write only ioctl. To ensure binary compatibility, the following + * #define will be used to intercept the incorrect ioctl + */ + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* create a draw context, which is used to preserve GPU state. + * The flags field may contain a mask KGSL_CONTEXT_* values + */ +struct kgsl_drawctxt_create { + unsigned int flags; + unsigned int drawctxt_id; /*output param */ +}; + +#define IOCTL_KGSL_DRAWCTXT_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x13, struct kgsl_drawctxt_create) + +/* destroy a draw context */ +struct kgsl_drawctxt_destroy { + unsigned int drawctxt_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_DESTROY \ + _IOW(KGSL_IOC_TYPE, 0x14, struct kgsl_drawctxt_destroy) + +/* + * add a block of pmem, fb, ashmem or user allocated address + * into the GPU address space + */ +struct kgsl_map_user_mem { + int fd; + unsigned long gpuaddr; /*output param */ + __kernel_size_t len; + __kernel_size_t offset; + unsigned long hostptr; /*input param */ + enum kgsl_user_mem_type memtype; + unsigned int flags; +}; + +#define IOCTL_KGSL_MAP_USER_MEM \ + _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem) + +struct kgsl_cmdstream_readtimestamp_ctxtid { + unsigned int context_id; + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \ + _IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid) + +struct kgsl_cmdstream_freememontimestamp_ctxtid { + unsigned int context_id; + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid) + +/* add a block of pmem or fb into the GPU address space */ +struct kgsl_sharedmem_from_pmem { + int pmem_fd; + unsigned long gpuaddr; /*output param */ + unsigned int len; + unsigned int offset; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_PMEM \ + _IOWR(KGSL_IOC_TYPE, 0x20, struct kgsl_sharedmem_from_pmem) + +/* remove memory from the GPU's address space */ +struct kgsl_sharedmem_free { + unsigned long gpuaddr; +}; + +#define IOCTL_KGSL_SHAREDMEM_FREE \ + _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free) + +struct kgsl_cff_user_event { + unsigned char cff_opcode; + unsigned int op1; + unsigned int op2; + unsigned int op3; + unsigned int op4; + unsigned int op5; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_CFF_USER_EVENT \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_cff_user_event) + +struct kgsl_gmem_desc { + unsigned int x; + unsigned int y; + unsigned int width; + unsigned int height; + unsigned int pitch; +}; + +struct kgsl_buffer_desc { + void *hostptr; + unsigned long gpuaddr; + int size; + unsigned int format; + unsigned int pitch; + unsigned int enabled; +}; + +struct kgsl_bind_gmem_shadow { + unsigned int drawctxt_id; + struct kgsl_gmem_desc gmem_desc; + unsigned int shadow_x; + unsigned int shadow_y; + struct kgsl_buffer_desc shadow_buffer; + unsigned int buffer_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_BIND_GMEM_SHADOW \ + _IOW(KGSL_IOC_TYPE, 0x22, struct kgsl_bind_gmem_shadow) + +/* add a block of memory into the GPU address space */ + +/* + * IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC deprecated 09/2012 + * use IOCTL_KGSL_GPUMEM_ALLOC instead + */ + +struct kgsl_sharedmem_from_vmalloc { + unsigned long gpuaddr; /*output param */ + unsigned int hostptr; + unsigned int flags; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x23, struct kgsl_sharedmem_from_vmalloc) + +/* + * This is being deprecated in favor of IOCTL_KGSL_GPUMEM_CACHE_SYNC which + * supports both directions (flush and invalidate). This code will still + * work, but by definition it will do a flush of the cache which might not be + * what you want to have happen on a buffer following a GPU operation. It is + * safer to go with IOCTL_KGSL_GPUMEM_CACHE_SYNC + */ + +#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free) + +struct kgsl_drawctxt_set_bin_base_offset { + unsigned int drawctxt_id; + unsigned int offset; +}; + +#define IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET \ + _IOW(KGSL_IOC_TYPE, 0x25, struct kgsl_drawctxt_set_bin_base_offset) + +enum kgsl_cmdwindow_type { + KGSL_CMDWINDOW_MIN = 0x00000000, + KGSL_CMDWINDOW_2D = 0x00000000, + KGSL_CMDWINDOW_3D = 0x00000001, /* legacy */ + KGSL_CMDWINDOW_MMU = 0x00000002, + KGSL_CMDWINDOW_ARBITER = 0x000000FF, + KGSL_CMDWINDOW_MAX = 0x000000FF, +}; + +/* write to the command window */ +struct kgsl_cmdwindow_write { + enum kgsl_cmdwindow_type target; + unsigned int addr; + unsigned int data; +}; + +#define IOCTL_KGSL_CMDWINDOW_WRITE \ + _IOW(KGSL_IOC_TYPE, 0x2e, struct kgsl_cmdwindow_write) + +struct kgsl_gpumem_alloc { + unsigned long gpuaddr; /* output param */ + __kernel_size_t size; + unsigned int flags; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc) + +struct kgsl_cff_syncmem { + unsigned long gpuaddr; + __kernel_size_t len; + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_CFF_SYNCMEM \ + _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem) + +/* + * A timestamp event allows the user space to register an action following an + * expired timestamp. Note IOCTL_KGSL_TIMESTAMP_EVENT has been redefined to + * _IOWR to support fences which need to return a fd for the priv parameter. + */ + +struct kgsl_timestamp_event { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + void __user *priv; /* Pointer to the event specific blob */ + __kernel_size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT_OLD \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_timestamp_event) + +/* A genlock timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_GENLOCK 1 + +struct kgsl_timestamp_event_genlock { + int handle; /* Handle of the genlock lock to release */ +}; + +/* A fence timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_FENCE 2 + +struct kgsl_timestamp_event_fence { + int fence_fd; /* Fence to signal */ +}; + +/* + * Set a property within the kernel. Uses the same structure as + * IOCTL_KGSL_GETPROPERTY + */ + +#define IOCTL_KGSL_SETPROPERTY \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty) + +#define IOCTL_KGSL_TIMESTAMP_EVENT \ + _IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event) + +/** + * struct kgsl_gpumem_alloc_id - argument to IOCTL_KGSL_GPUMEM_ALLOC_ID + * @id: returned id value for this allocation. + * @flags: mask of KGSL_MEM* values requested and actual flags on return. + * @size: requested size of the allocation and actual size on return. + * @mmapsize: returned size to pass to mmap() which may be larger than 'size' + * @gpuaddr: returned GPU address for the allocation + * + * Allocate memory for access by the GPU. The flags and size fields are echoed + * back by the kernel, so that the caller can know if the request was + * adjusted. + * + * Supported flags: + * KGSL_MEMFLAGS_GPUREADONLY: the GPU will be unable to write to the buffer + * KGSL_MEMTYPE*: usage hint for debugging aid + * KGSL_MEMALIGN*: alignment hint, may be ignored or adjusted by the kernel. + * KGSL_MEMFLAGS_USE_CPU_MAP: If set on call and return, the returned GPU + * address will be 0. Calling mmap() will set the GPU address. + */ +struct kgsl_gpumem_alloc_id { + unsigned int id; + unsigned int flags; + __kernel_size_t size; + __kernel_size_t mmapsize; + unsigned long gpuaddr; +/* private: reserved for future use*/ + unsigned long __pad[2]; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_ID \ + _IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id) + +/** + * struct kgsl_gpumem_free_id - argument to IOCTL_KGSL_GPUMEM_FREE_ID + * @id: GPU allocation id to free + * + * Free an allocation by id, in case a GPU address has not been assigned or + * is unknown. Freeing an allocation by id with this ioctl or by GPU address + * with IOCTL_KGSL_SHAREDMEM_FREE are equivalent. + */ +struct kgsl_gpumem_free_id { + unsigned int id; +/* private: reserved for future use*/ + unsigned int __pad; +}; + +#define IOCTL_KGSL_GPUMEM_FREE_ID \ + _IOWR(KGSL_IOC_TYPE, 0x35, struct kgsl_gpumem_free_id) + +/** + * struct kgsl_gpumem_get_info - argument to IOCTL_KGSL_GPUMEM_GET_INFO + * @gpuaddr: GPU address to query. Also set on return. + * @id: GPU allocation id to query. Also set on return. + * @flags: returned mask of KGSL_MEM* values. + * @size: returned size of the allocation. + * @mmapsize: returned size to pass mmap(), which may be larger than 'size' + * @useraddr: returned address of the userspace mapping for this buffer + * + * This ioctl allows querying of all user visible attributes of an existing + * allocation, by either the GPU address or the id returned by a previous + * call to IOCTL_KGSL_GPUMEM_ALLOC_ID. Legacy allocation ioctls may not + * return all attributes so this ioctl can be used to look them up if needed. + * + */ +struct kgsl_gpumem_get_info { + unsigned long gpuaddr; + unsigned int id; + unsigned int flags; + __kernel_size_t size; + __kernel_size_t mmapsize; + unsigned long useraddr; +/* private: reserved for future use*/ + unsigned long __pad[4]; +}; + +#define IOCTL_KGSL_GPUMEM_GET_INFO\ + _IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info) + +/** + * struct kgsl_gpumem_sync_cache - argument to IOCTL_KGSL_GPUMEM_SYNC_CACHE + * @gpuaddr: GPU address of the buffer to sync. + * @id: id of the buffer to sync. Either gpuaddr or id is sufficient. + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * @offset: offset into the buffer + * @length: number of bytes starting from offset to perform + * the cache operation on + * + * Sync the L2 cache for memory headed to and from the GPU - this replaces + * KGSL_SHAREDMEM_FLUSH_CACHE since it can handle cache management for both + * directions + * + */ +struct kgsl_gpumem_sync_cache { + unsigned long gpuaddr; + unsigned int id; + unsigned int op; + __kernel_size_t offset; + __kernel_size_t length; +}; + +#define KGSL_GPUMEM_CACHE_CLEAN (1 << 0) +#define KGSL_GPUMEM_CACHE_TO_GPU KGSL_GPUMEM_CACHE_CLEAN + +#define KGSL_GPUMEM_CACHE_INV (1 << 1) +#define KGSL_GPUMEM_CACHE_FROM_GPU KGSL_GPUMEM_CACHE_INV + +#define KGSL_GPUMEM_CACHE_FLUSH \ + (KGSL_GPUMEM_CACHE_CLEAN | KGSL_GPUMEM_CACHE_INV) + +/* Flag to ensure backwards compatibility of kgsl_gpumem_sync_cache struct */ +#define KGSL_GPUMEM_CACHE_RANGE (1 << 31U) + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache) + +/** + * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET + * @groupid: Performance counter group ID + * @countable: Countable to select within the group + * @offset: Return offset of the reserved LO counter + * @offset_hi: Return offset of the reserved HI counter + * + * Get an available performance counter from a specified groupid. The offset + * of the performance counter will be returned after successfully assigning + * the countable to the counter for the specified group. An error will be + * returned and an offset of 0 if the groupid is invalid or there are no + * more counters left. After successfully getting a perfcounter, the user + * must call kgsl_perfcounter_put(groupid, contable) when finished with + * the perfcounter to clear up perfcounter resources. + * + */ +struct kgsl_perfcounter_get { + unsigned int groupid; + unsigned int countable; + unsigned int offset; + unsigned int offset_hi; +/* private: reserved for future use */ + unsigned int __pad; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_GET \ + _IOWR(KGSL_IOC_TYPE, 0x38, struct kgsl_perfcounter_get) + +/** + * struct kgsl_perfcounter_put - argument to IOCTL_KGSL_PERFCOUNTER_PUT + * @groupid: Performance counter group ID + * @countable: Countable to release within the group + * + * Put an allocated performance counter to allow others to have access to the + * resource that was previously taken. This is only to be called after + * successfully getting a performance counter from kgsl_perfcounter_get(). + * + */ +struct kgsl_perfcounter_put { + unsigned int groupid; + unsigned int countable; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_PUT \ + _IOW(KGSL_IOC_TYPE, 0x39, struct kgsl_perfcounter_put) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group ID + * @countable: Return active countables array + * @size: Size of active countables array + * @max_counters: Return total number counters for the group ID + * + * Query the available performance counters given a groupid. The array + * *countables is used to return the current active countables in counters. + * The size of the array is passed in so the kernel will only write at most + * size or counter->size for the group id. The total number of available + * counters for the group ID is returned in max_counters. + * If the array or size passed in are invalid, then only the maximum number + * of counters will be returned, no data will be written to *countables. + * If the groupid is invalid an error code will be returned. + * + */ +struct kgsl_perfcounter_query { + unsigned int groupid; + /* Array to return the current countable for up to size counters */ + unsigned int __user *countables; + unsigned int count; + unsigned int max_counters; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group IDs + * @countable: Performance counter countable IDs + * @value: Return performance counter reads + * @size: Size of all arrays (groupid/countable pair and return value) + * + * Read in the current value of a performance counter given by the groupid + * and countable. + * + */ + +struct kgsl_perfcounter_read_group { + unsigned int groupid; + unsigned int countable; + unsigned long long value; +}; + +struct kgsl_perfcounter_read { + struct kgsl_perfcounter_read_group __user *reads; + unsigned int count; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read) +/* + * struct kgsl_gpumem_sync_cache_bulk - argument to + * IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK + * @id_list: list of GPU buffer ids of the buffers to sync + * @count: number of GPU buffer ids in id_list + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * + * Sync the cache for memory headed to and from the GPU. Certain + * optimizations can be made on the cache operation based on the total + * size of the working set of memory to be managed. + */ +struct kgsl_gpumem_sync_cache_bulk { + unsigned int __user *id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk) + +/* + * struct kgsl_cmd_syncpoint_timestamp + * @context_id: ID of a KGSL context + * @timestamp: GPU timestamp + * + * This structure defines a syncpoint comprising a context/timestamp pair. A + * list of these may be passed by IOCTL_KGSL_SUBMIT_COMMANDS to define + * dependencies that must be met before the command can be submitted to the + * hardware + */ +struct kgsl_cmd_syncpoint_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +struct kgsl_cmd_syncpoint_fence { + int fd; +}; + +/* + * struct kgsl_cmd_syncpoint_timeline + * @timelines: Address of an array of &struct kgsl_timeline_val + * @count: Number of entries in @timelines + * @timelines_size: Size of each entry in @timelines + * + * Define a syncpoint for a number of timelines. This syncpoint will + * be satisfied when all of the specified timelines are signaled. + */ +struct kgsl_cmd_syncpoint_timeline { + __u64 timelines; + __u32 count; + __u32 timelines_size; +}; + +/** + * struct kgsl_cmd_syncpoint - Define a sync point for a command batch + * @type: type of sync point defined here + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * + * This structure contains pointers defining a specific command sync point. + * The pointer and size should point to a type appropriate structure. + */ +struct kgsl_cmd_syncpoint { + int type; + void __user *priv; + __kernel_size_t size; +}; + +/* Flag to indicate that the cmdlist may contain memlists */ +#define KGSL_IBDESC_MEMLIST 0x1 + +/* Flag to point out the cmdbatch profiling buffer in the memlist */ +#define KGSL_IBDESC_PROFILING_BUFFER 0x2 + +/** + * struct kgsl_submit_commands - Argument to IOCTL_KGSL_SUBMIT_COMMANDS + * @context_id: KGSL context ID that owns the commands + * @flags: + * @cmdlist: User pointer to a list of kgsl_ibdesc structures + * @numcmds: Number of commands listed in cmdlist + * @synclist: User pointer to a list of kgsl_cmd_syncpoint structures + * @numsyncs: Number of sync points listed in synclist + * @timestamp: On entry the a user defined timestamp, on exist the timestamp + * assigned to the command batch + * + * This structure specifies a command to send to the GPU hardware. This is + * similar to kgsl_issueibcmds expect that it doesn't support the legacy way to + * submit IB lists and it adds sync points to block the IB until the + * dependencies are satisified. This entry point is the new and preferred way + * to submit commands to the GPU. The memory list can be used to specify all + * memory that is referrenced in the current set of commands. + */ + +struct kgsl_submit_commands { + unsigned int context_id; + unsigned int flags; + struct kgsl_ibdesc __user *cmdlist; + unsigned int numcmds; + struct kgsl_cmd_syncpoint __user *synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands) + +/** + * struct kgsl_device_constraint - device constraint argument + * @context_id: KGSL context ID + * @type: type of constraint i.e pwrlevel/none + * @data: constraint data + * @size: size of the constraint data + */ +struct kgsl_device_constraint { + unsigned int type; + unsigned int context_id; + void __user *data; + __kernel_size_t size; +}; + +/* Constraint Type*/ +#define KGSL_CONSTRAINT_NONE 0 +#define KGSL_CONSTRAINT_PWRLEVEL 1 + +/* L3 constraint Type */ +#define KGSL_CONSTRAINT_L3_NONE 2 +#define KGSL_CONSTRAINT_L3_PWRLEVEL 3 + +/* PWRLEVEL constraint level*/ +/* set to min frequency */ +#define KGSL_CONSTRAINT_PWR_MIN 0 +/* set to max frequency */ +#define KGSL_CONSTRAINT_PWR_MAX 1 + +struct kgsl_device_constraint_pwrlevel { + unsigned int level; +}; + +/** + * struct kgsl_syncsource_create - Argument to IOCTL_KGSL_SYNCSOURCE_CREATE + * @id: returned id for the syncsource that was created. + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_create { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x40, struct kgsl_syncsource_create) + +/** + * struct kgsl_syncsource_destroy - Argument to IOCTL_KGSL_SYNCSOURCE_DESTROY + * @id: syncsource id to destroy + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_destroy { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_DESTROY \ + _IOWR(KGSL_IOC_TYPE, 0x41, struct kgsl_syncsource_destroy) + +/** + * struct kgsl_syncsource_create_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * @id: syncsource id + * @fence_fd: returned sync_fence fd + * + * Create a fence that may be signaled by userspace by calling + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE. There are no order dependencies between + * these fences. + */ +struct kgsl_syncsource_create_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +/** + * struct kgsl_syncsource_signal_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE + * @id: syncsource id + * @fence_fd: sync_fence fd to signal + * + * Signal a fence that was created by a IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * call using the same syncsource id. This allows a fence to be shared + * to other processes but only signaled by the process owning the fd + * used to create the fence. + */ +#define IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x42, struct kgsl_syncsource_create_fence) + +struct kgsl_syncsource_signal_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x43, struct kgsl_syncsource_signal_fence) + +/** + * struct kgsl_cff_sync_gpuobj - Argument to IOCTL_KGSL_CFF_SYNC_GPUOBJ + * @offset: Offset into the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + */ +struct kgsl_cff_sync_gpuobj { + __u64 offset; + __u64 length; + unsigned int id; +}; + +#define IOCTL_KGSL_CFF_SYNC_GPUOBJ \ + _IOW(KGSL_IOC_TYPE, 0x44, struct kgsl_cff_sync_gpuobj) + +/** + * struct kgsl_gpuobj_alloc - Argument to IOCTL_KGSL_GPUOBJ_ALLOC + * @size: Size in bytes of the object to allocate + * @flags: mask of KGSL_MEMFLAG_* bits + * @va_len: Size in bytes of the virtual region to allocate + * @mmapsize: Returns the mmap() size of the object + * @id: Returns the GPU object ID of the new object + * @metadata_len: Length of the metdata to copy from the user + * @metadata: Pointer to the user specified metadata to store for the object + */ +struct kgsl_gpuobj_alloc { + __u64 size; + __u64 flags; + __u64 va_len; + __u64 mmapsize; + unsigned int id; + unsigned int metadata_len; + __u64 metadata; +}; + +/* Let the user know that this header supports the gpuobj metadata */ +#define KGSL_GPUOBJ_ALLOC_METADATA_MAX 64 + +#define IOCTL_KGSL_GPUOBJ_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x45, struct kgsl_gpuobj_alloc) + +/** + * struct kgsl_gpuobj_free - Argument to IOCTL_KGLS_GPUOBJ_FREE + * @flags: Mask of: KGSL_GUPOBJ_FREE_ON_EVENT + * @priv: Pointer to the private object if KGSL_GPUOBJ_FREE_ON_EVENT is + * specified + * @id: ID of the GPU object to free + * @type: If KGSL_GPUOBJ_FREE_ON_EVENT is specified, the type of asynchronous + * event to free on + * @len: Length of the data passed in priv + */ +struct kgsl_gpuobj_free { + __u64 flags; + __u64 __user priv; + unsigned int id; + unsigned int type; + unsigned int len; +}; + +#define KGSL_GPUOBJ_FREE_ON_EVENT 1 + +#define KGSL_GPU_EVENT_TIMESTAMP 1 +#define KGSL_GPU_EVENT_FENCE 2 + +/** + * struct kgsl_gpu_event_timestamp - Specifies a timestamp event to free a GPU + * object on + * @context_id: ID of the timestamp event to wait for + * @timestamp: Timestamp of the timestamp event to wait for + */ +struct kgsl_gpu_event_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +/** + * struct kgsl_gpu_event_fence - Specifies a fence ID to to free a GPU object on + * @fd: File descriptor for the fence + */ +struct kgsl_gpu_event_fence { + int fd; +}; + +#define IOCTL_KGSL_GPUOBJ_FREE \ + _IOW(KGSL_IOC_TYPE, 0x46, struct kgsl_gpuobj_free) + +/** + * struct kgsl_gpuobj_info - argument to IOCTL_KGSL_GPUOBJ_INFO + * @gpuaddr: GPU address of the object + * @flags: Current flags for the object + * @size: Size of the object + * @va_len: VA size of the object + * @va_addr: Virtual address of the object (if it is mapped) + * id - GPU object ID of the object to query + */ +struct kgsl_gpuobj_info { + __u64 gpuaddr; + __u64 flags; + __u64 size; + __u64 va_len; + __u64 va_addr; + unsigned int id; +}; + +#define IOCTL_KGSL_GPUOBJ_INFO \ + _IOWR(KGSL_IOC_TYPE, 0x47, struct kgsl_gpuobj_info) + +/** + * struct kgsl_gpuobj_import - argument to IOCTL_KGSL_GPUOBJ_IMPORT + * @priv: Pointer to the private data for the import type + * @priv_len: Length of the private data + * @flags: Mask of KGSL_MEMFLAG_ flags + * @type: Type of the import (KGSL_USER_MEM_TYPE_*) + * @id: Returns the ID of the new GPU object + */ +struct kgsl_gpuobj_import { + __u64 __user priv; + __u64 priv_len; + __u64 flags; + unsigned int type; + unsigned int id; +}; + +/** + * struct kgsl_gpuobj_import_dma_buf - import a dmabuf object + * @fd: File descriptor for the dma-buf object + */ +struct kgsl_gpuobj_import_dma_buf { + int fd; +}; + +/** + * struct kgsl_gpuobj_import_useraddr - import an object based on a useraddr + * @virtaddr: Virtual address of the object to import + */ +struct kgsl_gpuobj_import_useraddr { + __u64 virtaddr; +}; + +#define IOCTL_KGSL_GPUOBJ_IMPORT \ + _IOWR(KGSL_IOC_TYPE, 0x48, struct kgsl_gpuobj_import) + +/** + * struct kgsl_gpuobj_sync_obj - Individual GPU object to sync + * @offset: Offset within the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + * @op: Cache operation to execute + */ + +struct kgsl_gpuobj_sync_obj { + __u64 offset; + __u64 length; + unsigned int id; + unsigned int op; +}; + +/** + * struct kgsl_gpuobj_sync - Argument for IOCTL_KGSL_GPUOBJ_SYNC + * @objs: Pointer to an array of kgsl_gpuobj_sync_obj structs + * @obj_len: Size of each item in the array + * @count: Number of items in the array + */ + +struct kgsl_gpuobj_sync { + __u64 __user objs; + unsigned int obj_len; + unsigned int count; +}; + +#define IOCTL_KGSL_GPUOBJ_SYNC \ + _IOW(KGSL_IOC_TYPE, 0x49, struct kgsl_gpuobj_sync) + +/** + * struct kgsl_command_object - GPU command object + * @offset: GPU address offset of the object + * @gpuaddr: GPU address of the object + * @size: Size of the object + * @flags: Current flags for the object + * @id - GPU command object ID + */ +struct kgsl_command_object { + __u64 offset; + __u64 gpuaddr; + __u64 size; + unsigned int flags; + unsigned int id; +}; + +/** + * struct kgsl_command_syncpoint - GPU syncpoint object + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * @type: type of sync point defined here + */ +struct kgsl_command_syncpoint { + __u64 __user priv; + __u64 size; + unsigned int type; +}; + +/** + * struct kgsl_command_object - Argument for IOCTL_KGSL_GPU_COMMAND + * @flags: Current flags for the object + * @cmdlist: List of kgsl_command_objects for submission + * @cmd_size: Size of kgsl_command_objects structure + * @numcmds: Number of kgsl_command_objects in command list + * @objlist: List of kgsl_command_objects for tracking + * @obj_size: Size of kgsl_command_objects structure + * @numobjs: Number of kgsl_command_objects in object list + * @synclist: List of kgsl_command_syncpoints + * @sync_size: Size of kgsl_command_syncpoint structure + * @numsyncs: Number of kgsl_command_syncpoints in syncpoint list + * @context_id: Context ID submittin ghte kgsl_gpu_command + * @timestamp: Timestamp for the submitted commands + */ +struct kgsl_gpu_command { + __u64 flags; + __u64 __user cmdlist; + unsigned int cmdsize; + unsigned int numcmds; + __u64 __user objlist; + unsigned int objsize; + unsigned int numobjs; + __u64 __user synclist; + unsigned int syncsize; + unsigned int numsyncs; + unsigned int context_id; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_GPU_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x4A, struct kgsl_gpu_command) + +/** + * struct kgsl_preemption_counters_query - argument to + * IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY + * @counters: Return preemption counters array + * @size_user: Size allocated by userspace + * @size_priority_level: Size of preemption counters for each + * priority level + * @max_priority_level: Return max number of priority levels + * + * Query the available preemption counters. The array counters + * is used to return preemption counters. The size of the array + * is passed in so the kernel will only write at most size_user + * or max available preemption counters. The total number of + * preemption counters is returned in max_priority_level. If the + * array or size passed in are invalid, then an error is + * returned back. + */ +struct kgsl_preemption_counters_query { + __u64 __user counters; + unsigned int size_user; + unsigned int size_priority_level; + unsigned int max_priority_level; +}; + +#define IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x4B, struct kgsl_preemption_counters_query) + +/** + * struct kgsl_gpuobj_set_info - argument for IOCTL_KGSL_GPUOBJ_SET_INFO + * @flags: Flags to indicate which parameters to change + * @metadata: If KGSL_GPUOBJ_SET_INFO_METADATA is set, a pointer to the new + * metadata + * @id: GPU memory object ID to change + * @metadata_len: If KGSL_GPUOBJ_SET_INFO_METADATA is set, the length of the + * new metadata string + * @type: If KGSL_GPUOBJ_SET_INFO_TYPE is set, the new type of the memory object + */ + +#define KGSL_GPUOBJ_SET_INFO_METADATA (1 << 0) +#define KGSL_GPUOBJ_SET_INFO_TYPE (1 << 1) + +struct kgsl_gpuobj_set_info { + __u64 flags; + __u64 metadata; + unsigned int id; + unsigned int metadata_len; + unsigned int type; +}; + +#define IOCTL_KGSL_GPUOBJ_SET_INFO \ + _IOW(KGSL_IOC_TYPE, 0x4C, struct kgsl_gpuobj_set_info) + +/** + * struct kgsl_sparse_phys_alloc - Argument for IOCTL_KGSL_SPARSE_PHYS_ALLOC + * @size: Size in bytes to back + * @pagesize: Pagesize alignment required + * @flags: Flags for this allocation + * @id: Returned ID for this allocation + */ +struct kgsl_sparse_phys_alloc { + __u64 size; + __u64 pagesize; + __u64 flags; + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_PHYS_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x50, struct kgsl_sparse_phys_alloc) + +/** + * struct kgsl_sparse_phys_free - Argument for IOCTL_KGSL_SPARSE_PHYS_FREE + * @id: ID to free + */ +struct kgsl_sparse_phys_free { + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_PHYS_FREE \ + _IOW(KGSL_IOC_TYPE, 0x51, struct kgsl_sparse_phys_free) + +/** + * struct kgsl_sparse_virt_alloc - Argument for IOCTL_KGSL_SPARSE_VIRT_ALLOC + * @size: Size in bytes to reserve + * @pagesize: Pagesize alignment required + * @flags: Flags for this allocation + * @id: Returned ID for this allocation + * @gpuaddr: Returned GPU address for this allocation + */ +struct kgsl_sparse_virt_alloc { + __u64 size; + __u64 pagesize; + __u64 flags; + __u64 gpuaddr; + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_VIRT_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x52, struct kgsl_sparse_virt_alloc) + +/** + * struct kgsl_sparse_virt_free - Argument for IOCTL_KGSL_SPARSE_VIRT_FREE + * @id: ID to free + */ +struct kgsl_sparse_virt_free { + unsigned int id; +}; + +#define IOCTL_KGSL_SPARSE_VIRT_FREE \ + _IOW(KGSL_IOC_TYPE, 0x53, struct kgsl_sparse_virt_free) + +/** + * struct kgsl_sparse_binding_object - Argument for kgsl_sparse_bind + * @virtoffset: Offset into the virtual ID + * @physoffset: Offset into the physical ID (bind only) + * @size: Size in bytes to reserve + * @flags: Flags for this kgsl_sparse_binding_object + * @id: Physical ID to bind (bind only) + */ +struct kgsl_sparse_binding_object { + __u64 virtoffset; + __u64 physoffset; + __u64 size; + __u64 flags; + unsigned int id; +}; + +/** + * struct kgsl_sparse_bind - Argument for IOCTL_KGSL_SPARSE_BIND + * @list: List of kgsl_sparse_bind_objects to bind/unbind + * @id: Virtual ID to bind/unbind + * @size: Size of kgsl_sparse_bind_object + * @count: Number of elements in list + * + */ +struct kgsl_sparse_bind { + __u64 __user list; + unsigned int id; + unsigned int size; + unsigned int count; +}; + +#define IOCTL_KGSL_SPARSE_BIND \ + _IOW(KGSL_IOC_TYPE, 0x54, struct kgsl_sparse_bind) + +/** + * struct kgsl_gpu_sparse_command - Argument for + * IOCTL_KGSL_GPU_SPARSE_COMMAND + * @flags: Current flags for the object + * @sparselist: List of kgsl_sparse_binding_object to bind/unbind + * @synclist: List of kgsl_command_syncpoints + * @sparsesize: Size of kgsl_sparse_binding_object + * @numsparse: Number of elements in list + * @sync_size: Size of kgsl_command_syncpoint structure + * @numsyncs: Number of kgsl_command_syncpoints in syncpoint list + * @context_id: Context ID submitting the kgsl_gpu_command + * @timestamp: Timestamp for the submitted commands + * @id: Virtual ID to bind/unbind + */ +struct kgsl_gpu_sparse_command { + __u64 flags; + __u64 __user sparselist; + __u64 __user synclist; + unsigned int sparsesize; + unsigned int numsparse; + unsigned int syncsize; + unsigned int numsyncs; + unsigned int context_id; + unsigned int timestamp; + unsigned int id; +}; + +#define IOCTL_KGSL_GPU_SPARSE_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x55, struct kgsl_gpu_sparse_command) + +#define KGSL_GPUMEM_RANGE_OP_BIND 1 +#define KGSL_GPUMEM_RANGE_OP_UNBIND 2 + +/** + * struct kgsl_gpumem_bind_range - specifies a bind operation for a virtual + * buffer object + * @child_offset: Offset to the start of memory within the child buffer object + * (not used for KGSL_GPUMEM_RANGE_OP_UNBIND operations) + * @target_offset: GPU address offset within the target VBO + * @length: Amount of memory to map/unmap (in bytes) + * @child_id: The GPU buffer ID for the child object to map/unmap in the VBO + * @op: One of KGSL_GPUMEM_RANGE_OP_BIND or KGSL_GPUMEM_RANGE_OP_UNBIND + * + * This defines a specific bind operation to a virtual buffer object specified + * in &struct kgsl_gpumem_bind_ranges. When @op is KGSL_GPUMEM_RANGE_OP_BIND the + * physical memory starting at @child_offset in the memory object identified by + * @child_id will be mapped into the target virtual buffer object starting at + * @offset for @length bytes. + * + * When @op is KGSL_GPUMEM_RANGE_OP_UNBIND any entries in the target virtual + * buffer object between @offset and @length that belong to @child_id will be + * removed. + */ +struct kgsl_gpumem_bind_range { + __u64 child_offset; + __u64 target_offset; + __u64 length; + __u32 child_id; + __u32 op; +}; + +#define KGSL_GPUMEM_BIND_ASYNC (1UL << 0) +#define KGSL_GPUMEM_BIND_FENCE_OUT (1UL << 1) + +/** + * struct kgsl_gpumem_bind_ranges - Argument to IOCTL_KGSL_GPUMEM_BIND_RANGES to + * either map or unmap a child buffer object into a virtual buffer object. + * @ranges: User memory pointer to an array of range operations of type &struct + * kgsl_gpumem_bind_range + * @ranges_nents: Number of entries in @ranges + * @ranges_size: Size of each entry in @ranges in bytes + * @id: GPU buffer object identifier for the target virtual buffer object + * @flags: Bitmap of KGSL_GPUMEM_BIND_ASYNC and KGSL_GPUMEM_BIND_FENCE_OUT + * @fence_id: If KGSL_GPUMEM_BIND_FENCE_OUT is set in @flags contains the + * identifier for the sync fence that will be signaled after the operation + * completes + * + * Describes a number of range operations to perform on a virtual buffer object + * identified by @id. Ranges should be a __u64 representation of an array of + * &struct kgsl_gpumem_bind_range entries. @ranges_nents will contain the number + * of entries in the array, and @ranges_size will contain the size of each entry + * in the array. If KGSL_GPUMEM_BIND_ASYNC is set the operation will be + * performed asynchronously and the operation will immediately return to the + * user. Otherwise the calling context will block until the operation has + * completed. + * + * If KGSL_GPUMEM_BIND_ASYNC and KGSL_GPUMEM_BIND_FENCE_OUT are both set a sync + * fence will be created and returned in @fence_id. The fence will be signaled + * when the bind operation has completed. + */ +struct kgsl_gpumem_bind_ranges { + __u64 ranges; + __u32 ranges_nents; + __u32 ranges_size; + __u32 id; + __u32 flags; + int fence_id; + /* private: 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_GPUMEM_BIND_RANGES \ + _IOWR(KGSL_IOC_TYPE, 0x56, struct kgsl_gpumem_bind_ranges) + +#define KGSL_GPU_AUX_COMMAND_BIND (1 << 0) +#define KGSL_GPU_AUX_COMMAND_TIMELINE (1 << 1) +/* Reuse the same flag that GPU COMMAND uses */ +#define KGSL_GPU_AUX_COMMAND_SYNC KGSL_CMDBATCH_SYNC + +/** + * struct kgsl_gpu_aux_command_bind - Descriptor for a GPU AUX bind command + * @rangeslist: Pointer to a list of &struct kgsl_gpumem_bind_range items + * @numranges Number of entries in @rangeslist + * @rangesize: Size of each entry in @rangeslist + * @target: The GPU memory ID for the target virtual buffer object + * + * Describe a GPU AUX command to bind ranges in a virtual buffer object. + * @rangeslist points to a &struct kgsl_gpumem_bind_ranges which is the same + * struct that is used by IOCTl_KGSL_GPUMEM_BIND_RANGES. @numrages is the size + * of the array in @rangeslist and @rangesize is the size of each entity in + * @rangeslist. @target points to the GPU ID for the target VBO object. + */ +struct kgsl_gpu_aux_command_bind { + __u64 rangeslist; + __u64 numranges; + __u64 rangesize; + __u32 target; +/* private: Padding for 64 bit compatibility */ + __u32 padding; +}; + +/** + * struct kgsl_aux_command_generic - Container for an AUX command + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * @type: type of sync point defined here + * + * Describes a generic container for GPU aux commands. @priv is a user pointer + * to the command struct matching @type of size @size. + */ +struct kgsl_gpu_aux_command_generic { + __u64 priv; + __u64 size; + __u32 type; +/* private: Padding for 64 bit compatibility */ + __u32 padding; +}; + +/** + * struct kgsl_gpu_aux_command - Argument for IOCTL_KGSL_GPU_AUX_COMMAND + * @flags: flags for the object + * @cmdlist: List of &struct kgsl_gpu_aux_command_generic objects + * @cmd_size: Size of each entry in @cmdlist + * @numcmds: Number of entries in @cmdlist + * @synclist: List of &struct kgsl_command_syncpoint objects + * @syncsize: Size of each entry in @synclist + * @numsyncs: Number of entries in @synclist + * @context_id: ID of the context submtting the aux command + * @timestamp: Timestamp for the command submission + * + * Describe a GPU auxiliary command. Auxiliary commands are tasks that are not + * performed on hardware but can be queued like normal GPU commands. Like GPU + * commands AUX commands are assigned a timestamp and processed in order in the + * queue. They can also have standard sync objects attached. The only + * difference is that AUX commands usually perform some sort of administrative + * task in the CPU and are retired in the dispatcher. + * + * For bind operations flags must have one of the KGSL_GPU_AUX_COMMAND_* flags + * set. If sync objects are attached KGSL_GPU_AUX_COMMAND_SYNC must be set. + * @cmdlist points to an array of &struct kgsl_gpu_aux_command_generic structs + * which in turn will have a pointer to a specific command type. + * @numcmds is the number of commands in the list and @cmdsize is the size + * of each entity in @cmdlist. + * + * If KGSL_GPU_AUX_COMMAND_SYNC is specified @synclist will point to an array of + * &struct kgsl_command_syncpoint items in the same fashion as a GPU hardware + * command. @numsyncs and @syncsize describe the list. + * + * @context_id is the context that is submitting the command and @timestamp + * contains the timestamp for the operation. + */ +struct kgsl_gpu_aux_command { + __u64 flags; + __u64 cmdlist; + __u32 cmdsize; + __u32 numcmds; + __u64 synclist; + __u32 syncsize; + __u32 numsyncs; + __u32 context_id; + __u32 timestamp; +}; + +#define IOCTL_KGSL_GPU_AUX_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x57, struct kgsl_gpu_aux_command) + +/** + * struct kgsl_timeline_create - Argument for IOCTL_KGSL_TIMELINE_CREATE + * @seqno: Initial sequence number for the timeline + * @id: Timeline identifier [out] + * + * Create a new semaphore timeline and return the identifier in @id. + * The identifier is global for the device and can be used to + * identify the timeline in all subsequent commands. + */ +struct kgsl_timeline_create { + __u64 seqno; + __u32 id; +/* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_TIMELINE_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x58, struct kgsl_timeline_create) + +/** + * struct kgsl_timeline_val - A container to store a timeline/sequence number + * pair. + * @seqno: Sequence number to signal/query + * @timeline: The timeline identifier to signal/query + * + * A container to store a timeline/seqno pair used by the query and signal + * ioctls. + */ +struct kgsl_timeline_val { + __u64 seqno; + __u32 timeline; +/* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define KGSL_TIMELINE_WAIT_ALL 1 +#define KGSL_TIMELINE_WAIT_ANY 2 + +/** + * struct kgsl_timeline_wait - Argument for IOCTL_KGSL_TIMELINE_WAIT + * @tv_sec: Number of seconds to wait for the signal + * @tv_nsec: Number of nanoseconds to wait for the signal + * @timelines: Address of an array of &struct kgsl_timeline_val entries + * @count: Number of entries in @timeline + * @timelines_size: Size of each entry in @timelines + * @flags: One of KGSL_TIMELINE_WAIT_ALL or KGSL_TIMELINE_WAIT_ANY + * + * Wait for the timelines listed in @timelines to be signaled. If @flags is + * equal to KGSL_TIMELINE_WAIT_ALL then wait for all timelines or if + * KGSL_TIMELINE_WAIT_ANY is specified then wait for any of the timelines to + * signal. @tv_sec and @tv_nsec indicates the number of seconds and nanoseconds + * that the process should be blocked waiting for the signal. + */ +struct kgsl_timeline_wait { + __s64 tv_sec; + __s64 tv_nsec; + __u64 timelines; + __u32 count; + __u32 timelines_size; + __u32 flags; +/* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_TIMELINE_WAIT \ + _IOW(KGSL_IOC_TYPE, 0x59, struct kgsl_timeline_wait) + +#define IOCTL_KGSL_TIMELINE_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x5A, struct kgsl_timeline_val) + +/** + * struct kgsl_timeline_signal - argument for IOCTL_KGSL_TIMELINE_SIGNAL + * @timelines: Address of an array of &struct kgsl_timeline_val entries + * @count: Number of entries in @timelines + * @timelines_size: Size of each entry in @timelines + * + * Signal an array of timelines of type @struct kgsl_timeline_val. + */ +struct kgsl_timeline_signal { + __u64 timelines; + __u32 count; + __u32 timelines_size; +}; + +#define IOCTL_KGSL_TIMELINE_SIGNAL \ + _IOW(KGSL_IOC_TYPE, 0x5B, struct kgsl_timeline_signal) + +/** + * struct kgsl_timeline_fence_get - argument for IOCTL_KGSL_TIMELINE_FENCE_GET + * @seqno: Sequence number for the fence + * @timeline: Timeline to create the fence on + * @handle: Contains the fence fd for a successful operation [out] + * + * Create a sync file descriptor for the seqnum on the timeline and return it in + * @handle. Can be polled and queried just like any other sync file descriptor + */ +struct kgsl_timeline_fence_get { + __u64 seqno; + __u32 timeline; + int handle; +}; + +#define IOCTL_KGSL_TIMELINE_FENCE_GET \ + _IOWR(KGSL_IOC_TYPE, 0x5C, struct kgsl_timeline_fence_get) +/** + * IOCTL_KGSL_TIMELINE_DESTROY takes a u32 identifier for the timeline to + * destroy + */ +#define IOCTL_KGSL_TIMELINE_DESTROY _IOW(KGSL_IOC_TYPE, 0x5D, __u32) + +/** + * struct kgsl_gpu_aux_command_timeline - An aux command for timeline signals + * @timelines: An array of &struct kgsl_timeline_val elements + * @count: The number of entries in @timelines + * @timelines_size: The size of each element in @timelines + * + * An aux command for timeline signals that can be pointed to by + * &struct kgsl_aux_command_generic when the type is + * KGSL_GPU_AUX_COMMAND_TIMELINE. + */ +struct kgsl_gpu_aux_command_timeline { + __u64 timelines; + __u32 count; + __u32 timelines_size; +}; + +#endif /* _UAPI_MSM_KGSL_H */ diff --git a/kgsl.c b/kgsl.c new file mode 100644 index 0000000000..c59ccff831 --- /dev/null +++ b/kgsl.c @@ -0,0 +1,4809 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl_compat.h" +#include "kgsl_debugfs.h" +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_mmu.h" +#include "kgsl_pool.h" +#include "kgsl_reclaim.h" +#include "kgsl_sync.h" +#include "kgsl_sysfs.h" +#include "kgsl_trace.h" + +#ifndef arch_mmap_check +#define arch_mmap_check(addr, len, flags) (0) +#endif + +#ifndef pgprot_writebackcache +#define pgprot_writebackcache(_prot) (_prot) +#endif + +#ifndef pgprot_writethroughcache +#define pgprot_writethroughcache(_prot) (_prot) +#endif + +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM_LPAE) +#define KGSL_DMA_BIT_MASK DMA_BIT_MASK(64) +#else +#define KGSL_DMA_BIT_MASK DMA_BIT_MASK(32) +#endif + +/* List of dmabufs mapped */ +static LIST_HEAD(kgsl_dmabuf_list); +static DEFINE_SPINLOCK(kgsl_dmabuf_lock); + +struct dmabuf_list_entry { + struct page *firstpage; + struct list_head node; + struct list_head dmabuf_list; +}; + +struct kgsl_dma_buf_meta { + struct kgsl_mem_entry *entry; + struct dma_buf_attachment *attach; + struct dma_buf *dmabuf; + struct sg_table *table; + struct dmabuf_list_entry *dle; + struct list_head node; +}; + +static inline struct kgsl_pagetable *_get_memdesc_pagetable( + struct kgsl_pagetable *pt, struct kgsl_mem_entry *entry) +{ + /* if a secured buffer, map it to secure global pagetable */ + if (kgsl_memdesc_is_secured(&entry->memdesc)) + return pt->mmu->securepagetable; + + return pt; +} + +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry); + +static const struct vm_operations_struct kgsl_gpumem_vm_ops; + +/* + * The memfree list contains the last N blocks of memory that have been freed. + * On a GPU fault we walk the list to see if the faulting address had been + * recently freed and print out a message to that effect + */ + +#define MEMFREE_ENTRIES 512 + +static DEFINE_SPINLOCK(memfree_lock); + +struct memfree_entry { + pid_t ptname; + uint64_t gpuaddr; + uint64_t size; + pid_t pid; + uint64_t flags; +}; + +static struct { + struct memfree_entry *list; + int head; + int tail; +} memfree; + +static inline bool match_memfree_addr(struct memfree_entry *entry, + pid_t ptname, uint64_t gpuaddr) +{ + return ((entry->ptname == ptname) && + (entry->size > 0) && + (gpuaddr >= entry->gpuaddr && + gpuaddr < (entry->gpuaddr + entry->size))); +} +int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr, + uint64_t *size, uint64_t *flags, pid_t *pid) +{ + int ptr; + + if (memfree.list == NULL) + return 0; + + spin_lock(&memfree_lock); + + ptr = memfree.head - 1; + if (ptr < 0) + ptr = MEMFREE_ENTRIES - 1; + + /* Walk backwards through the list looking for the last match */ + while (ptr != memfree.tail) { + struct memfree_entry *entry = &memfree.list[ptr]; + + if (match_memfree_addr(entry, ptname, *gpuaddr)) { + *gpuaddr = entry->gpuaddr; + *flags = entry->flags; + *size = entry->size; + *pid = entry->pid; + + spin_unlock(&memfree_lock); + return 1; + } + + ptr = ptr - 1; + + if (ptr < 0) + ptr = MEMFREE_ENTRIES - 1; + } + + spin_unlock(&memfree_lock); + return 0; +} + +static void kgsl_memfree_purge(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + pid_t ptname = pagetable ? pagetable->name : 0; + int i; + + if (memfree.list == NULL) + return; + + spin_lock(&memfree_lock); + + for (i = 0; i < MEMFREE_ENTRIES; i++) { + struct memfree_entry *entry = &memfree.list[i]; + + if (entry->ptname != ptname || entry->size == 0) + continue; + + if (gpuaddr > entry->gpuaddr && + gpuaddr < entry->gpuaddr + entry->size) { + /* truncate the end of the entry */ + entry->size = gpuaddr - entry->gpuaddr; + } else if (gpuaddr <= entry->gpuaddr) { + if (gpuaddr + size > entry->gpuaddr && + gpuaddr + size < entry->gpuaddr + entry->size) + /* Truncate the beginning of the entry */ + entry->gpuaddr = gpuaddr + size; + else if (gpuaddr + size >= entry->gpuaddr + entry->size) + /* Remove the entire entry */ + entry->size = 0; + } + } + spin_unlock(&memfree_lock); +} + +static void kgsl_memfree_add(pid_t pid, pid_t ptname, uint64_t gpuaddr, + uint64_t size, uint64_t flags) + +{ + struct memfree_entry *entry; + + if (memfree.list == NULL) + return; + + spin_lock(&memfree_lock); + + entry = &memfree.list[memfree.head]; + + entry->pid = pid; + entry->ptname = ptname; + entry->gpuaddr = gpuaddr; + entry->size = size; + entry->flags = flags; + + memfree.head = (memfree.head + 1) % MEMFREE_ENTRIES; + + if (memfree.head == memfree.tail) + memfree.tail = (memfree.tail + 1) % MEMFREE_ENTRIES; + + spin_unlock(&memfree_lock); +} + +int kgsl_readtimestamp(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp) +{ + if (device) + return device->ftbl->readtimestamp(device, priv, type, + timestamp); + return -EINVAL; + +} + +const char *kgsl_context_type(int type) +{ + if (type == KGSL_CONTEXT_TYPE_GL) + return "GL"; + else if (type == KGSL_CONTEXT_TYPE_CL) + return "CL"; + else if (type == KGSL_CONTEXT_TYPE_C2D) + return "C2D"; + else if (type == KGSL_CONTEXT_TYPE_RS) + return "RS"; + else if (type == KGSL_CONTEXT_TYPE_VK) + return "VK"; + + return "ANY"; +} + +/* Scheduled by kgsl_mem_entry_put_deferred() */ +static void _deferred_put(struct work_struct *work) +{ + struct kgsl_mem_entry *entry = + container_of(work, struct kgsl_mem_entry, work); + + kgsl_mem_entry_put(entry); +} + +static struct kgsl_mem_entry *kgsl_mem_entry_create(void) +{ + struct kgsl_mem_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + + if (entry != NULL) { + kref_init(&entry->refcount); + /* put this ref in userspace memory alloc and map ioctls */ + kref_get(&entry->refcount); + atomic_set(&entry->map_count, 0); + } + + return entry; +} + +static void add_dmabuf_list(struct kgsl_dma_buf_meta *meta) +{ + struct kgsl_device *device = dev_get_drvdata(meta->attach->dev); + struct dmabuf_list_entry *dle; + struct page *page; + + /* + * Get the first page. We will use it to identify the imported + * buffer, since the same buffer can be mapped as different + * mem entries. + */ + page = sg_page(meta->table->sgl); + + spin_lock(&kgsl_dmabuf_lock); + + /* Go through the list to see if we imported this buffer before */ + list_for_each_entry(dle, &kgsl_dmabuf_list, node) { + if (dle->firstpage == page) { + /* Add the dmabuf meta to the list for this dle */ + meta->dle = dle; + list_add(&meta->node, &dle->dmabuf_list); + spin_unlock(&kgsl_dmabuf_lock); + return; + } + } + + /* This is a new buffer. Add a new entry for it */ + dle = kzalloc(sizeof(*dle), GFP_ATOMIC); + if (dle) { + dle->firstpage = page; + INIT_LIST_HEAD(&dle->dmabuf_list); + list_add(&dle->node, &kgsl_dmabuf_list); + meta->dle = dle; + list_add(&meta->node, &dle->dmabuf_list); + kgsl_trace_gpu_mem_total(device, + meta->entry->memdesc.size); + } + spin_unlock(&kgsl_dmabuf_lock); +} + +static void remove_dmabuf_list(struct kgsl_dma_buf_meta *meta) +{ + struct kgsl_device *device = dev_get_drvdata(meta->attach->dev); + struct dmabuf_list_entry *dle = meta->dle; + + if (!dle) + return; + + spin_lock(&kgsl_dmabuf_lock); + list_del(&meta->node); + if (list_empty(&dle->dmabuf_list)) { + list_del(&dle->node); + kfree(dle); + kgsl_trace_gpu_mem_total(device, + -(meta->entry->memdesc.size)); + } + spin_unlock(&kgsl_dmabuf_lock); +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static void kgsl_destroy_ion(struct kgsl_memdesc *memdesc) +{ + struct kgsl_mem_entry *entry = container_of(memdesc, + struct kgsl_mem_entry, memdesc); + struct kgsl_dma_buf_meta *meta = entry->priv_data; + + if (meta != NULL) { + remove_dmabuf_list(meta); + dma_buf_detach(meta->dmabuf, meta->attach); + dma_buf_put(meta->dmabuf); + kfree(meta); + } + + memdesc->sgt = NULL; +} + +static const struct kgsl_memdesc_ops kgsl_dmabuf_ops = { + .free = kgsl_destroy_ion, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; +#endif + +static void kgsl_destroy_anon(struct kgsl_memdesc *memdesc) +{ + int i = 0, j; + struct scatterlist *sg; + struct page *page; + + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { + page = sg_page(sg); + for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) { + + /* + * Mark the page in the scatterlist as dirty if they + * were writable by the GPU. + */ + if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)) + set_page_dirty_lock(nth_page(page, j)); + + /* + * Put the page reference taken using get_user_pages + * during memdesc_sg_virt. + */ + put_page(nth_page(page, j)); + } + } + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + memdesc->sgt = NULL; +} + +void +kgsl_mem_entry_destroy(struct kref *kref) +{ + struct kgsl_mem_entry *entry = container_of(kref, + struct kgsl_mem_entry, + refcount); + unsigned int memtype; + + if (entry == NULL) + return; + + /* pull out the memtype before the flags get cleared */ + memtype = kgsl_memdesc_usermem_type(&entry->memdesc); + + /* + * VBO allocations at gpumem_alloc_vbo_entry are not added into stats + * (using kgsl_process_add_stats) so do not subtract here. For all other + * allocations subtract before freeing memdesc + */ + if (!(entry->memdesc.flags & KGSL_MEMFLAGS_VBO)) + atomic64_sub(entry->memdesc.size, &entry->priv->stats[memtype].cur); + + /* Detach from process list */ + kgsl_mem_entry_detach_process(entry); + + if (memtype != KGSL_MEM_ENTRY_KERNEL) + atomic_long_sub(entry->memdesc.size, + &kgsl_driver.stats.mapped); + + kgsl_sharedmem_free(&entry->memdesc); + + kfree(entry); +} + +/* Commit the entry to the process so it can be accessed by other operations */ +static void kgsl_mem_entry_commit_process(struct kgsl_mem_entry *entry) +{ + if (!entry) + return; + + spin_lock(&entry->priv->mem_lock); + idr_replace(&entry->priv->mem_idr, entry, entry->id); + spin_unlock(&entry->priv->mem_lock); +} + +static int kgsl_mem_entry_attach_to_process(struct kgsl_device *device, + struct kgsl_process_private *process, + struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc *memdesc = &entry->memdesc; + int ret, id; + + ret = kgsl_process_private_get(process); + if (!ret) + return -EBADF; + + /* Assign a gpu address */ + if (!kgsl_memdesc_use_cpu_map(memdesc) && + kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_NONE) { + struct kgsl_pagetable *pagetable; + + pagetable = kgsl_memdesc_is_secured(memdesc) ? + device->mmu.securepagetable : process->pagetable; + + ret = kgsl_mmu_get_gpuaddr(pagetable, memdesc); + if (ret) { + kgsl_process_private_put(process); + return ret; + } + } + + idr_preload(GFP_KERNEL); + spin_lock(&process->mem_lock); + /* Allocate the ID but don't attach the pointer just yet */ + id = idr_alloc(&process->mem_idr, NULL, 1, 0, GFP_NOWAIT); + spin_unlock(&process->mem_lock); + idr_preload_end(); + + if (id < 0) { + if (!kgsl_memdesc_use_cpu_map(memdesc)) + kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); + kgsl_process_private_put(process); + return id; + } + + entry->id = id; + entry->priv = process; + + return 0; +} + +/* + * Attach the memory object to a process by (possibly) getting a GPU address and + * (possibly) mapping it + */ +static int kgsl_mem_entry_attach_and_map(struct kgsl_device *device, + struct kgsl_process_private *process, + struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc *memdesc = &entry->memdesc; + int ret; + + ret = kgsl_mem_entry_attach_to_process(device, process, entry); + if (ret) + return ret; + + if (memdesc->gpuaddr) { + /* + * Map the memory if a GPU address is already assigned, either + * through kgsl_mem_entry_attach_to_process() or via some other + * SVM process + */ + ret = kgsl_mmu_map(memdesc->pagetable, memdesc); + + if (ret) { + kgsl_mem_entry_detach_process(entry); + return ret; + } + } + + kgsl_memfree_purge(memdesc->pagetable, memdesc->gpuaddr, + memdesc->size); + + return ret; +} + +/* Detach a memory entry from a process and unmap it from the MMU */ +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + + /* + * First remove the entry from mem_idr list + * so that no one can operate on obsolete values + */ + spin_lock(&entry->priv->mem_lock); + if (entry->id != 0) + idr_remove(&entry->priv->mem_idr, entry->id); + entry->id = 0; + + spin_unlock(&entry->priv->mem_lock); + + kgsl_sharedmem_put_gpuaddr(&entry->memdesc); + + if (entry->memdesc.priv & KGSL_MEMDESC_RECLAIMED) + atomic_sub(entry->memdesc.page_count, + &entry->priv->unpinned_page_count); + + kgsl_process_private_put(entry->priv); + + entry->priv = NULL; +} + +#ifdef CONFIG_QCOM_KGSL_CONTEXT_DEBUG +static void kgsl_context_debug_info(struct kgsl_device *device) +{ + struct kgsl_context *context; + struct kgsl_process_private *p; + int next; + /* + * Keep an interval between consecutive logging to avoid + * flooding the kernel log + */ + static DEFINE_RATELIMIT_STATE(_rs, 10 * HZ, 1); + + if (!__ratelimit(&_rs)) + return; + + dev_info(device->dev, "KGSL active contexts:\n"); + dev_info(device->dev, "pid process total attached detached\n"); + + read_lock(&kgsl_driver.proclist_lock); + read_lock(&device->context_lock); + + list_for_each_entry(p, &kgsl_driver.process_list, list) { + int total_contexts = 0, num_detached = 0; + + idr_for_each_entry(&device->context_idr, context, next) { + if (context->proc_priv == p) { + total_contexts++; + if (kgsl_context_detached(context)) + num_detached++; + } + } + + dev_info(device->dev, "%-8u %-15.15s %-8d %-10d %-10d\n", + pid_nr(p->pid), p->comm, total_contexts, + total_contexts - num_detached, num_detached); + } + + read_unlock(&device->context_lock); + read_unlock(&kgsl_driver.proclist_lock); +} +#else +static void kgsl_context_debug_info(struct kgsl_device *device) +{ +} +#endif + +/** + * kgsl_context_dump() - dump information about a draw context + * @device: KGSL device that owns the context + * @context: KGSL context to dump information about + * + * Dump specific information about the context to the kernel log. Used for + * fence timeout callbacks + */ +void kgsl_context_dump(struct kgsl_context *context) +{ + struct kgsl_device *device; + + if (_kgsl_context_get(context) == 0) + return; + + device = context->device; + + if (kgsl_context_detached(context)) { + dev_err(device->dev, " context[%u]: context detached\n", + context->id); + } else if (device->ftbl->drawctxt_dump != NULL) + device->ftbl->drawctxt_dump(device, context); + + kgsl_context_put(context); +} + +/* Allocate a new context ID */ +static int _kgsl_get_context_id(struct kgsl_device *device) +{ + int id; + + idr_preload(GFP_KERNEL); + write_lock(&device->context_lock); + /* Allocate the slot but don't put a pointer in it yet */ + id = idr_alloc(&device->context_idr, NULL, 1, + KGSL_MEMSTORE_MAX, GFP_NOWAIT); + write_unlock(&device->context_lock); + idr_preload_end(); + + return id; +} + +/** + * kgsl_context_init() - helper to initialize kgsl_context members + * @dev_priv: the owner of the context + * @context: the newly created context struct, should be allocated by + * the device specific drawctxt_create function. + * + * This is a helper function for the device specific drawctxt_create + * function to initialize the common members of its context struct. + * If this function succeeds, reference counting is active in the context + * struct and the caller should kgsl_context_put() it on error. + * If it fails, the caller should just free the context structure + * it passed in. + */ +int kgsl_context_init(struct kgsl_device_private *dev_priv, + struct kgsl_context *context) +{ + struct kgsl_device *device = dev_priv->device; + int ret = 0, id; + struct kgsl_process_private *proc_priv = dev_priv->process_priv; + + /* + * Read and increment the context count under lock to make sure + * no process goes beyond the specified context limit. + */ + spin_lock(&proc_priv->ctxt_count_lock); + if (atomic_read(&proc_priv->ctxt_count) > KGSL_MAX_CONTEXTS_PER_PROC) { + dev_err(device->dev, + "Per process context limit reached for pid %u\n", + pid_nr(dev_priv->process_priv->pid)); + spin_unlock(&proc_priv->ctxt_count_lock); + kgsl_context_debug_info(device); + return -ENOSPC; + } + + atomic_inc(&proc_priv->ctxt_count); + spin_unlock(&proc_priv->ctxt_count_lock); + + id = _kgsl_get_context_id(device); + if (id == -ENOSPC) { + /* + * Before declaring that there are no contexts left try + * flushing the event workqueue just in case there are + * detached contexts waiting to finish + */ + + flush_workqueue(device->events_wq); + id = _kgsl_get_context_id(device); + } + + if (id < 0) { + if (id == -ENOSPC) { + dev_warn(device->dev, + "cannot have more than %zu contexts due to memstore limitation\n", + KGSL_MEMSTORE_MAX); + kgsl_context_debug_info(device); + } + atomic_dec(&proc_priv->ctxt_count); + return id; + } + + context->id = id; + + kref_init(&context->refcount); + /* + * Get a refernce to the process private so its not destroyed, until + * the context is destroyed. This will also prevent the pagetable + * from being destroyed + */ + if (!kgsl_process_private_get(dev_priv->process_priv)) { + ret = -EBADF; + goto out; + } + context->device = dev_priv->device; + context->dev_priv = dev_priv; + context->proc_priv = dev_priv->process_priv; + context->tid = task_pid_nr(current); + + ret = kgsl_sync_timeline_create(context); + if (ret) { + kgsl_process_private_put(dev_priv->process_priv); + goto out; + } + + kgsl_add_event_group(device, &context->events, context, + kgsl_readtimestamp, context, "context-%d", id); + +out: + if (ret) { + atomic_dec(&proc_priv->ctxt_count); + write_lock(&device->context_lock); + idr_remove(&dev_priv->device->context_idr, id); + write_unlock(&device->context_lock); + } + + return ret; +} + +/** + * kgsl_context_detach() - Release the "master" context reference + * @context: The context that will be detached + * + * This is called when a context becomes unusable, because userspace + * has requested for it to be destroyed. The context itself may + * exist a bit longer until its reference count goes to zero. + * Other code referencing the context can detect that it has been + * detached by checking the KGSL_CONTEXT_PRIV_DETACHED bit in + * context->priv. + */ +void kgsl_context_detach(struct kgsl_context *context) +{ + struct kgsl_device *device; + + if (context == NULL) + return; + + /* + * Mark the context as detached to keep others from using + * the context before it gets fully removed, and to make sure + * we don't try to detach twice. + */ + if (test_and_set_bit(KGSL_CONTEXT_PRIV_DETACHED, &context->priv)) + return; + + device = context->device; + + trace_kgsl_context_detach(device, context); + + context->device->ftbl->drawctxt_detach(context); + + /* + * Cancel all pending events after the device-specific context is + * detached, to avoid possibly freeing memory while it is still + * in use by the GPU. + */ + kgsl_cancel_events(device, &context->events); + + /* Remove the event group from the list */ + kgsl_del_event_group(device, &context->events); + + kgsl_sync_timeline_detach(context->ktimeline); + kgsl_context_put(context); +} + +void +kgsl_context_destroy(struct kref *kref) +{ + struct kgsl_context *context = container_of(kref, struct kgsl_context, + refcount); + struct kgsl_device *device = context->device; + + trace_kgsl_context_destroy(device, context); + + /* + * It's not safe to destroy the context if it's not detached as GPU + * may still be executing commands + */ + BUG_ON(!kgsl_context_detached(context)); + + kgsl_sync_timeline_put(context->ktimeline); + + write_lock(&device->context_lock); + if (context->id != KGSL_CONTEXT_INVALID) { + + /* Clear the timestamps in the memstore during destroy */ + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), 0); + kgsl_sharedmem_writel(device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), 0); + + /* clear device power constraint */ + if (context->id == device->pwrctrl.constraint.owner_id) { + trace_kgsl_constraint(device, + device->pwrctrl.constraint.type, + device->pwrctrl.active_pwrlevel, + 0); + device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE; + } + + atomic_dec(&context->proc_priv->ctxt_count); + idr_remove(&device->context_idr, context->id); + context->id = KGSL_CONTEXT_INVALID; + } + write_unlock(&device->context_lock); + kgsl_process_private_put(context->proc_priv); + + device->ftbl->drawctxt_destroy(context); +} + +struct kgsl_device *kgsl_get_device(int dev_idx) +{ + int i; + struct kgsl_device *ret = NULL; + + mutex_lock(&kgsl_driver.devlock); + + for (i = 0; i < ARRAY_SIZE(kgsl_driver.devp); i++) { + if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->id == dev_idx) { + ret = kgsl_driver.devp[i]; + break; + } + } + + mutex_unlock(&kgsl_driver.devlock); + return ret; +} + +static struct kgsl_device *kgsl_get_minor(int minor) +{ + struct kgsl_device *ret = NULL; + + if (minor < 0 || minor >= ARRAY_SIZE(kgsl_driver.devp)) + return NULL; + + mutex_lock(&kgsl_driver.devlock); + ret = kgsl_driver.devp[minor]; + mutex_unlock(&kgsl_driver.devlock); + + return ret; +} + +/** + * kgsl_check_timestamp() - return true if the specified timestamp is retired + * @device: Pointer to the KGSL device to check + * @context: Pointer to the context for the timestamp + * @timestamp: The timestamp to compare + */ +bool kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + unsigned int ts_processed; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &ts_processed); + + return (timestamp_cmp(ts_processed, timestamp) >= 0); +} + +/** + * kgsl_destroy_process_private() - Cleanup function to free process private + * @kref: - Pointer to object being destroyed's kref struct + * Free struct object and all other resources attached to it. + * Since the function can be used when not all resources inside process + * private have been allocated, there is a check to (before each resource + * cleanup) see if the struct member being cleaned is in fact allocated or not. + * If the value is not NULL, resource is freed. + */ +static void kgsl_destroy_process_private(struct kref *kref) +{ + struct kgsl_process_private *private = container_of(kref, + struct kgsl_process_private, refcount); + + mutex_lock(&kgsl_driver.process_mutex); + debugfs_remove_recursive(private->debug_root); + kobject_put(&private->kobj); + + /* When using global pagetables, do not detach global pagetable */ + if (private->pagetable->name != KGSL_MMU_GLOBAL_PT) + kgsl_mmu_detach_pagetable(private->pagetable); + + /* Remove the process struct from the master list */ + write_lock(&kgsl_driver.proclist_lock); + list_del(&private->list); + write_unlock(&kgsl_driver.proclist_lock); + mutex_unlock(&kgsl_driver.process_mutex); + + put_pid(private->pid); + idr_destroy(&private->mem_idr); + idr_destroy(&private->syncsource_idr); + + /* When using global pagetables, do not put global pagetable */ + if (private->pagetable->name != KGSL_MMU_GLOBAL_PT) + kgsl_mmu_putpagetable(private->pagetable); + + kfree(private); +} + +void +kgsl_process_private_put(struct kgsl_process_private *private) +{ + if (private) + kref_put(&private->refcount, kgsl_destroy_process_private); +} + +/** + * kgsl_process_private_find() - Find the process associated with the specified + * name + * @name: pid_t of the process to search for + * Return the process struct for the given ID. + */ +struct kgsl_process_private *kgsl_process_private_find(pid_t pid) +{ + struct kgsl_process_private *p, *private = NULL; + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(p, &kgsl_driver.process_list, list) { + if (pid_nr(p->pid) == pid) { + if (kgsl_process_private_get(p)) + private = p; + break; + } + } + read_unlock(&kgsl_driver.proclist_lock); + + return private; +} + +static struct kgsl_process_private *kgsl_process_private_new( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + struct pid *cur_pid = get_task_pid(current->group_leader, PIDTYPE_PID); + + /* Search in the process list */ + list_for_each_entry(private, &kgsl_driver.process_list, list) { + if (private->pid == cur_pid) { + if (!kgsl_process_private_get(private)) + /* + * This will happen only if refcount is zero + * i.e. destroy is triggered but didn't complete + * yet. Return -EEXIST to indicate caller that + * destroy is pending to allow caller to take + * appropriate action. + */ + private = ERR_PTR(-EEXIST); + /* + * We need to hold only one reference to the PID for + * each process struct to avoid overflowing the + * reference counter which can lead to use-after-free. + */ + put_pid(cur_pid); + return private; + } + } + + /* Create a new object */ + private = kzalloc(sizeof(struct kgsl_process_private), GFP_KERNEL); + if (private == NULL) { + put_pid(cur_pid); + return ERR_PTR(-ENOMEM); + } + + kref_init(&private->refcount); + + private->pid = cur_pid; + get_task_comm(private->comm, current->group_leader); + + spin_lock_init(&private->mem_lock); + spin_lock_init(&private->syncsource_lock); + spin_lock_init(&private->ctxt_count_lock); + + idr_init(&private->mem_idr); + idr_init(&private->syncsource_idr); + + kgsl_reclaim_proc_private_init(private); + + /* Allocate a pagetable for the new process object */ + private->pagetable = kgsl_mmu_getpagetable(&device->mmu, pid_nr(cur_pid)); + if (IS_ERR(private->pagetable)) { + int err = PTR_ERR(private->pagetable); + + idr_destroy(&private->mem_idr); + idr_destroy(&private->syncsource_idr); + put_pid(private->pid); + + kfree(private); + private = ERR_PTR(err); + return private; + } + + kgsl_process_init_sysfs(device, private); + kgsl_process_init_debugfs(private); + write_lock(&kgsl_driver.proclist_lock); + list_add(&private->list, &kgsl_driver.process_list); + write_unlock(&kgsl_driver.proclist_lock); + + return private; +} + +static void process_release_memory(struct kgsl_process_private *private) +{ + struct kgsl_mem_entry *entry; + int next = 0; + + while (1) { + spin_lock(&private->mem_lock); + entry = idr_get_next(&private->mem_idr, &next); + if (entry == NULL) { + spin_unlock(&private->mem_lock); + break; + } + /* + * If the free pending flag is not set it means that user space + * did not free it's reference to this entry, in that case + * free a reference to this entry, other references are from + * within kgsl so they will be freed eventually by kgsl + */ + if (!entry->pending_free) { + entry->pending_free = 1; + spin_unlock(&private->mem_lock); + kgsl_mem_entry_put(entry); + } else { + spin_unlock(&private->mem_lock); + } + next = next + 1; + } +} + +static void kgsl_process_private_close(struct kgsl_device_private *dev_priv, + struct kgsl_process_private *private) +{ + mutex_lock(&kgsl_driver.process_mutex); + + if (--private->fd_count > 0) { + mutex_unlock(&kgsl_driver.process_mutex); + kgsl_process_private_put(private); + return; + } + + /* + * If this is the last file on the process garbage collect + * any outstanding resources + */ + process_release_memory(private); + + /* Release all syncsource objects from process private */ + kgsl_syncsource_process_release_syncsources(private); + + mutex_unlock(&kgsl_driver.process_mutex); + + kgsl_process_private_put(private); +} + +static struct kgsl_process_private *_process_private_open( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + + mutex_lock(&kgsl_driver.process_mutex); + private = kgsl_process_private_new(device); + + if (IS_ERR(private)) + goto done; + + private->fd_count++; + +done: + mutex_unlock(&kgsl_driver.process_mutex); + return private; +} + +static struct kgsl_process_private *kgsl_process_private_open( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + int i; + + private = _process_private_open(device); + + /* + * If we get error and error is -EEXIST that means previous process + * private destroy is triggered but didn't complete. Retry creating + * process private after sometime to allow previous destroy to complete. + */ + for (i = 0; (PTR_ERR_OR_ZERO(private) == -EEXIST) && (i < 5); i++) { + usleep_range(10, 100); + private = _process_private_open(device); + } + + return private; +} + +int kgsl_gpu_frame_count(pid_t pid, u64 *frame_count) +{ + struct kgsl_process_private *p; + + if (!frame_count) + return -EINVAL; + + p = kgsl_process_private_find(pid); + if (!p) + return -ENOENT; + + *frame_count = atomic64_read(&p->frame_count); + kgsl_process_private_put(p); + + return 0; +} +EXPORT_SYMBOL(kgsl_gpu_frame_count); + +static int kgsl_close_device(struct kgsl_device *device) +{ + int result = 0; + + mutex_lock(&device->mutex); + if (device->open_count == 1) + result = device->ftbl->last_close(device); + + /* + * We must decrement the open_count after last_close() has finished. + * This is because last_close() relinquishes device mutex while + * waiting for active count to become 0. This opens up a window + * where a new process can come in, see that open_count is 0, and + * initiate a first_open(). This can potentially mess up the power + * state machine. To avoid a first_open() from happening before + * last_close() has finished, decrement the open_count after + * last_close(). + */ + device->open_count--; + mutex_unlock(&device->mutex); + return result; + +} + +static void device_release_contexts(struct kgsl_device_private *dev_priv) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + int next = 0; + int result = 0; + + while (1) { + read_lock(&device->context_lock); + context = idr_get_next(&device->context_idr, &next); + + if (context == NULL) { + read_unlock(&device->context_lock); + break; + } else if (context->dev_priv == dev_priv) { + /* + * Hold a reference to the context in case somebody + * tries to put it while we are detaching + */ + result = _kgsl_context_get(context); + } + read_unlock(&device->context_lock); + + if (result) { + kgsl_context_detach(context); + kgsl_context_put(context); + result = 0; + } + + next = next + 1; + } +} + +static int kgsl_release(struct inode *inodep, struct file *filep) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + int result; + + filep->private_data = NULL; + + /* Release the contexts for the file */ + device_release_contexts(dev_priv); + + /* Close down the process wide resources for the file */ + kgsl_process_private_close(dev_priv, dev_priv->process_priv); + + /* Destroy the device-specific structure */ + device->ftbl->device_private_destroy(dev_priv); + + result = kgsl_close_device(device); + pm_runtime_put(&device->pdev->dev); + + return result; +} + +static int kgsl_open_device(struct kgsl_device *device) +{ + int result = 0; + + mutex_lock(&device->mutex); + if (device->open_count == 0) { + result = device->ftbl->first_open(device); + if (result) + goto out; + } + device->open_count++; +out: + mutex_unlock(&device->mutex); + return result; +} + +static int kgsl_open(struct inode *inodep, struct file *filep) +{ + int result; + struct kgsl_device_private *dev_priv; + struct kgsl_device *device; + unsigned int minor = iminor(inodep); + + device = kgsl_get_minor(minor); + if (device == NULL) { + pr_err("kgsl: No device found\n"); + return -ENODEV; + } + + result = pm_runtime_get_sync(&device->pdev->dev); + if (result < 0) { + dev_err(device->dev, + "Runtime PM: Unable to wake up the device, rc = %d\n", + result); + return result; + } + result = 0; + + dev_priv = device->ftbl->device_private_create(); + if (dev_priv == NULL) { + result = -ENOMEM; + goto err; + } + + dev_priv->device = device; + filep->private_data = dev_priv; + + result = kgsl_open_device(device); + if (result) + goto err; + + /* + * Get file (per process) private struct. This must be done + * after the first start so that the global pagetable mappings + * are set up before we create the per-process pagetable. + */ + dev_priv->process_priv = kgsl_process_private_open(device); + if (IS_ERR(dev_priv->process_priv)) { + result = PTR_ERR(dev_priv->process_priv); + kgsl_close_device(device); + goto err; + } + +err: + if (result) { + filep->private_data = NULL; + kfree(dev_priv); + pm_runtime_put(&device->pdev->dev); + } + return result; +} + +#define GPUADDR_IN_MEMDESC(_val, _memdesc) \ + (((_val) >= (_memdesc)->gpuaddr) && \ + ((_val) < ((_memdesc)->gpuaddr + (_memdesc)->size))) + +/** + * kgsl_sharedmem_find() - Find a gpu memory allocation + * + * @private: private data for the process to check. + * @gpuaddr: start address of the region + * + * Find a gpu allocation. Caller must kgsl_mem_entry_put() + * the returned entry when finished using it. + */ +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr) +{ + int id; + struct kgsl_mem_entry *entry, *ret = NULL; + + if (!private) + return NULL; + + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr) && + !kgsl_mmu_gpuaddr_in_range( + private->pagetable->mmu->securepagetable, gpuaddr)) + return NULL; + + spin_lock(&private->mem_lock); + idr_for_each_entry(&private->mem_idr, entry, id) { + if (GPUADDR_IN_MEMDESC(gpuaddr, &entry->memdesc)) { + if (!entry->pending_free) + ret = kgsl_mem_entry_get(entry); + break; + } + } + spin_unlock(&private->mem_lock); + + return ret; +} + +static struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id_flags(struct kgsl_process_private *process, + unsigned int id, uint64_t flags) +{ + struct kgsl_mem_entry *entry, *ret = NULL; + + spin_lock(&process->mem_lock); + entry = idr_find(&process->mem_idr, id); + if (entry) + if (!entry->pending_free && + (flags & entry->memdesc.flags) == flags) + ret = kgsl_mem_entry_get(entry); + spin_unlock(&process->mem_lock); + + return ret; +} + +/** + * kgsl_sharedmem_find_id() - find a memory entry by id + * @process: the owning process + * @id: id to find + * + * @returns - the mem_entry or NULL + * + * Caller must kgsl_mem_entry_put() the returned entry, when finished using + * it. + */ +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id) +{ + return kgsl_sharedmem_find_id_flags(process, id, 0); +} + +/** + * kgsl_mem_entry_unset_pend() - Unset the pending free flag of an entry + * @entry - The memory entry + */ +static inline void kgsl_mem_entry_unset_pend(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + spin_lock(&entry->priv->mem_lock); + entry->pending_free = 0; + spin_unlock(&entry->priv->mem_lock); +} + +/** + * kgsl_mem_entry_set_pend() - Set the pending free flag of a memory entry + * @entry - The memory entry + * + * @returns - true if pending flag was 0 else false + * + * This function will set the pending free flag if it is previously unset. Used + * to prevent race condition between ioctls calling free/freememontimestamp + * on the same entry. Whichever thread set's the flag first will do the free. + */ +static inline bool kgsl_mem_entry_set_pend(struct kgsl_mem_entry *entry) +{ + bool ret = false; + + if (entry == NULL) + return false; + + spin_lock(&entry->priv->mem_lock); + if (!entry->pending_free) { + entry->pending_free = 1; + ret = true; + } + spin_unlock(&entry->priv->mem_lock); + return ret; +} + +static int kgsl_get_ctxt_fault_stats(struct kgsl_context *context, + struct kgsl_context_property *ctxt_property) +{ + struct kgsl_context_property_fault fault_stats; + size_t copy; + + /* Return the size of the subtype struct */ + if (ctxt_property->size == 0) { + ctxt_property->size = sizeof(fault_stats); + return 0; + } + + memset(&fault_stats, 0, sizeof(fault_stats)); + + copy = min_t(size_t, ctxt_property->size, sizeof(fault_stats)); + + fault_stats.faults = context->total_fault_count; + fault_stats.timestamp = context->last_faulted_cmd_ts; + + /* + * Copy the context fault stats to data which also serves as + * the out parameter. + */ + if (copy_to_user(u64_to_user_ptr(ctxt_property->data), + &fault_stats, copy)) + return -EFAULT; + + return 0; +} + +static long kgsl_get_ctxt_properties(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + /* Return fault stats of given context */ + struct kgsl_context_property ctxt_property; + struct kgsl_context *context; + size_t copy; + long ret; + + /* + * If sizebytes is zero, tell the user how big the + * ctxt_property struct should be. + */ + if (param->sizebytes == 0) { + param->sizebytes = sizeof(ctxt_property); + return 0; + } + + memset(&ctxt_property, 0, sizeof(ctxt_property)); + + copy = min_t(size_t, param->sizebytes, sizeof(ctxt_property)); + + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&ctxt_property, param->value, copy)) + return -EFAULT; + + /* ctxt type zero is not valid, as we consider it as uninitialized. */ + if (ctxt_property.type == 0) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, + ctxt_property.contextid); + if (!context) + return -EINVAL; + + if (ctxt_property.type == KGSL_CONTEXT_PROP_FAULTS) + ret = kgsl_get_ctxt_fault_stats(context, &ctxt_property); + else + ret = -EOPNOTSUPP; + + kgsl_context_put(context); + + return ret; +} + +static long kgsl_prop_version(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + struct kgsl_version version = { + .drv_major = KGSL_VERSION_MAJOR, + .drv_minor = KGSL_VERSION_MINOR, + .dev_major = 3, + .dev_minor = 1, + }; + + if (param->sizebytes != sizeof(version)) + return -EINVAL; + + if (copy_to_user(param->value, &version, sizeof(version))) + return -EFAULT; + + return 0; +} + +/* Return reset status of given context and clear it */ +static long kgsl_prop_gpu_reset_stat(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + u32 id; + struct kgsl_context *context; + + if (param->sizebytes != sizeof(id)) + return -EINVAL; + + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&id, param->value, sizeof(id))) + return -EFAULT; + + context = kgsl_context_get_owner(dev_priv, id); + if (!context) + return -EINVAL; + + /* + * Copy the reset status to value which also serves as + * the out parameter + */ + id = context->reset_status; + + context->reset_status = KGSL_CTX_STAT_NO_ERROR; + kgsl_context_put(context); + + if (copy_to_user(param->value, &id, sizeof(id))) + return -EFAULT; + + return 0; +} + +static long kgsl_prop_secure_buf_alignment(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + u32 align = PAGE_SIZE; + + if (param->sizebytes != sizeof(align)) + return -EINVAL; + + if (copy_to_user(param->value, &align, sizeof(align))) + return -EFAULT; + + return 0; +} + +static long kgsl_prop_secure_ctxt_support(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + u32 secure; + + if (param->sizebytes != sizeof(secure)) + return -EINVAL; + + secure = dev_priv->device->mmu.secured ? 1 : 0; + + if (copy_to_user(param->value, &secure, sizeof(secure))) + return -EFAULT; + + return 0; +} + +static int kgsl_query_caps_properties(struct kgsl_device *device, + struct kgsl_capabilities *caps) +{ + struct kgsl_capabilities_properties props; + size_t copy; + u32 count, *local; + int ret; + + /* Return the size of the subtype struct */ + if (caps->size == 0) { + caps->size = sizeof(props); + return 0; + } + + memset(&props, 0, sizeof(props)); + + copy = min_t(size_t, caps->size, sizeof(props)); + + if (copy_from_user(&props, u64_to_user_ptr(caps->data), copy)) + return -EFAULT; + + /* Get the number of properties */ + count = kgsl_query_property_list(device, NULL, 0); + + /* + * If the incoming user count is zero, they are querying the number of + * available properties. Set it and return. + */ + if (props.count == 0) { + props.count = count; + goto done; + } + + /* Copy the lesser of the user or kernel property count */ + if (props.count < count) + count = props.count; + + /* Create a local buffer to store the property list */ + local = kcalloc(count, sizeof(u32), GFP_KERNEL); + if (!local) + return -ENOMEM; + + /* Get the properties */ + props.count = kgsl_query_property_list(device, local, count); + + ret = copy_to_user(u64_to_user_ptr(props.list), local, + props.count * sizeof(u32)); + + kfree(local); + + if (ret) + return -EFAULT; + +done: + if (copy_to_user(u64_to_user_ptr(caps->data), &props, copy)) + return -EFAULT; + + return 0; +} + +static long kgsl_prop_query_capabilities(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param) +{ + struct kgsl_capabilities caps; + long ret; + size_t copy; + + /* + * If sizebytes is zero, tell the user how big the capabilities struct + * should be + */ + if (param->sizebytes == 0) { + param->sizebytes = sizeof(caps); + return 0; + } + + memset(&caps, 0, sizeof(caps)); + + copy = min_t(size_t, param->sizebytes, sizeof(caps)); + + if (copy_from_user(&caps, param->value, copy)) + return -EFAULT; + + /* querytype must be non zero */ + if (caps.querytype == 0) + return -EINVAL; + + if (caps.querytype == KGSL_QUERY_CAPS_PROPERTIES) + ret = kgsl_query_caps_properties(dev_priv->device, &caps); + else { + /* Unsupported querytypes should return a unique return value */ + return -EOPNOTSUPP; + } + + if (copy_to_user(param->value, &caps, copy)) + return -EFAULT; + + return ret; +} + +static const struct { + int type; + long (*func)(struct kgsl_device_private *dev_priv, + struct kgsl_device_getproperty *param); +} kgsl_property_funcs[] = { + { KGSL_PROP_VERSION, kgsl_prop_version }, + { KGSL_PROP_GPU_RESET_STAT, kgsl_prop_gpu_reset_stat}, + { KGSL_PROP_SECURE_BUFFER_ALIGNMENT, kgsl_prop_secure_buf_alignment }, + { KGSL_PROP_SECURE_CTXT_SUPPORT, kgsl_prop_secure_ctxt_support }, + { KGSL_PROP_QUERY_CAPABILITIES, kgsl_prop_query_capabilities }, + { KGSL_PROP_CONTEXT_PROPERTY, kgsl_get_ctxt_properties }, +}; + +/*call all ioctl sub functions with driver locked*/ +long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_device_getproperty *param = data; + int i; + + for (i = 0; i < ARRAY_SIZE(kgsl_property_funcs); i++) { + if (param->type == kgsl_property_funcs[i].type) + return kgsl_property_funcs[i].func(dev_priv, param); + } + + if (is_compat_task()) + return device->ftbl->getproperty_compat(device, param); + + return device->ftbl->getproperty(device, param); +} + +int kgsl_query_property_list(struct kgsl_device *device, u32 *list, u32 count) +{ + int num = 0; + + if (!list) { + num = ARRAY_SIZE(kgsl_property_funcs); + + if (device->ftbl->query_property_list) + num += device->ftbl->query_property_list(device, list, + count); + + return num; + } + + for (; num < count && num < ARRAY_SIZE(kgsl_property_funcs); num++) + list[num] = kgsl_property_funcs[num].type; + + if (device->ftbl->query_property_list) + num += device->ftbl->query_property_list(device, &list[num], + count - num); + + return num; +} + +long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + /* The getproperty struct is reused for setproperty too */ + struct kgsl_device_getproperty *param = data; + + /* Reroute to compat version if coming from compat_ioctl */ + if (is_compat_task()) + result = dev_priv->device->ftbl->setproperty_compat( + dev_priv, param->type, param->value, + param->sizebytes); + else if (dev_priv->device->ftbl->setproperty) + result = dev_priv->device->ftbl->setproperty( + dev_priv, param->type, param->value, + param->sizebytes); + + return result; +} + +long kgsl_ioctl_device_waittimestamp_ctxtid( + struct kgsl_device_private *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp_ctxtid *param = data; + struct kgsl_device *device = dev_priv->device; + long result = -EINVAL; + unsigned int temp_cur_ts = 0; + struct kgsl_context *context; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return result; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &temp_cur_ts); + + trace_kgsl_waittimestamp_entry(device, context->id, temp_cur_ts, + param->timestamp, param->timeout); + + result = device->ftbl->waittimestamp(device, context, param->timestamp, + param->timeout); + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &temp_cur_ts); + trace_kgsl_waittimestamp_exit(device, temp_cur_ts, result); + + kgsl_context_put(context); + + return result; +} + +long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_ringbuffer_issueibcmds *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_cmd *cmdobj; + long result = -EINVAL; + + /* The legacy functions don't support synchronization commands */ + if ((param->flags & (KGSL_DRAWOBJ_SYNC | KGSL_DRAWOBJ_MARKER))) + return -EINVAL; + + /* Sanity check the number of IBs */ + if (param->flags & KGSL_DRAWOBJ_SUBMIT_IB_LIST && + (param->numibs == 0 || param->numibs > KGSL_MAX_NUMIBS)) + return -EINVAL; + + /* Get the context */ + context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); + if (context == NULL) + return -EINVAL; + + cmdobj = kgsl_drawobj_cmd_create(device, context, param->flags, + CMDOBJ_TYPE); + if (IS_ERR(cmdobj)) { + kgsl_context_put(context); + return PTR_ERR(cmdobj); + } + + drawobj = DRAWOBJ(cmdobj); + + if (param->flags & KGSL_DRAWOBJ_SUBMIT_IB_LIST) + result = kgsl_drawobj_cmd_add_ibdesc_list(device, cmdobj, + (void __user *) param->ibdesc_addr, + param->numibs); + else { + struct kgsl_ibdesc ibdesc; + /* Ultra legacy path */ + + ibdesc.gpuaddr = param->ibdesc_addr; + ibdesc.sizedwords = param->numibs; + ibdesc.ctrl = 0; + + result = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc); + } + + if (result == 0) + result = kgsl_reclaim_to_pinned_state(dev_priv->process_priv); + + if (result == 0) + result = dev_priv->device->ftbl->queue_cmds(dev_priv, context, + &drawobj, 1, ¶m->timestamp); + + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + kgsl_drawobj_destroy(drawobj); + + kgsl_context_put(context); + return result; +} + +/* Returns 0 on failure. Returns command type(s) on success */ +static unsigned int _process_command_input(struct kgsl_device *device, + unsigned int flags, unsigned int numcmds, + unsigned int numobjs, unsigned int numsyncs) +{ + if (numcmds > KGSL_MAX_NUMIBS || + numobjs > KGSL_MAX_NUMIBS || + numsyncs > KGSL_MAX_SYNCPOINTS) + return 0; + + /* + * The SYNC bit is supposed to identify a dummy sync object + * so warn the user if they specified any IBs with it. + * A MARKER command can either have IBs or not but if the + * command has 0 IBs it is automatically assumed to be a marker. + */ + + /* If they specify the flag, go with what they say */ + if (flags & KGSL_DRAWOBJ_MARKER) + return MARKEROBJ_TYPE; + else if (flags & KGSL_DRAWOBJ_SYNC) + return SYNCOBJ_TYPE; + + /* If not, deduce what they meant */ + if (numsyncs && numcmds) + return SYNCOBJ_TYPE | CMDOBJ_TYPE; + else if (numsyncs) + return SYNCOBJ_TYPE; + else if (numcmds) + return CMDOBJ_TYPE; + else if (numcmds == 0) + return MARKEROBJ_TYPE; + + return 0; +} + +long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_submit_commands *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj *drawobj[2]; + unsigned int type; + long result; + unsigned int i = 0; + + type = _process_command_input(device, param->flags, param->numcmds, 0, + param->numsyncs); + if (!type) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + if (type & SYNCOBJ_TYPE) { + struct kgsl_drawobj_sync *syncobj = + kgsl_drawobj_sync_create(device, context); + if (IS_ERR(syncobj)) { + result = PTR_ERR(syncobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(syncobj); + + result = kgsl_drawobj_sync_add_syncpoints(device, syncobj, + param->synclist, param->numsyncs); + if (result) + goto done; + } + + if (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE)) { + struct kgsl_drawobj_cmd *cmdobj = + kgsl_drawobj_cmd_create(device, + context, param->flags, type); + if (IS_ERR(cmdobj)) { + result = PTR_ERR(cmdobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(cmdobj); + + result = kgsl_drawobj_cmd_add_ibdesc_list(device, cmdobj, + param->cmdlist, param->numcmds); + if (result) + goto done; + + /* If no profiling buffer was specified, clear the flag */ + if (cmdobj->profiling_buf_entry == NULL) + DRAWOBJ(cmdobj)->flags &= + ~(unsigned long)KGSL_DRAWOBJ_PROFILING; + + if (type & CMDOBJ_TYPE) { + result = kgsl_reclaim_to_pinned_state( + dev_priv->process_priv); + if (result) + goto done; + } + } + + result = device->ftbl->queue_cmds(dev_priv, context, drawobj, + i, ¶m->timestamp); + +done: + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + while (i--) + kgsl_drawobj_destroy(drawobj[i]); + + + kgsl_context_put(context); + return result; +} + +long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpu_command *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj *drawobj[2]; + unsigned int type; + long result; + unsigned int i = 0; + + type = _process_command_input(device, param->flags, param->numcmds, + param->numobjs, param->numsyncs); + if (!type) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + if (type & SYNCOBJ_TYPE) { + struct kgsl_drawobj_sync *syncobj = + kgsl_drawobj_sync_create(device, context); + + if (IS_ERR(syncobj)) { + result = PTR_ERR(syncobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(syncobj); + + result = kgsl_drawobj_sync_add_synclist(device, syncobj, + u64_to_user_ptr(param->synclist), + param->syncsize, param->numsyncs); + if (result) + goto done; + } + + if (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE)) { + struct kgsl_drawobj_cmd *cmdobj = + kgsl_drawobj_cmd_create(device, + context, param->flags, type); + + if (IS_ERR(cmdobj)) { + result = PTR_ERR(cmdobj); + goto done; + } + + drawobj[i++] = DRAWOBJ(cmdobj); + + result = kgsl_drawobj_cmd_add_cmdlist(device, cmdobj, + u64_to_user_ptr(param->cmdlist), + param->cmdsize, param->numcmds); + if (result) + goto done; + + result = kgsl_drawobj_cmd_add_memlist(device, cmdobj, + u64_to_user_ptr(param->objlist), + param->objsize, param->numobjs); + if (result) + goto done; + + /* If no profiling buffer was specified, clear the flag */ + if (cmdobj->profiling_buf_entry == NULL) + DRAWOBJ(cmdobj)->flags &= + ~(unsigned long)KGSL_DRAWOBJ_PROFILING; + + if (type & CMDOBJ_TYPE) { + result = kgsl_reclaim_to_pinned_state( + dev_priv->process_priv); + if (result) + goto done; + } + } + + result = device->ftbl->queue_cmds(dev_priv, context, drawobj, + i, ¶m->timestamp); + +done: + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + while (i--) + kgsl_drawobj_destroy(drawobj[i]); + + kgsl_context_put(context); + return result; +} + +long kgsl_ioctl_gpu_aux_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpu_aux_command *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_drawobj **drawobjs; + struct kgsl_drawobj_sync *tsobj; + void __user *cmdlist; + u32 queued, count; + int i, index = 0; + long ret; + struct kgsl_gpu_aux_command_generic generic; + + /* We support only one aux command */ + if (param->numcmds != 1) + return -EINVAL; + + if (!(param->flags & + (KGSL_GPU_AUX_COMMAND_BIND | KGSL_GPU_AUX_COMMAND_TIMELINE))) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (!context) + return -EINVAL; + + /* + * param->numcmds is always one and we have one additional drawobj + * for the timestamp sync if KGSL_GPU_AUX_COMMAND_SYNC flag is passed. + * On top of that we make an implicit sync object for the last queued + * timestamp on this context. + */ + count = (param->flags & KGSL_GPU_AUX_COMMAND_SYNC) ? 3 : 2; + + drawobjs = kvcalloc(count, sizeof(*drawobjs), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + + if (!drawobjs) { + kgsl_context_put(context); + return -ENOMEM; + } + + trace_kgsl_aux_command(context->id, param->numcmds, param->flags, + param->timestamp); + + if (param->flags & KGSL_GPU_AUX_COMMAND_SYNC) { + struct kgsl_drawobj_sync *syncobj = + kgsl_drawobj_sync_create(device, context); + + if (IS_ERR(syncobj)) { + ret = PTR_ERR(syncobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(syncobj); + + ret = kgsl_drawobj_sync_add_synclist(device, syncobj, + u64_to_user_ptr(param->synclist), + param->syncsize, param->numsyncs); + if (ret) + goto err; + } + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, &queued); + + /* + * Make an implicit sync object for the last queued timestamp on this + * context + */ + tsobj = kgsl_drawobj_create_timestamp_syncobj(device, + context, queued); + + if (IS_ERR(tsobj)) { + ret = PTR_ERR(tsobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(tsobj); + + cmdlist = u64_to_user_ptr(param->cmdlist); + + /* + * Create a draw object for KGSL_GPU_AUX_COMMAND_BIND or + * KGSL_GPU_AUX_COMMAND_TIMELINE. + */ + if (copy_struct_from_user(&generic, sizeof(generic), + cmdlist, param->cmdsize)) { + ret = -EFAULT; + goto err; + } + + if (generic.type == KGSL_GPU_AUX_COMMAND_BIND) { + struct kgsl_drawobj_bind *bindobj; + + bindobj = kgsl_drawobj_bind_create(device, context); + + if (IS_ERR(bindobj)) { + ret = PTR_ERR(bindobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(bindobj); + + ret = kgsl_drawobj_add_bind(dev_priv, bindobj, + cmdlist, param->cmdsize); + if (ret) + goto err; + } else if (generic.type == KGSL_GPU_AUX_COMMAND_TIMELINE) { + struct kgsl_drawobj_timeline *timelineobj; + + timelineobj = kgsl_drawobj_timeline_create(device, + context); + + if (IS_ERR(timelineobj)) { + ret = PTR_ERR(timelineobj); + goto err; + } + + drawobjs[index++] = DRAWOBJ(timelineobj); + + ret = kgsl_drawobj_add_timeline(dev_priv, timelineobj, + cmdlist, param->cmdsize); + if (ret) + goto err; + + } else { + ret = -EINVAL; + goto err; + } + + ret = device->ftbl->queue_cmds(dev_priv, context, + drawobjs, index, ¶m->timestamp); + +err: + kgsl_context_put(context); + + if (ret && ret != -EPROTO) { + for (i = 0; i < count; i++) + kgsl_drawobj_destroy(drawobjs[i]); + } + + kvfree(drawobjs); + return ret; +} + +long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_readtimestamp_ctxtid *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + long result = -EINVAL; + + mutex_lock(&device->mutex); + context = kgsl_context_get_owner(dev_priv, param->context_id); + + if (context) { + result = kgsl_readtimestamp(device, context, + param->type, ¶m->timestamp); + + trace_kgsl_readtimestamp(device, context->id, + param->type, param->timestamp); + } + + kgsl_context_put(context); + mutex_unlock(&device->mutex); + return result; +} + +long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_drawctxt_create *param = data; + struct kgsl_context *context = NULL; + struct kgsl_device *device = dev_priv->device; + + context = device->ftbl->drawctxt_create(dev_priv, ¶m->flags); + if (IS_ERR(context)) { + result = PTR_ERR(context); + goto done; + } + trace_kgsl_context_create(dev_priv->device, context, param->flags); + + /* Commit the pointer to the context in context_idr */ + write_lock(&device->context_lock); + idr_replace(&device->context_idr, context, context->id); + param->drawctxt_id = context->id; + write_unlock(&device->context_lock); + +done: + return result; +} + +long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_drawctxt_destroy *param = data; + struct kgsl_context *context; + + context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); + if (context == NULL) + return -EINVAL; + + kgsl_context_detach(context); + kgsl_context_put(context); + + return 0; +} + +long gpumem_free_entry(struct kgsl_mem_entry *entry) +{ + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + trace_kgsl_mem_free(entry); + kgsl_memfree_add(pid_nr(entry->priv->pid), + entry->memdesc.pagetable ? + entry->memdesc.pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size, + entry->memdesc.flags); + + kgsl_mem_entry_put(entry); + + return 0; +} + +static void gpumem_free_func(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int ret) +{ + struct kgsl_context *context = group->context; + struct kgsl_mem_entry *entry = priv; + unsigned int timestamp; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, ×tamp); + + /* Free the memory for all event types */ + trace_kgsl_mem_timestamp_free(device, entry, KGSL_CONTEXT_ID(context), + timestamp, 0); + kgsl_memfree_add(pid_nr(entry->priv->pid), + entry->memdesc.pagetable ? + entry->memdesc.pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size, + entry->memdesc.flags); + + kgsl_mem_entry_put(entry); +} + +static long gpumem_free_entry_on_timestamp(struct kgsl_device *device, + struct kgsl_mem_entry *entry, + struct kgsl_context *context, unsigned int timestamp) +{ + int ret; + unsigned int temp; + + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &temp); + trace_kgsl_mem_timestamp_queue(device, entry, context->id, temp, + timestamp); + ret = kgsl_add_event(device, &context->events, + timestamp, gpumem_free_func, entry); + + if (ret) + kgsl_mem_entry_unset_pend(entry); + + return ret; +} + +long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) + return -EINVAL; + + ret = gpumem_free_entry(entry); + kgsl_mem_entry_put(entry); + + return ret; +} + +long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_free_id *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + ret = gpumem_free_entry(entry); + kgsl_mem_entry_put(entry); + + return ret; +} + +static long gpuobj_free_on_timestamp(struct kgsl_device_private *dev_priv, + struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param) +{ + struct kgsl_gpu_event_timestamp event; + struct kgsl_context *context; + long ret; + + if (copy_struct_from_user(&event, sizeof(event), + u64_to_user_ptr(param->priv), param->len)) + return -EFAULT; + + if (event.context_id == 0) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, event.context_id); + if (context == NULL) + return -EINVAL; + + ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, context, + event.timestamp); + + kgsl_context_put(context); + return ret; +} + +static bool gpuobj_free_fence_func(void *priv) +{ + struct kgsl_mem_entry *entry = priv; + + trace_kgsl_mem_free(entry); + kgsl_memfree_add(pid_nr(entry->priv->pid), + entry->memdesc.pagetable ? + entry->memdesc.pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size, + entry->memdesc.flags); + + INIT_WORK(&entry->work, _deferred_put); + queue_work(kgsl_driver.mem_workqueue, &entry->work); + return true; +} + +static long gpuobj_free_on_fence(struct kgsl_device_private *dev_priv, + struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param) +{ + struct kgsl_sync_fence_cb *handle; + struct kgsl_gpu_event_fence event; + + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + if (copy_struct_from_user(&event, sizeof(event), + u64_to_user_ptr(param->priv), param->len)) { + kgsl_mem_entry_unset_pend(entry); + return -EFAULT; + } + + if (event.fd < 0) { + kgsl_mem_entry_unset_pend(entry); + return -EINVAL; + } + + handle = kgsl_sync_fence_async_wait(event.fd, + gpuobj_free_fence_func, entry, NULL); + + if (IS_ERR(handle)) { + kgsl_mem_entry_unset_pend(entry); + return PTR_ERR(handle); + } + + /* if handle is NULL the fence has already signaled */ + if (handle == NULL) + gpuobj_free_fence_func(entry); + + return 0; +} + +long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpuobj_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + /* If no event is specified then free immediately */ + if (!(param->flags & KGSL_GPUOBJ_FREE_ON_EVENT)) + ret = gpumem_free_entry(entry); + else if (param->type == KGSL_GPU_EVENT_TIMESTAMP) + ret = gpuobj_free_on_timestamp(dev_priv, entry, param); + else if (param->type == KGSL_GPU_EVENT_FENCE) + ret = gpuobj_free_on_fence(dev_priv, entry, param); + else + ret = -EINVAL; + + kgsl_mem_entry_put(entry); + return ret; +} + +long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data; + struct kgsl_context *context = NULL; + struct kgsl_mem_entry *entry; + long ret = -EINVAL; + + if (param->type != KGSL_TIMESTAMP_RETIRED) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + entry = kgsl_sharedmem_find(dev_priv->process_priv, + (uint64_t) param->gpuaddr); + if (entry == NULL) { + kgsl_context_put(context); + return -EINVAL; + } + + ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, + context, param->timestamp); + + kgsl_mem_entry_put(entry); + kgsl_context_put(context); + + return ret; +} + +static int check_vma_flags(struct vm_area_struct *vma, + unsigned int flags) +{ + unsigned long flags_requested = (VM_READ | VM_WRITE); + + if (flags & KGSL_MEMFLAGS_GPUREADONLY) + flags_requested &= ~(unsigned long)VM_WRITE; + + if ((vma->vm_flags & flags_requested) == flags_requested) + return 0; + + return -EFAULT; +} + +static int check_vma(unsigned long hostptr, u64 size) +{ + struct vm_area_struct *vma; + unsigned long cur = hostptr; + + while (cur < (hostptr + size)) { + vma = find_vma(current->mm, cur); + if (!vma) + return false; + + /* Don't remap memory that we already own */ + if (vma->vm_file && vma->vm_ops == &kgsl_gpumem_vm_ops) + return false; + + cur = vma->vm_end; + } + + return true; +} + +static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, unsigned long useraddr) +{ + int ret = 0; + long npages = 0, i; + size_t sglen = (size_t) (memdesc->size / PAGE_SIZE); + struct page **pages = NULL; + int write = ((memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 0 : + FOLL_WRITE); + + if (sglen == 0 || sglen >= LONG_MAX) + return -EINVAL; + + pages = kvcalloc(sglen, sizeof(*pages), GFP_KERNEL); + if (pages == NULL) + return -ENOMEM; + + memdesc->sgt = kmalloc(sizeof(*memdesc->sgt), GFP_KERNEL); + if (memdesc->sgt == NULL) { + ret = -ENOMEM; + goto out; + } + + mmap_read_lock(current->mm); + if (!check_vma(useraddr, memdesc->size)) { + mmap_read_unlock(current->mm); + ret = -EFAULT; + goto out; + } + + npages = get_user_pages(useraddr, sglen, write, pages, NULL); + mmap_read_unlock(current->mm); + + ret = (npages < 0) ? (int)npages : 0; + if (ret) + goto out; + + if ((unsigned long) npages != sglen) { + ret = -EINVAL; + goto out; + } + + ret = sg_alloc_table_from_pages(memdesc->sgt, pages, npages, + 0, memdesc->size, GFP_KERNEL); +out: + if (ret) { + for (i = 0; i < npages; i++) + put_page(pages[i]); + + kfree(memdesc->sgt); + memdesc->sgt = NULL; + } + kvfree(pages); + return ret; +} + +static const struct kgsl_memdesc_ops kgsl_usermem_ops = { + .free = kgsl_destroy_anon, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; + +static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr, + size_t offset, size_t size) +{ + /* Map an anonymous memory chunk */ + + int ret; + + if (size == 0 || offset != 0 || + !IS_ALIGNED(size, PAGE_SIZE)) + return -EINVAL; + + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = (uint64_t) size; + entry->memdesc.flags |= (uint64_t)KGSL_MEMFLAGS_USERMEM_ADDR; + entry->memdesc.ops = &kgsl_usermem_ops; + + if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + + /* Register the address in the database */ + ret = kgsl_mmu_set_svm_region(pagetable, + (uint64_t) hostptr, (uint64_t) size); + + if (ret) + return ret; + + entry->memdesc.gpuaddr = (uint64_t) hostptr; + } + + ret = memdesc_sg_virt(&entry->memdesc, hostptr); + + if (ret && kgsl_memdesc_use_cpu_map(&entry->memdesc)) + kgsl_mmu_put_gpuaddr(pagetable, &entry->memdesc); + + return ret; +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static void _setup_cache_mode(struct kgsl_mem_entry *entry, + struct vm_area_struct *vma) +{ + uint64_t mode; + pgprot_t pgprot = vma->vm_page_prot; + + if ((pgprot_val(pgprot) == pgprot_val(pgprot_noncached(pgprot))) || + (pgprot_val(pgprot) == pgprot_val(pgprot_writecombine(pgprot)))) + mode = KGSL_CACHEMODE_WRITECOMBINE; + else + mode = KGSL_CACHEMODE_WRITEBACK; + + entry->memdesc.flags |= FIELD_PREP(KGSL_CACHEMODE_MASK, mode); +} + +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf); + +static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr) +{ + struct vm_area_struct *vma; + struct dma_buf *dmabuf = NULL; + int ret; + + /* + * Find the VMA containing this pointer and figure out if it + * is a dma-buf. + */ + mmap_read_lock(current->mm); + vma = find_vma(current->mm, hostptr); + + if (vma && vma->vm_file) { + ret = check_vma_flags(vma, entry->memdesc.flags); + if (ret) { + mmap_read_unlock(current->mm); + return ret; + } + + /* + * Check to see that this isn't our own memory that we have + * already mapped + */ + if (vma->vm_ops == &kgsl_gpumem_vm_ops) { + mmap_read_unlock(current->mm); + return -EFAULT; + } + + if (!is_dma_buf_file(vma->vm_file)) { + mmap_read_unlock(current->mm); + return -ENODEV; + } + + /* Take a refcount because dma_buf_put() decrements the refcount */ + get_file(vma->vm_file); + + dmabuf = vma->vm_file->private_data; + } + + if (!dmabuf) { + mmap_read_unlock(current->mm); + return -ENODEV; + } + + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) { + dma_buf_put(dmabuf); + mmap_read_unlock(current->mm); + return ret; + } + + /* Setup the cache mode for cache operations */ + _setup_cache_mode(entry, vma); + + if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && + (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) && + kgsl_cachemode_is_cached(entry->memdesc.flags))) + entry->memdesc.flags |= KGSL_MEMFLAGS_IOCOHERENT; + else + entry->memdesc.flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT); + + mmap_read_unlock(current->mm); + return 0; +} +#else +static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr) +{ + return -ENODEV; +} +#endif + +static int kgsl_setup_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned long hostptr, size_t offset, size_t size) +{ + int ret; + + if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE)) + return -EINVAL; + + /* Try to set up a dmabuf - if it returns -ENODEV assume anonymous */ + ret = kgsl_setup_dmabuf_useraddr(device, pagetable, entry, hostptr); + if (ret != -ENODEV) + return ret; + + /* Okay - lets go legacy */ + return kgsl_setup_anon_useraddr(pagetable, entry, + hostptr, offset, size); +} + +static long _gpuobj_map_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param) +{ + struct kgsl_gpuobj_import_useraddr useraddr; + + param->flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_CACHEMODE_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMTYPE_MASK + | KGSL_MEMFLAGS_FORCE_32BIT + | KGSL_MEMFLAGS_IOCOHERENT; + + /* Specifying SECURE is an explicit error */ + if (param->flags & KGSL_MEMFLAGS_SECURE) + return -ENOTSUPP; + + kgsl_memdesc_init(device, &entry->memdesc, param->flags); + + if (copy_from_user(&useraddr, + u64_to_user_ptr(param->priv), sizeof(useraddr))) + return -EINVAL; + + /* Verify that the virtaddr and len are within bounds */ + if (useraddr.virtaddr > ULONG_MAX) + return -EINVAL; + + return kgsl_setup_useraddr(device, pagetable, entry, + (unsigned long) useraddr.virtaddr, 0, param->priv_len); +} + +static bool check_and_warn_secured(struct kgsl_device *device) +{ + if (kgsl_mmu_is_secured(&device->mmu)) + return true; + + dev_WARN_ONCE(device->dev, 1, "Secure buffers are not supported\n"); + return false; +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static long _gpuobj_map_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param, + int *fd) +{ + bool iocoherent = (param->flags & KGSL_MEMFLAGS_IOCOHERENT); + struct kgsl_gpuobj_import_dma_buf buf; + struct dma_buf *dmabuf; + int ret; + + param->flags &= KGSL_MEMFLAGS_GPUREADONLY | + KGSL_MEMTYPE_MASK | + KGSL_MEMALIGN_MASK | + KGSL_MEMFLAGS_SECURE | + KGSL_MEMFLAGS_FORCE_32BIT | + KGSL_MEMFLAGS_GUARD_PAGE; + + kgsl_memdesc_init(device, &entry->memdesc, param->flags); + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (!check_and_warn_secured(device)) + return -ENOTSUPP; + + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + } + + if (copy_struct_from_user(&buf, sizeof(buf), + u64_to_user_ptr(param->priv), param->priv_len)) + return -EFAULT; + + if (buf.fd < 0) + return -EINVAL; + + *fd = buf.fd; + dmabuf = dma_buf_get(buf.fd); + + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + /* + * DMA BUFS are always cached so make sure that is reflected in + * the memdesc. + */ + entry->memdesc.flags |= + FIELD_PREP(KGSL_CACHEMODE_MASK, KGSL_CACHEMODE_WRITEBACK); + + /* + * Enable I/O coherency if it is 1) a thing, and either + * 2) enabled by default or 3) enabled by the caller + */ + if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && + (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) || + iocoherent)) + entry->memdesc.flags |= KGSL_MEMFLAGS_IOCOHERENT; + + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) + dma_buf_put(dmabuf); + + return ret; +} +#else +static long _gpuobj_map_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param, + int *fd) +{ + return -EINVAL; +} +#endif + +static void kgsl_process_add_stats(struct kgsl_process_private *priv, + unsigned int type, uint64_t size) +{ + u64 ret = atomic64_add_return(size, &priv->stats[type].cur); + + if (ret > priv->stats[type].max) + priv->stats[type].max = ret; +} + + + +long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + struct kgsl_gpuobj_import *param = data; + struct kgsl_mem_entry *entry; + int ret, fd = -1; + + if (param->type != KGSL_USER_MEM_TYPE_ADDR && + param->type != KGSL_USER_MEM_TYPE_DMABUF) + return -ENOTSUPP; + + if (param->flags & KGSL_MEMFLAGS_VBO) + return -EINVAL; + + entry = kgsl_mem_entry_create(); + if (entry == NULL) + return -ENOMEM; + + if (param->type == KGSL_USER_MEM_TYPE_ADDR) + ret = _gpuobj_map_useraddr(device, private->pagetable, + entry, param); + else + ret = _gpuobj_map_dma_buf(device, private->pagetable, + entry, param, &fd); + + if (ret) + goto out; + + if (entry->memdesc.size >= SZ_1M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M)); + else if (entry->memdesc.size >= SZ_64K) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64K)); + + param->flags = entry->memdesc.flags; + + ret = kgsl_mem_entry_attach_and_map(device, private, entry); + if (ret) + goto unmap; + + param->id = entry->id; + + KGSL_STATS_ADD(entry->memdesc.size, &kgsl_driver.stats.mapped, + &kgsl_driver.stats.mapped_max); + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), + entry->memdesc.size); + + trace_kgsl_mem_map(entry, fd); + + kgsl_mem_entry_commit_process(entry); + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; + +unmap: + kgsl_sharedmem_free(&entry->memdesc); + +out: + kfree(entry); + return ret; +} + +static long _map_usermem_addr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, + unsigned long hostptr, size_t offset, size_t size) +{ + if (!kgsl_mmu_has_feature(device, KGSL_MMU_PAGED)) + return -EINVAL; + + /* No CPU mapped buffer could ever be secure */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) + return -EINVAL; + + return kgsl_setup_useraddr(device, pagetable, entry, hostptr, + offset, size); +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int _map_usermem_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned int fd) +{ + int ret; + struct dma_buf *dmabuf; + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (!check_and_warn_secured(device)) + return -EOPNOTSUPP; + + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + } + + dmabuf = dma_buf_get(fd); + if (IS_ERR_OR_NULL(dmabuf)) { + ret = PTR_ERR(dmabuf); + return ret ? ret : -EINVAL; + } + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) + dma_buf_put(dmabuf); + return ret; +} +#else +static int _map_usermem_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned int fd) +{ + return -EINVAL; +} +#endif + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf) +{ + int ret = 0; + struct scatterlist *s; + struct sg_table *sg_table; + struct dma_buf_attachment *attach = NULL; + struct kgsl_dma_buf_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + attach = dma_buf_attach(dmabuf, device->dev); + + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto out; + } + + /* + * If dma buffer is marked IO coherent, skip sync at attach, + * which involves flushing the buffer on CPU. + * HW manages coherency for IO coherent buffers. + */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_IOCOHERENT) + attach->dma_map_attrs |= DMA_ATTR_SKIP_CPU_SYNC; + + meta->dmabuf = dmabuf; + meta->attach = attach; + meta->entry = entry; + + entry->priv_data = meta; + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = 0; + entry->memdesc.ops = &kgsl_dmabuf_ops; + /* USE_CPU_MAP is not impemented for ION. */ + entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + entry->memdesc.flags |= (uint64_t)KGSL_MEMFLAGS_USERMEM_ION; + + sg_table = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sg_table)) { + ret = PTR_ERR(sg_table); + goto out; + } + + dma_buf_unmap_attachment(attach, sg_table, DMA_BIDIRECTIONAL); + + meta->table = sg_table; + entry->priv_data = meta; + entry->memdesc.sgt = sg_table; + + if (entry->memdesc.priv & KGSL_MEMDESC_SECURE) { + uint32_t *vmid_list = NULL, *perms_list = NULL; + uint32_t nelems = 0; + int i; + + if (mem_buf_dma_buf_exclusive_owner(dmabuf)) { + ret = -EPERM; + goto out; + } + + ret = mem_buf_dma_buf_copy_vmperm(dmabuf, (int **)&vmid_list, + (int **)&perms_list, (int *)&nelems); + if (ret) { + ret = 0; + dev_info(device->dev, "Skipped access check\n"); + goto skip_access_check; + } + + /* Check if secure buffer is accessible to CP_PIXEL */ + for (i = 0; i < nelems; i++) { + if (vmid_list[i] == QCOM_DMA_HEAP_FLAG_CP_PIXEL) + break; + } + + kfree(vmid_list); + kfree(perms_list); + + if (i == nelems) { + /* + * Secure buffer is not accessible to CP_PIXEL, there is no point + * in importing this buffer. + */ + ret = -EPERM; + goto out; + } + } + +skip_access_check: + /* Calculate the size of the memdesc from the sglist */ + for (s = entry->memdesc.sgt->sgl; s != NULL; s = sg_next(s)) + entry->memdesc.size += (uint64_t) s->length; + + if (!entry->memdesc.size) { + ret = -EINVAL; + goto out; + } + + add_dmabuf_list(meta); + entry->memdesc.size = PAGE_ALIGN(entry->memdesc.size); + +out: + if (ret) { + if (!IS_ERR_OR_NULL(attach)) + dma_buf_detach(dmabuf, attach); + + kfree(meta); + } + + return ret; +} +#endif + +#ifdef CONFIG_DMA_SHARED_BUFFER +void kgsl_get_egl_counts(struct kgsl_mem_entry *entry, + int *egl_surface_count, int *egl_image_count) +{ + struct kgsl_dma_buf_meta *meta = entry->priv_data; + struct dmabuf_list_entry *dle = meta->dle; + struct kgsl_dma_buf_meta *scan_meta; + struct kgsl_mem_entry *scan_mem_entry; + + if (!dle) + return; + + spin_lock(&kgsl_dmabuf_lock); + list_for_each_entry(scan_meta, &dle->dmabuf_list, node) { + scan_mem_entry = scan_meta->entry; + + switch (kgsl_memdesc_get_memtype(&scan_mem_entry->memdesc)) { + case KGSL_MEMTYPE_EGL_SURFACE: + (*egl_surface_count)++; + break; + case KGSL_MEMTYPE_EGL_IMAGE: + (*egl_image_count)++; + break; + } + } + spin_unlock(&kgsl_dmabuf_lock); +} + +unsigned long kgsl_get_dmabuf_inode_number(struct kgsl_mem_entry *entry) +{ + struct kgsl_dma_buf_meta *meta = entry->priv_data; + + return meta ? file_inode(meta->dmabuf->file)->i_ino : 0; +} +#else +void kgsl_get_egl_counts(struct kgsl_mem_entry *entry, + int *egl_surface_count, int *egl_image_count) +{ +} + +unsigned long kgsl_get_dmabuf_inode_number(struct kgsl_mem_entry *entry) +{ +} +#endif + +long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = -EINVAL; + struct kgsl_map_user_mem *param = data; + struct kgsl_mem_entry *entry = NULL; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + unsigned int memtype; + uint64_t flags; + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + + if (param->flags & KGSL_MEMFLAGS_SECURE) { + if (!check_and_warn_secured(device)) + return -EOPNOTSUPP; + + /* + * On 64 bit kernel, secure memory region is expanded and + * moved to 64 bit address, 32 bit apps can not access it from + * this IOCTL. + */ + if (is_compat_task() && + test_bit(KGSL_MMU_64BIT, &device->mmu.features)) + return -EOPNOTSUPP; + + /* Can't use CPU map with secure buffers */ + if (param->flags & KGSL_MEMFLAGS_USE_CPU_MAP) + return -EINVAL; + } + + entry = kgsl_mem_entry_create(); + + if (entry == NULL) + return -ENOMEM; + + /* + * Convert from enum value to KGSL_MEM_ENTRY value, so that + * we can use the latter consistently everywhere. + */ + memtype = param->memtype + 1; + + /* + * Mask off unknown flags from userspace. This way the caller can + * check if a flag is supported by looking at the returned flags. + * Note: CACHEMODE is ignored for this call. Caching should be + * determined by type of allocation being mapped. + */ + flags = param->flags & (KGSL_MEMFLAGS_GPUREADONLY + | KGSL_MEMTYPE_MASK + | KGSL_MEMALIGN_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMFLAGS_SECURE + | KGSL_MEMFLAGS_IOCOHERENT); + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + kgsl_memdesc_init(device, &entry->memdesc, flags); + + switch (memtype) { + case KGSL_MEM_ENTRY_USER: + result = _map_usermem_addr(device, private->pagetable, + entry, param->hostptr, param->offset, param->len); + break; + case KGSL_MEM_ENTRY_ION: + if (param->offset != 0) + result = -EINVAL; + else + result = _map_usermem_dma_buf(device, + private->pagetable, entry, param->fd); + break; + default: + result = -EOPNOTSUPP; + break; + } + + if (result) + goto error; + + if (entry->memdesc.size >= SZ_2M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_2M)); + else if (entry->memdesc.size >= SZ_1M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M)); + else if (entry->memdesc.size >= SZ_64K) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64)); + + /* echo back flags */ + param->flags = (unsigned int) entry->memdesc.flags; + + result = kgsl_mem_entry_attach_and_map(device, private, + entry); + if (result) + goto error_attach; + + /* Adjust the returned value for a non 4k aligned offset */ + param->gpuaddr = (unsigned long) + entry->memdesc.gpuaddr + (param->offset & PAGE_MASK); + + KGSL_STATS_ADD(param->len, &kgsl_driver.stats.mapped, + &kgsl_driver.stats.mapped_max); + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), param->len); + + trace_kgsl_mem_map(entry, param->fd); + + kgsl_mem_entry_commit_process(entry); + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return result; + +error_attach: + kgsl_sharedmem_free(&entry->memdesc); +error: + /* Clear gpuaddr here so userspace doesn't get any wrong ideas */ + param->gpuaddr = 0; + + kfree(entry); + return result; +} + +static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry, + uint64_t offset, uint64_t length, unsigned int op) +{ + int ret = 0; + int cacheop; + + if (!entry) + return 0; + + /* Cache ops are not allowed on secure memory */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) + return 0; + + /* + * Flush is defined as (clean | invalidate). If both bits are set, then + * do a flush, otherwise check for the individual bits and clean or inv + * as requested + */ + + if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH) + cacheop = KGSL_CACHE_OP_FLUSH; + else if (op & KGSL_GPUMEM_CACHE_CLEAN) + cacheop = KGSL_CACHE_OP_CLEAN; + else if (op & KGSL_GPUMEM_CACHE_INV) + cacheop = KGSL_CACHE_OP_INV; + else { + ret = -EINVAL; + goto done; + } + + if (!(op & KGSL_GPUMEM_CACHE_RANGE)) { + offset = 0; + length = entry->memdesc.size; + } + + if (kgsl_cachemode_is_cached(entry->memdesc.flags)) { + trace_kgsl_mem_sync_cache(entry, offset, length, op); + ret = kgsl_cache_range_op(&entry->memdesc, offset, + length, cacheop); + } + +done: + return ret; +} + +/* New cache sync function - supports both directions (clean and invalidate) */ + +long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + long ret; + + if (param->id != 0) + entry = kgsl_sharedmem_find_id(private, param->id); + else if (param->gpuaddr != 0) + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + + if (entry == NULL) + return -EINVAL; + + ret = _kgsl_gpumem_sync_cache(entry, (uint64_t) param->offset, + (uint64_t) param->length, param->op); + kgsl_mem_entry_put(entry); + return ret; +} + +static int mem_id_cmp(const void *_a, const void *_b) +{ + const unsigned int *a = _a, *b = _b; + + if (*a == *b) + return 0; + return (*a > *b) ? 1 : -1; +} + +#ifdef CONFIG_ARM64 +/* Do not support full flush on ARM64 targets */ +static inline bool check_full_flush(size_t size, int op) +{ + return false; +} +#else +/* Support full flush if the size is bigger than the threshold */ +static inline bool check_full_flush(size_t size, int op) +{ + /* If we exceed the breakeven point, flush the entire cache */ + bool ret = (kgsl_driver.full_cache_threshold != 0) && + (size >= kgsl_driver.full_cache_threshold) && + (op == KGSL_GPUMEM_CACHE_FLUSH); + if (ret) + flush_cache_all(); + return ret; +} +#endif + +long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int i; + struct kgsl_gpumem_sync_cache_bulk *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned int id, last_id = 0, *id_list = NULL, actual_count = 0; + struct kgsl_mem_entry **entries = NULL; + long ret = 0; + uint64_t op_size = 0; + bool full_flush = false; + + if (param->id_list == NULL || param->count == 0 + || param->count > (PAGE_SIZE / sizeof(unsigned int))) + return -EINVAL; + + id_list = kcalloc(param->count, sizeof(unsigned int), GFP_KERNEL); + if (id_list == NULL) + return -ENOMEM; + + entries = kcalloc(param->count, sizeof(*entries), GFP_KERNEL); + if (entries == NULL) { + ret = -ENOMEM; + goto end; + } + + if (copy_from_user(id_list, param->id_list, + param->count * sizeof(unsigned int))) { + ret = -EFAULT; + goto end; + } + /* sort the ids so we can weed out duplicates */ + sort(id_list, param->count, sizeof(*id_list), mem_id_cmp, NULL); + + for (i = 0; i < param->count; i++) { + unsigned int cachemode; + struct kgsl_mem_entry *entry = NULL; + + id = id_list[i]; + /* skip 0 ids or duplicates */ + if (id == last_id) + continue; + + entry = kgsl_sharedmem_find_id(private, id); + if (entry == NULL) + continue; + + /* skip uncached memory */ + cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc); + if (cachemode != KGSL_CACHEMODE_WRITETHROUGH && + cachemode != KGSL_CACHEMODE_WRITEBACK) { + kgsl_mem_entry_put(entry); + continue; + } + + op_size += entry->memdesc.size; + entries[actual_count++] = entry; + + full_flush = check_full_flush(op_size, param->op); + if (full_flush) { + trace_kgsl_mem_sync_full_cache(actual_count, op_size); + break; + } + + last_id = id; + } + + param->op &= ~KGSL_GPUMEM_CACHE_RANGE; + + for (i = 0; i < actual_count; i++) { + if (!full_flush) + _kgsl_gpumem_sync_cache(entries[i], 0, + entries[i]->memdesc.size, + param->op); + kgsl_mem_entry_put(entries[i]); + } +end: + kfree(entries); + kfree(id_list); + return ret; +} + +/* Legacy cache function, does a flush (clean + invalidate) */ + +long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + long ret; + + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) + return -EINVAL; + + ret = _kgsl_gpumem_sync_cache(entry, 0, entry->memdesc.size, + KGSL_GPUMEM_CACHE_FLUSH); + kgsl_mem_entry_put(entry); + return ret; +} + +long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_sync *param = data; + struct kgsl_gpuobj_sync_obj *objs; + struct kgsl_mem_entry **entries; + long ret = 0; + uint64_t size = 0; + int i; + void __user *ptr; + + if (param->count == 0 || param->count > 128) + return -EINVAL; + + objs = kcalloc(param->count, sizeof(*objs), GFP_KERNEL); + if (objs == NULL) + return -ENOMEM; + + entries = kcalloc(param->count, sizeof(*entries), GFP_KERNEL); + if (entries == NULL) { + kfree(objs); + return -ENOMEM; + } + + ptr = u64_to_user_ptr(param->objs); + + for (i = 0; i < param->count; i++) { + ret = copy_struct_from_user(&objs[i], sizeof(*objs), ptr, + param->obj_len); + if (ret) + goto out; + + entries[i] = kgsl_sharedmem_find_id(private, objs[i].id); + + /* Not finding the ID is not a fatal failure - just skip it */ + if (entries[i] == NULL) + continue; + + if (!(objs[i].op & KGSL_GPUMEM_CACHE_RANGE)) + size += entries[i]->memdesc.size; + else if (objs[i].offset < entries[i]->memdesc.size) + size += (entries[i]->memdesc.size - objs[i].offset); + + if (check_full_flush(size, objs[i].op)) { + trace_kgsl_mem_sync_full_cache(i, size); + goto out; + } + + ptr += sizeof(*objs); + } + + for (i = 0; !ret && i < param->count; i++) + ret = _kgsl_gpumem_sync_cache(entries[i], + objs[i].offset, objs[i].length, objs[i].op); + +out: + for (i = 0; i < param->count; i++) + kgsl_mem_entry_put(entries[i]); + + kfree(entries); + kfree(objs); + + return ret; +} + +#ifdef CONFIG_ARM64 +static uint64_t kgsl_filter_cachemode(uint64_t flags) +{ + /* + * WRITETHROUGH is not supported in arm64, so we tell the user that we + * use WRITEBACK which is the default caching policy. + */ + if (FIELD_GET(KGSL_CACHEMODE_MASK, flags) == KGSL_CACHEMODE_WRITETHROUGH) { + flags &= ~((uint64_t) KGSL_CACHEMODE_MASK); + flags |= FIELD_PREP(KGSL_CACHEMODE_MASK, KGSL_CACHEMODE_WRITEBACK); + } + return flags; +} +#else +static uint64_t kgsl_filter_cachemode(uint64_t flags) +{ + return flags; +} +#endif + +/* The largest allowable alignment for a GPU object is 32MB */ +#define KGSL_MAX_ALIGN (32 * SZ_1M) + +static u64 cap_alignment(struct kgsl_device *device, u64 flags) +{ + u32 align = FIELD_GET(KGSL_MEMALIGN_MASK, flags); + + if (align >= ilog2(KGSL_MAX_ALIGN)) { + /* Cap the alignment bits to the highest number we can handle */ + dev_err(device->dev, + "Alignment too large; restricting to %dK\n", + KGSL_MAX_ALIGN >> 10); + align = ilog2(KGSL_MAX_ALIGN); + } + + flags &= ~((u64) KGSL_MEMALIGN_MASK); + return flags | FIELD_PREP(KGSL_MEMALIGN_MASK, align); +} + +static struct kgsl_mem_entry * +gpumem_alloc_vbo_entry(struct kgsl_device_private *dev_priv, + u64 size, u64 flags) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + struct kgsl_memdesc *memdesc; + struct kgsl_mem_entry *entry; + int ret; + + /* Disallow specific flags */ + if (flags & (KGSL_MEMFLAGS_GPUREADONLY | KGSL_CACHEMODE_MASK)) + return ERR_PTR(-EINVAL); + + if (flags & (KGSL_MEMFLAGS_USE_CPU_MAP | KGSL_MEMFLAGS_IOCOHERENT)) + return ERR_PTR(-EINVAL); + + /* Quietly ignore the other flags that aren't this list */ + flags &= KGSL_MEMFLAGS_SECURE | + KGSL_MEMFLAGS_VBO | + KGSL_MEMTYPE_MASK | + KGSL_MEMALIGN_MASK | + KGSL_MEMFLAGS_FORCE_32BIT; + + if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device)) + return ERR_PTR(-EOPNOTSUPP); + + flags = cap_alignment(device, flags); + + entry = kgsl_mem_entry_create(); + if (!entry) + return ERR_PTR(-ENOMEM); + + memdesc = &entry->memdesc; + + ret = kgsl_sharedmem_allocate_vbo(device, memdesc, size, flags); + if (ret) { + kfree(entry); + return ERR_PTR(ret); + } + + if (flags & KGSL_MEMFLAGS_SECURE) + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + + ret = kgsl_mem_entry_attach_to_process(device, private, entry); + if (ret) + goto out; + + ret = kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, + memdesc, 0, memdesc->size); + if (!ret) { + trace_kgsl_mem_alloc(entry); + kgsl_mem_entry_commit_process(entry); + return entry; + } + +out: + kgsl_sharedmem_free(memdesc); + kfree(entry); + return ERR_PTR(ret); +} + +struct kgsl_mem_entry *gpumem_alloc_entry( + struct kgsl_device_private *dev_priv, + uint64_t size, uint64_t flags) +{ + int ret; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + struct kgsl_device *device = dev_priv->device; + u32 cachemode; + + /* For 32-bit kernel world nothing to do with this flag */ + if (BITS_PER_LONG == 32) + flags &= ~((uint64_t) KGSL_MEMFLAGS_FORCE_32BIT); + + if (flags & KGSL_MEMFLAGS_VBO) + return gpumem_alloc_vbo_entry(dev_priv, size, flags); + + flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_CACHEMODE_MASK + | KGSL_MEMTYPE_MASK + | KGSL_MEMALIGN_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMFLAGS_SECURE + | KGSL_MEMFLAGS_FORCE_32BIT + | KGSL_MEMFLAGS_IOCOHERENT + | KGSL_MEMFLAGS_GUARD_PAGE; + + /* Return not supported error if secure memory isn't enabled */ + if ((flags & KGSL_MEMFLAGS_SECURE) && !check_and_warn_secured(device)) + return ERR_PTR(-EOPNOTSUPP); + + flags = cap_alignment(device, flags); + + /* For now only allow allocations up to 4G */ + if (size == 0 || size > UINT_MAX) + return ERR_PTR(-EINVAL); + + flags = kgsl_filter_cachemode(flags); + + entry = kgsl_mem_entry_create(); + if (entry == NULL) + return ERR_PTR(-ENOMEM); + + if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) && + kgsl_cachemode_is_cached(flags)) + flags |= KGSL_MEMFLAGS_IOCOHERENT; + + ret = kgsl_allocate_user(device, &entry->memdesc, + size, flags, 0); + if (ret != 0) + goto err; + + ret = kgsl_mem_entry_attach_and_map(device, private, entry); + if (ret != 0) { + kgsl_sharedmem_free(&entry->memdesc); + goto err; + } + + cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc); + /* + * Secure buffers cannot be reclaimed. Avoid reclaim of cached buffers + * as we could get request for cache operations on these buffers when + * they are reclaimed. + */ + if (!(flags & KGSL_MEMFLAGS_SECURE) && + !(cachemode == KGSL_CACHEMODE_WRITEBACK) && + !(cachemode == KGSL_CACHEMODE_WRITETHROUGH)) + entry->memdesc.priv |= KGSL_MEMDESC_CAN_RECLAIM; + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), + entry->memdesc.size); + trace_kgsl_mem_alloc(entry); + + kgsl_mem_entry_commit_process(entry); + return entry; +err: + kfree(entry); + return ERR_PTR(ret); +} + +static void copy_metadata(struct kgsl_mem_entry *entry, uint64_t metadata, + unsigned int len) +{ + unsigned int i, size; + + if (len == 0) + return; + + size = min_t(unsigned int, len, sizeof(entry->metadata) - 1); + + if (copy_from_user(entry->metadata, u64_to_user_ptr(metadata), size)) { + memset(entry->metadata, 0, sizeof(entry->metadata)); + return; + } + + /* Clean up non printable characters in the string */ + for (i = 0; i < size && entry->metadata[i] != 0; i++) { + if (!isprint(entry->metadata[i])) + entry->metadata[i] = '?'; + } +} + +long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpuobj_alloc *param = data; + struct kgsl_mem_entry *entry; + + entry = gpumem_alloc_entry(dev_priv, param->size, param->flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + copy_metadata(entry, param->metadata, param->metadata_len); + + param->size = entry->memdesc.size; + param->flags = entry->memdesc.flags; + param->mmapsize = kgsl_memdesc_footprint(&entry->memdesc); + param->id = entry->id; + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; +} + +long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_gpumem_alloc *param = data; + struct kgsl_mem_entry *entry; + uint64_t flags = param->flags; + + /* + * On 64 bit kernel, secure memory region is expanded and + * moved to 64 bit address, 32 bit apps can not access it from + * this IOCTL. + */ + if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() + && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) + return -EOPNOTSUPP; + + /* Legacy functions doesn't support these advanced features */ + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + param->size = (size_t) entry->memdesc.size; + param->flags = (unsigned int) entry->memdesc.flags; + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; +} + +long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_gpumem_alloc_id *param = data; + struct kgsl_mem_entry *entry; + uint64_t flags = param->flags; + + /* + * On 64 bit kernel, secure memory region is expanded and + * moved to 64 bit address, 32 bit apps can not access it from + * this IOCTL. + */ + if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() + && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) + return -EOPNOTSUPP; + + if (is_compat_task()) + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + param->id = entry->id; + param->flags = (unsigned int) entry->memdesc.flags; + param->size = (size_t) entry->memdesc.size; + param->mmapsize = (size_t) kgsl_memdesc_footprint(&entry->memdesc); + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + + /* Put the extra ref from kgsl_mem_entry_create() */ + kgsl_mem_entry_put(entry); + + return 0; +} + +long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpumem_get_info *param = data; + struct kgsl_mem_entry *entry = NULL; + int result = 0; + + if (param->id != 0) + entry = kgsl_sharedmem_find_id(private, param->id); + else if (param->gpuaddr != 0) + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + + if (entry == NULL) + return -EINVAL; + + /* + * If any of the 64 bit address / sizes would end up being + * truncated, return -ERANGE. That will signal the user that they + * should use a more modern API + */ + if (entry->memdesc.gpuaddr > ULONG_MAX) + result = -ERANGE; + + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + param->id = entry->id; + param->flags = (unsigned int) entry->memdesc.flags; + param->size = (size_t) entry->memdesc.size; + param->mmapsize = (size_t) kgsl_memdesc_footprint(&entry->memdesc); + /* + * Entries can have multiple user mappings so thre isn't any one address + * we can report. Plus, the user should already know their mappings, so + * there isn't any value in reporting it back to them. + */ + param->useraddr = 0; + + kgsl_mem_entry_put(entry); + return result; +} + +long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_info *param = data; + struct kgsl_mem_entry *entry; + + if (param->id == 0) + return -EINVAL; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + param->id = entry->id; + param->gpuaddr = entry->memdesc.gpuaddr; + param->flags = entry->memdesc.flags; + param->size = entry->memdesc.size; + + /* VBOs cannot be mapped, so don't report a va_len */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_VBO) + param->va_len = 0; + else + param->va_len = kgsl_memdesc_footprint(&entry->memdesc); + + /* + * Entries can have multiple user mappings so thre isn't any one address + * we can report. Plus, the user should already know their mappings, so + * there isn't any value in reporting it back to them. + */ + param->va_addr = 0; + + kgsl_mem_entry_put(entry); + return 0; +} + +long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_set_info *param = data; + struct kgsl_mem_entry *entry; + int ret = 0; + + if (param->id == 0) + return -EINVAL; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + if (param->flags & KGSL_GPUOBJ_SET_INFO_METADATA) + copy_metadata(entry, param->metadata, param->metadata_len); + + if (param->flags & KGSL_GPUOBJ_SET_INFO_TYPE) { + if (FIELD_FIT(KGSL_MEMTYPE_MASK, param->type)) { + entry->memdesc.flags &= ~((uint64_t) KGSL_MEMTYPE_MASK); + entry->memdesc.flags |= + FIELD_PREP(KGSL_MEMTYPE_MASK, param->type); + } else + ret = -EINVAL; + } + + kgsl_mem_entry_put(entry); + return ret; +} + +/** + * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace + * @dev_priv - pointer to the private device structure + * @cmd - the ioctl cmd passed from kgsl_ioctl + * @data - the user data buffer from kgsl_ioctl + * @returns 0 on success or error code on failure + */ + +long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event *param = data; + int ret; + + switch (param->type) { + case KGSL_TIMESTAMP_EVENT_FENCE: + ret = kgsl_add_fence_event(dev_priv->device, + param->context_id, param->timestamp, param->priv, + param->len, dev_priv); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static vm_fault_t +kgsl_memstore_vm_fault(struct vm_fault *vmf) +{ + struct kgsl_memdesc *memdesc = vmf->vma->vm_private_data; + + return memdesc->ops->vmfault(memdesc, vmf->vma, vmf); +} + +static const struct vm_operations_struct kgsl_memstore_vm_ops = { + .fault = kgsl_memstore_vm_fault, +}; + +static int +kgsl_mmap_memstore(struct file *file, struct kgsl_device *device, + struct vm_area_struct *vma) +{ + struct kgsl_memdesc *memdesc = device->memstore; + unsigned int vma_size = vma->vm_end - vma->vm_start; + + /* The memstore can only be mapped as read only */ + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + vma->vm_flags &= ~VM_MAYWRITE; + + if (memdesc->size != vma_size) { + dev_err(device->dev, "Cannot partially map the memstore\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_private_data = memdesc; + vma->vm_flags |= memdesc->ops->vmflags; + vma->vm_ops = &kgsl_memstore_vm_ops; + vma->vm_file = file; + + return 0; +} + +/* + * kgsl_gpumem_vm_open is called whenever a vma region is copied or split. + * Increase the refcount to make sure that the accounting stays correct + */ + +static void kgsl_gpumem_vm_open(struct vm_area_struct *vma) +{ + struct kgsl_mem_entry *entry = vma->vm_private_data; + + if (!kgsl_mem_entry_get(entry)) + vma->vm_private_data = NULL; + + atomic_inc(&entry->map_count); +} + +static vm_fault_t +kgsl_gpumem_vm_fault(struct vm_fault *vmf) +{ + struct kgsl_mem_entry *entry = vmf->vma->vm_private_data; + + if (!entry) + return VM_FAULT_SIGBUS; + if (!entry->memdesc.ops || !entry->memdesc.ops->vmfault) + return VM_FAULT_SIGBUS; + + return entry->memdesc.ops->vmfault(&entry->memdesc, vmf->vma, vmf); +} + +static void +kgsl_gpumem_vm_close(struct vm_area_struct *vma) +{ + struct kgsl_mem_entry *entry = vma->vm_private_data; + + if (!entry) + return; + + /* + * Remove the memdesc from the mapped stat once all the mappings have + * gone away + */ + if (!atomic_dec_return(&entry->map_count)) + atomic64_sub(entry->memdesc.size, &entry->priv->gpumem_mapped); + + kgsl_mem_entry_put(entry); +} + +static const struct vm_operations_struct kgsl_gpumem_vm_ops = { + .open = kgsl_gpumem_vm_open, + .fault = kgsl_gpumem_vm_fault, + .close = kgsl_gpumem_vm_close, +}; + +static int +get_mmap_entry(struct kgsl_process_private *private, + struct kgsl_mem_entry **out_entry, unsigned long pgoff, + unsigned long len) +{ + int ret = 0; + struct kgsl_mem_entry *entry; + + entry = kgsl_sharedmem_find_id(private, pgoff); + if (entry == NULL) + entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT); + + if (!entry) + return -EINVAL; + + if (!entry->memdesc.ops || + !entry->memdesc.ops->vmflags || + !entry->memdesc.ops->vmfault) { + ret = -EINVAL; + goto err_put; + } + + /* Don't allow ourselves to remap user memory */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_ADDR) { + ret = -EBUSY; + goto err_put; + } + + if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + if (len != kgsl_memdesc_footprint(&entry->memdesc)) { + ret = -ERANGE; + goto err_put; + } + } else if (len != kgsl_memdesc_footprint(&entry->memdesc) && + len != entry->memdesc.size) { + /* + * If cpu_map != gpumap then user can map either the + * footprint or the entry size + */ + ret = -ERANGE; + goto err_put; + } + + *out_entry = entry; + return 0; +err_put: + kgsl_mem_entry_put(entry); + return ret; +} + +static unsigned long _gpu_set_svm_region(struct kgsl_process_private *private, + struct kgsl_mem_entry *entry, unsigned long addr, + unsigned long size) +{ + int ret; + + /* + * Protect access to the gpuaddr here to prevent multiple vmas from + * trying to map a SVM region at the same time + */ + spin_lock(&entry->memdesc.lock); + + if (entry->memdesc.gpuaddr) { + spin_unlock(&entry->memdesc.lock); + return (unsigned long) -EBUSY; + } + + ret = kgsl_mmu_set_svm_region(private->pagetable, (uint64_t) addr, + (uint64_t) size); + + if (ret != 0) { + spin_unlock(&entry->memdesc.lock); + return (unsigned long) ret; + } + + entry->memdesc.gpuaddr = (uint64_t) addr; + spin_unlock(&entry->memdesc.lock); + + entry->memdesc.pagetable = private->pagetable; + + ret = kgsl_mmu_map(private->pagetable, &entry->memdesc); + if (ret) { + kgsl_mmu_put_gpuaddr(private->pagetable, &entry->memdesc); + return (unsigned long) ret; + } + + kgsl_memfree_purge(private->pagetable, entry->memdesc.gpuaddr, + entry->memdesc.size); + + return addr; +} + +static unsigned long get_align(struct kgsl_mem_entry *entry) +{ + int bit = kgsl_memdesc_get_align(&entry->memdesc); + + if (bit >= ilog2(SZ_2M)) + return SZ_2M; + else if (bit >= ilog2(SZ_1M)) + return SZ_1M; + else if (bit >= ilog2(SZ_64K)) + return SZ_64K; + + return SZ_4K; +} + +static unsigned long set_svm_area(struct file *file, + struct kgsl_mem_entry *entry, + unsigned long addr, unsigned long len, + unsigned long flags) +{ + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned long ret; + + /* + * Do additoinal constraints checking on the address. Passing MAP_FIXED + * ensures that the address we want gets checked + */ + ret = current->mm->get_unmapped_area(file, addr, len, 0, + flags & MAP_FIXED); + + /* If it passes, attempt to set the region in the SVM */ + if (!IS_ERR_VALUE(ret)) + return _gpu_set_svm_region(private, entry, addr, len); + + return ret; +} + +static unsigned long get_svm_unmapped_area(struct file *file, + struct kgsl_mem_entry *entry, + unsigned long addr, unsigned long len, + unsigned long flags) +{ + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned long align = get_align(entry); + unsigned long ret, iova; + u64 start = 0, end = 0; + struct vm_area_struct *vma; + + if (flags & MAP_FIXED) { + /* Even fixed addresses need to obey alignment */ + if (!IS_ALIGNED(addr, align)) + return -EINVAL; + + return set_svm_area(file, entry, addr, len, flags); + } + + /* If a hint was provided, try to use that first */ + if (addr) { + if (IS_ALIGNED(addr, align)) { + ret = set_svm_area(file, entry, addr, len, flags); + if (!IS_ERR_VALUE(ret)) + return ret; + } + } + + /* Get the SVM range for the current process */ + if (kgsl_mmu_svm_range(private->pagetable, &start, &end, + entry->memdesc.flags)) + return -ERANGE; + + /* Find the first gap in the iova map */ + iova = kgsl_mmu_find_svm_region(private->pagetable, start, end, + len, align); + + while (!IS_ERR_VALUE(iova)) { + vma = find_vma_intersection(current->mm, iova, iova + len - 1); + if (vma) { + iova = vma->vm_start; + } else { + ret = set_svm_area(file, entry, iova, len, flags); + if (!IS_ERR_VALUE(ret)) + return ret; + + /* + * set_svm_area will return -EBUSY if we tried to set up + * SVM on an object that already has a GPU address. If + * that happens don't bother walking the rest of the + * region + */ + if ((long) ret == -EBUSY) + return -EBUSY; + + } + + iova = kgsl_mmu_find_svm_region(private->pagetable, + start, iova - 1, len, align); + } + + return -ENOMEM; +} + +static unsigned long +kgsl_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + unsigned long val; + unsigned long vma_offset = pgoff << PAGE_SHIFT; + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + struct kgsl_mem_entry *entry = NULL; + + if (vma_offset == (unsigned long) KGSL_MEMSTORE_TOKEN_ADDRESS) + return get_unmapped_area(NULL, addr, len, pgoff, flags); + + val = get_mmap_entry(private, &entry, pgoff, len); + if (val) + return val; + + /* Do not allow CPU mappings for secure buffers */ + if (kgsl_memdesc_is_secured(&entry->memdesc)) { + kgsl_mem_entry_put(entry); + return (unsigned long) -EPERM; + } + + if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + val = current->mm->get_unmapped_area(file, addr, len, 0, flags); + if (IS_ERR_VALUE(val)) + dev_err_ratelimited(device->dev, + "get_unmapped_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n", + pid_nr(private->pid), addr, pgoff, len, + (int) val); + } else { + val = get_svm_unmapped_area(file, entry, addr, len, flags); + if (IS_ERR_VALUE(val)) + dev_err_ratelimited(device->dev, + "_get_svm_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n", + pid_nr(private->pid), addr, pgoff, len, + (int) val); + } + + kgsl_mem_entry_put(entry); + return val; +} + +static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned int cache; + unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT; + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + struct kgsl_device *device = dev_priv->device; + int ret; + + /* Handle leagacy behavior for memstore */ + + if (vma_offset == (unsigned long) KGSL_MEMSTORE_TOKEN_ADDRESS) + return kgsl_mmap_memstore(file, device, vma); + + /* + * The reference count on the entry that we get from + * get_mmap_entry() will be held until kgsl_gpumem_vm_close(). + */ + ret = get_mmap_entry(private, &entry, vma->vm_pgoff, + vma->vm_end - vma->vm_start); + if (ret) + return ret; + + vma->vm_flags |= entry->memdesc.ops->vmflags; + + vma->vm_private_data = entry; + + /* Determine user-side caching policy */ + + cache = kgsl_memdesc_get_cachemode(&entry->memdesc); + + switch (cache) { + case KGSL_CACHEMODE_WRITETHROUGH: + vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot); + if (pgprot_val(vma->vm_page_prot) == + pgprot_val(pgprot_writebackcache(vma->vm_page_prot))) + WARN_ONCE(1, "WRITETHROUGH is deprecated for arm64"); + break; + case KGSL_CACHEMODE_WRITEBACK: + vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot); + break; + case KGSL_CACHEMODE_UNCACHED: + case KGSL_CACHEMODE_WRITECOMBINE: + default: + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + break; + } + + vma->vm_ops = &kgsl_gpumem_vm_ops; + + if (cache == KGSL_CACHEMODE_WRITEBACK + || cache == KGSL_CACHEMODE_WRITETHROUGH) { + int i; + unsigned long addr = vma->vm_start; + struct kgsl_memdesc *m = &entry->memdesc; + + for (i = 0; i < m->page_count; i++) { + struct page *page = m->pages[i]; + + vm_insert_page(vma, addr, page); + addr += PAGE_SIZE; + } + } + + if (entry->memdesc.shmem_filp) { + fput(vma->vm_file); + vma->vm_file = get_file(entry->memdesc.shmem_filp); + } + + atomic64_add(entry->memdesc.size, &entry->priv->gpumem_mapped); + + atomic_inc(&entry->map_count); + + /* + * kgsl gets the entry id or the gpu address through vm_pgoff. + * It is used during mmap and never needed again. But this vm_pgoff + * has different meaning at other parts of kernel. Not setting to + * zero will let way for wrong assumption when tried to unmap a page + * from this vma. + */ + vma->vm_pgoff = 0; + + trace_kgsl_mem_mmap(entry, vma->vm_start); + return 0; +} + +#define KGSL_READ_MESSAGE "OH HAI GPU\n" + +static ssize_t kgsl_read(struct file *filep, char __user *buf, size_t count, + loff_t *pos) +{ + return simple_read_from_buffer(buf, count, pos, + KGSL_READ_MESSAGE, strlen(KGSL_READ_MESSAGE) + 1); +} + +static const struct file_operations kgsl_fops = { + .owner = THIS_MODULE, + .release = kgsl_release, + .open = kgsl_open, + .mmap = kgsl_mmap, + .read = kgsl_read, + .get_unmapped_area = kgsl_get_unmapped_area, + .unlocked_ioctl = kgsl_ioctl, + .compat_ioctl = kgsl_compat_ioctl, +}; + +struct kgsl_driver kgsl_driver = { + .process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex), + .proclist_lock = __RW_LOCK_UNLOCKED(kgsl_driver.proclist_lock), + .ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock), + .devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock), + /* + * Full cache flushes are faster than line by line on at least + * 8064 and 8974 once the region to be flushed is > 16mb. + */ + .full_cache_threshold = SZ_16M, + + .stats.vmalloc = ATOMIC_LONG_INIT(0), + .stats.vmalloc_max = ATOMIC_LONG_INIT(0), + .stats.page_alloc = ATOMIC_LONG_INIT(0), + .stats.page_alloc_max = ATOMIC_LONG_INIT(0), + .stats.coherent = ATOMIC_LONG_INIT(0), + .stats.coherent_max = ATOMIC_LONG_INIT(0), + .stats.secure = ATOMIC_LONG_INIT(0), + .stats.secure_max = ATOMIC_LONG_INIT(0), + .stats.mapped = ATOMIC_LONG_INIT(0), + .stats.mapped_max = ATOMIC_LONG_INIT(0), +}; + +static void _unregister_device(struct kgsl_device *device) +{ + int minor; + + mutex_lock(&kgsl_driver.devlock); + for (minor = 0; minor < ARRAY_SIZE(kgsl_driver.devp); minor++) { + if (device == kgsl_driver.devp[minor]) { + device_destroy(kgsl_driver.class, + MKDEV(MAJOR(kgsl_driver.major), minor)); + kgsl_driver.devp[minor] = NULL; + break; + } + } + mutex_unlock(&kgsl_driver.devlock); +} + +/* sysfs_ops for the /sys/kernel/gpu kobject */ +static ssize_t kgsl_gpu_sysfs_attr_show(struct kobject *kobj, + struct attribute *__attr, char *buf) +{ + struct kgsl_gpu_sysfs_attr *attr = container_of(__attr, + struct kgsl_gpu_sysfs_attr, attr); + struct kgsl_device *device = container_of(kobj, + struct kgsl_device, gpu_sysfs_kobj); + + if (attr->show) + return attr->show(device, buf); + + return -EIO; +} + +static ssize_t kgsl_gpu_sysfs_attr_store(struct kobject *kobj, + struct attribute *__attr, const char *buf, size_t count) +{ + struct kgsl_gpu_sysfs_attr *attr = container_of(__attr, + struct kgsl_gpu_sysfs_attr, attr); + struct kgsl_device *device = container_of(kobj, + struct kgsl_device, gpu_sysfs_kobj); + + if (attr->store) + return attr->store(device, buf, count); + + return -EIO; +} + +/* Dummy release function - we have nothing to do here */ +static void kgsl_gpu_sysfs_release(struct kobject *kobj) +{ +} + +static const struct sysfs_ops kgsl_gpu_sysfs_ops = { + .show = kgsl_gpu_sysfs_attr_show, + .store = kgsl_gpu_sysfs_attr_store, +}; + +static struct kobj_type kgsl_gpu_sysfs_ktype = { + .sysfs_ops = &kgsl_gpu_sysfs_ops, + .release = kgsl_gpu_sysfs_release, +}; + +static int _register_device(struct kgsl_device *device) +{ + static u64 dma_mask = DMA_BIT_MASK(64); + static struct device_dma_parameters dma_parms; + int minor, ret; + dev_t dev; + + /* Find a minor for the device */ + + mutex_lock(&kgsl_driver.devlock); + for (minor = 0; minor < ARRAY_SIZE(kgsl_driver.devp); minor++) { + if (kgsl_driver.devp[minor] == NULL) { + kgsl_driver.devp[minor] = device; + break; + } + } + mutex_unlock(&kgsl_driver.devlock); + + if (minor == ARRAY_SIZE(kgsl_driver.devp)) { + pr_err("kgsl: minor devices exhausted\n"); + return -ENODEV; + } + + /* Create the device */ + dev = MKDEV(MAJOR(kgsl_driver.major), minor); + device->dev = device_create(kgsl_driver.class, + &device->pdev->dev, + dev, device, + device->name); + + if (IS_ERR(device->dev)) { + mutex_lock(&kgsl_driver.devlock); + kgsl_driver.devp[minor] = NULL; + mutex_unlock(&kgsl_driver.devlock); + ret = PTR_ERR(device->dev); + pr_err("kgsl: device_create(%s): %d\n", device->name, ret); + return ret; + } + + device->dev->dma_mask = &dma_mask; + device->dev->dma_parms = &dma_parms; + + dma_set_max_seg_size(device->dev, DMA_BIT_MASK(32)); + + set_dma_ops(device->dev, NULL); + + kobject_init_and_add(&device->gpu_sysfs_kobj, &kgsl_gpu_sysfs_ktype, + kernel_kobj, "gpu"); + + return 0; +} + +int kgsl_request_irq(struct platform_device *pdev, const char *name, + irq_handler_t handler, void *data) +{ + int ret, num = platform_get_irq_byname(pdev, name); + + if (num < 0) + return num; + + ret = devm_request_irq(&pdev->dev, num, handler, IRQF_TRIGGER_HIGH, + name, data); + + if (ret) { + dev_err(&pdev->dev, "Unable to get interrupt %s: %d\n", + name, ret); + return ret; + } + + disable_irq(num); + return num; +} + +int kgsl_of_property_read_ddrtype(struct device_node *node, const char *base, + u32 *ptr) +{ + char str[32]; + int ddr = of_fdt_get_ddrtype(); + + /* of_fdt_get_ddrtype returns error if the DDR type isn't determined */ + if (ddr >= 0) { + int ret; + + /* Construct expanded string for the DDR type */ + ret = snprintf(str, sizeof(str), "%s-ddr%d", base, ddr); + + /* WARN_ON() if the array size was too small for the string */ + if (WARN_ON(ret > sizeof(str))) + return -ENOMEM; + + /* Read the expanded string */ + if (!of_property_read_u32(node, str, ptr)) + return 0; + } + + /* Read the default string */ + return of_property_read_u32(node, base, ptr); +} + +int kgsl_device_platform_probe(struct kgsl_device *device) +{ + struct platform_device *pdev = device->pdev; + int status = -EINVAL; + + status = _register_device(device); + if (status) + return status; + + /* Can return -EPROBE_DEFER */ + status = kgsl_pwrctrl_init(device); + if (status) + goto error; + + device->events_wq = alloc_workqueue("kgsl-events", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_HIGHPRI, 0); + + if (!device->events_wq) { + dev_err(device->dev, "Failed to allocate events workqueue\n"); + status = -ENOMEM; + goto error_pwrctrl_close; + } + + /* This can return -EPROBE_DEFER */ + status = kgsl_mmu_probe(device); + if (status != 0) + goto error_pwrctrl_close; + + status = kgsl_reclaim_init(); + if (status) + goto error_pwrctrl_close; + + rwlock_init(&device->context_lock); + spin_lock_init(&device->submit_lock); + + idr_init(&device->timelines); + spin_lock_init(&device->timelines_lock); + + kgsl_device_debugfs_init(device); + + dma_set_coherent_mask(&pdev->dev, KGSL_DMA_BIT_MASK); + + /* Set up the GPU events for the device */ + kgsl_device_events_probe(device); + + /* Initialize common sysfs entries */ + kgsl_pwrctrl_init_sysfs(device); + + return 0; + +error_pwrctrl_close: + if (device->events_wq) { + destroy_workqueue(device->events_wq); + device->events_wq = NULL; + } + + kgsl_pwrctrl_close(device); +error: + _unregister_device(device); + return status; +} + +void kgsl_device_platform_remove(struct kgsl_device *device) +{ + if (device->events_wq) { + destroy_workqueue(device->events_wq); + device->events_wq = NULL; + } + + kgsl_device_snapshot_close(device); + + if (device->gpu_sysfs_kobj.state_initialized) + kobject_del(&device->gpu_sysfs_kobj); + + idr_destroy(&device->context_idr); + idr_destroy(&device->timelines); + + kgsl_device_events_remove(device); + + kgsl_mmu_close(device); + + /* + * This needs to come after the MMU close so we can be sure all the + * pagetables have been freed + */ + kgsl_free_globals(device); + + kgsl_pwrctrl_close(device); + + kgsl_device_debugfs_close(device); + _unregister_device(device); +} + +void kgsl_core_exit(void) +{ + kgsl_exit_page_pools(); + kgsl_eventlog_exit(); + + if (kgsl_driver.workqueue) { + destroy_workqueue(kgsl_driver.workqueue); + kgsl_driver.workqueue = NULL; + } + + if (kgsl_driver.mem_workqueue) { + destroy_workqueue(kgsl_driver.mem_workqueue); + kgsl_driver.mem_workqueue = NULL; + } + + kgsl_events_exit(); + kgsl_core_debugfs_close(); + + kgsl_reclaim_close(); + + /* + * We call device_unregister() + * only if kgsl_driver.virtdev has been populated. + * We check at least one member of kgsl_driver.virtdev to + * see if it is not NULL (and thus, has been populated). + */ + if (kgsl_driver.virtdev.class) + device_unregister(&kgsl_driver.virtdev); + + if (kgsl_driver.class) { + class_destroy(kgsl_driver.class); + kgsl_driver.class = NULL; + } + + kgsl_drawobjs_cache_exit(); + + kfree(memfree.list); + memset(&memfree, 0, sizeof(memfree)); + + unregister_chrdev_region(kgsl_driver.major, + ARRAY_SIZE(kgsl_driver.devp)); +} + +int __init kgsl_core_init(void) +{ + int result = 0; + + /* alloc major and minor device numbers */ + result = alloc_chrdev_region(&kgsl_driver.major, 0, + ARRAY_SIZE(kgsl_driver.devp), "kgsl"); + + if (result < 0) { + + pr_err("kgsl: alloc_chrdev_region failed err = %d\n", result); + goto err; + } + + cdev_init(&kgsl_driver.cdev, &kgsl_fops); + kgsl_driver.cdev.owner = THIS_MODULE; + kgsl_driver.cdev.ops = &kgsl_fops; + result = cdev_add(&kgsl_driver.cdev, MKDEV(MAJOR(kgsl_driver.major), 0), + ARRAY_SIZE(kgsl_driver.devp)); + + if (result) { + pr_err("kgsl: cdev_add() failed, dev_num= %d,result= %d\n", + kgsl_driver.major, result); + goto err; + } + + kgsl_driver.class = class_create(THIS_MODULE, "kgsl"); + + if (IS_ERR(kgsl_driver.class)) { + result = PTR_ERR(kgsl_driver.class); + pr_err("kgsl: failed to create class for kgsl\n"); + goto err; + } + + /* + * Make a virtual device for managing core related things + * in sysfs + */ + kgsl_driver.virtdev.class = kgsl_driver.class; + dev_set_name(&kgsl_driver.virtdev, "kgsl"); + result = device_register(&kgsl_driver.virtdev); + if (result) { + pr_err("kgsl: driver_register failed\n"); + goto err; + } + + /* Make kobjects in the virtual device for storing statistics */ + + kgsl_driver.ptkobj = + kobject_create_and_add("pagetables", + &kgsl_driver.virtdev.kobj); + + kgsl_driver.prockobj = + kobject_create_and_add("proc", + &kgsl_driver.virtdev.kobj); + + kgsl_core_debugfs_init(); + + kgsl_sharedmem_init_sysfs(); + + /* Initialize the memory pools */ + kgsl_probe_page_pools(); + + INIT_LIST_HEAD(&kgsl_driver.process_list); + + INIT_LIST_HEAD(&kgsl_driver.pagetable_list); + + kgsl_driver.workqueue = alloc_workqueue("kgsl-workqueue", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + + if (!kgsl_driver.workqueue) { + pr_err("kgsl: Failed to allocate kgsl workqueue\n"); + result = -ENOMEM; + goto err; + } + + kgsl_driver.mem_workqueue = alloc_workqueue("kgsl-mementry", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + + if (!kgsl_driver.mem_workqueue) { + pr_err("kgsl: Failed to allocate mem workqueue\n"); + result = -ENOMEM; + goto err; + } + + kgsl_eventlog_init(); + + kgsl_events_init(); + + result = kgsl_drawobjs_cache_init(); + if (result) + goto err; + + memfree.list = kcalloc(MEMFREE_ENTRIES, sizeof(struct memfree_entry), + GFP_KERNEL); + + return 0; + +err: + kgsl_core_exit(); + return result; +} diff --git a/kgsl.h b/kgsl.h new file mode 100644 index 0000000000..98b25cb300 --- /dev/null +++ b/kgsl.h @@ -0,0 +1,606 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_H +#define __KGSL_H + +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl_gmu_core.h" +#include "kgsl_pwrscale.h" + +#define KGSL_L3_DEVICE "kgsl-l3" + +/* + * --- kgsl drawobj flags --- + * These flags are same as --- drawobj flags --- + * but renamed to reflect that cmdbatch is renamed to drawobj. + */ +#define KGSL_DRAWOBJ_MEMLIST KGSL_CMDBATCH_MEMLIST +#define KGSL_DRAWOBJ_MARKER KGSL_CMDBATCH_MARKER +#define KGSL_DRAWOBJ_SUBMIT_IB_LIST KGSL_CMDBATCH_SUBMIT_IB_LIST +#define KGSL_DRAWOBJ_CTX_SWITCH KGSL_CMDBATCH_CTX_SWITCH +#define KGSL_DRAWOBJ_PROFILING KGSL_CMDBATCH_PROFILING +#define KGSL_DRAWOBJ_PROFILING_KTIME KGSL_CMDBATCH_PROFILING_KTIME +#define KGSL_DRAWOBJ_END_OF_FRAME KGSL_CMDBATCH_END_OF_FRAME +#define KGSL_DRAWOBJ_SYNC KGSL_CMDBATCH_SYNC +#define KGSL_DRAWOBJ_PWR_CONSTRAINT KGSL_CMDBATCH_PWR_CONSTRAINT + +#define kgsl_drawobj_profiling_buffer kgsl_cmdbatch_profiling_buffer + + +/* The number of memstore arrays limits the number of contexts allowed. + * If more contexts are needed, update multiple for MEMSTORE_SIZE + */ +#define KGSL_MEMSTORE_SIZE ((int)(PAGE_SIZE * 8)) +#define KGSL_MEMSTORE_GLOBAL (0) +#define KGSL_PRIORITY_MAX_RB_LEVELS 4 +#define KGSL_MEMSTORE_MAX (KGSL_MEMSTORE_SIZE / \ + sizeof(struct kgsl_devmemstore) - 1 - KGSL_PRIORITY_MAX_RB_LEVELS) +#define KGSL_MAX_CONTEXTS_PER_PROC 200 + +#define MEMSTORE_RB_OFFSET(rb, field) \ + KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field) + +#define MEMSTORE_ID_GPU_ADDR(dev, iter, field) \ + ((dev)->memstore->gpuaddr + KGSL_MEMSTORE_OFFSET(iter, field)) + +#define MEMSTORE_RB_GPU_ADDR(dev, rb, field) \ + ((dev)->memstore->gpuaddr + \ + KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field)) + +/* + * SCRATCH MEMORY: The scratch memory is one page worth of data that + * is mapped into the GPU. This allows for some 'shared' data between + * the GPU and CPU. For example, it will be used by the GPU to write + * each updated RPTR for each RB. + * + * Used Data: + * Offset: Length(bytes): What + * 0x0: 4 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 RPTR + */ + +/* Shadow global helpers */ +#define SCRATCH_RPTR_OFFSET(id) ((id) * sizeof(unsigned int)) +#define SCRATCH_RPTR_GPU_ADDR(dev, id) \ + ((dev)->scratch->gpuaddr + SCRATCH_RPTR_OFFSET(id)) +#define SCRATCH_BV_RPTR_OFFSET(id) (0x40 + (id) * sizeof(unsigned int)) +#define SCRATCH_BV_RPTR_GPU_ADDR(dev, id) \ + ((dev)->scratch->gpuaddr + SCRATCH_BV_RPTR_OFFSET(id)) + +/* Timestamp window used to detect rollovers (half of integer range) */ +#define KGSL_TIMESTAMP_WINDOW 0x80000000 + +/* + * A macro for memory statistics - add the new size to the stat and if + * the statisic is greater then _max, set _max + */ +static inline void KGSL_STATS_ADD(uint64_t size, atomic_long_t *stat, + atomic_long_t *max) +{ + uint64_t ret = atomic_long_add_return(size, stat); + + if (ret > atomic_long_read(max)) + atomic_long_set(max, ret); +} + +#define KGSL_MAX_NUMIBS 100000 +#define KGSL_MAX_SYNCPOINTS 32 + +struct kgsl_device; +struct kgsl_context; + +/** + * struct kgsl_driver - main container for global KGSL things + * @cdev: Character device struct + * @major: Major ID for the KGSL device + * @class: Pointer to the class struct for the core KGSL sysfs entries + * @virtdev: Virtual device for managing the core + * @ptkobj: kobject for storing the pagetable statistics + * @prockobj: kobject for storing the process statistics + * @devp: Array of pointers to the individual KGSL device structs + * @process_list: List of open processes + * @pagetable_list: LIst of open pagetables + * @ptlock: Lock for accessing the pagetable list + * @process_mutex: Mutex for accessing the process list + * @proclist_lock: Lock for accessing the process list + * @devlock: Mutex protecting the device list + * @stats: Struct containing atomic memory statistics + * @full_cache_threshold: the threshold that triggers a full cache flush + * @workqueue: Pointer to a single threaded workqueue + * @mem_workqueue: Pointer to a workqueue for deferring memory entries + */ +struct kgsl_driver { + struct cdev cdev; + dev_t major; + struct class *class; + struct device virtdev; + struct kobject *ptkobj; + struct kobject *prockobj; + struct kgsl_device *devp[1]; + struct list_head process_list; + struct list_head pagetable_list; + spinlock_t ptlock; + struct mutex process_mutex; + rwlock_t proclist_lock; + struct mutex devlock; + struct { + atomic_long_t vmalloc; + atomic_long_t vmalloc_max; + atomic_long_t page_alloc; + atomic_long_t page_alloc_max; + atomic_long_t coherent; + atomic_long_t coherent_max; + atomic_long_t secure; + atomic_long_t secure_max; + atomic_long_t mapped; + atomic_long_t mapped_max; + } stats; + unsigned int full_cache_threshold; + struct workqueue_struct *workqueue; + struct workqueue_struct *mem_workqueue; +}; + +extern struct kgsl_driver kgsl_driver; + +struct kgsl_pagetable; +struct kgsl_memdesc; + +struct kgsl_memdesc_ops { + unsigned int vmflags; + vm_fault_t (*vmfault)(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, struct vm_fault *vmf); + void (*free)(struct kgsl_memdesc *memdesc); + int (*map_kernel)(struct kgsl_memdesc *memdesc); + void (*unmap_kernel)(struct kgsl_memdesc *memdesc); + /** + * @put_gpuaddr: Put away the GPU address and unmap the memory + * descriptor + */ + void (*put_gpuaddr)(struct kgsl_memdesc *memdesc); +}; + +/* Internal definitions for memdesc->priv */ +#define KGSL_MEMDESC_GUARD_PAGE BIT(0) +/* Set if the memdesc is mapped into all pagetables */ +#define KGSL_MEMDESC_GLOBAL BIT(1) +/* The memdesc is frozen during a snapshot */ +#define KGSL_MEMDESC_FROZEN BIT(2) +/* The memdesc is mapped into a pagetable */ +#define KGSL_MEMDESC_MAPPED BIT(3) +/* The memdesc is secured for content protection */ +#define KGSL_MEMDESC_SECURE BIT(4) +/* Memory is accessible in privileged mode */ +#define KGSL_MEMDESC_PRIVILEGED BIT(6) +/* This is an instruction buffer */ +#define KGSL_MEMDESC_UCODE BIT(7) +/* For global buffers, randomly assign an address from the region */ +#define KGSL_MEMDESC_RANDOM BIT(8) +/* Allocate memory from the system instead of the pools */ +#define KGSL_MEMDESC_SYSMEM BIT(9) +/* The memdesc pages can be reclaimed */ +#define KGSL_MEMDESC_CAN_RECLAIM BIT(10) +/* The memdesc pages were reclaimed */ +#define KGSL_MEMDESC_RECLAIMED BIT(11) +/* Skip reclaim of the memdesc pages */ +#define KGSL_MEMDESC_SKIP_RECLAIM BIT(12) + +/** + * struct kgsl_memdesc - GPU memory object descriptor + * @pagetable: Pointer to the pagetable that the object is mapped in + * @hostptr: Kernel virtual address + * @hostptr_count: Number of threads using hostptr + * @gpuaddr: GPU virtual address + * @physaddr: Physical address of the memory object + * @size: Size of the memory object + * @priv: Internal flags and settings + * @sgt: Scatter gather table for allocated pages + * @ops: Function hooks for the memdesc memory type + * @flags: Flags set from userspace + * @dev: Pointer to the struct device that owns this memory + * @attrs: dma attributes for this memory + * @pages: An array of pointers to allocated pages + * @page_count: Total number of pages allocated + */ +struct kgsl_memdesc { + struct kgsl_pagetable *pagetable; + void *hostptr; + unsigned int hostptr_count; + uint64_t gpuaddr; + phys_addr_t physaddr; + uint64_t size; + unsigned int priv; + struct sg_table *sgt; + const struct kgsl_memdesc_ops *ops; + uint64_t flags; + struct device *dev; + unsigned long attrs; + struct page **pages; + unsigned int page_count; + /* + * @lock: Spinlock to protect the gpuaddr from being accessed by + * multiple entities trying to map the same SVM region at once + */ + spinlock_t lock; + /** @shmem_filp: Pointer to the shmem file backing this memdesc */ + struct file *shmem_filp; + /** @ranges: rbtree base for the interval list of vbo ranges */ + struct rb_root_cached ranges; + /** @ranges_lock: Mutex to protect the range database */ + struct mutex ranges_lock; + /** @gmuaddr: GMU VA if this is mapped in GMU */ + u32 gmuaddr; +}; + +/** + * struct kgsl_global_memdesc - wrapper for global memory objects + */ +struct kgsl_global_memdesc { + /** @memdesc: Container for the GPU memory descriptor for the object */ + struct kgsl_memdesc memdesc; + /** @name: Name of the object for the debugfs list */ + const char *name; + /** @node: List node for the list of global objects */ + struct list_head node; +}; + +/* + * List of different memory entry types. The usermem enum + * starts at 0, which we use for allocated memory, so 1 is + * added to the enum values. + */ +#define KGSL_MEM_ENTRY_KERNEL 0 +#define KGSL_MEM_ENTRY_USER (KGSL_USER_MEM_TYPE_ADDR + 1) +#define KGSL_MEM_ENTRY_ION (KGSL_USER_MEM_TYPE_ION + 1) +#define KGSL_MEM_ENTRY_MAX (KGSL_USER_MEM_TYPE_MAX + 1) + +/* symbolic table for trace and debugfs */ +/* + * struct kgsl_mem_entry - a userspace memory allocation + * @refcount: reference count. Currently userspace can only + * hold a single reference count, but the kernel may hold more. + * @memdesc: description of the memory + * @priv_data: type-specific data, such as the dma-buf attachment pointer. + * @node: rb_node for the gpu address lookup rb tree + * @id: idr index for this entry, can be used to find memory that does not have + * a valid GPU address. + * @priv: back pointer to the process that owns this memory + * @pending_free: if !0, userspace requested that his memory be freed, but there + * are still references to it. + * @dev_priv: back pointer to the device file that created this entry. + * @metadata: String containing user specified metadata for the entry + * @work: Work struct used to schedule a kgsl_mem_entry_put in atomic contexts + */ +struct kgsl_mem_entry { + struct kref refcount; + struct kgsl_memdesc memdesc; + void *priv_data; + struct rb_node node; + unsigned int id; + struct kgsl_process_private *priv; + int pending_free; + char metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX + 1]; + struct work_struct work; + /** + * @map_count: Count how many vmas this object is mapped in - used for + * debugfs accounting + */ + atomic_t map_count; +}; + +struct kgsl_device_private; +struct kgsl_event_group; + +typedef void (*kgsl_event_func)(struct kgsl_device *, struct kgsl_event_group *, + void *, int); + +/** + * struct kgsl_event - KGSL GPU timestamp event + * @device: Pointer to the KGSL device that owns the event + * @context: Pointer to the context that owns the event + * @timestamp: Timestamp for the event to expire + * @func: Callback function for for the event when it expires + * @priv: Private data passed to the callback function + * @node: List node for the kgsl_event_group list + * @created: Jiffies when the event was created + * @work: Work struct for dispatching the callback + * @result: KGSL event result type to pass to the callback + * group: The event group this event belongs to + */ +struct kgsl_event { + struct kgsl_device *device; + struct kgsl_context *context; + unsigned int timestamp; + kgsl_event_func func; + void *priv; + struct list_head node; + unsigned int created; + struct work_struct work; + int result; + struct kgsl_event_group *group; +}; + +typedef int (*readtimestamp_func)(struct kgsl_device *, void *, + enum kgsl_timestamp_type, unsigned int *); + +/** + * struct event_group - A list of GPU events + * @context: Pointer to the active context for the events + * @lock: Spinlock for protecting the list + * @events: List of active GPU events + * @group: Node for the master group list + * @processed: Last processed timestamp + * @name: String name for the group (for the debugfs file) + * @readtimestamp: Function pointer to read a timestamp + * @priv: Priv member to pass to the readtimestamp function + */ +struct kgsl_event_group { + struct kgsl_context *context; + spinlock_t lock; + struct list_head events; + struct list_head group; + unsigned int processed; + char name[64]; + readtimestamp_func readtimestamp; + void *priv; +}; + +/** + * struct submission_info - Container for submission statistics + * @inflight: Number of commands that are inflight + * @rb_id: id of the ringbuffer to which this submission is made + * @rptr: Read pointer of the ringbuffer + * @wptr: Write pointer of the ringbuffer + * @gmu_dispatch_queue: GMU dispach queue to which this submission is made + */ +struct submission_info { + int inflight; + u32 rb_id; + u32 rptr; + u32 wptr; + u32 gmu_dispatch_queue; +}; + +/** + * struct retire_info - Container for retire statistics + * @inflight: NUmber of commands that are inflight + * @rb_id: id of the ringbuffer to which this submission is made + * @rptr: Read pointer of the ringbuffer + * @wptr: Write pointer of the ringbuffer + * @gmu_dispatch_queue: GMU dispach queue to which this submission is made + * @timestamp: Timestamp of submission that retired + * @submitted_to_rb: AO ticks when GMU put this submission on ringbuffer + * @sop: AO ticks when GPU started procssing this submission + * @eop: AO ticks when GPU finished this submission + * @retired_on_gmu: AO ticks when GMU retired this submission + */ +struct retire_info { + int inflight; + int rb_id; + u32 rptr; + u32 wptr; + u32 gmu_dispatch_queue; + u32 timestamp; + u64 submitted_to_rb; + u64 sop; + u64 eop; + u64 retired_on_gmu; +}; + +long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, void *data); +long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data); +long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data); +long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_bind_ranges(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpu_aux_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_wait(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_query(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_fence_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_signal(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +void kgsl_mem_entry_destroy(struct kref *kref); + +void kgsl_get_egl_counts(struct kgsl_mem_entry *entry, + int *egl_surface_count, int *egl_image_count); + +unsigned long kgsl_get_dmabuf_inode_number(struct kgsl_mem_entry *entry); + +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr); + +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id); + +struct kgsl_mem_entry *gpumem_alloc_entry(struct kgsl_device_private *dev_priv, + uint64_t size, uint64_t flags); +long gpumem_free_entry(struct kgsl_mem_entry *entry); + +enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device); +void kgsl_mmu_add_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, const char *name); +void kgsl_mmu_remove_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc); + +/* Helper functions */ +int kgsl_request_irq(struct platform_device *pdev, const char *name, + irq_handler_t handler, void *data); + +int __init kgsl_core_init(void); +void kgsl_core_exit(void); + +static inline bool kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, + uint64_t gpuaddr, uint64_t size) +{ + if (!memdesc) + return false; + + /* set a minimum size to search for */ + if (!size) + size = 1; + + /* don't overflow */ + if (size > U64_MAX - gpuaddr) + return false; + + return (gpuaddr >= memdesc->gpuaddr && + ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))); +} + +static inline void *kgsl_memdesc_map(struct kgsl_memdesc *memdesc) +{ + if (memdesc->ops && memdesc->ops->map_kernel) + memdesc->ops->map_kernel(memdesc); + + return memdesc->hostptr; +} + +static inline void kgsl_memdesc_unmap(struct kgsl_memdesc *memdesc) +{ + if (memdesc->ops && memdesc->ops->unmap_kernel) + memdesc->ops->unmap_kernel(memdesc); +} + +static inline void *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc, + uint64_t gpuaddr) +{ + void *hostptr = NULL; + + if ((gpuaddr >= memdesc->gpuaddr) && + (gpuaddr < (memdesc->gpuaddr + memdesc->size))) + hostptr = kgsl_memdesc_map(memdesc); + + return hostptr != NULL ? hostptr + (gpuaddr - memdesc->gpuaddr) : NULL; +} + +static inline int timestamp_cmp(unsigned int a, unsigned int b) +{ + /* check for equal */ + if (a == b) + return 0; + + /* check for greater-than for non-rollover case */ + if ((a > b) && (a - b < KGSL_TIMESTAMP_WINDOW)) + return 1; + + /* check for greater-than for rollover case + * note that <= is required to ensure that consistent + * results are returned for values whose difference is + * equal to the window size + */ + a += KGSL_TIMESTAMP_WINDOW; + b += KGSL_TIMESTAMP_WINDOW; + return ((a > b) && (a - b <= KGSL_TIMESTAMP_WINDOW)) ? 1 : -1; +} + +/** + * kgsl_schedule_work() - Schedule a work item on the KGSL workqueue + * @work: work item to schedule + */ +static inline void kgsl_schedule_work(struct work_struct *work) +{ + queue_work(kgsl_driver.workqueue, work); +} + +static inline struct kgsl_mem_entry * +kgsl_mem_entry_get(struct kgsl_mem_entry *entry) +{ + if (!IS_ERR_OR_NULL(entry) && kref_get_unless_zero(&entry->refcount)) + return entry; + + return NULL; +} + +static inline void +kgsl_mem_entry_put(struct kgsl_mem_entry *entry) +{ + if (!IS_ERR_OR_NULL(entry)) + kref_put(&entry->refcount, kgsl_mem_entry_destroy); +} + +/* + * kgsl_addr_range_overlap() - Checks if 2 ranges overlap + * @gpuaddr1: Start of first address range + * @size1: Size of first address range + * @gpuaddr2: Start of second address range + * @size2: Size of second address range + * + * Function returns true if the 2 given address ranges overlap + * else false + */ +static inline bool kgsl_addr_range_overlap(uint64_t gpuaddr1, + uint64_t size1, uint64_t gpuaddr2, uint64_t size2) +{ + if ((size1 > (U64_MAX - gpuaddr1)) || (size2 > (U64_MAX - gpuaddr2))) + return false; + return !(((gpuaddr1 + size1) <= gpuaddr2) || + (gpuaddr1 >= (gpuaddr2 + size2))); +} +#endif /* __KGSL_H */ diff --git a/kgsl_bus.c b/kgsl_bus.c new file mode 100644 index 0000000000..2279ce5d2e --- /dev/null +++ b/kgsl_bus.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" + + +static u32 _ab_buslevel_update(struct kgsl_pwrctrl *pwr, + u32 ib) +{ + if (!ib) + return 0; + + /* + * In the absence of any other settings, make ab 25% of ib + * where the ib vote is in kbps + */ + if ((!pwr->bus_percent_ab) && (!pwr->bus_ab_mbytes)) + return 25 * ib / 100000; + + if (pwr->bus_width) + return pwr->bus_ab_mbytes; + + return (pwr->bus_percent_ab * pwr->bus_max) / 100; +} + + +int kgsl_bus_update(struct kgsl_device *device, + enum kgsl_bus_vote vote_state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + /* FIXME: this might be wrong? */ + int cur = pwr->pwrlevels[pwr->active_pwrlevel].bus_freq; + int buslevel = 0; + u32 ab; + + /* the bus should be ON to update the active frequency */ + if ((vote_state != KGSL_BUS_VOTE_OFF) && + !(test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags))) + return 0; + /* + * If the bus should remain on calculate our request and submit it, + * otherwise request bus level 0, off. + */ + if (vote_state == KGSL_BUS_VOTE_ON) { + buslevel = min_t(int, pwr->pwrlevels[0].bus_max, + cur + pwr->bus_mod); + buslevel = max_t(int, buslevel, 1); + } else if (vote_state == KGSL_BUS_VOTE_MINIMUM) { + /* Request bus level 1, minimum non-zero value */ + buslevel = 1; + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + pwr->bus_ab_mbytes = 0; + } else if (vote_state == KGSL_BUS_VOTE_OFF) { + /* If the bus is being turned off, reset to default level */ + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + pwr->bus_ab_mbytes = 0; + } + + /* buslevel is the IB vote, update the AB */ + ab = _ab_buslevel_update(pwr, pwr->ddr_table[buslevel]); + + return device->ftbl->gpu_bus_set(device, buslevel, ab); +} + +static void validate_pwrlevels(struct kgsl_device *device, u32 *ibs, + int count) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i; + + for (i = 0; i < pwr->num_pwrlevels - 1; i++) { + struct kgsl_pwrlevel *pwrlevel = &pwr->pwrlevels[i]; + + if (pwrlevel->bus_freq >= count) { + dev_err(device->dev, "Bus setting for GPU freq %d is out of bounds\n", + pwrlevel->gpu_freq); + pwrlevel->bus_freq = count - 1; + } + + if (pwrlevel->bus_max >= count) { + dev_err(device->dev, "Bus max for GPU freq %d is out of bounds\n", + pwrlevel->gpu_freq); + pwrlevel->bus_max = count - 1; + } + + if (pwrlevel->bus_min >= count) { + dev_err(device->dev, "Bus min for GPU freq %d is out of bounds\n", + pwrlevel->gpu_freq); + pwrlevel->bus_min = count - 1; + } + + if (pwrlevel->bus_min > pwrlevel->bus_max) { + dev_err(device->dev, "Bus min is bigger than bus max for GPU freq %d\n", + pwrlevel->gpu_freq); + pwrlevel->bus_min = pwrlevel->bus_max; + } + } +} + +u32 *kgsl_bus_get_table(struct platform_device *pdev, + const char *name, int *count) +{ + u32 *levels; + int i, num = of_property_count_elems_of_size(pdev->dev.of_node, + name, sizeof(u32)); + + /* If the bus wasn't specified, then build a static table */ + if (num <= 0) + return ERR_PTR(-EINVAL); + + levels = kcalloc(num, sizeof(*levels), GFP_KERNEL); + if (!levels) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < num; i++) + of_property_read_u32_index(pdev->dev.of_node, + name, i, &levels[i]); + + *count = num; + return levels; +} + +int kgsl_bus_init(struct kgsl_device *device, struct platform_device *pdev) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int count; + int ddr = of_fdt_get_ddrtype(); + + if (ddr >= 0) { + char str[32]; + + snprintf(str, sizeof(str), "qcom,bus-table-ddr%d", ddr); + + pwr->ddr_table = kgsl_bus_get_table(pdev, str, &count); + if (!IS_ERR(pwr->ddr_table)) + goto done; + } + + /* Look if a generic table is present */ + pwr->ddr_table = kgsl_bus_get_table(pdev, "qcom,bus-table-ddr", &count); + if (IS_ERR(pwr->ddr_table)) { + int ret = PTR_ERR(pwr->ddr_table); + + pwr->ddr_table = NULL; + return ret; + } +done: + pwr->ddr_table_count = count; + + validate_pwrlevels(device, pwr->ddr_table, pwr->ddr_table_count); + + pwr->icc_path = of_icc_get(&pdev->dev, "gpu_icc_path"); + if (IS_ERR(pwr->icc_path) && !gmu_core_scales_bandwidth(device)) { + WARN(1, "The CPU has no way to set the GPU bus levels\n"); + + kfree(pwr->ddr_table); + pwr->ddr_table = NULL; + return PTR_ERR(pwr->icc_path); + } + + return 0; +} + +void kgsl_bus_close(struct kgsl_device *device) +{ + kfree(device->pwrctrl.ddr_table); + device->pwrctrl.ddr_table = NULL; + icc_put(device->pwrctrl.icc_path); +} diff --git a/kgsl_bus.h b/kgsl_bus.h new file mode 100644 index 0000000000..1814233658 --- /dev/null +++ b/kgsl_bus.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_BUS_H +#define _KGSL_BUS_H + +enum kgsl_bus_vote { + KGSL_BUS_VOTE_OFF = 0, + KGSL_BUS_VOTE_ON, + KGSL_BUS_VOTE_MINIMUM, +}; + +struct kgsl_device; +struct platform_device; + +int kgsl_bus_init(struct kgsl_device *device, struct platform_device *pdev); +void kgsl_bus_close(struct kgsl_device *device); +int kgsl_bus_update(struct kgsl_device *device, enum kgsl_bus_vote vote_state); + +u32 *kgsl_bus_get_table(struct platform_device *pdev, + const char *name, int *count); + +#endif diff --git a/kgsl_compat.c b/kgsl_compat.c new file mode 100644 index 0000000000..a5e8deaf78 --- /dev/null +++ b/kgsl_compat.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2013-2021, The Linux Foundation. All rights reserved. + */ + +#include "kgsl_device.h" +#include "kgsl_compat.h" +#include "kgsl_sync.h" + +static long +kgsl_ioctl_device_getproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device_getproperty_compat *param32 = data; + struct kgsl_device_getproperty param; + + param.type = param32->type; + param.value = compat_ptr(param32->value); + param.sizebytes = (size_t)param32->sizebytes; + + return kgsl_ioctl_device_getproperty(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_device_setproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device_getproperty_compat *param32 = data; + struct kgsl_device_getproperty param; + + param.type = param32->type; + param.value = compat_ptr(param32->value); + param.sizebytes = (size_t)param32->sizebytes; + + return kgsl_ioctl_device_setproperty(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_submit_commands_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result; + struct kgsl_submit_commands_compat *param32 = data; + struct kgsl_submit_commands param; + + param.context_id = param32->context_id; + param.flags = param32->flags; + param.cmdlist = compat_ptr(param32->cmdlist); + param.numcmds = param32->numcmds; + param.synclist = compat_ptr(param32->synclist); + param.numsyncs = param32->numsyncs; + param.timestamp = param32->timestamp; + + result = kgsl_ioctl_submit_commands(dev_priv, cmd, ¶m); + + param32->timestamp = param.timestamp; + + return result; +} + +static long +kgsl_ioctl_rb_issueibcmds_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result; + struct kgsl_ringbuffer_issueibcmds_compat *param32 = data; + struct kgsl_ringbuffer_issueibcmds param; + + param.drawctxt_id = param32->drawctxt_id; + param.flags = param32->flags; + param.ibdesc_addr = (unsigned long)param32->ibdesc_addr; + param.numibs = param32->numibs; + param.timestamp = param32->timestamp; + + result = kgsl_ioctl_rb_issueibcmds(dev_priv, cmd, ¶m); + + param32->timestamp = param.timestamp; + + return result; +} + +static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid_compat *param32 = data; + struct kgsl_cmdstream_freememontimestamp_ctxtid param; + + param.context_id = param32->context_id; + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.type = param32->type; + param.timestamp = param32->timestamp; + + return kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(dev_priv, cmd, + ¶m); +} + +static long kgsl_ioctl_sharedmem_free_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_sharedmem_free_compat *param32 = data; + struct kgsl_sharedmem_free param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + + return kgsl_ioctl_sharedmem_free(dev_priv, cmd, ¶m); +} + +static long kgsl_ioctl_map_user_mem_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + int result = 0; + struct kgsl_map_user_mem_compat *param32 = data; + struct kgsl_map_user_mem param; + + param.fd = param32->fd; + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.len = (size_t)param32->len; + param.offset = (size_t)param32->offset; + param.hostptr = (unsigned long)param32->hostptr; + param.memtype = param32->memtype; + param.flags = param32->flags; + + result = kgsl_ioctl_map_user_mem(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->flags = param.flags; + return result; +} + +static long +kgsl_ioctl_gpumem_sync_cache_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache_compat *param32 = data; + struct kgsl_gpumem_sync_cache param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.id = param32->id; + param.op = param32->op; + param.offset = (size_t)param32->offset; + param.length = (size_t)param32->length; + + return kgsl_ioctl_gpumem_sync_cache(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_gpumem_sync_cache_bulk_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache_bulk_compat *param32 = data; + struct kgsl_gpumem_sync_cache_bulk param; + + param.id_list = compat_ptr(param32->id_list); + param.count = param32->count; + param.op = param32->op; + + return kgsl_ioctl_gpumem_sync_cache_bulk(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_sharedmem_flush_cache_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free_compat *param32 = data; + struct kgsl_sharedmem_free param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + + return kgsl_ioctl_sharedmem_flush_cache(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_gpumem_alloc_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_alloc_compat *param32 = data; + struct kgsl_gpumem_alloc param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.size = (size_t)param32->size; + param.flags = param32->flags; + + /* + * Since this is a 32 bit application the page aligned size is expected + * to fit inside of 32 bits - check for overflow and return error if so + */ + if (PAGE_ALIGN(param.size) >= UINT_MAX) + return -EINVAL; + + result = kgsl_ioctl_gpumem_alloc(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->size = sizet_to_compat(param.size); + param32->flags = param.flags; + + return result; +} + +static long +kgsl_ioctl_gpumem_alloc_id_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_alloc_id_compat *param32 = data; + struct kgsl_gpumem_alloc_id param; + + param.id = param32->id; + param.flags = param32->flags; + param.size = (size_t)param32->size; + param.mmapsize = (size_t)param32->mmapsize; + param.gpuaddr = (unsigned long)param32->gpuaddr; + + /* + * Since this is a 32 bit application the page aligned size is expected + * to fit inside of 32 bits - check for overflow and return error if so + */ + if (PAGE_ALIGN(param.size) >= UINT_MAX) + return -EINVAL; + + result = kgsl_ioctl_gpumem_alloc_id(dev_priv, cmd, ¶m); + + param32->id = param.id; + param32->flags = param.flags; + param32->size = sizet_to_compat(param.size); + param32->mmapsize = sizet_to_compat(param.mmapsize); + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + + return result; +} + +static long +kgsl_ioctl_gpumem_get_info_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_get_info_compat *param32 = data; + struct kgsl_gpumem_get_info param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.id = param32->id; + param.flags = param32->flags; + param.size = (size_t)param32->size; + param.mmapsize = (size_t)param32->mmapsize; + param.useraddr = (unsigned long)param32->useraddr; + + result = kgsl_ioctl_gpumem_get_info(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->id = param.id; + param32->flags = param.flags; + param32->size = sizet_to_compat(param.size); + param32->mmapsize = sizet_to_compat(param.mmapsize); + param32->useraddr = (compat_ulong_t)param.useraddr; + + return result; +} + +static long kgsl_ioctl_timestamp_event_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event_compat *param32 = data; + struct kgsl_timestamp_event param; + + param.type = param32->type; + param.timestamp = param32->timestamp; + param.context_id = param32->context_id; + param.priv = compat_ptr(param32->priv); + param.len = (size_t)param32->len; + + return kgsl_ioctl_timestamp_event(dev_priv, cmd, ¶m); +} + + +static const struct kgsl_ioctl kgsl_compat_ioctl_funcs[] = { + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT, + kgsl_ioctl_device_getproperty_compat), + /* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT, + kgsl_ioctl_rb_issueibcmds_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT, + kgsl_ioctl_submit_commands_compat), + /* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid), + /* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, + kgsl_ioctl_drawctxt_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, + kgsl_ioctl_drawctxt_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM_COMPAT, + kgsl_ioctl_map_user_mem_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE_COMPAT, + kgsl_ioctl_sharedmem_free_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT, + kgsl_ioctl_sharedmem_flush_cache_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_COMPAT, + kgsl_ioctl_gpumem_alloc_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT, + kgsl_ioctl_timestamp_event_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY_COMPAT, + kgsl_ioctl_device_setproperty_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT, + kgsl_ioctl_gpumem_alloc_id_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID, + kgsl_ioctl_gpumem_free_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT, + kgsl_ioctl_gpumem_get_info_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT, + kgsl_ioctl_gpumem_sync_cache_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT, + kgsl_ioctl_gpumem_sync_cache_bulk_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE, + kgsl_ioctl_syncsource_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY, + kgsl_ioctl_syncsource_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE, + kgsl_ioctl_syncsource_create_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE, + kgsl_ioctl_syncsource_signal_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC, + kgsl_ioctl_gpuobj_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE, + kgsl_ioctl_gpuobj_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO, + kgsl_ioctl_gpuobj_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT, + kgsl_ioctl_gpuobj_import), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC, + kgsl_ioctl_gpuobj_sync), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND, + kgsl_ioctl_gpu_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_BIND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_SPARSE_COMMAND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_BIND_RANGES, + kgsl_ioctl_gpumem_bind_ranges), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_AUX_COMMAND, + kgsl_ioctl_gpu_aux_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_CREATE, + kgsl_ioctl_timeline_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_WAIT, + kgsl_ioctl_timeline_wait), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_FENCE_GET, + kgsl_ioctl_timeline_fence_get), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_QUERY, + kgsl_ioctl_timeline_query), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_SIGNAL, + kgsl_ioctl_timeline_signal), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_DESTROY, + kgsl_ioctl_timeline_destroy), +}; + +long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + + long ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_compat_ioctl_funcs, + ARRAY_SIZE(kgsl_compat_ioctl_funcs)); + + /* + * If the command was unrecognized in the generic core, try the device + * specific function + */ + + if (ret == -ENOIOCTLCMD) { + if (device->ftbl->compat_ioctl != NULL) + return device->ftbl->compat_ioctl(dev_priv, cmd, arg); + } + + return ret; +} diff --git a/kgsl_compat.h b/kgsl_compat.h new file mode 100644 index 0000000000..a8081dd08f --- /dev/null +++ b/kgsl_compat.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013-2017,2019,2021 The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_COMPAT_H +#define __KGSL_COMPAT_H + +#include +#include + +#ifdef CONFIG_COMPAT + +struct kgsl_ibdesc_compat { + compat_ulong_t gpuaddr; + unsigned int __pad; + compat_size_t sizedwords; + unsigned int ctrl; +}; + +struct kgsl_cmd_syncpoint_compat { + int type; + compat_uptr_t priv; + compat_size_t size; +}; + +struct kgsl_devinfo_compat { + unsigned int device_id; + unsigned int chip_id; + unsigned int mmu_enabled; + compat_ulong_t gmem_gpubaseaddr; + unsigned int gpu_id; + compat_size_t gmem_sizebytes; +}; + +struct kgsl_shadowprop_compat { + compat_ulong_t gpuaddr; + compat_size_t size; + unsigned int flags; +}; + +struct kgsl_device_constraint_compat { + unsigned int type; + unsigned int context_id; + compat_uptr_t data; + compat_size_t size; +}; + +struct kgsl_device_getproperty_compat { + unsigned int type; + compat_uptr_t value; + compat_size_t sizebytes; +}; + +#define IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty_compat) + +#define IOCTL_KGSL_SETPROPERTY_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty_compat) + + +struct kgsl_submit_commands_compat { + unsigned int context_id; + unsigned int flags; + compat_uptr_t cmdlist; + unsigned int numcmds; + compat_uptr_t synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands_compat) + +struct kgsl_ringbuffer_issueibcmds_compat { + unsigned int drawctxt_id; + compat_ulong_t ibdesc_addr; + unsigned int numibs; + unsigned int timestamp; /* output param */ + unsigned int flags; +}; + +#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds_compat) + +struct kgsl_cmdstream_freememontimestamp_ctxtid_compat { + unsigned int context_id; + compat_ulong_t gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid_compat) + +struct kgsl_map_user_mem_compat { + int fd; + compat_ulong_t gpuaddr; + compat_size_t len; + compat_size_t offset; + compat_ulong_t hostptr; + enum kgsl_user_mem_type memtype; + unsigned int flags; +}; + +#define IOCTL_KGSL_MAP_USER_MEM_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem_compat) + +struct kgsl_sharedmem_free_compat { + compat_ulong_t gpuaddr; +}; + +#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free_compat) + +#define IOCTL_KGSL_SHAREDMEM_FREE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free_compat) + +struct kgsl_gpumem_alloc_compat { + compat_ulong_t gpuaddr; /* output param */ + compat_size_t size; + unsigned int flags; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc_compat) + +struct kgsl_cff_syncmem_compat { + compat_ulong_t gpuaddr; + compat_size_t len; + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_CFF_SYNCMEM_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem_compat) + +struct kgsl_timestamp_event_compat { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + compat_uptr_t priv; /* Pointer to the event specific blob */ + compat_size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event_compat) + +struct kgsl_gpumem_alloc_id_compat { + unsigned int id; + unsigned int flags; + compat_size_t size; + compat_size_t mmapsize; + compat_ulong_t gpuaddr; +/* private: reserved for future use*/ + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id_compat) + +struct kgsl_gpumem_get_info_compat { + compat_ulong_t gpuaddr; + unsigned int id; + unsigned int flags; + compat_size_t size; + compat_size_t mmapsize; + compat_ulong_t useraddr; +/* private: reserved for future use*/ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info_compat) + +struct kgsl_gpumem_sync_cache_compat { + compat_ulong_t gpuaddr; + unsigned int id; + unsigned int op; + compat_size_t offset; + compat_size_t length; +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache_compat) + +struct kgsl_gpumem_sync_cache_bulk_compat { + compat_uptr_t id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk_compat) + +struct kgsl_perfcounter_query_compat { + unsigned int groupid; + compat_uptr_t countables; + unsigned int count; + unsigned int max_counters; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query_compat) + +struct kgsl_perfcounter_read_compat { + compat_uptr_t reads; + unsigned int count; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read_compat) + +static inline compat_ulong_t gpuaddr_to_compat(unsigned long gpuaddr) +{ + WARN(gpuaddr >> 32, "Top 32 bits of gpuaddr have been set\n"); + return (compat_ulong_t)gpuaddr; +} + +static inline compat_size_t sizet_to_compat(size_t size) +{ + WARN(size >> 32, "Size greater than 4G\n"); + return (compat_size_t)size; +} + +long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg); + +#else + +static inline long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + return -EINVAL; +} + +#endif /* CONFIG_COMPAT */ +#endif /* __KGSL_COMPAT_H */ diff --git a/kgsl_debugfs.c b/kgsl_debugfs.c new file mode 100644 index 0000000000..0a9356010f --- /dev/null +++ b/kgsl_debugfs.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2008-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_debugfs.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" + +struct dentry *kgsl_debugfs_dir; +static struct dentry *proc_d_debugfs; + +static void kgsl_qdss_gfx_register_probe(struct kgsl_device *device) +{ + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "qdss_gfx"); + + if (res == NULL) + return; + + device->qdss_gfx_virt = devm_ioremap(&device->pdev->dev, res->start, + resource_size(res)); + + if (device->qdss_gfx_virt == NULL) + dev_warn(device->dev, "qdss_gfx ioremap failed\n"); +} + +static int _isdb_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + + if (device->qdss_gfx_virt == NULL) + kgsl_qdss_gfx_register_probe(device); + + device->set_isdb_breakpoint = val ? true : false; + return 0; +} + +static int _isdb_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + + *val = device->set_isdb_breakpoint ? 1 : 0; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(_isdb_fops, _isdb_get, _isdb_set, "%llu\n"); + +static int globals_show(struct seq_file *s, void *unused) +{ + struct kgsl_device *device = s->private; + struct kgsl_global_memdesc *md; + + list_for_each_entry(md, &device->globals, node) { + struct kgsl_memdesc *memdesc = &md->memdesc; + char flags[6]; + + flags[0] = memdesc->priv & KGSL_MEMDESC_PRIVILEGED ? 'p' : '-'; + flags[1] = !(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 'w' : '-'; + flags[2] = kgsl_memdesc_is_secured(memdesc) ? 's' : '-'; + flags[3] = memdesc->priv & KGSL_MEMDESC_RANDOM ? 'r' : '-'; + flags[4] = memdesc->priv & KGSL_MEMDESC_UCODE ? 'u' : '-'; + flags[5] = '\0'; + + seq_printf(s, "0x%pK-0x%pK %16llu %5s %s\n", + (u64 *)(uintptr_t) memdesc->gpuaddr, + (u64 *)(uintptr_t) (memdesc->gpuaddr + + memdesc->size - 1), memdesc->size, flags, + md->name); + } + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(globals); + +void kgsl_device_debugfs_init(struct kgsl_device *device) +{ + struct dentry *snapshot_dir; + + if (IS_ERR_OR_NULL(kgsl_debugfs_dir)) + return; + + device->d_debugfs = debugfs_create_dir(device->name, + kgsl_debugfs_dir); + + debugfs_create_file("globals", 0444, device->d_debugfs, device, + &globals_fops); + + snapshot_dir = debugfs_create_dir("snapshot", kgsl_debugfs_dir); + debugfs_create_file("break_isdb", 0644, snapshot_dir, device, + &_isdb_fops); +} + +void kgsl_device_debugfs_close(struct kgsl_device *device) +{ + debugfs_remove_recursive(device->d_debugfs); +} + +static const char *memtype_str(int memtype) +{ + if (memtype == KGSL_MEM_ENTRY_KERNEL) + return "gpumem"; + else if (memtype == KGSL_MEM_ENTRY_USER) + return "usermem"; + else if (memtype == KGSL_MEM_ENTRY_ION) + return "ion"; + + return "unknown"; +} + +static char get_alignflag(const struct kgsl_memdesc *m) +{ + int align = kgsl_memdesc_get_align(m); + + if (align >= ilog2(SZ_1M)) + return 'L'; + else if (align >= ilog2(SZ_64K)) + return 'l'; + return '-'; +} + +static char get_cacheflag(const struct kgsl_memdesc *m) +{ + static const char table[] = { + [KGSL_CACHEMODE_WRITECOMBINE] = '-', + [KGSL_CACHEMODE_UNCACHED] = 'u', + [KGSL_CACHEMODE_WRITEBACK] = 'b', + [KGSL_CACHEMODE_WRITETHROUGH] = 't', + }; + + return table[kgsl_memdesc_get_cachemode(m)]; +} + + +static int print_mem_entry(void *data, void *ptr) +{ + struct seq_file *s = data; + struct kgsl_mem_entry *entry = ptr; + char flags[11]; + char usage[16]; + struct kgsl_memdesc *m = &entry->memdesc; + unsigned int usermem_type = kgsl_memdesc_usermem_type(m); + int egl_surface_count = 0, egl_image_count = 0; + unsigned long inode_number = 0; + u32 map_count = atomic_read(&entry->map_count); + + flags[0] = kgsl_memdesc_is_global(m) ? 'g' : '-'; + flags[1] = '-'; + flags[2] = !(m->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 'w' : '-'; + flags[3] = get_alignflag(m); + flags[4] = get_cacheflag(m); + flags[5] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-'; + /* Show Y if at least one vma has this entry mapped (could be multiple) */ + flags[6] = map_count ? 'Y' : 'N'; + flags[7] = kgsl_memdesc_is_secured(m) ? 's' : '-'; + flags[8] = '-'; + flags[9] = m->flags & KGSL_MEMFLAGS_VBO ? 'v' : '-'; + flags[10] = '\0'; + + kgsl_get_memory_usage(usage, sizeof(usage), m->flags); + + if (usermem_type == KGSL_MEM_ENTRY_ION) { + kgsl_get_egl_counts(entry, &egl_surface_count, + &egl_image_count); + inode_number = kgsl_get_dmabuf_inode_number(entry); + } + + seq_printf(s, "%pK %pK %16llu %5d %10s %10s %16s %5d %10d %6d %6d %10lu", + (uint64_t *)(uintptr_t) m->gpuaddr, + /* + * Show zero for the useraddr - we can't reliably track + * that value for multiple vmas anyway + */ + NULL, m->size, entry->id, flags, + memtype_str(usermem_type), + usage, (m->sgt ? m->sgt->nents : 0), map_count, + egl_surface_count, egl_image_count, inode_number); + + if (entry->metadata[0] != 0) + seq_printf(s, " %s", entry->metadata); + + seq_putc(s, '\n'); + + return 0; +} + +static struct kgsl_mem_entry *process_mem_seq_find(struct seq_file *s, + void *ptr, loff_t pos) +{ + struct kgsl_mem_entry *entry = ptr; + struct kgsl_process_private *private = s->private; + int id = 0; + + loff_t temp_pos = 1; + + if (entry != SEQ_START_TOKEN) + id = entry->id + 1; + + spin_lock(&private->mem_lock); + for (entry = idr_get_next(&private->mem_idr, &id); entry; + id++, entry = idr_get_next(&private->mem_idr, &id), + temp_pos++) { + if (temp_pos == pos && kgsl_mem_entry_get(entry)) { + spin_unlock(&private->mem_lock); + goto found; + } + } + spin_unlock(&private->mem_lock); + + entry = NULL; +found: + if (ptr != SEQ_START_TOKEN) + kgsl_mem_entry_put(ptr); + + return entry; +} + +static void *process_mem_seq_start(struct seq_file *s, loff_t *pos) +{ + loff_t seq_file_offset = *pos; + + if (seq_file_offset == 0) + return SEQ_START_TOKEN; + else + return process_mem_seq_find(s, SEQ_START_TOKEN, + seq_file_offset); +} + +static void process_mem_seq_stop(struct seq_file *s, void *ptr) +{ + if (ptr && ptr != SEQ_START_TOKEN) + kgsl_mem_entry_put(ptr); +} + +static void *process_mem_seq_next(struct seq_file *s, void *ptr, + loff_t *pos) +{ + ++*pos; + return process_mem_seq_find(s, ptr, 1); +} + +static int process_mem_seq_show(struct seq_file *s, void *ptr) +{ + if (ptr == SEQ_START_TOKEN) { + seq_printf(s, "%16s %16s %16s %5s %10s %10s %16s %5s %10s %6s %6s %10s\n", + "gpuaddr", "useraddr", "size", "id", "flags", "type", + "usage", "sglen", "mapcnt", "eglsrf", "eglimg", "inode"); + return 0; + } else + return print_mem_entry(s, ptr); +} + +static const struct seq_operations process_mem_seq_fops = { + .start = process_mem_seq_start, + .stop = process_mem_seq_stop, + .next = process_mem_seq_next, + .show = process_mem_seq_show, +}; + +static int process_mem_open(struct inode *inode, struct file *file) +{ + int ret; + pid_t pid = (pid_t) (unsigned long) inode->i_private; + struct seq_file *s = NULL; + struct kgsl_process_private *private = NULL; + + private = kgsl_process_private_find(pid); + + if (!private) + return -ENODEV; + + ret = seq_open(file, &process_mem_seq_fops); + if (ret) + kgsl_process_private_put(private); + else { + s = file->private_data; + s->private = private; + } + + return ret; +} + +static int process_mem_release(struct inode *inode, struct file *file) +{ + struct kgsl_process_private *private = + ((struct seq_file *)file->private_data)->private; + + if (private) + kgsl_process_private_put(private); + + return seq_release(inode, file); +} + +static const struct file_operations process_mem_fops = { + .open = process_mem_open, + .read = seq_read, + .llseek = seq_lseek, + .release = process_mem_release, +}; + + +static int print_vbo_ranges(int id, void *ptr, void *data) +{ + kgsl_memdesc_print_vbo_ranges(ptr, data); + return 0; +} + +static int vbo_print(struct seq_file *s, void *unused) +{ + struct kgsl_process_private *private = s->private; + + seq_puts(s, "id child range\n"); + + spin_lock(&private->mem_lock); + idr_for_each(&private->mem_idr, print_vbo_ranges, s); + spin_unlock(&private->mem_lock); + + return 0; +} + +static int vbo_open(struct inode *inode, struct file *file) +{ + pid_t pid = (pid_t) (unsigned long) inode->i_private; + struct kgsl_process_private *private; + int ret; + + private = kgsl_process_private_find(pid); + + if (!private) + return -ENODEV; + + ret = single_open(file, vbo_print, private); + if (ret) + kgsl_process_private_put(private); + + return ret; +} + +static const struct file_operations vbo_fops = { + .open = vbo_open, + .read = seq_read, + .llseek = seq_lseek, + /* Reuse the same release function */ + .release = process_mem_release, +}; + +/** + * kgsl_process_init_debugfs() - Initialize debugfs for a process + * @private: Pointer to process private structure created for the process + * + * kgsl_process_init_debugfs() is called at the time of creating the + * process struct when a process opens kgsl device for the first time. + * This function is not fatal - all we do is print a warning message if + * the files can't be created + */ +void kgsl_process_init_debugfs(struct kgsl_process_private *private) +{ + unsigned char name[16]; + struct dentry *dentry; + + snprintf(name, sizeof(name), "%d", pid_nr(private->pid)); + + private->debug_root = debugfs_create_dir(name, proc_d_debugfs); + + if (IS_ERR(private->debug_root)) { + WARN_ONCE("Unable to create debugfs dir for %s\n", name); + private->debug_root = NULL; + return; + } + + dentry = debugfs_create_file("mem", 0444, private->debug_root, + (void *) ((unsigned long) pid_nr(private->pid)), &process_mem_fops); + + if (IS_ERR(dentry)) + WARN_ONCE("Unable to create 'mem' file for %s\n", name); + + debugfs_create_file("vbos", 0444, private->debug_root, + (void *) ((unsigned long) pid_nr(private->pid)), &vbo_fops); +} + +void kgsl_core_debugfs_init(void) +{ + struct dentry *debug_dir; + + kgsl_debugfs_dir = debugfs_create_dir("kgsl", NULL); + if (IS_ERR_OR_NULL(kgsl_debugfs_dir)) + return; + + debug_dir = debugfs_create_dir("debug", kgsl_debugfs_dir); + + proc_d_debugfs = debugfs_create_dir("proc", kgsl_debugfs_dir); + + debugfs_create_bool("strict_memory", 0644, debug_dir, + &kgsl_sharedmem_noretry_flag); +} + +void kgsl_core_debugfs_close(void) +{ + debugfs_remove_recursive(kgsl_debugfs_dir); +} diff --git a/kgsl_debugfs.h b/kgsl_debugfs.h new file mode 100644 index 0000000000..16799f2900 --- /dev/null +++ b/kgsl_debugfs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2008-2011,2013,2015,2017,2019, The Linux Foundation. All rights reserved. + */ +#ifndef _KGSL_DEBUGFS_H +#define _KGSL_DEBUGFS_H + +struct kgsl_device; +struct kgsl_process_private; + +#ifdef CONFIG_DEBUG_FS +void kgsl_core_debugfs_init(void); +void kgsl_core_debugfs_close(void); + +void kgsl_device_debugfs_init(struct kgsl_device *device); +void kgsl_device_debugfs_close(struct kgsl_device *device); + +extern struct dentry *kgsl_debugfs_dir; +static inline struct dentry *kgsl_get_debugfs_dir(void) +{ + return kgsl_debugfs_dir; +} + +void kgsl_process_init_debugfs(struct kgsl_process_private *priv); +#else +static inline void kgsl_core_debugfs_init(void) { } +static inline void kgsl_device_debugfs_init(struct kgsl_device *device) { } +static inline void kgsl_device_debugfs_close(struct kgsl_device *device) { } +static inline void kgsl_core_debugfs_close(void) { } +static inline struct dentry *kgsl_get_debugfs_dir(void) { return NULL; } +static inline void kgsl_process_init_debugfs(struct kgsl_process_private *priv) +{ +} +#endif + +#endif diff --git a/kgsl_device.h b/kgsl_device.h new file mode 100644 index 0000000000..ba704d3cb1 --- /dev/null +++ b/kgsl_device.h @@ -0,0 +1,962 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_DEVICE_H +#define __KGSL_DEVICE_H + +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_drawobj.h" +#include "kgsl_mmu.h" +#include "kgsl_regmap.h" + +#define KGSL_IOCTL_FUNC(_cmd, _func) \ + [_IOC_NR((_cmd))] = \ + { .cmd = (_cmd), .func = (_func) } + +/* + * KGSL device state is initialized to INIT when platform_probe * + * successfully initialized the device. Once a device has been opened * + * (started) it becomes active. NAP implies that only low latency * + * resources (for now clocks on some platforms) are off. SLEEP implies * + * that the KGSL module believes a device is idle (has been inactive * + * past its timer) and all system resources are released. SUSPEND is * + * requested by the kernel and will be enforced upon all open devices. * + * RESET indicates that GPU or GMU hang happens. KGSL is handling * + * snapshot or recover GPU from hang. MINBW implies that DDR BW vote is * + * set to non-zero minimum value. + */ + +#define KGSL_STATE_NONE 0x00000000 +#define KGSL_STATE_INIT 0x00000001 +#define KGSL_STATE_ACTIVE 0x00000002 +#define KGSL_STATE_NAP 0x00000004 +#define KGSL_STATE_SUSPEND 0x00000010 +#define KGSL_STATE_AWARE 0x00000020 +#define KGSL_STATE_SLUMBER 0x00000080 +#define KGSL_STATE_MINBW 0x00000100 + +/** + * enum kgsl_event_results - result codes passed to an event callback when the + * event is retired or cancelled + * @KGSL_EVENT_RETIRED: The timestamp associated with the event retired + * successflly + * @KGSL_EVENT_CANCELLED: The event was cancelled before the event was fired + */ +enum kgsl_event_results { + KGSL_EVENT_RETIRED = 1, + KGSL_EVENT_CANCELLED = 2, +}; + +/* + * "list" of event types for ftrace symbolic magic + */ + +#define KGSL_CONTEXT_FLAGS \ + { KGSL_CONTEXT_NO_GMEM_ALLOC, "NO_GMEM_ALLOC" }, \ + { KGSL_CONTEXT_PREAMBLE, "PREAMBLE" }, \ + { KGSL_CONTEXT_TRASH_STATE, "TRASH_STATE" }, \ + { KGSL_CONTEXT_CTX_SWITCH, "CTX_SWITCH" }, \ + { KGSL_CONTEXT_PER_CONTEXT_TS, "PER_CONTEXT_TS" }, \ + { KGSL_CONTEXT_USER_GENERATED_TS, "USER_TS" }, \ + { KGSL_CONTEXT_NO_FAULT_TOLERANCE, "NO_FT" }, \ + { KGSL_CONTEXT_INVALIDATE_ON_FAULT, "INVALIDATE_ON_FAULT" }, \ + { KGSL_CONTEXT_PWR_CONSTRAINT, "PWR" }, \ + { KGSL_CONTEXT_SAVE_GMEM, "SAVE_GMEM" }, \ + { KGSL_CONTEXT_IFH_NOP, "IFH_NOP" }, \ + { KGSL_CONTEXT_SECURE, "SECURE" }, \ + { KGSL_CONTEXT_NO_SNAPSHOT, "NO_SNAPSHOT" } + +#define KGSL_CONTEXT_ID(_context) \ + ((_context != NULL) ? (_context)->id : KGSL_MEMSTORE_GLOBAL) + +struct kgsl_device; +struct platform_device; +struct kgsl_device_private; +struct kgsl_context; +struct kgsl_power_stats; +struct kgsl_event; +struct kgsl_snapshot; + +struct kgsl_functable { + /* Mandatory functions - these functions must be implemented + * by the client device. The driver will not check for a NULL + * pointer before calling the hook. + */ + int (*suspend_context)(struct kgsl_device *device); + int (*first_open)(struct kgsl_device *device); + int (*last_close)(struct kgsl_device *device); + int (*start)(struct kgsl_device *device, int priority); + int (*stop)(struct kgsl_device *device); + int (*getproperty)(struct kgsl_device *device, + struct kgsl_device_getproperty *param); + int (*getproperty_compat)(struct kgsl_device *device, + struct kgsl_device_getproperty *param); + int (*waittimestamp)(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, + unsigned int msecs); + int (*readtimestamp)(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp); + int (*queue_cmds)(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_drawobj *drawobj[], + uint32_t count, uint32_t *timestamp); + void (*power_stats)(struct kgsl_device *device, + struct kgsl_power_stats *stats); + void (*snapshot)(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_context *context); + /** @drain_and_idle: Drain the GPU and wait for it to idle */ + int (*drain_and_idle)(struct kgsl_device *device); + struct kgsl_device_private * (*device_private_create)(void); + void (*device_private_destroy)(struct kgsl_device_private *dev_priv); + /* + * Optional functions - these functions are not mandatory. The + * driver will check that the function pointer is not NULL before + * calling the hook + */ + struct kgsl_context *(*drawctxt_create) + (struct kgsl_device_private *dev_priv, + uint32_t *flags); + void (*drawctxt_detach)(struct kgsl_context *context); + void (*drawctxt_destroy)(struct kgsl_context *context); + void (*drawctxt_dump)(struct kgsl_device *device, + struct kgsl_context *context); + long (*ioctl)(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + long (*compat_ioctl)(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + int (*setproperty)(struct kgsl_device_private *dev_priv, + unsigned int type, void __user *value, + unsigned int sizebytes); + int (*setproperty_compat)(struct kgsl_device_private *dev_priv, + unsigned int type, void __user *value, + unsigned int sizebytes); + void (*drawctxt_sched)(struct kgsl_device *device, + struct kgsl_context *context); + void (*resume)(struct kgsl_device *device); + int (*regulator_enable)(struct kgsl_device *device); + bool (*is_hw_collapsible)(struct kgsl_device *device); + void (*regulator_disable)(struct kgsl_device *device); + void (*pwrlevel_change_settings)(struct kgsl_device *device, + unsigned int prelevel, unsigned int postlevel, bool post); + void (*clk_set_options)(struct kgsl_device *device, + const char *name, struct clk *clk, bool on); + /** + * @query_property_list: query the list of properties + * supported by the device. If 'list' is NULL just return the total + * number of properties available otherwise copy up to 'count' items + * into the list and return the total number of items copied. + */ + int (*query_property_list)(struct kgsl_device *device, u32 *list, + u32 count); + bool (*is_hwcg_on)(struct kgsl_device *device); + /** @gpu_clock_set: Target specific function to set gpu frequency */ + int (*gpu_clock_set)(struct kgsl_device *device, u32 pwrlevel); + /** @gpu_bus_set: Target specific function to set gpu bandwidth */ + int (*gpu_bus_set)(struct kgsl_device *device, int bus_level, u32 ab); + void (*deassert_gbif_halt)(struct kgsl_device *device); +}; + +struct kgsl_ioctl { + unsigned int cmd; + long (*func)(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +}; + +long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len); + +/* Flag to mark that the memobj_node should not go to the hadrware */ +#define MEMOBJ_SKIP BIT(1) + +/** + * struct kgsl_memobj_node - Memory object descriptor + * @node: Local list node for the object + * @id: GPU memory ID for the object + * offset: Offset within the object + * @gpuaddr: GPU address for the object + * @flags: External flags passed by the user + * @priv: Internal flags set by the driver + */ +struct kgsl_memobj_node { + struct list_head node; + unsigned int id; + uint64_t offset; + uint64_t gpuaddr; + uint64_t size; + unsigned long flags; + unsigned long priv; +}; + +struct kgsl_device { + struct device *dev; + const char *name; + u32 id; + + /* Kernel virtual address for GPU shader memory */ + void __iomem *shader_mem_virt; + + /* Starting kernel virtual address for QDSS GFX DBG register block */ + void __iomem *qdss_gfx_virt; + + struct kgsl_memdesc *memstore; + struct kgsl_memdesc *scratch; + + struct kgsl_mmu mmu; + struct gmu_core_device gmu_core; + struct completion hwaccess_gate; + struct completion halt_gate; + const struct kgsl_functable *ftbl; + struct work_struct idle_check_ws; + struct timer_list idle_timer; + struct kgsl_pwrctrl pwrctrl; + int open_count; + + /* For GPU inline submission */ + uint32_t submit_now; + spinlock_t submit_lock; + bool slumber; + + struct mutex mutex; + uint32_t state; + uint32_t requested_state; + + atomic_t active_cnt; + /** @total_mapped: To trace overall gpu memory usage */ + atomic64_t total_mapped; + + wait_queue_head_t active_cnt_wq; + struct platform_device *pdev; + struct dentry *d_debugfs; + struct idr context_idr; + rwlock_t context_lock; + + struct { + void *ptr; + dma_addr_t dma_handle; + u32 size; + } snapshot_memory; + + struct kgsl_snapshot *snapshot; + /** @panic_nb: notifier block to capture GPU snapshot on kernel panic */ + struct notifier_block panic_nb; + struct { + void *ptr; + u32 size; + } snapshot_memory_atomic; + + u32 snapshot_faultcount; /* Total number of faults since boot */ + bool force_panic; /* Force panic after snapshot dump */ + bool skip_ib_capture; /* Skip IB capture after snapshot */ + bool prioritize_unrecoverable; /* Overwrite with new GMU snapshots */ + bool set_isdb_breakpoint; /* Set isdb registers before snapshot */ + bool snapshot_atomic; /* To capture snapshot in atomic context*/ + /* Use CP Crash dumper to get GPU snapshot*/ + bool snapshot_crashdumper; + /* Use HOST side register reads to get GPU snapshot*/ + bool snapshot_legacy; + /* Use to dump the context record in bytes */ + u64 snapshot_ctxt_record_size; + + struct kobject snapshot_kobj; + + struct kgsl_pwrscale pwrscale; + + int reset_counter; /* Track how many GPU core resets have occurred */ + struct workqueue_struct *events_wq; + + /* Number of active contexts seen globally for this device */ + int active_context_count; + struct kobject gpu_sysfs_kobj; + unsigned int l3_freq[3]; + unsigned int num_l3_pwrlevels; + /* store current L3 vote to determine if we should change our vote */ + unsigned int cur_l3_pwrlevel; + /** @globals: List of global memory objects */ + struct list_head globals; + /** @globlal_map: bitmap for global memory allocations */ + unsigned long *global_map; + /* @qdss_desc: Memory descriptor for the QDSS region if applicable */ + struct kgsl_memdesc *qdss_desc; + /* @qtimer_desc: Memory descriptor for the QDSS region if applicable */ + struct kgsl_memdesc *qtimer_desc; + /** @event_groups: List of event groups for this device */ + struct list_head event_groups; + /** @event_groups_lock: A R/W lock for the events group list */ + rwlock_t event_groups_lock; + /** @speed_bin: Speed bin for the GPU device if applicable */ + u32 speed_bin; + /** @gmu_fault: Set when a gmu or rgmu fault is encountered */ + bool gmu_fault; + /** @regmap: GPU register map */ + struct kgsl_regmap regmap; + /** @timelines: Iterator for assigning IDs to timelines */ + struct idr timelines; + /** @timelines_lock: Spinlock to protect the timelines idr */ + spinlock_t timelines_lock; + /** @fence_trace_array: A local trace array for fence debugging */ + struct trace_array *fence_trace_array; + /** @l3_vote: Enable/Disable l3 voting */ + bool l3_vote; + /** @pdev_loaded: Flag to test if platform driver is probed */ + bool pdev_loaded; +}; + +#define KGSL_MMU_DEVICE(_mmu) \ + container_of((_mmu), struct kgsl_device, mmu) + +/** + * enum bits for struct kgsl_context.priv + * @KGSL_CONTEXT_PRIV_SUBMITTED - The context has submitted commands to gpu. + * @KGSL_CONTEXT_PRIV_DETACHED - The context has been destroyed by userspace + * and is no longer using the gpu. + * @KGSL_CONTEXT_PRIV_INVALID - The context has been destroyed by the kernel + * because it caused a GPU fault. + * @KGSL_CONTEXT_PRIV_PAGEFAULT - The context has caused a page fault. + * @KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC - this value and higher values are + * reserved for devices specific use. + */ +enum kgsl_context_priv { + KGSL_CONTEXT_PRIV_SUBMITTED = 0, + KGSL_CONTEXT_PRIV_DETACHED, + KGSL_CONTEXT_PRIV_INVALID, + KGSL_CONTEXT_PRIV_PAGEFAULT, + KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC = 16, +}; + +struct kgsl_process_private; + +/** + * struct kgsl_context - The context fields that are valid for a user defined + * context + * @refcount: kref object for reference counting the context + * @id: integer identifier for the context + * @priority; The context's priority to submit commands to GPU + * @tid: task that created this context. + * @dev_priv: pointer to the owning device instance + * @proc_priv: pointer to process private, the process that allocated the + * context + * @priv: in-kernel context flags, use KGSL_CONTEXT_* values + * @reset_status: status indication whether a gpu reset occurred and whether + * this context was responsible for causing it + * @timeline: sync timeline used to create fences that can be signaled when a + * sync_pt timestamp expires + * @events: A kgsl_event_group for this context - contains the list of GPU + * events + * @flags: flags from userspace controlling the behavior of this context + * @pwr_constraint: power constraint from userspace for this context + * @fault_count: number of times gpu hanged in last _context_throttle_time ms + * @fault_time: time of the first gpu hang in last _context_throttle_time ms + * @user_ctxt_record: memory descriptor used by CP to save/restore VPC data + * across preemption + * @total_fault_count: number of times gpu faulted in this context + * @last_faulted_cmd_ts: last faulted command batch timestamp + * @gmu_registered: whether context is registered with gmu or not + */ +struct kgsl_context { + struct kref refcount; + uint32_t id; + uint32_t priority; + pid_t tid; + struct kgsl_device_private *dev_priv; + struct kgsl_process_private *proc_priv; + unsigned long priv; + struct kgsl_device *device; + unsigned int reset_status; + struct kgsl_sync_timeline *ktimeline; + struct kgsl_event_group events; + unsigned int flags; + struct kgsl_pwr_constraint pwr_constraint; + struct kgsl_pwr_constraint l3_pwr_constraint; + unsigned int fault_count; + ktime_t fault_time; + struct kgsl_mem_entry *user_ctxt_record; + unsigned int total_fault_count; + unsigned int last_faulted_cmd_ts; + bool gmu_registered; + /** + * @gmu_dispatch_queue: dispatch queue id to which this context will be + * submitted + */ + u32 gmu_dispatch_queue; +}; + +#define _context_comm(_c) \ + (((_c) && (_c)->proc_priv) ? (_c)->proc_priv->comm : "unknown") + +/* + * Print log messages with the context process name/pid: + * [...] kgsl kgsl-3d0: kgsl-api-test[22182]: + */ + +#define pr_context(_d, _c, fmt, args...) \ + dev_err((_d)->dev, "%s[%d]: " fmt, \ + _context_comm((_c)), \ + pid_nr((_c)->proc_priv->pid), ##args) + +/** + * struct kgsl_process_private - Private structure for a KGSL process (across + * all devices) + * @priv: Internal flags, use KGSL_PROCESS_* values + * @pid: Identification structure for the task owner of the process + * @comm: task name of the process + * @mem_lock: Spinlock to protect the process memory lists + * @refcount: kref object for reference counting the process + * @idr: Iterator for assigning IDs to memory allocations + * @pagetable: Pointer to the pagetable owned by this process + * @kobj: Pointer to a kobj for the sysfs directory for this process + * @debug_root: Pointer to the debugfs root for this process + * @stats: Memory allocation statistics for this process + * @gpumem_mapped: KGSL memory mapped in the process address space + * @syncsource_idr: sync sources created by this process + * @syncsource_lock: Spinlock to protect the syncsource idr + * @fd_count: Counter for the number of FDs for this process + * @ctxt_count: Count for the number of contexts for this process + * @ctxt_count_lock: Spinlock to protect ctxt_count + * @frame_count: Count for the number of frames processed + */ +struct kgsl_process_private { + unsigned long priv; + struct pid *pid; + char comm[TASK_COMM_LEN]; + spinlock_t mem_lock; + struct kref refcount; + struct idr mem_idr; + struct kgsl_pagetable *pagetable; + struct list_head list; + struct list_head reclaim_list; + struct kobject kobj; + struct dentry *debug_root; + struct { + atomic64_t cur; + uint64_t max; + } stats[KGSL_MEM_ENTRY_MAX]; + atomic64_t gpumem_mapped; + struct idr syncsource_idr; + spinlock_t syncsource_lock; + int fd_count; + atomic_t ctxt_count; + spinlock_t ctxt_count_lock; + atomic64_t frame_count; + /** + * @state: state consisting KGSL_PROC_STATE and KGSL_PROC_PINNED_STATE + */ + unsigned long state; + /** + * @unpinned_page_count: The number of pages unpinned for reclaim + */ + atomic_t unpinned_page_count; + /** + * @fg_work: Work struct to schedule foreground work + */ + struct work_struct fg_work; + /** + * @reclaim_lock: Mutex lock to protect KGSL_PROC_PINNED_STATE + */ + struct mutex reclaim_lock; + /** + * @cmd_count: The number of cmds that are active for the process + */ + atomic_t cmd_count; +}; + +struct kgsl_device_private { + struct kgsl_device *device; + struct kgsl_process_private *process_priv; +}; + +/** + * struct kgsl_snapshot - details for a specific snapshot instance + * @ib1base: Active IB1 base address at the time of fault + * @ib2base: Active IB2 base address at the time of fault + * @ib1size: Number of DWORDS pending in IB1 at the time of fault + * @ib2size: Number of DWORDS pending in IB2 at the time of fault + * @ib1dumped: Active IB1 dump status to sansphot binary + * @ib2dumped: Active IB2 dump status to sansphot binary + * @start: Pointer to the start of the static snapshot region + * @size: Size of the current snapshot instance + * @ptr: Pointer to the next block of memory to write to during snapshotting + * @remain: Bytes left in the snapshot region + * @timestamp: Timestamp of the snapshot instance (in seconds since boot) + * @mempool: Pointer to the memory pool for storing memory objects + * @mempool_size: Size of the memory pool + * @obj_list: List of frozen GPU buffers that are waiting to be dumped. + * @cp_list: List of IB's to be dumped. + * @work: worker to dump the frozen memory + * @dump_gate: completion gate signaled by worker when it is finished. + * @process: the process that caused the hang, if known. + * @sysfs_read: Count of current reads via sysfs + * @first_read: True until the snapshot read is started + * @recovered: True if GPU was recovered after previous snapshot + */ +struct kgsl_snapshot { + uint64_t ib1base; + uint64_t ib2base; + unsigned int ib1size; + unsigned int ib2size; + bool ib1dumped; + bool ib2dumped; + u8 *start; + size_t size; + u8 *ptr; + size_t remain; + unsigned long timestamp; + u8 *mempool; + size_t mempool_size; + struct list_head obj_list; + struct list_head cp_list; + struct work_struct work; + struct completion dump_gate; + struct kgsl_process_private *process; + unsigned int sysfs_read; + bool first_read; + bool recovered; + struct kgsl_device *device; +}; + +/** + * struct kgsl_snapshot_object - GPU memory in the snapshot + * @gpuaddr: The GPU address identified during snapshot + * @size: The buffer size identified during snapshot + * @offset: offset from start of the allocated kgsl_mem_entry + * @type: SNAPSHOT_OBJ_TYPE_* identifier. + * @entry: the reference counted memory entry for this buffer + * @node: node for kgsl_snapshot.obj_list + */ +struct kgsl_snapshot_object { + uint64_t gpuaddr; + uint64_t size; + uint64_t offset; + int type; + struct kgsl_mem_entry *entry; + struct list_head node; +}; + +struct kgsl_device *kgsl_get_device(int dev_idx); + +static inline void kgsl_regread(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int *value) +{ + *value = kgsl_regmap_read(&device->regmap, offsetwords); +} + +static inline void kgsl_regwrite(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int value) +{ + kgsl_regmap_write(&device->regmap, value, offsetwords); +} + +static inline void kgsl_regrmw(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + kgsl_regmap_rmw(&device->regmap, offsetwords, mask, bits); +} + +static inline bool kgsl_state_is_awake(struct kgsl_device *device) +{ + return (device->state == KGSL_STATE_ACTIVE || + device->state == KGSL_STATE_AWARE); +} + +static inline bool kgsl_state_is_nap_or_minbw(struct kgsl_device *device) +{ + return (device->state == KGSL_STATE_NAP || + device->state == KGSL_STATE_MINBW); +} + +/** + * kgsl_start_idle_timer - Start the idle timer + * @device: A KGSL device handle + * + * Start the idle timer to expire in 'interval_timeout' milliseconds + */ +static inline void kgsl_start_idle_timer(struct kgsl_device *device) +{ + mod_timer(&device->idle_timer, + jiffies + msecs_to_jiffies(device->pwrctrl.interval_timeout)); +} + +int kgsl_readtimestamp(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp); + +bool kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp); + +int kgsl_device_platform_probe(struct kgsl_device *device); + +void kgsl_device_platform_remove(struct kgsl_device *device); + +const char *kgsl_pwrstate_to_str(unsigned int state); + +/** + * kgsl_device_snapshot_probe - add resources for the device GPU snapshot + * @device: The device to initialize + * @size: The size of the static region to allocate + * + * Allocate memory for a GPU snapshot for the specified device, + * and create the sysfs files to manage it + */ +void kgsl_device_snapshot_probe(struct kgsl_device *device, u32 size); + +void kgsl_device_snapshot(struct kgsl_device *device, + struct kgsl_context *context, bool gmu_fault); +void kgsl_device_snapshot_close(struct kgsl_device *device); + +void kgsl_events_init(void); +void kgsl_events_exit(void); + +/** + * kgsl_device_events_probe - Set up events for the KGSL device + * @device: A KGSL GPU device handle + * + * Set up the list and lock for GPU events for this device + */ +void kgsl_device_events_probe(struct kgsl_device *device); + +/** + * kgsl_device_events_remove - Remove all event groups from the KGSL device + * @device: A KGSL GPU device handle + * + * Remove all of the GPU event groups from the device and warn if any of them + * still have events pending + */ +void kgsl_device_events_remove(struct kgsl_device *device); + +void kgsl_context_detach(struct kgsl_context *context); + +/** + * kgsl_del_event_group - Remove a GPU event group from a device + * @device: A KGSL GPU device handle + * @group: Event group to be removed + * + * Remove the specified group from the list of event groups on @device. + */ +void kgsl_del_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); + +/** + * kgsl_add_event_group - Add a new GPU event group + * @device: A KGSL GPU device handle + * @group: Pointer to the new group to add to the list + * @context: Context that owns the group (or NULL for global) + * @readtimestamp: Function pointer to the readtimestamp function to call when + * processing events + * @priv: Priv member to pass to the readtimestamp function + * @fmt: The format string to use to build the event name + * @...: Arguments for the format string + */ +void kgsl_add_event_group(struct kgsl_device *device, + struct kgsl_event_group *group, + struct kgsl_context *context, readtimestamp_func readtimestamp, + void *priv, const char *fmt, ...); + +void kgsl_cancel_events_timestamp(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp); +void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_cancel_event(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv); +bool kgsl_event_pending(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv); +int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv); +void kgsl_process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_flush_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_process_event_groups(struct kgsl_device *device); + +void kgsl_context_destroy(struct kref *kref); + +int kgsl_context_init(struct kgsl_device_private *dev_priv, + struct kgsl_context *context); + +void kgsl_context_dump(struct kgsl_context *context); + +int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr, + uint64_t *size, uint64_t *flags, pid_t *pid); + +long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); + +long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr); + +long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr); + +/** + * kgsl_context_type - Return a symbolic string for the context type + * @type: Context type + * + * Return: Symbolic string representing the context type + */ +const char *kgsl_context_type(int type); + +/** + * kgsl_context_put() - Release context reference count + * @context: Pointer to the KGSL context to be released + * + * Reduce the reference count on a KGSL context and destroy it if it is no + * longer needed + */ +static inline void +kgsl_context_put(struct kgsl_context *context) +{ + if (context) + kref_put(&context->refcount, kgsl_context_destroy); +} + +/** + * kgsl_context_detached() - check if a context is detached + * @context: the context + * + * Check if a context has been destroyed by userspace and is only waiting + * for reference counts to go away. This check is used to weed out + * contexts that shouldn't use the gpu so NULL is considered detached. + */ +static inline bool kgsl_context_detached(struct kgsl_context *context) +{ + return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_DETACHED, + &context->priv)); +} + +/** + * kgsl_context_invalid() - check if a context is invalid + * @context: the context + * + * Check if a context has been invalidated by the kernel and may no + * longer use the GPU. + */ +static inline bool kgsl_context_invalid(struct kgsl_context *context) +{ + return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_INVALID, + &context->priv)); +} + +/** kgsl_context_is_bad - Check if a context is detached or invalid + * @context: Pointer to a KGSL context handle + * + * Return: True if the context has been detached or is invalid + */ +static inline bool kgsl_context_is_bad(struct kgsl_context *context) +{ + return (kgsl_context_detached(context) || + kgsl_context_invalid(context)); +} + +/** + * kgsl_context_get() - get a pointer to a KGSL context + * @device: Pointer to the KGSL device that owns the context + * @id: Context ID + * + * Find the context associated with the given ID number, increase the reference + * count on it and return it. The caller must make sure that this call is + * paired with a kgsl_context_put. This function is for internal use because it + * doesn't validate the ownership of the context with the calling process - use + * kgsl_context_get_owner for that + */ +static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device, + uint32_t id) +{ + int result = 0; + struct kgsl_context *context = NULL; + + read_lock(&device->context_lock); + + context = idr_find(&device->context_idr, id); + + /* Don't return a context that has been detached */ + if (kgsl_context_detached(context)) + context = NULL; + else + result = kref_get_unless_zero(&context->refcount); + + read_unlock(&device->context_lock); + + if (!result) + return NULL; + return context; +} + +/** + * _kgsl_context_get() - lightweight function to just increment the ref count + * @context: Pointer to the KGSL context + * + * Get a reference to the specified KGSL context structure. This is a + * lightweight way to just increase the refcount on a known context rather than + * walking through kgsl_context_get and searching the iterator + */ +static inline int _kgsl_context_get(struct kgsl_context *context) +{ + int ret = 0; + + if (context) + ret = kref_get_unless_zero(&context->refcount); + + return ret; +} + +/** + * kgsl_context_get_owner() - get a pointer to a KGSL context in a specific + * process + * @dev_priv: Pointer to the process struct + * @id: Context ID to return + * + * Find the context associated with the given ID number, increase the reference + * count on it and return it. The caller must make sure that this call is + * paired with a kgsl_context_put. This function validates that the context id + * given is owned by the dev_priv instancet that is passed in. See + * kgsl_context_get for the internal version that doesn't do the check + */ +static inline struct kgsl_context *kgsl_context_get_owner( + struct kgsl_device_private *dev_priv, uint32_t id) +{ + struct kgsl_context *context; + + context = kgsl_context_get(dev_priv->device, id); + + /* Verify that the context belongs to current calling fd. */ + if (context != NULL && context->dev_priv != dev_priv) { + kgsl_context_put(context); + return NULL; + } + + return context; +} + +/** + * kgsl_process_private_get() - increment the refcount on a + * kgsl_process_private struct + * @process: Pointer to the KGSL process_private + * + * Returns 0 if the structure is invalid and a reference count could not be + * obtained, nonzero otherwise. + */ +static inline int kgsl_process_private_get(struct kgsl_process_private *process) +{ + if (process != NULL) + return kref_get_unless_zero(&process->refcount); + return 0; +} + +void kgsl_process_private_put(struct kgsl_process_private *private); + + +struct kgsl_process_private *kgsl_process_private_find(pid_t pid); + +/* + * A helper macro to print out "not enough memory functions" - this + * makes it easy to standardize the messages as well as cut down on + * the number of strings in the binary + */ +#define SNAPSHOT_ERR_NOMEM(_d, _s) \ + dev_err((_d)->dev, \ + "snapshot: not enough snapshot memory for section %s\n", (_s)) + +/** + * struct kgsl_snapshot_registers - list of registers to snapshot + * @regs: Pointer to an array of register ranges + * @count: Number of entries in the array + */ +struct kgsl_snapshot_registers { + const unsigned int *regs; + unsigned int count; +}; + +size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); + +void kgsl_snapshot_indexed_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, unsigned int index, + unsigned int data, unsigned int start, unsigned int count); + +int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, uint64_t gpuaddr, + uint64_t size, unsigned int type); + +int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t size); + +struct adreno_ib_object_list; + +int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot, + struct adreno_ib_object_list *ib_obj_list); + +void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id, + struct kgsl_snapshot *snapshot, + size_t (*func)(struct kgsl_device *, u8 *, size_t, void *), + void *priv); + +/** + * kgsl_of_property_read_ddrtype - Get property from devicetree based on + * the type of DDR. + * @node: Devicetree node + * @base: prefix string of the property + * @ptr: Pointer to store the value of the property + * + * First look up the devicetree property based on the prefix string and DDR + * type. If property is not specified per DDR type, then look for the property + * based on prefix string only. + * + * Return: 0 on success or error code on failure. + */ +int kgsl_of_property_read_ddrtype(struct device_node *node, const char *base, + u32 *ptr); + +/** + * kgsl_query_property_list - Get a list of valid properties + * @device: A KGSL device handle + * @list: Pointer to a list of u32s + * @count: Number of items in @list + * + * Populate a list with the IDs for supported properties. If @list is NULL, + * just return the number of properties available, otherwise fill up to @count + * items in the list with property identifiers. + * + * Returns the number of total properties if @list is NULL or the number of + * properties copied to @list. + */ +int kgsl_query_property_list(struct kgsl_device *device, u32 *list, u32 count); + +static inline bool kgsl_mmu_has_feature(struct kgsl_device *device, + enum kgsl_mmu_feature feature) +{ + return test_bit(feature, &device->mmu.features); +} + +static inline void kgsl_mmu_set_feature(struct kgsl_device *device, + enum kgsl_mmu_feature feature) +{ + set_bit(feature, &device->mmu.features); +} + +/** + * kgsl_trace_gpu_mem_total - Overall gpu memory usage tracking which includes + * process allocations, imported dmabufs and kgsl globals + * @device: A KGSL device handle + * @delta: delta of total mapped memory size + */ +#ifdef CONFIG_TRACE_GPU_MEM +static inline void kgsl_trace_gpu_mem_total(struct kgsl_device *device, + s64 delta) +{ + u64 total_size; + + total_size = atomic64_add_return(delta, &device->total_mapped); + trace_gpu_mem_total(0, 0, total_size); +} +#else +static inline void kgsl_trace_gpu_mem_total(struct kgsl_device *device, + s64 delta) {} +#endif + +#endif /* __KGSL_DEVICE_H */ diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c new file mode 100644 index 0000000000..75807d6350 --- /dev/null +++ b/kgsl_drawobj.c @@ -0,0 +1,1489 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +/* + * KGSL drawobj management + * A drawobj is a single submission from userland. The drawobj + * encapsulates everything about the submission : command buffers, flags and + * sync points. + * + * Sync points are events that need to expire before the + * drawobj can be queued to the hardware. All synpoints are contained in an + * array of kgsl_drawobj_sync_event structs in the drawobj. There can be + * multiple types of events both internal ones (GPU events) and external + * triggers. As the events expire bits are cleared in a pending bitmap stored + * in the drawobj. The GPU will submit the command as soon as the bitmap + * goes to zero indicating no more pending events. + */ + +#include +#include + +#include "adreno_drawctxt.h" +#include "kgsl_compat.h" +#include "kgsl_device.h" +#include "kgsl_drawobj.h" +#include "kgsl_eventlog.h" +#include "kgsl_sync.h" +#include "kgsl_timeline.h" +#include "kgsl_trace.h" + +/* + * Define an kmem cache for the memobj structures since we + * allocate and free them so frequently + */ +static struct kmem_cache *memobjs_cache; + +static void syncobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + int i; + + for (i = 0; i < syncobj->numsyncs; i++) { + struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; + + if (event->type == KGSL_CMD_SYNCPOINT_TYPE_FENCE) { + struct event_fence_info *priv = event->priv; + + if (priv) { + kfree(priv->fences); + kfree(priv); + } + } else if (event->type == KGSL_CMD_SYNCPOINT_TYPE_TIMELINE) { + kfree(event->priv); + } + } + + kfree(syncobj->synclist); + kfree(syncobj); +} + +static void cmdobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + kfree(CMDOBJ(drawobj)); +} + +static void bindobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + kfree(BINDOBJ(drawobj)); +} + +static void timelineobj_destroy_object(struct kgsl_drawobj *drawobj) +{ + kfree(TIMELINEOBJ(drawobj)); +} + +void kgsl_drawobj_destroy_object(struct kref *kref) +{ + struct kgsl_drawobj *drawobj = container_of(kref, + struct kgsl_drawobj, refcount); + + kgsl_context_put(drawobj->context); + drawobj->destroy_object(drawobj); +} + +void kgsl_dump_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj) +{ + struct kgsl_drawobj_sync_event *event; + unsigned int i; + + for (i = 0; i < syncobj->numsyncs; i++) { + event = &syncobj->synclist[i]; + + if (!kgsl_drawobj_event_pending(syncobj, i)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: { + unsigned int retired; + + kgsl_readtimestamp(event->device, + event->context, KGSL_TIMESTAMP_RETIRED, + &retired); + + dev_err(device->dev, + " [timestamp] context %u timestamp %u (retired %u)\n", + event->context->id, event->timestamp, + retired); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: { + int j; + struct event_fence_info *info = event->priv; + + for (j = 0; info && j < info->num_fences; j++) + dev_err(device->dev, "[%d] fence: %s\n", + i, info->fences[j].name); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: { + int j; + struct event_timeline_info *info = event->priv; + + for (j = 0; info && info[j].timeline; j++) + dev_err(device->dev, "[%d] timeline: %d seqno %lld\n", + i, info[j].timeline, info[j].seqno); + break; + } + } + } +} + +static void syncobj_timer(struct timer_list *t) +{ + struct kgsl_device *device; + struct kgsl_drawobj_sync *syncobj = from_timer(syncobj, t, timer); + struct kgsl_drawobj *drawobj; + struct kgsl_drawobj_sync_event *event; + unsigned int i; + + if (syncobj == NULL) + return; + + drawobj = DRAWOBJ(syncobj); + + if (!kref_get_unless_zero(&drawobj->refcount)) + return; + + if (drawobj->context == NULL) { + kgsl_drawobj_put(drawobj); + return; + } + + device = drawobj->context->device; + + dev_err(device->dev, + "kgsl: possible gpu syncpoint deadlock for context %u timestamp %u\n", + drawobj->context->id, drawobj->timestamp); + + set_bit(ADRENO_CONTEXT_FENCE_LOG, &drawobj->context->priv); + kgsl_context_dump(drawobj->context); + clear_bit(ADRENO_CONTEXT_FENCE_LOG, &drawobj->context->priv); + + dev_err(device->dev, " pending events:\n"); + + for (i = 0; i < syncobj->numsyncs; i++) { + event = &syncobj->synclist[i]; + + if (!kgsl_drawobj_event_pending(syncobj, i)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + dev_err(device->dev, " [%u] TIMESTAMP %u:%u\n", + i, event->context->id, event->timestamp); + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: { + int j; + struct event_fence_info *info = event->priv; + + for (j = 0; info && j < info->num_fences; j++) + dev_err(device->dev, " [%u] FENCE %s\n", + i, info->fences[j].name); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: { + int j; + struct event_timeline_info *info = event->priv; + + dev_err(device->dev, " [%u] FENCE %s\n", + i, dma_fence_is_signaled(event->fence) ? + "signaled" : "not signaled"); + + for (j = 0; info && info[j].timeline; j++) + dev_err(device->dev, " TIMELINE %d SEQNO %lld\n", + info[j].timeline, info[j].seqno); + break; + } + } + } + + kgsl_drawobj_put(drawobj); + dev_err(device->dev, "--gpu syncpoint deadlock print end--\n"); +} + +/* + * a generic function to retire a pending sync event and (possibly) kick the + * dispatcher. + * Returns false if the event was already marked for cancellation in another + * thread. This function should return true if this thread is responsible for + * freeing up the memory, and the event will not be cancelled. + */ +static bool drawobj_sync_expire(struct kgsl_device *device, + struct kgsl_drawobj_sync_event *event) +{ + struct kgsl_drawobj_sync *syncobj = event->syncobj; + /* + * Clear the event from the pending mask - if it is already clear, then + * leave without doing anything useful + */ + if (!test_and_clear_bit(event->id, &syncobj->pending)) + return false; + + /* + * If no more pending events, delete the timer and schedule the command + * for dispatch + */ + if (!kgsl_drawobj_events_pending(event->syncobj)) { + del_timer(&syncobj->timer); + + if (device->ftbl->drawctxt_sched) + device->ftbl->drawctxt_sched(device, + event->syncobj->base.context); + } + return true; +} + +/* + * This function is called by the GPU event when the sync event timestamp + * expires + */ +static void drawobj_sync_func(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int result) +{ + struct kgsl_drawobj_sync_event *event = priv; + + trace_syncpoint_timestamp_expire(event->syncobj, + event->context, event->timestamp); + + /* + * Put down the context ref count only if + * this thread successfully clears the pending bit mask. + */ + if (drawobj_sync_expire(device, event)) + kgsl_context_put(event->context); + + kgsl_drawobj_put(&event->syncobj->base); +} + +static void drawobj_sync_timeline_fence_work(struct irq_work *work) +{ + struct kgsl_drawobj_sync_event *event = container_of(work, + struct kgsl_drawobj_sync_event, work); + + dma_fence_put(event->fence); + kgsl_drawobj_put(&event->syncobj->base); +} + +static void trace_syncpoint_timeline_fence(struct kgsl_drawobj_sync *syncobj, + struct dma_fence *f, bool expire) +{ + struct dma_fence_array *array = to_dma_fence_array(f); + struct dma_fence **fences = &f; + u32 num_fences = 1; + int i; + + if (array) { + num_fences = array->num_fences; + fences = array->fences; + } + + for (i = 0; i < num_fences; i++) { + char fence_name[KGSL_FENCE_NAME_LEN]; + + snprintf(fence_name, sizeof(fence_name), "%s:%llu", + fences[i]->ops->get_timeline_name(fences[i]), + fences[i]->seqno); + if (expire) { + trace_syncpoint_fence_expire(syncobj, fence_name); + log_kgsl_syncpoint_fence_expire_event( + syncobj->base.context->id, fence_name); + } else { + trace_syncpoint_fence(syncobj, fence_name); + log_kgsl_syncpoint_fence_event( + syncobj->base.context->id, fence_name); + } + } +} + +static void drawobj_sync_timeline_fence_callback(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct kgsl_drawobj_sync_event *event = container_of(cb, + struct kgsl_drawobj_sync_event, cb); + + trace_syncpoint_timeline_fence(event->syncobj, f, true); + + /* + * Mark the event as synced and then fire off a worker to handle + * removing the fence + */ + if (drawobj_sync_expire(event->device, event)) + irq_work_queue(&event->work); +} + +static void syncobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); + unsigned int i; + + /* Zap the canary timer */ + del_timer_sync(&syncobj->timer); + + /* + * Clear all pending events - this will render any subsequent async + * callbacks harmless + */ + for (i = 0; i < syncobj->numsyncs; i++) { + struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; + + /* + * Don't do anything if the event has already expired. + * If this thread clears the pending bit mask then it is + * responsible for doing context put. + */ + if (!test_and_clear_bit(i, &syncobj->pending)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + kgsl_cancel_event(drawobj->device, + &event->context->events, event->timestamp, + drawobj_sync_func, event); + /* + * Do context put here to make sure the context is alive + * till this thread cancels kgsl event. + */ + kgsl_context_put(event->context); + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + kgsl_sync_fence_async_cancel(event->handle); + kgsl_drawobj_put(drawobj); + break; + case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: + dma_fence_remove_callback(event->fence, &event->cb); + dma_fence_put(event->fence); + kgsl_drawobj_put(drawobj); + break; + } + } + + /* + * If we cancelled an event, there's a good chance that the context is + * on a dispatcher queue, so schedule to get it removed. + */ + if (!bitmap_empty(&syncobj->pending, KGSL_MAX_SYNCPOINTS) && + drawobj->device->ftbl->drawctxt_sched) + drawobj->device->ftbl->drawctxt_sched(drawobj->device, + drawobj->context); + +} + +static void timelineobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); + int i; + + for (i = 0; i < timelineobj->count; i++) + kgsl_timeline_put(timelineobj->timelines[i].timeline); + + kvfree(timelineobj->timelines); + timelineobj->timelines = NULL; + timelineobj->count = 0; +} + +static void bindobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj); + + kgsl_sharedmem_put_bind_op(bindobj->bind); +} + +static void cmdobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj); + struct kgsl_memobj_node *mem, *tmpmem; + + /* + * Release the refcount on the mem entry associated with the + * ib profiling buffer + */ + if (cmdobj->base.flags & KGSL_DRAWOBJ_PROFILING) + kgsl_mem_entry_put(cmdobj->profiling_buf_entry); + + /* Destroy the command list */ + list_for_each_entry_safe(mem, tmpmem, &cmdobj->cmdlist, node) { + list_del_init(&mem->node); + kmem_cache_free(memobjs_cache, mem); + } + + /* Destroy the memory list */ + list_for_each_entry_safe(mem, tmpmem, &cmdobj->memlist, node) { + list_del_init(&mem->node); + kmem_cache_free(memobjs_cache, mem); + } + + if (drawobj->type & CMDOBJ_TYPE) + atomic_dec(&drawobj->context->proc_priv->cmd_count); +} + +/** + * kgsl_drawobj_destroy() - Destroy a kgsl object structure + * @obj: Pointer to the kgsl object to destroy + * + * Start the process of destroying a command batch. Cancel any pending events + * and decrement the refcount. Asynchronous events can still signal after + * kgsl_drawobj_destroy has returned. + */ +void kgsl_drawobj_destroy(struct kgsl_drawobj *drawobj) +{ + if (IS_ERR_OR_NULL(drawobj)) + return; + + drawobj->destroy(drawobj); + + kgsl_drawobj_put(drawobj); +} + +static bool drawobj_sync_fence_func(void *priv) +{ + struct kgsl_drawobj_sync_event *event = priv; + struct event_fence_info *info = event->priv; + int i; + + for (i = 0; info && i < info->num_fences; i++) { + trace_syncpoint_fence_expire(event->syncobj, + info->fences[i].name); + log_kgsl_syncpoint_fence_expire_event( + event->syncobj->base.context->id, info->fences[i].name); + } + + /* + * Only call kgsl_drawobj_put() if it's not marked for cancellation + * in another thread. + */ + if (drawobj_sync_expire(event->device, event)) { + kgsl_drawobj_put(&event->syncobj->base); + return true; + } + return false; +} + +static struct event_timeline_info * +drawobj_get_sync_timeline_priv(void __user *uptr, u64 usize, u32 count) +{ + int i; + struct event_timeline_info *priv; + + /* Make sure we don't accidently overflow count */ + if (count == UINT_MAX) + return NULL; + + priv = kcalloc(count + 1, sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + for (i = 0; i < count; i++, uptr += usize) { + struct kgsl_timeline_val val; + + if (copy_struct_from_user(&val, sizeof(val), uptr, usize)) + continue; + + priv[i].timeline = val.timeline; + priv[i].seqno = val.seqno; + } + + priv[i].timeline = 0; + return priv; +} + +static int drawobj_add_sync_timeline(struct kgsl_device *device, + + struct kgsl_drawobj_sync *syncobj, void __user *uptr, + u64 usize) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + struct kgsl_cmd_syncpoint_timeline sync; + struct kgsl_drawobj_sync_event *event; + struct dma_fence *fence; + unsigned int id; + int ret; + + if (copy_struct_from_user(&sync, sizeof(sync), uptr, usize)) + return -EFAULT; + + fence = kgsl_timelines_to_fence_array(device, sync.timelines, + sync.count, sync.timelines_size, false); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + kref_get(&drawobj->refcount); + + id = syncobj->numsyncs++; + + event = &syncobj->synclist[id]; + + event->id = id; + event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMELINE; + event->syncobj = syncobj; + event->device = device; + event->context = NULL; + event->fence = fence; + init_irq_work(&event->work, drawobj_sync_timeline_fence_work); + + INIT_LIST_HEAD(&event->cb.node); + + event->priv = + drawobj_get_sync_timeline_priv(u64_to_user_ptr(sync.timelines), + sync.timelines_size, sync.count); + + ret = dma_fence_add_callback(event->fence, + &event->cb, drawobj_sync_timeline_fence_callback); + + set_bit(event->id, &syncobj->pending); + + if (ret) { + clear_bit(event->id, &syncobj->pending); + + if (dma_fence_is_signaled(event->fence)) { + trace_syncpoint_fence_expire(syncobj, "signaled"); + log_kgsl_syncpoint_fence_expire_event( + syncobj->base.context->id, "signaled"); + dma_fence_put(event->fence); + ret = 0; + } + + kgsl_drawobj_put(drawobj); + return ret; + } + + trace_syncpoint_timeline_fence(event->syncobj, event->fence, false); + return 0; +} + +static int drawobj_add_sync_fence(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *data, + u64 datasize) +{ + struct kgsl_cmd_syncpoint_fence sync; + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + struct kgsl_drawobj_sync_event *event; + struct event_fence_info *priv; + unsigned int id, i; + + if (copy_struct_from_user(&sync, sizeof(sync), data, datasize)) + return -EFAULT; + + kref_get(&drawobj->refcount); + + id = syncobj->numsyncs++; + + event = &syncobj->synclist[id]; + + event->id = id; + event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE; + event->syncobj = syncobj; + event->device = device; + event->context = NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + + set_bit(event->id, &syncobj->pending); + + event->handle = kgsl_sync_fence_async_wait(sync.fd, + drawobj_sync_fence_func, event, priv); + + event->priv = priv; + + if (IS_ERR_OR_NULL(event->handle)) { + int ret = PTR_ERR(event->handle); + + clear_bit(event->id, &syncobj->pending); + event->handle = NULL; + + kgsl_drawobj_put(drawobj); + + /* + * If ret == 0 the fence was already signaled - print a trace + * message so we can track that + */ + if (ret == 0) { + trace_syncpoint_fence_expire(syncobj, "signaled"); + log_kgsl_syncpoint_fence_expire_event( + syncobj->base.context->id, "signaled"); + } + + return ret; + } + + for (i = 0; priv && i < priv->num_fences; i++) { + trace_syncpoint_fence(syncobj, priv->fences[i].name); + log_kgsl_syncpoint_fence_event(syncobj->base.context->id, + priv->fences[i].name); + } + + return 0; +} + +/* drawobj_add_sync_timestamp() - Add a new sync point for a sync obj + * @device: KGSL device + * @syncobj: KGSL sync obj to add the sync point to + * @priv: Private structure passed by the user + * + * Add a new sync point timestamp event to the sync obj. + */ +static int drawobj_add_sync_timestamp(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, + struct kgsl_cmd_syncpoint_timestamp *timestamp) + +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + struct kgsl_context *context = kgsl_context_get(device, + timestamp->context_id); + struct kgsl_drawobj_sync_event *event; + int ret = -EINVAL; + unsigned int id; + + if (context == NULL) + return -EINVAL; + + /* + * We allow somebody to create a sync point on their own context. + * This has the effect of delaying a command from submitting until the + * dependent command has cleared. That said we obviously can't let them + * create a sync point on a future timestamp. + */ + + if (context == drawobj->context) { + unsigned int queued; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &queued); + + if (timestamp_cmp(timestamp->timestamp, queued) > 0) { + dev_err(device->dev, + "Cannot create syncpoint for future timestamp %d (current %d)\n", + timestamp->timestamp, queued); + goto done; + } + } + + kref_get(&drawobj->refcount); + + id = syncobj->numsyncs++; + + event = &syncobj->synclist[id]; + event->id = id; + + event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP; + event->syncobj = syncobj; + event->context = context; + event->timestamp = timestamp->timestamp; + event->device = device; + + set_bit(event->id, &syncobj->pending); + + ret = kgsl_add_event(device, &context->events, timestamp->timestamp, + drawobj_sync_func, event); + + if (ret) { + clear_bit(event->id, &syncobj->pending); + kgsl_drawobj_put(drawobj); + } else { + trace_syncpoint_timestamp(syncobj, context, + timestamp->timestamp); + } + +done: + if (ret) + kgsl_context_put(context); + + return ret; +} + +static int drawobj_add_sync_timestamp_from_user(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *data, + u64 datasize) +{ + struct kgsl_cmd_syncpoint_timestamp timestamp; + + if (copy_struct_from_user(×tamp, sizeof(timestamp), + data, datasize)) + return -EFAULT; + + return drawobj_add_sync_timestamp(device, syncobj, ×tamp); +} + +/** + * kgsl_drawobj_sync_add_sync() - Add a sync point to a command + * batch + * @device: Pointer to the KGSL device struct for the GPU + * @syncobj: Pointer to the sync obj + * @sync: Pointer to the user-specified struct defining the syncpoint + * + * Create a new sync point in the sync obj based on the + * user specified parameters + */ +int kgsl_drawobj_sync_add_sync(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, + struct kgsl_cmd_syncpoint *sync) +{ + struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); + + if (sync->type == KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP) + return drawobj_add_sync_timestamp_from_user(device, + syncobj, sync->priv, sync->size); + else if (sync->type == KGSL_CMD_SYNCPOINT_TYPE_FENCE) + return drawobj_add_sync_fence(device, + syncobj, sync->priv, sync->size); + else if (sync->type == KGSL_CMD_SYNCPOINT_TYPE_TIMELINE) + return drawobj_add_sync_timeline(device, + syncobj, sync->priv, sync->size); + + dev_err(device->dev, "bad syncpoint type %d for ctxt %d\n", + sync->type, drawobj->context->id); + + return -EINVAL; +} + +static void add_profiling_buffer(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, + uint64_t gpuaddr, uint64_t size, + unsigned int id, uint64_t offset) +{ + struct kgsl_mem_entry *entry; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + if (!(drawobj->flags & KGSL_DRAWOBJ_PROFILING)) + return; + + /* Only the first buffer entry counts - ignore the rest */ + if (cmdobj->profiling_buf_entry != NULL) + return; + + if (id != 0) + entry = kgsl_sharedmem_find_id(drawobj->context->proc_priv, + id); + else + entry = kgsl_sharedmem_find(drawobj->context->proc_priv, + gpuaddr); + + if (entry != NULL) { + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { + kgsl_mem_entry_put(entry); + entry = NULL; + } + } + + if (entry == NULL) { + dev_err(device->dev, + "ignore bad profile buffer ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", + drawobj->context->id, id, offset, gpuaddr, size); + return; + } + + + if (!id) { + cmdobj->profiling_buffer_gpuaddr = gpuaddr; + } else { + u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer); + + /* + * Make sure there is enough room in the object to store the + * entire profiling buffer object + */ + if (off < offset || off >= entry->memdesc.size) { + dev_err(device->dev, + "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", + drawobj->context->id, id, offset, gpuaddr, size); + kgsl_mem_entry_put(entry); + return; + } + + cmdobj->profiling_buffer_gpuaddr = + entry->memdesc.gpuaddr + offset; + } + + cmdobj->profiling_buf_entry = entry; +} + +/** + * kgsl_drawobj_cmd_add_ibdesc() - Add a legacy ibdesc to a command + * batch + * @cmdobj: Pointer to the ib + * @ibdesc: Pointer to the user-specified struct defining the memory or IB + * + * Create a new memory entry in the ib based on the + * user specified parameters + */ +int kgsl_drawobj_cmd_add_ibdesc(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, struct kgsl_ibdesc *ibdesc) +{ + uint64_t gpuaddr = (uint64_t) ibdesc->gpuaddr; + uint64_t size = (uint64_t) ibdesc->sizedwords << 2; + struct kgsl_memobj_node *mem; + struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + + /* sanitize the ibdesc ctrl flags */ + ibdesc->ctrl &= KGSL_IBDESC_MEMLIST | KGSL_IBDESC_PROFILING_BUFFER; + + if (drawobj->flags & KGSL_DRAWOBJ_MEMLIST && + ibdesc->ctrl & KGSL_IBDESC_MEMLIST) { + if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER) { + add_profiling_buffer(device, cmdobj, + gpuaddr, size, 0, 0); + return 0; + } + } + + /* Ignore if SYNC or MARKER is specified */ + if (drawobj->type & (SYNCOBJ_TYPE | MARKEROBJ_TYPE)) + return 0; + + mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL); + if (mem == NULL) + return -ENOMEM; + + mem->gpuaddr = gpuaddr; + mem->size = size; + mem->priv = 0; + mem->id = 0; + mem->offset = 0; + mem->flags = 0; + + if (drawobj->flags & KGSL_DRAWOBJ_MEMLIST && + ibdesc->ctrl & KGSL_IBDESC_MEMLIST) + /* add to the memlist */ + list_add_tail(&mem->node, &cmdobj->memlist); + else { + /* set the preamble flag if directed to */ + if (drawobj->context->flags & KGSL_CONTEXT_PREAMBLE && + list_empty(&cmdobj->cmdlist)) + mem->flags = KGSL_CMDLIST_CTXTSWITCH_PREAMBLE; + + /* add to the cmd list */ + list_add_tail(&mem->node, &cmdobj->cmdlist); + } + + return 0; +} + +static int drawobj_init(struct kgsl_device *device, + struct kgsl_context *context, struct kgsl_drawobj *drawobj, + int type) +{ + /* + * Increase the reference count on the context so it doesn't disappear + * during the lifetime of this object + */ + if (!_kgsl_context_get(context)) + return -ENOENT; + + kref_init(&drawobj->refcount); + + drawobj->device = device; + drawobj->context = context; + drawobj->type = type; + + return 0; +} + +static int get_aux_command(void __user *ptr, u64 generic_size, + int type, void *auxcmd, size_t auxcmd_size) +{ + struct kgsl_gpu_aux_command_generic generic; + u64 size; + + if (copy_struct_from_user(&generic, sizeof(generic), ptr, generic_size)) + return -EFAULT; + + if (generic.type != type) + return -EINVAL; + + size = min_t(u64, auxcmd_size, generic.size); + if (copy_from_user(auxcmd, u64_to_user_ptr(generic.priv), size)) + return -EFAULT; + + return 0; +} + +struct kgsl_drawobj_timeline * +kgsl_drawobj_timeline_create(struct kgsl_device *device, + struct kgsl_context *context) +{ + int ret; + struct kgsl_drawobj_timeline *timelineobj = + kzalloc(sizeof(*timelineobj), GFP_KERNEL); + + if (!timelineobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &timelineobj->base, + TIMELINEOBJ_TYPE); + if (ret) { + kfree(timelineobj); + return ERR_PTR(ret); + } + + timelineobj->base.destroy = timelineobj_destroy; + timelineobj->base.destroy_object = timelineobj_destroy_object; + + return timelineobj; +} + +int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_timeline *timelineobj, + void __user *src, u64 cmdsize) +{ + struct kgsl_gpu_aux_command_timeline cmd; + int i, ret; + + memset(&cmd, 0, sizeof(cmd)); + + ret = get_aux_command(src, cmdsize, + KGSL_GPU_AUX_COMMAND_TIMELINE, &cmd, sizeof(cmd)); + if (ret) + return ret; + + if (!cmd.count) + return -EINVAL; + + timelineobj->timelines = kvcalloc(cmd.count, + sizeof(*timelineobj->timelines), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + if (!timelineobj->timelines) + return -ENOMEM; + + src = u64_to_user_ptr(cmd.timelines); + + for (i = 0; i < cmd.count; i++) { + struct kgsl_timeline_val val; + + if (copy_struct_from_user(&val, sizeof(val), src, + cmd.timelines_size)) { + ret = -EFAULT; + goto err; + } + + if (val.padding) { + ret = -EINVAL; + goto err; + } + + timelineobj->timelines[i].timeline = + kgsl_timeline_by_id(dev_priv->device, + val.timeline); + + if (!timelineobj->timelines[i].timeline) { + ret = -ENODEV; + goto err; + } + + trace_kgsl_drawobj_timeline(val.timeline, val.seqno); + timelineobj->timelines[i].seqno = val.seqno; + + src += cmd.timelines_size; + } + + timelineobj->count = cmd.count; + return 0; +err: + for (i = 0; i < cmd.count; i++) + kgsl_timeline_put(timelineobj->timelines[i].timeline); + + kvfree(timelineobj->timelines); + timelineobj->timelines = NULL; + return ret; +} + +static void kgsl_drawobj_bind_callback(struct kgsl_sharedmem_bind_op *op) +{ + struct kgsl_drawobj_bind *bindobj = op->data; + struct kgsl_drawobj *drawobj = DRAWOBJ(bindobj); + struct kgsl_device *device = drawobj->device; + + set_bit(KGSL_BINDOBJ_STATE_DONE, &bindobj->state); + + /* Re-schedule the context */ + if (device->ftbl->drawctxt_sched) + device->ftbl->drawctxt_sched(device, + drawobj->context); + + /* Put back the reference we took when we started the operation */ + kgsl_context_put(drawobj->context); + kgsl_drawobj_put(drawobj); +} + +int kgsl_drawobj_add_bind(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_bind *bindobj, + void __user *src, u64 cmdsize) +{ + struct kgsl_gpu_aux_command_bind cmd; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_sharedmem_bind_op *op; + int ret; + + ret = get_aux_command(src, cmdsize, + KGSL_GPU_AUX_COMMAND_BIND, &cmd, sizeof(cmd)); + if (ret) + return ret; + + op = kgsl_sharedmem_create_bind_op(private, cmd.target, + u64_to_user_ptr(cmd.rangeslist), cmd.numranges, + cmd.rangesize); + + if (IS_ERR(op)) + return PTR_ERR(op); + + op->callback = kgsl_drawobj_bind_callback; + op->data = bindobj; + + bindobj->bind = op; + return 0; +} + +struct kgsl_drawobj_bind *kgsl_drawobj_bind_create(struct kgsl_device *device, + struct kgsl_context *context) +{ + int ret; + struct kgsl_drawobj_bind *bindobj = + kzalloc(sizeof(*bindobj), GFP_KERNEL); + + if (!bindobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &bindobj->base, BINDOBJ_TYPE); + if (ret) { + kfree(bindobj); + return ERR_PTR(ret); + } + + bindobj->base.destroy = bindobj_destroy; + bindobj->base.destroy_object = bindobj_destroy_object; + + return bindobj; +} + +/** + * kgsl_drawobj_sync_create() - Create a new sync obj + * structure + * @device: Pointer to a KGSL device struct + * @context: Pointer to a KGSL context struct + * + * Allocate an new kgsl_drawobj_sync structure + */ +struct kgsl_drawobj_sync *kgsl_drawobj_sync_create(struct kgsl_device *device, + struct kgsl_context *context) +{ + struct kgsl_drawobj_sync *syncobj = + kzalloc(sizeof(*syncobj), GFP_KERNEL); + int ret; + + if (!syncobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &syncobj->base, SYNCOBJ_TYPE); + if (ret) { + kfree(syncobj); + return ERR_PTR(ret); + } + + syncobj->base.destroy = syncobj_destroy; + syncobj->base.destroy_object = syncobj_destroy_object; + + timer_setup(&syncobj->timer, syncobj_timer, 0); + + return syncobj; +} + +/** + * kgsl_drawobj_cmd_create() - Create a new command obj + * structure + * @device: Pointer to a KGSL device struct + * @context: Pointer to a KGSL context struct + * @flags: Flags for the command obj + * @type: type of cmdobj MARKER/CMD + * + * Allocate a new kgsl_drawobj_cmd structure + */ +struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device, + struct kgsl_context *context, unsigned int flags, + unsigned int type) +{ + struct kgsl_drawobj_cmd *cmdobj = kzalloc(sizeof(*cmdobj), GFP_KERNEL); + int ret; + + if (!cmdobj) + return ERR_PTR(-ENOMEM); + + ret = drawobj_init(device, context, &cmdobj->base, + (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE))); + if (ret) { + kfree(cmdobj); + return ERR_PTR(ret); + } + + cmdobj->base.destroy = cmdobj_destroy; + cmdobj->base.destroy_object = cmdobj_destroy_object; + + /* sanitize our flags for drawobjs */ + cmdobj->base.flags = flags & (KGSL_DRAWOBJ_CTX_SWITCH + | KGSL_DRAWOBJ_MARKER + | KGSL_DRAWOBJ_END_OF_FRAME + | KGSL_DRAWOBJ_PWR_CONSTRAINT + | KGSL_DRAWOBJ_MEMLIST + | KGSL_DRAWOBJ_PROFILING + | KGSL_DRAWOBJ_PROFILING_KTIME); + + INIT_LIST_HEAD(&cmdobj->cmdlist); + INIT_LIST_HEAD(&cmdobj->memlist); + + if (type & CMDOBJ_TYPE) + atomic_inc(&context->proc_priv->cmd_count); + + return cmdobj; +} + +#ifdef CONFIG_COMPAT +static int add_ibdesc_list_compat(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count) +{ + int i, ret = 0; + struct kgsl_ibdesc_compat ibdesc32; + struct kgsl_ibdesc ibdesc; + + for (i = 0; i < count; i++) { + memset(&ibdesc32, 0, sizeof(ibdesc32)); + + if (copy_from_user(&ibdesc32, ptr, sizeof(ibdesc32))) { + ret = -EFAULT; + break; + } + + ibdesc.gpuaddr = (unsigned long) ibdesc32.gpuaddr; + ibdesc.sizedwords = (size_t) ibdesc32.sizedwords; + ibdesc.ctrl = (unsigned int) ibdesc32.ctrl; + + ret = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc); + if (ret) + break; + + ptr += sizeof(ibdesc32); + } + + return ret; +} + +static int add_syncpoints_compat(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count) +{ + struct kgsl_cmd_syncpoint_compat sync32; + struct kgsl_cmd_syncpoint sync; + int i, ret = 0; + + for (i = 0; i < count; i++) { + memset(&sync32, 0, sizeof(sync32)); + + if (copy_from_user(&sync32, ptr, sizeof(sync32))) { + ret = -EFAULT; + break; + } + + sync.type = sync32.type; + sync.priv = compat_ptr(sync32.priv); + sync.size = (size_t) sync32.size; + + ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync); + if (ret) + break; + + ptr += sizeof(sync32); + } + + return ret; +} +#else +static int add_ibdesc_list_compat(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count) +{ + return -EINVAL; +} + +static int add_syncpoints_compat(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count) +{ + return -EINVAL; +} +#endif + +/* Returns: + * -EINVAL: Bad data + * 0: All data fields are empty (nothing to do) + * 1: All list information is valid + */ +static int _verify_input_list(unsigned int count, void __user *ptr, + unsigned int size) +{ + /* Return early if nothing going on */ + if (count == 0 && ptr == NULL && size == 0) + return 0; + + /* Sanity check inputs */ + if (count == 0 || ptr == NULL || size == 0) + return -EINVAL; + + return 1; +} + +int kgsl_drawobj_cmd_add_ibdesc_list(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count) +{ + struct kgsl_ibdesc ibdesc; + struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj); + int i, ret; + + /* Ignore everything if this is a MARKER */ + if (baseobj->type & MARKEROBJ_TYPE) + return 0; + + ret = _verify_input_list(count, ptr, sizeof(ibdesc)); + if (ret <= 0) + return -EINVAL; + + if (is_compat_task()) + return add_ibdesc_list_compat(device, cmdobj, ptr, count); + + for (i = 0; i < count; i++) { + memset(&ibdesc, 0, sizeof(ibdesc)); + + if (copy_from_user(&ibdesc, ptr, sizeof(ibdesc))) + return -EFAULT; + + ret = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc); + if (ret) + return ret; + + ptr += sizeof(ibdesc); + } + + return 0; +} + +int kgsl_drawobj_sync_add_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count) +{ + struct kgsl_cmd_syncpoint sync; + int i, ret; + + if (count == 0) + return 0; + + syncobj->synclist = kcalloc(count, + sizeof(struct kgsl_drawobj_sync_event), GFP_KERNEL); + + if (syncobj->synclist == NULL) + return -ENOMEM; + + if (is_compat_task()) + return add_syncpoints_compat(device, syncobj, ptr, count); + + for (i = 0; i < count; i++) { + memset(&sync, 0, sizeof(sync)); + + if (copy_from_user(&sync, ptr, sizeof(sync))) + return -EFAULT; + + ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync); + if (ret) + return ret; + + ptr += sizeof(sync); + } + + return 0; +} + +static int kgsl_drawobj_add_memobject(struct list_head *head, + struct kgsl_command_object *obj) +{ + struct kgsl_memobj_node *mem; + + mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL); + if (mem == NULL) + return -ENOMEM; + + mem->gpuaddr = obj->gpuaddr; + mem->size = obj->size; + mem->id = obj->id; + mem->offset = obj->offset; + mem->flags = obj->flags; + mem->priv = 0; + + list_add_tail(&mem->node, head); + return 0; +} + +#define CMDLIST_FLAGS \ + (KGSL_CMDLIST_IB | \ + KGSL_CMDLIST_CTXTSWITCH_PREAMBLE | \ + KGSL_CMDLIST_IB_PREAMBLE) + +/* This can only accept MARKEROBJ_TYPE and CMDOBJ_TYPE */ +int kgsl_drawobj_cmd_add_cmdlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_object obj; + struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj); + int i, ret; + + /* Ignore everything if this is a MARKER */ + if (baseobj->type & MARKEROBJ_TYPE) + return 0; + + ret = _verify_input_list(count, ptr, size); + if (ret <= 0) + return ret; + + for (i = 0; i < count; i++) { + if (copy_struct_from_user(&obj, sizeof(obj), ptr, size)) + return -EFAULT; + + /* Sanity check the flags */ + if (!(obj.flags & CMDLIST_FLAGS)) { + dev_err(device->dev, + "invalid cmdobj ctxt %d flags %d id %d offset %llu addr %llx size %llu\n", + baseobj->context->id, obj.flags, obj.id, + obj.offset, obj.gpuaddr, obj.size); + return -EINVAL; + } + + ret = kgsl_drawobj_add_memobject(&cmdobj->cmdlist, &obj); + if (ret) + return ret; + + ptr += sizeof(obj); + } + + return 0; +} + +int kgsl_drawobj_cmd_add_memlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_object obj; + struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj); + int i, ret; + + /* Ignore everything if this is a MARKER */ + if (baseobj->type & MARKEROBJ_TYPE) + return 0; + + ret = _verify_input_list(count, ptr, size); + if (ret <= 0) + return ret; + + for (i = 0; i < count; i++) { + if (copy_struct_from_user(&obj, sizeof(obj), ptr, size)) + return -EFAULT; + + if (!(obj.flags & KGSL_OBJLIST_MEMOBJ)) { + dev_err(device->dev, + "invalid memobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n", + DRAWOBJ(cmdobj)->context->id, obj.flags, + obj.id, obj.offset, obj.gpuaddr, + obj.size); + return -EINVAL; + } + + if (obj.flags & KGSL_OBJLIST_PROFILE) + add_profiling_buffer(device, cmdobj, obj.gpuaddr, + obj.size, obj.id, obj.offset); + else { + ret = kgsl_drawobj_add_memobject(&cmdobj->memlist, + &obj); + if (ret) + return ret; + } + + ptr += sizeof(obj); + } + + return 0; +} + +struct kgsl_drawobj_sync * +kgsl_drawobj_create_timestamp_syncobj(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + struct kgsl_drawobj_sync *syncobj; + struct kgsl_cmd_syncpoint_timestamp priv; + int ret; + + syncobj = kgsl_drawobj_sync_create(device, context); + if (IS_ERR(syncobj)) + return syncobj; + + syncobj->synclist = kzalloc(sizeof(*syncobj->synclist), GFP_KERNEL); + if (!syncobj->synclist) { + kgsl_drawobj_destroy(DRAWOBJ(syncobj)); + return ERR_PTR(-ENOMEM); + } + + priv.timestamp = timestamp; + priv.context_id = context->id; + + ret = drawobj_add_sync_timestamp(device, syncobj, &priv); + if (ret) { + kgsl_drawobj_destroy(DRAWOBJ(syncobj)); + return ERR_PTR(ret); + } + + return syncobj; +} + +int kgsl_drawobj_sync_add_synclist(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_syncpoint syncpoint; + struct kgsl_cmd_syncpoint sync; + int i, ret; + + /* If creating a sync and the data is not there or wrong then error */ + ret = _verify_input_list(count, ptr, size); + if (ret <= 0) + return -EINVAL; + + syncobj->synclist = kcalloc(count, + sizeof(struct kgsl_drawobj_sync_event), GFP_KERNEL); + + if (syncobj->synclist == NULL) + return -ENOMEM; + + for (i = 0; i < count; i++) { + if (copy_struct_from_user(&syncpoint, sizeof(syncpoint), ptr, size)) + return -EFAULT; + + sync.type = syncpoint.type; + sync.priv = u64_to_user_ptr(syncpoint.priv); + sync.size = syncpoint.size; + + ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync); + if (ret) + return ret; + + ptr += sizeof(syncpoint); + } + + return 0; +} + +void kgsl_drawobjs_cache_exit(void) +{ + kmem_cache_destroy(memobjs_cache); +} + +int kgsl_drawobjs_cache_init(void) +{ + memobjs_cache = KMEM_CACHE(kgsl_memobj_node, 0); + + if (!memobjs_cache) + return -ENOMEM; + + return 0; +} diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h new file mode 100644 index 0000000000..faf396ba74 --- /dev/null +++ b/kgsl_drawobj.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __KGSL_DRAWOBJ_H +#define __KGSL_DRAWOBJ_H + +#include +#include +#include + +#define DRAWOBJ(obj) (&obj->base) +#define SYNCOBJ(obj) \ + container_of(obj, struct kgsl_drawobj_sync, base) +#define CMDOBJ(obj) \ + container_of(obj, struct kgsl_drawobj_cmd, base) + +#define CMDOBJ_TYPE BIT(0) +#define MARKEROBJ_TYPE BIT(1) +#define SYNCOBJ_TYPE BIT(2) +#define BINDOBJ_TYPE BIT(3) +#define TIMELINEOBJ_TYPE BIT(4) + +/** + * struct kgsl_drawobj - KGSL drawobj descriptor + * @device: KGSL GPU device that the command was created for + * @context: KGSL context that created the command + * @type: Object type + * @timestamp: Timestamp assigned to the command + * @flags: flags + * @refcount: kref structure to maintain the reference count + */ +struct kgsl_drawobj { + struct kgsl_device *device; + struct kgsl_context *context; + uint32_t type; + uint32_t timestamp; + unsigned long flags; + struct kref refcount; + /** @destroy: Callbak function to take down the object */ + void (*destroy)(struct kgsl_drawobj *drawobj); + /** @destroy_object: Callback function to free the object memory */ + void (*destroy_object)(struct kgsl_drawobj *drawobj); +}; + +/** + * struct kgsl_drawobj_cmd - KGSL command obj, This covers marker + * cmds also since markers are special form of cmds that do not + * need their cmds to be executed. + * @base: Base kgsl_drawobj, this needs to be the first entry + * @priv: Internal flags + * @global_ts: The ringbuffer timestamp corresponding to this + * command obj + * @fault_policy: Internal policy describing how to handle this command in case + * of a fault + * @fault_recovery: recovery actions actually tried for this batch + * be hung + * @refcount: kref structure to maintain the reference count + * @cmdlist: List of IBs to issue + * @memlist: List of all memory used in this command batch + * @marker_timestamp: For markers, the timestamp of the last "real" command that + * was queued + * @profiling_buf_entry: Mem entry containing the profiling buffer + * @profiling_buffer_gpuaddr: GPU virt address of the profile buffer added here + * for easy access + * @profile_index: Index to store the start/stop ticks in the kernel profiling + * buffer + * @submit_ticks: Variable to hold ticks at the time of + * command obj submit. + + */ +struct kgsl_drawobj_cmd { + struct kgsl_drawobj base; + unsigned long priv; + unsigned int global_ts; + unsigned long fault_policy; + unsigned long fault_recovery; + struct list_head cmdlist; + struct list_head memlist; + unsigned int marker_timestamp; + struct kgsl_mem_entry *profiling_buf_entry; + uint64_t profiling_buffer_gpuaddr; + unsigned int profile_index; + uint64_t submit_ticks; + /* @numibs: Number of ibs in this cmdobj */ + u32 numibs; +}; + +/** + * struct kgsl_drawobj_sync - KGSL sync object + * @base: Base kgsl_drawobj, this needs to be the first entry + * @synclist: Array of context/timestamp tuples to wait for before issuing + * @numsyncs: Number of sync entries in the array + * @pending: Bitmask of sync events that are active + * @timer: a timer used to track possible sync timeouts for this + * sync obj + * @timeout_jiffies: For a sync obj the jiffies at + * which the timer will expire + */ +struct kgsl_drawobj_sync { + struct kgsl_drawobj base; + struct kgsl_drawobj_sync_event *synclist; + unsigned int numsyncs; + unsigned long pending; + struct timer_list timer; + unsigned long timeout_jiffies; +}; + +#define KGSL_BINDOBJ_STATE_START 0 +#define KGSL_BINDOBJ_STATE_DONE 1 + +/** + * struct kgsl_drawobj_bind - KGSL virtual buffer object bind operation + * @base: &struct kgsl_drawobj container + * @state: Current state of the draw operation + * @bind: Pointer to the VBO bind operation struct + */ +struct kgsl_drawobj_bind { + struct kgsl_drawobj base; + unsigned long state; + struct kgsl_sharedmem_bind_op *bind; +}; + +static inline struct kgsl_drawobj_bind *BINDOBJ(struct kgsl_drawobj *obj) +{ + return container_of(obj, struct kgsl_drawobj_bind, base); +} + +/** + * struct kgsl_drawobj_timeline - KGSL timeline signal operation + */ +struct kgsl_drawobj_timeline { + /** @base: &struct kgsl_drawobj container */ + struct kgsl_drawobj base; + struct { + /** @timeline: Pointer to a &struct kgsl_timeline */ + struct kgsl_timeline *timeline; + /** @seqno: Sequence number to signal */ + u64 seqno; + } *timelines; + /** @count: Number of items in timelines */ + int count; +}; + +static inline struct kgsl_drawobj_timeline * +TIMELINEOBJ(struct kgsl_drawobj *obj) +{ + return container_of(obj, struct kgsl_drawobj_timeline, base); +} + +#define KGSL_FENCE_NAME_LEN 74 + +struct fence_info { + char name[KGSL_FENCE_NAME_LEN]; +}; + +struct event_fence_info { + struct fence_info *fences; + int num_fences; +}; + +struct event_timeline_info { + u64 seqno; + u32 timeline; +}; + +/** + * struct kgsl_drawobj_sync_event + * @id: identifer (positiion within the pending bitmap) + * @type: Syncpoint type + * @syncobj: Pointer to the syncobj that owns the sync event + * @context: KGSL context for whose timestamp we want to + * register this event + * @timestamp: Pending timestamp for the event + * @handle: Pointer to a sync fence handle + * @device: Pointer to the KGSL device + */ +struct kgsl_drawobj_sync_event { + unsigned int id; + int type; + struct kgsl_drawobj_sync *syncobj; + struct kgsl_context *context; + unsigned int timestamp; + struct kgsl_sync_fence_cb *handle; + struct kgsl_device *device; + /** @priv: Type specific private information */ + void *priv; + /** + * @fence: Pointer to a dma fence for KGSL_CMD_SYNCPOINT_TYPE_TIMELINE + * events + */ + struct dma_fence *fence; + /** @cb: Callback struct for KGSL_CMD_SYNCPOINT_TYPE_TIMELINE */ + struct dma_fence_cb cb; + /** @work : irq worker for KGSL_CMD_SYNCPOINT_TYPE_TIMELINE */ + struct irq_work work; +}; + +#define KGSL_DRAWOBJ_FLAGS \ + { KGSL_DRAWOBJ_MARKER, "MARKER" }, \ + { KGSL_DRAWOBJ_CTX_SWITCH, "CTX_SWITCH" }, \ + { KGSL_DRAWOBJ_SYNC, "SYNC" }, \ + { KGSL_DRAWOBJ_END_OF_FRAME, "EOF" }, \ + { KGSL_DRAWOBJ_PWR_CONSTRAINT, "PWR_CONSTRAINT" }, \ + { KGSL_DRAWOBJ_SUBMIT_IB_LIST, "IB_LIST" } + +/** + * enum kgsl_drawobj_cmd_priv - Internal command obj flags + * @CMDOBJ_SKIP - skip the entire command obj + * @CMDOBJ_FORCE_PREAMBLE - Force the preamble on for + * command obj + * @CMDOBJ_WFI - Force wait-for-idle for the submission + * @CMDOBJ_PROFILE - store the start / retire ticks for + * @CMDOBJ_FAULT - Mark the command object as faulted + * the command obj in the profiling buffer + */ +enum kgsl_drawobj_cmd_priv { + CMDOBJ_SKIP = 0, + CMDOBJ_FORCE_PREAMBLE, + CMDOBJ_WFI, + CMDOBJ_PROFILE, + CMDOBJ_FAULT, +}; + +struct kgsl_ibdesc; +struct kgsl_cmd_syncpoint; + +struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device, + struct kgsl_context *context, unsigned int flags, + unsigned int type); +int kgsl_drawobj_cmd_add_ibdesc(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, struct kgsl_ibdesc *ibdesc); +int kgsl_drawobj_cmd_add_ibdesc_list(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count); +int kgsl_drawobj_cmd_add_cmdlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count); +int kgsl_drawobj_cmd_add_memlist(struct kgsl_device *device, + struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, + unsigned int size, unsigned int count); + +struct kgsl_drawobj_sync *kgsl_drawobj_sync_create(struct kgsl_device *device, + struct kgsl_context *context); +int kgsl_drawobj_sync_add_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, + int count); +int kgsl_drawobj_sync_add_synclist(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, void __user *ptr, + unsigned int size, unsigned int count); +int kgsl_drawobj_sync_add_sync(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj, + struct kgsl_cmd_syncpoint *sync); + +int kgsl_drawobjs_cache_init(void); +void kgsl_drawobjs_cache_exit(void); + +void kgsl_dump_syncpoints(struct kgsl_device *device, + struct kgsl_drawobj_sync *syncobj); + +void kgsl_drawobj_destroy(struct kgsl_drawobj *drawobj); + +void kgsl_drawobj_destroy_object(struct kref *kref); + +static inline bool kgsl_drawobj_events_pending( + struct kgsl_drawobj_sync *syncobj) +{ + return !bitmap_empty(&syncobj->pending, KGSL_MAX_SYNCPOINTS); +} + +static inline bool kgsl_drawobj_event_pending( + struct kgsl_drawobj_sync *syncobj, unsigned int bit) +{ + if (bit >= KGSL_MAX_SYNCPOINTS) + return false; + + return test_bit(bit, &syncobj->pending); +} + +static inline void kgsl_drawobj_put(struct kgsl_drawobj *drawobj) +{ + if (drawobj) + kref_put(&drawobj->refcount, kgsl_drawobj_destroy_object); +} + +/** + * kgsl_drawobj_create_timestamp_syncobj - Create a syncobj for a timestamp + * @device: A GPU device handle + * @context: Draw context for the syncobj + * @timestamp: Timestamp to sync on + * + * Create a sync object for @timestamp on @context. + * Return: A pointer to the sync object + */ +struct kgsl_drawobj_sync * +kgsl_drawobj_create_timestamp_syncobj(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp); + +struct kgsl_drawobj_bind *kgsl_drawobj_bind_create(struct kgsl_device *device, + struct kgsl_context *context); + +int kgsl_drawobj_add_bind(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_bind *bindobj, + void __user *src, u64 cmdsize); + +/** + * kgsl_drawobj_timeline_create - Create a timeline draw object + * @device: A GPU device handle + * @context: Draw context for the drawobj + * + * Create a timeline draw object on @context. + * Return: A pointer to the draw object + */ +struct kgsl_drawobj_timeline * +kgsl_drawobj_timeline_create(struct kgsl_device *device, + struct kgsl_context *context); + +/** + * kgsl_drwobj_add_timeline - Add a timeline to a timeline drawobj + * @dev_priv: Pointer to the process private data + * @timelineobj: Pointer to a timeline drawobject + * @src: Ponter to the &struct kgsl_timeline_val from userspace + * @cmdsize: size of the object in @src + * + * Add a timeline to an draw object. + * Return: 0 on success or negative on failure + */ +int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, + struct kgsl_drawobj_timeline *timelineobj, + void __user *src, u64 cmdsize); + +#endif /* __KGSL_DRAWOBJ_H */ diff --git a/kgsl_eventlog.c b/kgsl_eventlog.c new file mode 100644 index 0000000000..71987cea1f --- /dev/null +++ b/kgsl_eventlog.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_util.h" + +#define EVENTLOG_SIZE SZ_8K +#define MAGIC 0xabbaabba +#define LOG_FENCE_NAME_LEN 74 + +/* + * This an internal event used to skip empty space at the bottom of the + * ringbuffer + */ + +#define LOG_SKIP 1 +#define LOG_FIRE_EVENT 2 +#define LOG_CMDBATCH_SUBMITTED_EVENT 3 +#define LOG_CMDBATCH_RETIRED_EVENT 4 +#define LOG_SYNCPOINT_FENCE_EVENT 5 +#define LOG_SYNCPOINT_FENCE_EXPIRE_EVENT 6 +#define LOG_TIMELINE_FENCE_ALLOC_EVENT 7 +#define LOG_TIMELINE_FENCE_RELEASE_EVENT 8 + +static spinlock_t lock; +static void *kgsl_eventlog; +static int eventlog_wptr; + +struct kgsl_log_header { + u32 magic; + int pid; + u64 time; + u32 eventid; + u32 size; +}; + +/* Add a marker to skip the rest of the eventlog and start over fresh */ +static void add_skip_header(u32 offset) +{ + struct kgsl_log_header *header = kgsl_eventlog + offset; + + header->magic = MAGIC; + header->time = local_clock(); + header->pid = 0; + header->eventid = LOG_SKIP; + header->size = EVENTLOG_SIZE - sizeof(*header) - offset; +} + +static void *kgsl_eventlog_alloc(u32 eventid, u32 size) +{ + struct kgsl_log_header *header; + u32 datasize = size + sizeof(*header); + unsigned long flags; + void *data; + + if (!kgsl_eventlog) + return NULL; + + spin_lock_irqsave(&lock, flags); + if (eventlog_wptr + datasize > (EVENTLOG_SIZE - sizeof(*header))) { + add_skip_header(eventlog_wptr); + eventlog_wptr = datasize; + data = kgsl_eventlog; + } else { + data = kgsl_eventlog + eventlog_wptr; + eventlog_wptr += datasize; + } + spin_unlock_irqrestore(&lock, flags); + + header = data; + + header->magic = MAGIC; + header->time = local_clock(); + header->pid = current->pid; + header->eventid = eventid; + header->size = size; + + return data + sizeof(*header); +} + +void kgsl_eventlog_init(void) +{ + kgsl_eventlog = kzalloc(EVENTLOG_SIZE, GFP_KERNEL); + eventlog_wptr = 0; + + spin_lock_init(&lock); + + kgsl_add_to_minidump("KGSL_EVENTLOG", (u64) kgsl_eventlog, + __pa(kgsl_eventlog), EVENTLOG_SIZE); +} + +void kgsl_eventlog_exit(void) +{ + kgsl_remove_from_minidump("KGSL_EVENTLOG", (u64) kgsl_eventlog, + __pa(kgsl_eventlog), EVENTLOG_SIZE); + + kfree(kgsl_eventlog); + kgsl_eventlog = NULL; + eventlog_wptr = 0; +} + +void log_kgsl_fire_event(u32 id, u32 ts, u32 type, u32 age) +{ + struct { + u32 id; + u32 ts; + u32 type; + u32 age; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_FIRE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->ts = ts; + entry->type = type; + entry->age = age; +} + +void log_kgsl_cmdbatch_submitted_event(u32 id, u32 ts, u32 prio, u64 flags) +{ + struct { + u32 id; + u32 ts; + u32 prio; + u64 flags; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_CMDBATCH_SUBMITTED_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->ts = ts; + entry->prio = prio; + entry->flags = flags; +} + +void log_kgsl_cmdbatch_retired_event(u32 id, u32 ts, u32 prio, u64 flags, + u64 start, u64 retire) +{ + struct { + u32 id; + u32 ts; + u32 prio; + u64 flags; + u64 start; + u64 retire; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_CMDBATCH_RETIRED_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->ts = ts; + entry->prio = prio; + entry->flags = flags; + entry->start = start; + entry->retire = retire; +} + +void log_kgsl_syncpoint_fence_event(u32 id, char *fence_name) +{ + struct { + u32 id; + char name[LOG_FENCE_NAME_LEN]; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_SYNCPOINT_FENCE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + memset(entry->name, 0, sizeof(entry->name)); + strlcpy(entry->name, fence_name, sizeof(entry->name)); +} + +void log_kgsl_syncpoint_fence_expire_event(u32 id, char *fence_name) +{ + struct { + u32 id; + char name[LOG_FENCE_NAME_LEN]; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_SYNCPOINT_FENCE_EXPIRE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + memset(entry->name, 0, sizeof(entry->name)); + strlcpy(entry->name, fence_name, sizeof(entry->name)); +} + +void log_kgsl_timeline_fence_alloc_event(u32 id, u64 seqno) +{ + struct { + u32 id; + u64 seqno; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_TIMELINE_FENCE_ALLOC_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->seqno = seqno; +} + +void log_kgsl_timeline_fence_release_event(u32 id, u64 seqno) +{ + struct { + u32 id; + u64 seqno; + } *entry; + + entry = kgsl_eventlog_alloc(LOG_TIMELINE_FENCE_RELEASE_EVENT, sizeof(*entry)); + if (!entry) + return; + + entry->id = id; + entry->seqno = seqno; +} diff --git a/kgsl_eventlog.h b/kgsl_eventlog.h new file mode 100644 index 0000000000..5e6e0176be --- /dev/null +++ b/kgsl_eventlog.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_EVENTLOG_H +#define _KGSL_EVENTLOG_H + +void kgsl_eventlog_init(void); +void kgsl_eventlog_exit(void); + +void log_kgsl_fire_event(u32 id, u32 ts, u32 type, u32 age); +void log_kgsl_cmdbatch_submitted_event(u32 id, u32 ts, u32 prio, u64 flags); +void log_kgsl_cmdbatch_retired_event(u32 id, u32 ts, u32 prio, u64 flags, + u64 start, u64 retire); +void log_kgsl_syncpoint_fence_event(u32 id, char *fence_name); +void log_kgsl_syncpoint_fence_expire_event(u32 id, char *fence_name); +void log_kgsl_timeline_fence_alloc_event(u32 id, u64 seqno); +void log_kgsl_timeline_fence_release_event(u32 id, u64 seqno); +#endif diff --git a/kgsl_events.c b/kgsl_events.c new file mode 100644 index 0000000000..644ddfd183 --- /dev/null +++ b/kgsl_events.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_debugfs.h" +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_trace.h" + +/* + * Define an kmem cache for the event structures since we allocate and free them + * so frequently + */ +static struct kmem_cache *events_cache; + +static inline void signal_event(struct kgsl_device *device, + struct kgsl_event *event, int result) +{ + list_del(&event->node); + event->result = result; + queue_work(device->events_wq, &event->work); +} + +/** + * _kgsl_event_worker() - Work handler for processing GPU event callbacks + * @work: Pointer to the work_struct for the event + * + * Each event callback has its own work struct and is run on a event specific + * workqeuue. This is the worker that queues up the event callback function. + */ +static void _kgsl_event_worker(struct work_struct *work) +{ + struct kgsl_event *event = container_of(work, struct kgsl_event, work); + int id = KGSL_CONTEXT_ID(event->context); + + trace_kgsl_fire_event(id, event->timestamp, event->result, + jiffies - event->created, event->func); + + log_kgsl_fire_event(id, event->timestamp, event->result, + jiffies - event->created); + + event->func(event->device, event->group, event->priv, event->result); + + kgsl_context_put(event->context); + kmem_cache_free(events_cache, event); +} + +/* return true if the group needs to be processed */ +static bool _do_process_group(unsigned int processed, unsigned int cur) +{ + if (processed == cur) + return false; + + /* + * This ensures that the timestamp didn't slip back accidently, maybe + * due to a memory barrier issue. This is highly unlikely but we've + * been burned here in the past. + */ + if ((cur < processed) && ((processed - cur) < KGSL_TIMESTAMP_WINDOW)) + return false; + + return true; +} + +static void _process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group, bool flush) +{ + struct kgsl_event *event, *tmp; + unsigned int timestamp; + struct kgsl_context *context; + + if (group == NULL) + return; + + context = group->context; + + /* + * Sanity check to be sure that we we aren't racing with the context + * getting destroyed + */ + if (WARN_ON(context != NULL && !_kgsl_context_get(context))) + return; + + spin_lock(&group->lock); + + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED, + ×tamp); + + if (!flush && !_do_process_group(group->processed, timestamp)) + goto out; + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp_cmp(event->timestamp, timestamp) <= 0) + signal_event(device, event, KGSL_EVENT_RETIRED); + else if (flush) + signal_event(device, event, KGSL_EVENT_CANCELLED); + + } + + group->processed = timestamp; + +out: + spin_unlock(&group->lock); + kgsl_context_put(context); +} + +/** + * kgsl_process_event_group() - Handle all the retired events in a group + * @device: Pointer to a KGSL device + * @group: Pointer to a GPU events group to process + */ + +void kgsl_process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + _process_event_group(device, group, false); +} + +/** + * kgsl_flush_event_group() - flush all the events in a group by retiring the + * ones can be retired and cancelling the ones that are pending + * @device: Pointer to a KGSL device + * @group: Pointer to a GPU events group to process + */ +void kgsl_flush_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + _process_event_group(device, group, true); +} + +/** + * kgsl_cancel_events_timestamp() - Cancel pending events for a given timestamp + * @device: Pointer to a KGSL device + * @group: Ponter to the GPU event group that owns the event + * @timestamp: Registered expiry timestamp for the event + */ +void kgsl_cancel_events_timestamp(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp_cmp(timestamp, event->timestamp) == 0) + signal_event(device, event, KGSL_EVENT_CANCELLED); + } + + spin_unlock(&group->lock); +} + +/** + * kgsl_cancel_events() - Cancel all pending events in the group + * @device: Pointer to a KGSL device + * @group: Pointer to a kgsl_events_group + */ +void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) + signal_event(device, event, KGSL_EVENT_CANCELLED); + + spin_unlock(&group->lock); +} + +/** + * kgsl_cancel_event() - Cancel a specific event from a group + * @device: Pointer to a KGSL device + * @group: Pointer to the group that contains the events + * @timestamp: Registered expiry timestamp for the event + * @func: Registered callback for the function + * @priv: Registered priv data for the function + */ +void kgsl_cancel_event(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp == event->timestamp && func == event->func && + event->priv == priv) + signal_event(device, event, KGSL_EVENT_CANCELLED); + } + + spin_unlock(&group->lock); +} + +/** + * kgsl_event_pending() - Searches for an event in an event group + * @device: Pointer to a KGSL device + * @group: Pointer to the group that contains the events + * @timestamp: Registered expiry timestamp for the event + * @func: Registered callback for the function + * @priv: Registered priv data for the function + */ +bool kgsl_event_pending(struct kgsl_device *device, + struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv) +{ + struct kgsl_event *event; + bool result = false; + + spin_lock(&group->lock); + list_for_each_entry(event, &group->events, node) { + if (timestamp == event->timestamp && func == event->func && + event->priv == priv) { + result = true; + break; + } + } + spin_unlock(&group->lock); + return result; +} +/** + * kgsl_add_event() - Add a new GPU event to a group + * @device: Pointer to a KGSL device + * @group: Pointer to the group to add the event to + * @timestamp: Timestamp that the event will expire on + * @func: Callback function for the event + * @priv: Private data to send to the callback function + */ +int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv) +{ + unsigned int queued; + struct kgsl_context *context = group->context; + struct kgsl_event *event; + unsigned int retired; + + if (!func) + return -EINVAL; + + /* + * If the caller is creating their own timestamps, let them schedule + * events in the future. Otherwise only allow timestamps that have been + * queued. + */ + if (!context || !(context->flags & KGSL_CONTEXT_USER_GENERATED_TS)) { + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_QUEUED, + &queued); + + if (timestamp_cmp(timestamp, queued) > 0) + return -EINVAL; + } + + event = kmem_cache_alloc(events_cache, GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + /* Get a reference to the context while the event is active */ + if (context != NULL && !_kgsl_context_get(context)) { + kmem_cache_free(events_cache, event); + return -ENOENT; + } + + event->device = device; + event->context = context; + event->timestamp = timestamp; + event->priv = priv; + event->func = func; + event->created = jiffies; + event->group = group; + + INIT_WORK(&event->work, _kgsl_event_worker); + + trace_kgsl_register_event(KGSL_CONTEXT_ID(context), timestamp, func); + + spin_lock(&group->lock); + + /* + * Check to see if the requested timestamp has already retired. If so, + * schedule the callback right away + */ + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED, + &retired); + + if (timestamp_cmp(retired, timestamp) >= 0) { + event->result = KGSL_EVENT_RETIRED; + queue_work(device->events_wq, &event->work); + spin_unlock(&group->lock); + return 0; + } + + /* Add the event to the group list */ + list_add_tail(&event->node, &group->events); + + spin_unlock(&group->lock); + + return 0; +} + +void kgsl_process_event_groups(struct kgsl_device *device) +{ + struct kgsl_event_group *group; + + read_lock(&device->event_groups_lock); + list_for_each_entry(group, &device->event_groups, group) + _process_event_group(device, group, false); + read_unlock(&device->event_groups_lock); +} + +void kgsl_del_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + /* Check if the group is uninintalized */ + if (!group->context) + return; + + /* Make sure that all the events have been deleted from the list */ + WARN_ON(!list_empty(&group->events)); + + write_lock(&device->event_groups_lock); + list_del(&group->group); + write_unlock(&device->event_groups_lock); +} + +void kgsl_add_event_group(struct kgsl_device *device, + struct kgsl_event_group *group, struct kgsl_context *context, + readtimestamp_func readtimestamp, + void *priv, const char *fmt, ...) +{ + va_list args; + + WARN_ON(readtimestamp == NULL); + + spin_lock_init(&group->lock); + INIT_LIST_HEAD(&group->events); + + group->context = context; + group->readtimestamp = readtimestamp; + group->priv = priv; + + if (fmt) { + va_start(args, fmt); + vsnprintf(group->name, sizeof(group->name), fmt, args); + va_end(args); + } + + write_lock(&device->event_groups_lock); + list_add_tail(&group->group, &device->event_groups); + write_unlock(&device->event_groups_lock); +} + +static void events_debugfs_print_group(struct seq_file *s, + struct kgsl_event_group *group) +{ + struct kgsl_event *event; + unsigned int retired; + + spin_lock(&group->lock); + + seq_printf(s, "%s: last=%d\n", group->name, group->processed); + + list_for_each_entry(event, &group->events, node) { + + group->readtimestamp(event->device, group->priv, + KGSL_TIMESTAMP_RETIRED, &retired); + + seq_printf(s, "\t%u:%u age=%lu func=%ps [retired=%u]\n", + group->context ? group->context->id : + KGSL_MEMSTORE_GLOBAL, + event->timestamp, jiffies - event->created, + event->func, retired); + } + spin_unlock(&group->lock); +} + +static int events_show(struct seq_file *s, void *unused) +{ + struct kgsl_device *device = s->private; + struct kgsl_event_group *group; + + seq_puts(s, "event groups:\n"); + seq_puts(s, "--------------\n"); + + read_lock(&device->event_groups_lock); + list_for_each_entry(group, &device->event_groups, group) { + events_debugfs_print_group(s, group); + seq_puts(s, "\n"); + } + read_unlock(&device->event_groups_lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(events); + +void kgsl_device_events_remove(struct kgsl_device *device) +{ + struct kgsl_event_group *group, *tmp; + + write_lock(&device->event_groups_lock); + list_for_each_entry_safe(group, tmp, &device->event_groups, group) { + WARN_ON(!list_empty(&group->events)); + list_del(&group->group); + } + write_unlock(&device->event_groups_lock); +} + +void kgsl_device_events_probe(struct kgsl_device *device) +{ + INIT_LIST_HEAD(&device->event_groups); + rwlock_init(&device->event_groups_lock); + + debugfs_create_file("events", 0444, device->d_debugfs, device, + &events_fops); +} + +/** + * kgsl_events_exit() - Destroy the event kmem cache on module exit + */ +void kgsl_events_exit(void) +{ + kmem_cache_destroy(events_cache); +} + +/** + * kgsl_events_init() - Create the event kmem cache on module start + */ +void __init kgsl_events_init(void) +{ + events_cache = KMEM_CACHE(kgsl_event, 0); +} diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c new file mode 100644 index 0000000000..83c3d53936 --- /dev/null +++ b/kgsl_gmu_core.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "adreno.h" +#include "kgsl_device.h" +#include "kgsl_gmu_core.h" +#include "kgsl_trace.h" + +static const struct of_device_id gmu_match_table[] = { + { .compatible = "qcom,gpu-gmu", .data = &a6xx_gmu_driver }, + { .compatible = "qcom,gpu-rgmu", .data = &a6xx_rgmu_driver }, + { .compatible = "qcom,gen7-gmu", .data = &gen7_gmu_driver }, + {}, +}; + +void __init gmu_core_register(void) +{ + const struct of_device_id *match; + struct device_node *node; + + node = of_find_matching_node_and_match(NULL, gmu_match_table, + &match); + if (!node) + return; + + platform_driver_register((struct platform_driver *) match->data); + of_node_put(node); +} + +void __exit gmu_core_unregister(void) +{ + const struct of_device_id *match; + struct device_node *node; + + node = of_find_matching_node_and_match(NULL, gmu_match_table, + &match); + if (!node) + return; + + platform_driver_unregister((struct platform_driver *) match->data); + of_node_put(node); +} + +bool gmu_core_isenabled(struct kgsl_device *device) +{ + return test_bit(GMU_ENABLED, &device->gmu_core.flags); +} + +bool gmu_core_gpmu_isenabled(struct kgsl_device *device) +{ + return (device->gmu_core.dev_ops != NULL); +} + +bool gmu_core_scales_bandwidth(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->scales_bandwidth) + return ops->scales_bandwidth(device); + + return false; +} + +int gmu_core_dev_acd_set(struct kgsl_device *device, bool val) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->acd_set) + return ops->acd_set(device, val); + + return -EINVAL; +} + +void gmu_core_regread(struct kgsl_device *device, unsigned int offsetwords, + unsigned int *value) +{ + u32 val = kgsl_regmap_read(&device->regmap, offsetwords); + *value = val; +} + +void gmu_core_regwrite(struct kgsl_device *device, unsigned int offsetwords, + unsigned int value) +{ + kgsl_regmap_write(&device->regmap, value, offsetwords); +} + +void gmu_core_blkwrite(struct kgsl_device *device, unsigned int offsetwords, + const void *buffer, size_t size) +{ + kgsl_regmap_bulk_write(&device->regmap, offsetwords, + buffer, size >> 2); +} + +void gmu_core_regrmw(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + kgsl_regmap_rmw(&device->regmap, offsetwords, mask, bits); +} + +int gmu_core_dev_oob_set(struct kgsl_device *device, enum oob_request req) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->oob_set) + return ops->oob_set(device, req); + + return 0; +} + +void gmu_core_dev_oob_clear(struct kgsl_device *device, enum oob_request req) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->oob_clear) + ops->oob_clear(device, req); +} + +void gmu_core_dev_cooperative_reset(struct kgsl_device *device) +{ + + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->cooperative_reset) + ops->cooperative_reset(device); +} + +bool gmu_core_dev_gx_is_on(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->gx_is_on) + return ops->gx_is_on(device); + + return true; +} + +int gmu_core_dev_ifpc_show(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->ifpc_show) + return ops->ifpc_show(device); + + return 0; +} + +int gmu_core_dev_ifpc_store(struct kgsl_device *device, unsigned int val) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->ifpc_store) + return ops->ifpc_store(device, val); + + return -EINVAL; +} + +int gmu_core_dev_wait_for_active_transition(struct kgsl_device *device) +{ + const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device); + + if (ops && ops->wait_for_active_transition) + return ops->wait_for_active_transition(device); + + return 0; +} + +void gmu_core_fault_snapshot(struct kgsl_device *device) +{ + device->gmu_fault = true; + kgsl_device_snapshot(device, NULL, true); +} + +int gmu_core_timed_poll_check(struct kgsl_device *device, + unsigned int offset, unsigned int expected_ret, + unsigned int timeout_ms, unsigned int mask) +{ + u32 val; + + return kgsl_regmap_read_poll_timeout(&device->regmap, offset, + val, (val & mask) == expected_ret, 100, timeout_ms * 1000); +} + +int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memdesc, + u64 gmuaddr, int attrs) +{ + size_t mapped; + + if (!memdesc->pages) { + mapped = iommu_map_sg(domain, gmuaddr, memdesc->sgt->sgl, + memdesc->sgt->nents, attrs); + } else { + struct sg_table sgt = { 0 }; + int ret; + + ret = sg_alloc_table_from_pages(&sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + + if (ret) + return ret; + + mapped = iommu_map_sg(domain, gmuaddr, sgt.sgl, sgt.nents, attrs); + sg_free_table(&sgt); + } + + return mapped == 0 ? -ENOMEM : 0; +} diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h new file mode 100644 index 0000000000..0ae12a8e04 --- /dev/null +++ b/kgsl_gmu_core.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_GMU_CORE_H +#define __KGSL_GMU_CORE_H + +#include + +/* GMU_DEVICE - Given an KGSL device return the GMU specific struct */ +#define GMU_DEVICE_OPS(_a) ((_a)->gmu_core.dev_ops) + +#define MAX_GX_LEVELS 16 +#define MAX_CX_LEVELS 4 +#define MAX_CNOC_LEVELS 2 +#define MAX_CNOC_CMDS 6 +#define MAX_BW_CMDS 8 +#define INVALID_DCVS_IDX 0xFF + +#if MAX_CNOC_LEVELS > MAX_GX_LEVELS +#error "CNOC levels cannot exceed GX levels" +#endif + +/* + * These are the different ways the GMU can boot. GMU_WARM_BOOT is waking up + * from slumber. GMU_COLD_BOOT is booting for the first time. GMU_RESET + * is a soft reset of the GMU. + */ +enum gmu_core_boot { + GMU_WARM_BOOT = 0, + GMU_COLD_BOOT = 1, + GMU_RESET = 2 +}; + +/* Bits for the flags field in the gmu structure */ +enum gmu_core_flags { + GMU_BOOT_INIT_DONE = 0, + GMU_HFI_ON, + GMU_FAULT, + GMU_DCVS_REPLAY, + GMU_ENABLED, + GMU_RSCC_SLEEP_SEQ_DONE, + GMU_DISABLE_SLUMBER, +}; + +/* + * OOB requests values. These range from 0 to 7 and then + * the BIT() offset into the actual value is calculated + * later based on the request. This keeps the math clean + * and easy to ensure not reaching over/under the range + * of 8 bits. + */ +enum oob_request { + oob_gpu = 0, + oob_perfcntr = 1, + oob_boot_slumber = 6, /* reserved special case */ + oob_dcvs = 7, /* reserved special case */ + oob_max, +}; + +enum gmu_pwrctrl_mode { + GMU_FW_START, + GMU_FW_STOP, + GMU_SUSPEND, + GMU_DCVS_NOHFI, + GMU_NOTIFY_SLUMBER, + INVALID_POWER_CTRL +}; + +#define GPU_HW_ACTIVE 0x00 +#define GPU_HW_IFPC 0x03 +#define GPU_HW_SLUMBER 0x0f + +/* + * Wait time before trying to write the register again. + * Hopefully the GMU has finished waking up during this delay. + * This delay must be less than the IFPC main hysteresis or + * the GMU will start shutting down before we try again. + */ +#define GMU_CORE_WAKEUP_DELAY_US 10 + +/* Max amount of tries to wake up the GMU. The short retry + * limit is half of the long retry limit. After the short + * number of retries, we print an informational message to say + * exiting IFPC is taking longer than expected. We continue + * to retry after this until the long retry limit. + */ +#define GMU_CORE_SHORT_WAKEUP_RETRY_LIMIT 100 +#define GMU_CORE_LONG_WAKEUP_RETRY_LIMIT 200 + +#define FENCE_STATUS_WRITEDROPPED0_MASK 0x1 +#define FENCE_STATUS_WRITEDROPPED1_MASK 0x2 + +#define GMU_FREQ_MIN 200000000 +#define GMU_FREQ_MAX 500000000 + +#define GMU_VER_MAJOR(ver) (((ver) >> 28) & 0xF) +#define GMU_VER_MINOR(ver) (((ver) >> 16) & 0xFFF) +#define GMU_VER_STEP(ver) ((ver) & 0xFFFF) +#define GMU_VERSION(major, minor) \ + ((((major) & 0xF) << 28) | (((minor) & 0xFFF) << 16)) + +#define GMU_INT_WDOG_BITE BIT(0) +#define GMU_INT_RSCC_COMP BIT(1) +#define GMU_INT_FENCE_ERR BIT(3) +#define GMU_INT_DBD_WAKEUP BIT(4) +#define GMU_INT_HOST_AHB_BUS_ERR BIT(5) +#define GMU_AO_INT_MASK \ + (GMU_INT_WDOG_BITE | \ + GMU_INT_FENCE_ERR | \ + GMU_INT_HOST_AHB_BUS_ERR) + +/* Bitmask for GPU low power mode enabling and hysterisis*/ +#define SPTP_ENABLE_MASK (BIT(2) | BIT(0)) +#define IFPC_ENABLE_MASK (BIT(1) | BIT(0)) + +/* Bitmask for RPMH capability enabling */ +#define RPMH_INTERFACE_ENABLE BIT(0) +#define LLC_VOTE_ENABLE BIT(4) +#define DDR_VOTE_ENABLE BIT(8) +#define MX_VOTE_ENABLE BIT(9) +#define CX_VOTE_ENABLE BIT(10) +#define GFX_VOTE_ENABLE BIT(11) +#define RPMH_ENABLE_MASK (RPMH_INTERFACE_ENABLE | \ + LLC_VOTE_ENABLE | \ + DDR_VOTE_ENABLE | \ + MX_VOTE_ENABLE | \ + CX_VOTE_ENABLE | \ + GFX_VOTE_ENABLE) + +/* Constants for GMU OOBs */ +#define OOB_BOOT_OPTION 0 +#define OOB_SLUMBER_OPTION 1 + +/* Gmu FW block header format */ +struct gmu_block_header { + u32 addr; + u32 size; + u32 type; + u32 value; +}; + +/* GMU Block types */ +#define GMU_BLK_TYPE_DATA 0 +#define GMU_BLK_TYPE_PREALLOC_REQ 1 +#define GMU_BLK_TYPE_CORE_VER 2 +#define GMU_BLK_TYPE_CORE_DEV_VER 3 +#define GMU_BLK_TYPE_PWR_VER 4 +#define GMU_BLK_TYPE_PWR_DEV_VER 5 +#define GMU_BLK_TYPE_HFI_VER 6 +#define GMU_BLK_TYPE_PREALLOC_PERSIST_REQ 7 + +/* For GMU Logs*/ +#define GMU_LOG_SIZE SZ_16K + +/* GMU memdesc entries */ +#define GMU_KERNEL_ENTRIES 16 + +enum gmu_mem_type { + GMU_ITCM = 0, + GMU_ICACHE, + GMU_CACHE = GMU_ICACHE, + GMU_DTCM, + GMU_DCACHE, + GMU_NONCACHED_KERNEL, + GMU_NONCACHED_USER, + GMU_MEM_TYPE_MAX, +}; + +/** + * struct gmu_memdesc - Gmu shared memory object descriptor + * @hostptr: Kernel virtual address + * @gmuaddr: GPU virtual address + * @physaddr: Physical address of the memory object + * @size: Size of the memory object + */ +struct gmu_memdesc { + void *hostptr; + u32 gmuaddr; + phys_addr_t physaddr; + u32 size; +}; + +struct kgsl_mailbox { + struct mbox_client client; + struct mbox_chan *channel; +}; + +struct icc_path; + +struct gmu_vma_entry { + /** @start: Starting virtual address of the vma */ + u32 start; + /** @size: Size of this vma */ + u32 size; + /** @next_va: Next available virtual address in this vma */ + u32 next_va; +}; + +enum { + GMU_PRIV_FIRST_BOOT_DONE = 0, + GMU_PRIV_GPU_STARTED, + GMU_PRIV_HFI_STARTED, + GMU_PRIV_RSCC_SLEEP_DONE, + GMU_PRIV_PM_SUSPEND, + GMU_PRIV_PDC_RSC_LOADED, +}; + +struct device_node; +struct kgsl_device; +struct kgsl_snapshot; + +struct gmu_dev_ops { + int (*oob_set)(struct kgsl_device *device, enum oob_request req); + void (*oob_clear)(struct kgsl_device *device, enum oob_request req); + bool (*gx_is_on)(struct kgsl_device *device); + int (*ifpc_store)(struct kgsl_device *device, unsigned int val); + unsigned int (*ifpc_show)(struct kgsl_device *device); + void (*cooperative_reset)(struct kgsl_device *device); + void (*halt_execution)(struct kgsl_device *device); + int (*wait_for_active_transition)(struct kgsl_device *device); + bool (*scales_bandwidth)(struct kgsl_device *device); + int (*acd_set)(struct kgsl_device *device, bool val); +}; + +/** + * struct gmu_core_device - GMU Core device structure + * @ptr: Pointer to GMU device structure + * @dev_ops: Pointer to gmu device operations + * @flags: GMU flags + */ +struct gmu_core_device { + void *ptr; + const struct gmu_dev_ops *dev_ops; + unsigned long flags; +}; + +extern struct platform_driver a6xx_gmu_driver; +extern struct platform_driver a6xx_rgmu_driver; +extern struct platform_driver a6xx_hwsched_driver; +extern struct platform_driver gen7_gmu_driver; +extern struct platform_driver gen7_hwsched_driver; + +/* GMU core functions */ + +void __init gmu_core_register(void); +void __exit gmu_core_unregister(void); + +bool gmu_core_gpmu_isenabled(struct kgsl_device *device); +bool gmu_core_scales_bandwidth(struct kgsl_device *device); +bool gmu_core_isenabled(struct kgsl_device *device); +int gmu_core_dev_acd_set(struct kgsl_device *device, bool val); +void gmu_core_regread(struct kgsl_device *device, unsigned int offsetwords, + unsigned int *value); +void gmu_core_regwrite(struct kgsl_device *device, unsigned int offsetwords, + unsigned int value); + +/** + * gmu_core_blkwrite - Do a bulk I/O write to GMU + * @device: Pointer to the kgsl device + * @offsetwords: Destination dword offset + * @buffer: Pointer to the source buffer + * @size: Number of bytes to copy + * + * Write a series of GMU registers quickly without bothering to spend time + * logging the register writes. The logging of these writes causes extra + * delays that could allow IRQs arrive and be serviced before finishing + * all the writes. + */ +void gmu_core_blkwrite(struct kgsl_device *device, unsigned int offsetwords, + const void *buffer, size_t size); +void gmu_core_regrmw(struct kgsl_device *device, unsigned int offsetwords, + unsigned int mask, unsigned int bits); +int gmu_core_dev_oob_set(struct kgsl_device *device, enum oob_request req); +void gmu_core_dev_oob_clear(struct kgsl_device *device, enum oob_request req); +bool gmu_core_dev_gx_is_on(struct kgsl_device *device); +int gmu_core_dev_ifpc_show(struct kgsl_device *device); +int gmu_core_dev_ifpc_store(struct kgsl_device *device, unsigned int val); +int gmu_core_dev_wait_for_active_transition(struct kgsl_device *device); +void gmu_core_dev_cooperative_reset(struct kgsl_device *device); + +/** + * gmu_core_fault_snapshot - Set gmu fault and trigger snapshot + * @device: Pointer to the kgsl device + * + * Set the gmu fault and take snapshot when we hit a gmu fault + */ +void gmu_core_fault_snapshot(struct kgsl_device *device); + +/** + * gmu_core_timed_poll_check() - polling *gmu* register at given offset until + * its value changed to match expected value. The function times + * out and returns after given duration if register is not updated + * as expected. + * + * @device: Pointer to KGSL device + * @offset: Register offset in dwords + * @expected_ret: expected register value that stops polling + * @timeout_ms: time in milliseconds to poll the register + * @mask: bitmask to filter register value to match expected_ret + */ +int gmu_core_timed_poll_check(struct kgsl_device *device, + unsigned int offset, unsigned int expected_ret, + unsigned int timeout_ms, unsigned int mask); + +struct kgsl_memdesc; +struct iommu_domain; + +/** + * gmu_core_map_memdesc - Map the memdesc into the GMU IOMMU domain + * @domain: Domain to map the memory into + * @memdesc: Memory descriptor to map + * @gmuaddr: Virtual GMU address to map the memory into + * @attrs: Attributes for the mapping + * + * Return: 0 on success or -ENOMEM on failure + */ +int gmu_core_map_memdesc(struct iommu_domain *domain, struct kgsl_memdesc *memdesc, + u64 gmuaddr, int attrs); + +#endif /* __KGSL_GMU_CORE_H */ diff --git a/kgsl_ioctl.c b/kgsl_ioctl.c new file mode 100644 index 0000000000..c6b55641a8 --- /dev/null +++ b/kgsl_ioctl.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved. + */ + +#include "kgsl_device.h" +#include "kgsl_sync.h" + +static const struct kgsl_ioctl kgsl_ioctl_funcs[] = { + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY, + kgsl_ioctl_device_getproperty), + /* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS, + kgsl_ioctl_rb_issueibcmds), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS, + kgsl_ioctl_submit_commands), + /* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid), + /* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, + kgsl_ioctl_drawctxt_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, + kgsl_ioctl_drawctxt_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM, + kgsl_ioctl_map_user_mem), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM, + kgsl_ioctl_map_user_mem), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE, + kgsl_ioctl_sharedmem_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE, + kgsl_ioctl_sharedmem_flush_cache), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC, + kgsl_ioctl_gpumem_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, + kgsl_ioctl_timestamp_event), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY, + kgsl_ioctl_device_setproperty), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID, + kgsl_ioctl_gpumem_alloc_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID, + kgsl_ioctl_gpumem_free_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO, + kgsl_ioctl_gpumem_get_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE, + kgsl_ioctl_gpumem_sync_cache), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK, + kgsl_ioctl_gpumem_sync_cache_bulk), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE, + kgsl_ioctl_syncsource_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY, + kgsl_ioctl_syncsource_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE, + kgsl_ioctl_syncsource_create_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE, + kgsl_ioctl_syncsource_signal_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC, + kgsl_ioctl_gpuobj_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE, + kgsl_ioctl_gpuobj_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO, + kgsl_ioctl_gpuobj_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT, + kgsl_ioctl_gpuobj_import), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC, + kgsl_ioctl_gpuobj_sync), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND, + kgsl_ioctl_gpu_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO, + kgsl_ioctl_gpuobj_set_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_ALLOC, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_FREE, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_BIND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_SPARSE_COMMAND, + NULL), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_BIND_RANGES, + kgsl_ioctl_gpumem_bind_ranges), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_AUX_COMMAND, + kgsl_ioctl_gpu_aux_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_CREATE, + kgsl_ioctl_timeline_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_WAIT, + kgsl_ioctl_timeline_wait), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_FENCE_GET, + kgsl_ioctl_timeline_fence_get), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_QUERY, + kgsl_ioctl_timeline_query), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_SIGNAL, + kgsl_ioctl_timeline_signal), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_DESTROY, + kgsl_ioctl_timeline_destroy), +}; + +long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr) +{ + unsigned int usize = _IOC_SIZE(user_cmd); + unsigned int ksize = _IOC_SIZE(kernel_cmd); + unsigned int copy = ksize < usize ? ksize : usize; + + if ((kernel_cmd & IOC_IN) && (user_cmd & IOC_IN)) { + if (copy > 0 && copy_from_user(ptr, (void __user *) arg, copy)) + return -EFAULT; + } + + return 0; +} + +long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr) +{ + unsigned int usize = _IOC_SIZE(user_cmd); + unsigned int ksize = _IOC_SIZE(kernel_cmd); + unsigned int copy = ksize < usize ? ksize : usize; + + if ((kernel_cmd & IOC_OUT) && (user_cmd & IOC_OUT)) { + if (copy > 0 && copy_to_user((void __user *) arg, ptr, copy)) + return -EFAULT; + } + + return 0; +} + +long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + unsigned char data[128] = { 0 }; + unsigned int nr = _IOC_NR(cmd); + long ret; + + if (nr >= len || cmds[nr].func == NULL) + return -ENOIOCTLCMD; + + if (_IOC_SIZE(cmds[nr].cmd) > sizeof(data)) { + dev_err_ratelimited(dev_priv->device->dev, + "data too big for ioctl 0x%08x: %d/%zu\n", + cmd, _IOC_SIZE(cmds[nr].cmd), sizeof(data)); + return -EINVAL; + } + + if (_IOC_SIZE(cmds[nr].cmd)) { + ret = kgsl_ioctl_copy_in(cmds[nr].cmd, cmd, arg, data); + if (ret) + return ret; + } + + ret = cmds[nr].func(dev_priv, cmd, data); + + if (ret == 0 && _IOC_SIZE(cmds[nr].cmd)) + ret = kgsl_ioctl_copy_out(cmds[nr].cmd, cmd, arg, data); + + return ret; +} + +long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + long ret; + + ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs, + ARRAY_SIZE(kgsl_ioctl_funcs)); + + /* + * If the command was unrecognized in the generic core, try the device + * specific function + */ + + if (ret == -ENOIOCTLCMD) { + if (is_compat_task()) + return device->ftbl->compat_ioctl(dev_priv, cmd, arg); + + return device->ftbl->ioctl(dev_priv, cmd, arg); + } + + return ret; +} diff --git a/kgsl_iommu.c b/kgsl_iommu.c new file mode 100644 index 0000000000..282a231b90 --- /dev/null +++ b/kgsl_iommu.c @@ -0,0 +1,2419 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "kgsl_device.h" +#include "kgsl_iommu.h" +#include "kgsl_mmu.h" +#include "kgsl_pwrctrl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_trace.h" + +#define KGSL_IOMMU_SPLIT_TABLE_BASE 0x0001ff8000000000ULL + +#define KGSL_IOMMU_IDR1_OFFSET 0x24 +#define IDR1_NUMPAGENDXB GENMASK(30, 28) +#define IDR1_PAGESIZE BIT(31) + +static const struct kgsl_mmu_pt_ops secure_pt_ops; +static const struct kgsl_mmu_pt_ops default_pt_ops; +static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops; + +/* Zero page for non-secure VBOs */ +static struct page *kgsl_vbo_zero_page; + +/* + * struct kgsl_iommu_addr_entry - entry in the kgsl_pagetable rbtree. + * @base: starting virtual address of the entry + * @size: size of the entry + * @node: the rbtree node + */ +struct kgsl_iommu_addr_entry { + uint64_t base; + uint64_t size; + struct rb_node node; +}; + +static struct kmem_cache *addr_entry_cache; + +/* These are dummy TLB ops for the io-pgtable instances */ + +static void _tlb_flush_all(void *cookie) +{ +} + +static void _tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void _tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) +{ +} + +static const struct iommu_flush_ops kgsl_iopgtbl_tlb_ops = { + .tlb_flush_all = _tlb_flush_all, + .tlb_flush_walk = _tlb_flush_walk, + .tlb_add_page = _tlb_add_page, +}; + +static bool _iommu_domain_check_bool(struct iommu_domain *domain, int attr) +{ + u32 val; + int ret = iommu_domain_get_attr(domain, attr, &val); + + return (!ret && val); +} + +static int _iommu_domain_context_bank(struct iommu_domain *domain) +{ + int val, ret; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_CONTEXT_BANK, &val); + + return ret ? ret : val; +} + +static struct kgsl_iommu_pt *to_iommu_pt(struct kgsl_pagetable *pagetable) +{ + return container_of(pagetable, struct kgsl_iommu_pt, base); +} + +static u32 get_llcc_flags(struct iommu_domain *domain) +{ + if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_LLC_NWA)) + return IOMMU_USE_LLC_NWA; + + if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT)) + return IOMMU_USE_UPSTREAM_HINT; + + return 0; +} + + +static int _iommu_get_protection_flags(struct iommu_domain *domain, + struct kgsl_memdesc *memdesc) +{ + int flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC; + + flags |= get_llcc_flags(domain); + + if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) + flags &= ~IOMMU_WRITE; + + if (memdesc->priv & KGSL_MEMDESC_PRIVILEGED) + flags |= IOMMU_PRIV; + + if (memdesc->flags & KGSL_MEMFLAGS_IOCOHERENT) + flags |= IOMMU_CACHE; + + if (memdesc->priv & KGSL_MEMDESC_UCODE) + flags &= ~IOMMU_NOEXEC; + + return flags; +} + +/* Get a scattterlist for the subrange in the child memdesc */ +static int get_sg_from_child(struct sg_table *sgt, struct kgsl_memdesc *child, + u64 offset, u64 length) +{ + int npages = (length >> PAGE_SHIFT); + int pgoffset = (offset >> PAGE_SHIFT); + struct scatterlist *target_sg; + struct sg_page_iter iter; + int ret; + + if (child->pages) + return sg_alloc_table_from_pages(sgt, + child->pages + pgoffset, npages, 0, + length, GFP_KERNEL); + + ret = sg_alloc_table(sgt, npages, GFP_KERNEL); + if (ret) + return ret; + + target_sg = sgt->sgl; + + for_each_sg_page(child->sgt->sgl, &iter, npages, pgoffset) { + sg_set_page(target_sg, sg_page_iter_page(&iter), PAGE_SIZE, 0); + target_sg = sg_next(target_sg); + } + + return 0; +} + +static struct iommu_domain *to_iommu_domain(struct kgsl_iommu_context *context) +{ + return context->domain; +} + +static struct kgsl_iommu *to_kgsl_iommu(struct kgsl_pagetable *pt) +{ + return &pt->mmu->iommu; +} + +/* + * One page allocation for a guard region to protect against over-zealous + * GPU pre-fetch + */ +static struct page *kgsl_guard_page; +static struct page *kgsl_secure_guard_page; + +static struct page *iommu_get_guard_page(struct kgsl_memdesc *memdesc) +{ + if (kgsl_memdesc_is_secured(memdesc)) { + if (!kgsl_secure_guard_page) + kgsl_secure_guard_page = kgsl_alloc_secure_page(); + + return kgsl_secure_guard_page; + } + + if (!kgsl_guard_page) + kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_NORETRY | __GFP_HIGHMEM); + + return kgsl_guard_page; +} + +static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) +{ + struct kgsl_iommu *iommu = &pt->base.mmu->iommu; + struct io_pgtable_ops *ops = pt->pgtbl_ops; + + while (size) { + if ((ops->unmap(ops, gpuaddr, PAGE_SIZE, NULL)) != PAGE_SIZE) + return -EINVAL; + + gpuaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + iommu_flush_iotlb_all(to_iommu_domain(&iommu->user_context)); + iommu_flush_iotlb_all(to_iommu_domain(&iommu->lpac_context)); + + return 0; +} + +static size_t _iopgtbl_map_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, + struct page **pages, int npages, int prot) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + size_t mapped = 0; + u64 addr = gpuaddr; + int ret, i; + + for (i = 0; i < npages; i++) { + ret = ops->map(ops, addr, page_to_phys(pages[i]), PAGE_SIZE, + prot, GFP_KERNEL); + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + + mapped += PAGE_SIZE; + addr += PAGE_SIZE; + } + + return mapped; +} + +static int _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, + struct sg_table *sgt, int prot) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + struct scatterlist *sg; + size_t mapped = 0; + u64 addr = gpuaddr; + int ret, i; + + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + size_t size = sg->length; + phys_addr_t phys = sg_phys(sg); + + while (size) { + ret = ops->map(ops, addr, phys, PAGE_SIZE, prot, GFP_KERNEL); + + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + + phys += PAGE_SIZE; + mapped += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + } + + return mapped; +} + + +static int +kgsl_iopgtbl_map_child(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, + u64 offset, struct kgsl_memdesc *child, u64 child_offset, u64 length) +{ + struct kgsl_iommu *iommu = &pt->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + struct kgsl_iommu_pt *iommu_pt = to_iommu_pt(pt); + struct sg_table sgt; + u32 flags; + int ret; + + ret = get_sg_from_child(&sgt, child, child_offset, length); + if (ret) + return ret; + + /* Inherit the flags from the child for this mapping */ + flags = _iommu_get_protection_flags(domain, child); + + ret = _iopgtbl_map_sg(iommu_pt, memdesc->gpuaddr + offset, &sgt, flags); + + sg_free_table(&sgt); + + return ret ? 0 : -ENOMEM; +} + + +static int +kgsl_iopgtbl_unmap_range(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, + u64 offset, u64 length) +{ + if (WARN_ON(offset >= memdesc->size || + (offset + length) > memdesc->size)) + return -ERANGE; + + return _iopgtbl_unmap(to_iommu_pt(pt), memdesc->gpuaddr + offset, + length); +} + +static size_t _iopgtbl_map_page_to_range(struct kgsl_iommu_pt *pt, + struct page *page, u64 gpuaddr, size_t range, int prot) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + size_t mapped = 0; + u64 addr = gpuaddr; + int ret; + + while (range) { + ret = ops->map(ops, addr, page_to_phys(page), PAGE_SIZE, + prot, GFP_KERNEL); + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + + mapped += PAGE_SIZE; + addr += PAGE_SIZE; + range -= PAGE_SIZE; + } + + return mapped; +} + +static int kgsl_iopgtbl_map_zero_page_to_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, u64 length) +{ + struct kgsl_iommu *iommu = &pt->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + /* + * The SMMU only does the PRT compare at the bottom level of the page table, because + * there is not an easy way for the hardware to perform this check at earlier levels. + * Mark this page writable to avoid page faults while writing to it. Since the address + * of this zero page is programmed in PRR register, MMU will intercept any accesses to + * the page before they go to DDR and will terminate the transaction. + */ + u32 flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC | get_llcc_flags(domain); + struct kgsl_iommu_pt *iommu_pt = to_iommu_pt(pt); + struct page *page = kgsl_vbo_zero_page; + + if (WARN_ON(!page)) + return -ENODEV; + + if (WARN_ON((offset >= memdesc->size) || + (offset + length) > memdesc->size)) + return -ERANGE; + + if (!_iopgtbl_map_page_to_range(iommu_pt, page, memdesc->gpuaddr + offset, + length, flags)) + return -ENOMEM; + + return 0; +} + +static int kgsl_iopgtbl_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + struct kgsl_iommu *iommu = &pagetable->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + size_t mapped, padding; + int prot; + + /* Get the protection flags for the user context */ + prot = _iommu_get_protection_flags(domain, memdesc); + + if (memdesc->sgt) + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, + memdesc->sgt, prot); + else + mapped = _iopgtbl_map_pages(pt, memdesc->gpuaddr, + memdesc->pages, memdesc->page_count, prot); + + if (mapped == 0) + return -ENOMEM; + + padding = kgsl_memdesc_footprint(memdesc) - mapped; + + if (padding) { + struct page *page = iommu_get_guard_page(memdesc); + size_t ret; + + if (page) + ret = _iopgtbl_map_page_to_range(pt, page, + memdesc->gpuaddr + mapped, padding, + prot & ~IOMMU_WRITE); + + if (!page || !ret) { + _iopgtbl_unmap(pt, memdesc->gpuaddr, mapped); + return -ENOMEM; + } + } + + return 0; +} + +static int kgsl_iopgtbl_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + return _iopgtbl_unmap(to_iommu_pt(pagetable), memdesc->gpuaddr, + kgsl_memdesc_footprint(memdesc)); +} + +static int _iommu_unmap(struct iommu_domain *domain, u64 addr, size_t size) +{ + size_t unmapped = 0; + + if (!domain) + return 0; + + /* Sign extend TTBR1 addresses all the way to avoid warning */ + if (addr & (1ULL << 48)) + addr |= 0xffff000000000000; + + unmapped = iommu_unmap(domain, addr, size); + + return (unmapped == size) ? 0 : -ENOMEM; +} + + +static size_t _iommu_map_page_to_range(struct iommu_domain *domain, + struct page *page, u64 gpuaddr, size_t range, int prot) +{ + size_t mapped = 0; + u64 addr = gpuaddr; + + if (!page) + return 0; + + /* Sign extend TTBR1 addresses all the way to avoid warning */ + if (gpuaddr & (1ULL << 48)) + gpuaddr |= 0xffff000000000000; + + + while (range) { + int ret = iommu_map(domain, addr, page_to_phys(page), + PAGE_SIZE, prot); + if (ret) { + iommu_unmap(domain, gpuaddr, mapped); + return 0; + } + + addr += PAGE_SIZE; + mapped += PAGE_SIZE; + range -= PAGE_SIZE; + } + + return mapped; +} + +static size_t _iommu_map_sg(struct iommu_domain *domain, u64 gpuaddr, + struct sg_table *sgt, int prot) +{ + /* Sign extend TTBR1 addresses all the way to avoid warning */ + if (gpuaddr & (1ULL << 48)) + gpuaddr |= 0xffff000000000000; + + return iommu_map_sg(domain, gpuaddr, sgt->sgl, sgt->orig_nents, prot); +} + +static int +_kgsl_iommu_map(struct iommu_domain *domain, struct kgsl_memdesc *memdesc) +{ + int prot = _iommu_get_protection_flags(domain, memdesc); + size_t mapped, padding; + int ret = 0; + + /* + * For paged memory allocated through kgsl, memdesc->pages is not NULL. + * Allocate sgt here just for its map operation. Contiguous memory + * already has its sgt, so no need to allocate it here. + */ + if (!memdesc->pages) { + mapped = _iommu_map_sg(domain, memdesc->gpuaddr, + memdesc->sgt, prot); + } else { + struct sg_table sgt; + + ret = sg_alloc_table_from_pages(&sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + if (ret) + return ret; + + mapped = _iommu_map_sg(domain, memdesc->gpuaddr, &sgt, prot); + sg_free_table(&sgt); + } + + if (!mapped) + return -ENOMEM; + + padding = kgsl_memdesc_footprint(memdesc) - mapped; + + if (padding) { + struct page *page = iommu_get_guard_page(memdesc); + size_t guard_mapped; + + if (page) + guard_mapped = _iommu_map_page_to_range(domain, page, + memdesc->gpuaddr + mapped, padding, prot & ~IOMMU_WRITE); + + if (!page || !guard_mapped) { + _iommu_unmap(domain, memdesc->gpuaddr, mapped); + ret = -ENOMEM; + } + } + + return ret; +} + +static int kgsl_iommu_secure_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_iommu *iommu = &pagetable->mmu->iommu; + struct iommu_domain *domain = to_iommu_domain(&iommu->secure_context); + + return _kgsl_iommu_map(domain, memdesc); +} + +/* + * Return true if the address is in the TTBR0 region. This is used for cases + * when the "default" pagetable is used for both TTBR0 and TTBR1 + */ +static bool is_lower_address(struct kgsl_mmu *mmu, u64 addr) +{ + return (test_bit(KGSL_MMU_IOPGTABLE, &mmu->features) && + addr < KGSL_IOMMU_SPLIT_TABLE_BASE); +} + +static int _kgsl_iommu_unmap(struct iommu_domain *domain, + struct kgsl_memdesc *memdesc) +{ + if (memdesc->size == 0 || memdesc->gpuaddr == 0) + return -EINVAL; + + return _iommu_unmap(domain, memdesc->gpuaddr, + kgsl_memdesc_footprint(memdesc)); +} + +/* Map on the default pagetable and the LPAC pagetable if it exists */ +static int kgsl_iommu_default_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_mmu *mmu = pagetable->mmu; + struct kgsl_iommu *iommu = &mmu->iommu; + struct iommu_domain *domain, *lpac; + int ret; + + if (is_lower_address(mmu, memdesc->gpuaddr)) + return kgsl_iopgtbl_map(pagetable, memdesc); + + domain = to_iommu_domain(&iommu->user_context); + + /* Map the object to the default GPU domain */ + ret = _kgsl_iommu_map(domain, memdesc); + + /* Also map the object to the LPAC domain if it exists */ + lpac = to_iommu_domain(&iommu->lpac_context); + + if (!ret && lpac) { + ret = _kgsl_iommu_map(lpac, memdesc); + + /* On failure, also unmap from the default domain */ + if (ret) + _kgsl_iommu_unmap(domain, memdesc); + + } + + return ret; +} + +static int kgsl_iommu_secure_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_iommu *iommu = &pagetable->mmu->iommu; + + if (memdesc->size == 0 || memdesc->gpuaddr == 0) + return -EINVAL; + + return _kgsl_iommu_unmap(to_iommu_domain(&iommu->secure_context), + memdesc); +} + +static int kgsl_iommu_default_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_mmu *mmu = pagetable->mmu; + struct kgsl_iommu *iommu = &mmu->iommu; + int ret; + + if (memdesc->size == 0 || memdesc->gpuaddr == 0) + return -EINVAL; + + if (is_lower_address(mmu, memdesc->gpuaddr)) + return kgsl_iopgtbl_unmap(pagetable, memdesc); + + /* Unmap from the default domain */ + ret = _kgsl_iommu_unmap(to_iommu_domain(&iommu->user_context), memdesc); + + /* Unmap from the LPAC domain if it exists */ + ret |= _kgsl_iommu_unmap(to_iommu_domain(&iommu->lpac_context), memdesc); + return ret; +} + +static bool kgsl_iommu_addr_is_global(struct kgsl_mmu *mmu, u64 addr) +{ + if (test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + return (addr >= KGSL_IOMMU_SPLIT_TABLE_BASE); + + return ((addr >= KGSL_IOMMU_GLOBAL_MEM_BASE(mmu)) && + (addr < KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + + KGSL_IOMMU_GLOBAL_MEM_SIZE)); +} + +static void __iomem *kgsl_iommu_reg(struct kgsl_iommu_context *ctx, + u32 offset) +{ + struct kgsl_iommu *iommu = KGSL_IOMMU(ctx->kgsldev); + + if (!iommu->cb0_offset) { + u32 reg = + readl_relaxed(iommu->regbase + KGSL_IOMMU_IDR1_OFFSET); + + iommu->pagesize = + FIELD_GET(IDR1_PAGESIZE, reg) ? SZ_64K : SZ_4K; + + /* + * The number of pages in the global address space or + * translation bank address space is 2^(NUMPAGENDXB + 1). + */ + iommu->cb0_offset = iommu->pagesize * + (1 << (FIELD_GET(IDR1_NUMPAGENDXB, reg) + 1)); + } + + return (void __iomem *) (iommu->regbase + iommu->cb0_offset + + (ctx->cb_num * iommu->pagesize) + offset); +} + +static u64 KGSL_IOMMU_GET_CTX_REG_Q(struct kgsl_iommu_context *ctx, u32 offset) +{ + void __iomem *addr = kgsl_iommu_reg(ctx, offset); + + return readq_relaxed(addr); +} + +static void KGSL_IOMMU_SET_CTX_REG(struct kgsl_iommu_context *ctx, u32 offset, + u32 val) +{ + void __iomem *addr = kgsl_iommu_reg(ctx, offset); + + writel_relaxed(val, addr); +} + +static u32 KGSL_IOMMU_GET_CTX_REG(struct kgsl_iommu_context *ctx, u32 offset) +{ + void __iomem *addr = kgsl_iommu_reg(ctx, offset); + + return readl_relaxed(addr); +} + +static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); + +static void kgsl_iommu_map_secure_global(struct kgsl_mmu *mmu, + struct kgsl_memdesc *memdesc) +{ + if (IS_ERR_OR_NULL(mmu->securepagetable)) + return; + + if (!memdesc->gpuaddr) { + int ret = kgsl_iommu_get_gpuaddr(mmu->securepagetable, + memdesc); + + if (WARN_ON(ret)) + return; + } + + kgsl_iommu_secure_map(mmu->securepagetable, memdesc); +} + +#define KGSL_GLOBAL_MEM_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT) + +static u64 global_get_offset(struct kgsl_device *device, u64 size, + unsigned long priv) +{ + int start = 0, bit; + + if (!device->global_map) { + device->global_map = + kcalloc(BITS_TO_LONGS(KGSL_GLOBAL_MEM_PAGES), + sizeof(unsigned long), GFP_KERNEL); + if (!device->global_map) + return (unsigned long) -ENOMEM; + } + + if (priv & KGSL_MEMDESC_RANDOM) { + u32 offset = KGSL_GLOBAL_MEM_PAGES - (size >> PAGE_SHIFT); + + start = get_random_int() % offset; + } + + while (start >= 0) { + bit = bitmap_find_next_zero_area(device->global_map, + KGSL_GLOBAL_MEM_PAGES, start, size >> PAGE_SHIFT, 0); + + if (bit < KGSL_GLOBAL_MEM_PAGES) + break; + + /* + * Later implementations might want to randomize this to reduce + * predictability + */ + start--; + } + + if (WARN_ON(start < 0)) + return (unsigned long) -ENOMEM; + + bitmap_set(device->global_map, bit, size >> PAGE_SHIFT); + + return bit << PAGE_SHIFT; +} + +static void kgsl_iommu_map_global(struct kgsl_mmu *mmu, + struct kgsl_memdesc *memdesc, u32 padding) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + + if (memdesc->flags & KGSL_MEMFLAGS_SECURE) { + kgsl_iommu_map_secure_global(mmu, memdesc); + return; + } + + if (!memdesc->gpuaddr) { + u64 offset; + + offset = global_get_offset(device, memdesc->size + padding, + memdesc->priv); + + if (IS_ERR_VALUE(offset)) + return; + + memdesc->gpuaddr = mmu->defaultpagetable->global_base + offset; + } + + kgsl_iommu_default_map(mmu->defaultpagetable, memdesc); +} + +/* Print the mem entry for the pagefault debugging */ +static void print_entry(struct device *dev, struct kgsl_mem_entry *entry, + pid_t pid) +{ + char name[32]; + + if (!entry) { + dev_crit(dev, "**EMPTY**\n"); + return; + } + + kgsl_get_memory_usage(name, sizeof(name), entry->memdesc.flags); + + dev_err(dev, "[%016llX - %016llX] %s %s (pid = %d) (%s)\n", + entry->memdesc.gpuaddr, + entry->memdesc.gpuaddr + entry->memdesc.size - 1, + entry->memdesc.priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "", + entry->pending_free ? "(pending free)" : "", + pid, name); +} + +/* Check if the address in the list of recently freed memory */ +static void kgsl_iommu_check_if_freed(struct device *dev, + struct kgsl_iommu_context *context, u64 addr, u32 ptname) +{ + uint64_t gpuaddr = addr; + uint64_t size = 0; + uint64_t flags = 0; + char name[32]; + pid_t pid; + + if (!kgsl_memfree_find_entry(ptname, &gpuaddr, &size, &flags, &pid)) + return; + + kgsl_get_memory_usage(name, sizeof(name), flags); + + dev_err(dev, "---- premature free ----\n"); + dev_err(dev, "[%8.8llX-%8.8llX] (%s) was already freed by pid %d\n", + gpuaddr, gpuaddr + size, name, pid); +} + +static struct kgsl_process_private *kgsl_iommu_get_process(u64 ptbase) +{ + struct kgsl_process_private *p; + struct kgsl_iommu_pt *iommu_pt; + + read_lock(&kgsl_driver.proclist_lock); + + list_for_each_entry(p, &kgsl_driver.process_list, list) { + iommu_pt = to_iommu_pt(p->pagetable); + if (iommu_pt->ttbr0 == ptbase) { + if (!kgsl_process_private_get(p)) + p = NULL; + + read_unlock(&kgsl_driver.proclist_lock); + return p; + } + } + + read_unlock(&kgsl_driver.proclist_lock); + + return NULL; +} + +static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, + struct kgsl_iommu_context *ctxt, unsigned long addr, + u64 ptbase, u32 contextid, + int flags, struct kgsl_process_private *private, + struct kgsl_context *context) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_mem_entry *prev = NULL, *next = NULL, *entry; + const char *fault_type; + const char *comm = NULL; + u32 ptname = KGSL_MMU_GLOBAL_PT; + int id; + + if (private) { + comm = private->comm; + ptname = pid_nr(private->pid); + } + + trace_kgsl_mmu_pagefault(device, addr, + ptname, comm, + (flags & IOMMU_FAULT_WRITE) ? "write" : "read"); + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_type = "external"; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_type = "transaction stalled"; + + /* FIXME: This seems buggy */ + if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE, &mmu->pfpolicy)) + if (!kgsl_mmu_log_fault_addr(mmu, ptbase, addr)) + return; + + if (!__ratelimit(&ctxt->ratelimit)) + return; + + dev_crit(device->dev, + "GPU PAGE FAULT: addr = %lX pid= %d name=%s drawctxt=%d context pid = %d\n", addr, + ptname, comm, contextid, context ? context->tid : 0); + + dev_crit(device->dev, + "context=%s TTBR0=0x%llx (%s %s fault)\n", + ctxt->name, ptbase, + (flags & IOMMU_FAULT_WRITE) ? "write" : "read", fault_type); + + if (gpudev->iommu_fault_block) { + u32 fsynr1 = KGSL_IOMMU_GET_CTX_REG(ctxt, + KGSL_IOMMU_CTX_FSYNR1); + + dev_crit(device->dev, + "FAULTING BLOCK: %s\n", + gpudev->iommu_fault_block(device, fsynr1)); + } + + /* Don't print the debug if this is a permissions fault */ + if ((flags & IOMMU_FAULT_PERMISSION)) + return; + + kgsl_iommu_check_if_freed(device->dev, ctxt, addr, ptname); + + /* + * Don't print any debug information if the address is + * in the global region. These are rare and nobody needs + * to know the addresses that are in here + */ + if (kgsl_iommu_addr_is_global(mmu, addr)) { + dev_crit(device->dev, "Fault in global memory\n"); + return; + } + + if (!private) + return; + + dev_crit(device->dev, "---- nearby memory ----\n"); + + spin_lock(&private->mem_lock); + idr_for_each_entry(&private->mem_idr, entry, id) { + u64 cur = entry->memdesc.gpuaddr; + + if (cur < addr) { + if (!prev || prev->memdesc.gpuaddr < cur) + prev = entry; + } + + if (cur > addr) { + if (!next || next->memdesc.gpuaddr > cur) + next = entry; + } + } + + print_entry(device->dev, prev, pid_nr(private->pid)); + dev_crit(device->dev, "<- fault @ %16.16lx\n", addr); + print_entry(device->dev, next, pid_nr(private->pid)); + + spin_unlock(&private->mem_lock); +} + +/* + * Return true if the IOMMU should stall and trigger a snasphot on a pagefault + */ +static bool kgsl_iommu_check_stall_on_fault(struct kgsl_iommu_context *ctx, + struct kgsl_mmu *mmu, int flags) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + + if (!(flags & IOMMU_FAULT_TRANSACTION_STALLED)) + return false; + + if (!test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) + return false; + + /* + * Sometimes, there can be multiple invocations of the fault handler. + * Make sure we trigger reset/recovery only once. + */ + if (ctx->stalled_on_fault) + return false; + + if (!mutex_trylock(&device->mutex)) + return true; + + /* + * Turn off GPU IRQ so we don't get faults from it too. + * The device mutex must be held to change power state + */ + if (gmu_core_isenabled(device)) + kgsl_pwrctrl_irq(device, false); + else + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + + mutex_unlock(&device->mutex); + return true; +} + +static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, + struct kgsl_iommu_context *ctx, unsigned long addr, int flags) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + u64 ptbase; + u32 contextidr; + bool stall; + struct kgsl_process_private *private; + struct kgsl_context *context; + + ptbase = KGSL_IOMMU_GET_CTX_REG_Q(ctx, KGSL_IOMMU_CTX_TTBR0); + contextidr = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_CONTEXTIDR); + + private = kgsl_iommu_get_process(ptbase); + context = kgsl_context_get(device, contextidr); + + stall = kgsl_iommu_check_stall_on_fault(ctx, mmu, flags); + + kgsl_iommu_print_fault(mmu, ctx, addr, ptbase, contextidr, flags, private, + context); + + if (stall) { + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u32 sctlr; + + /* + * Disable context fault interrupts as we do not clear FSR in + * the ISR. Will be re-enabled after FSR is cleared. + */ + sctlr = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + sctlr &= ~(0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr); + + /* This is used by reset/recovery path */ + ctx->stalled_on_fault = true; + + /* Go ahead with recovery*/ + if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->fault) + adreno_dev->dispatch_ops->fault(adreno_dev, + ADRENO_IOMMU_PAGE_FAULT); + } + + kgsl_context_put(context); + kgsl_process_private_put(private); + + /* Return -EBUSY to keep the IOMMU driver from resuming on a stall */ + return stall ? -EBUSY : 0; +} + +static int kgsl_iommu_default_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + struct kgsl_mmu *mmu = token; + struct kgsl_iommu *iommu = &mmu->iommu; + + return kgsl_iommu_fault_handler(mmu, &iommu->user_context, + addr, flags); +} + +static int kgsl_iommu_lpac_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + struct kgsl_mmu *mmu = token; + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->lpac_context; + u32 fsynr0, fsynr1; + + fsynr0 = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSYNR0); + fsynr1 = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSYNR1); + + dev_crit(KGSL_MMU_DEVICE(mmu)->dev, + "LPAC PAGE FAULT iova=0x%16lx, fsynr0=0x%x, fsynr1=0x%x\n", + addr, fsynr0, fsynr1); + + return 0; +} + +static int kgsl_iommu_secure_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + struct kgsl_mmu *mmu = token; + struct kgsl_iommu *iommu = &mmu->iommu; + + return kgsl_iommu_fault_handler(mmu, &iommu->secure_context, + addr, flags); +} + +/* + * kgsl_iommu_disable_clk() - Disable iommu clocks + * Disable IOMMU clocks + */ +static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + atomic_dec(&iommu->clk_enable_count); + + /* + * Make sure the clk refcounts are good. An unbalance may + * cause the clocks to be off when we need them on. + */ + WARN_ON(atomic_read(&iommu->clk_enable_count) < 0); + + clk_bulk_disable_unprepare(iommu->num_clks, iommu->clks); + + if (!IS_ERR_OR_NULL(iommu->cx_gdsc)) + regulator_disable(iommu->cx_gdsc); +} + +/* + * kgsl_iommu_enable_clk - Enable iommu clocks + * Enable all the IOMMU clocks + */ +static void kgsl_iommu_enable_clk(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + if (!IS_ERR_OR_NULL(iommu->cx_gdsc)) + WARN_ON(regulator_enable(iommu->cx_gdsc)); + + clk_bulk_prepare_enable(iommu->num_clks, iommu->clks); + + atomic_inc(&iommu->clk_enable_count); +} + +/* kgsl_iommu_get_ttbr0 - Get TTBR0 setting for a pagetable */ +static u64 kgsl_iommu_get_ttbr0(struct kgsl_pagetable *pagetable) +{ + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + + /* This will be zero if KGSL_MMU_IOPGTABLE is not enabled */ + return pt->ttbr0; +} + +/* FIXME: This is broken for LPAC. For now return the default context bank */ +static int kgsl_iommu_get_context_bank(struct kgsl_pagetable *pt) +{ + struct kgsl_iommu *iommu = to_kgsl_iommu(pt); + struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); + + return _iommu_domain_context_bank(domain); +} + +static void kgsl_iommu_destroy_default_pagetable(struct kgsl_pagetable *pagetable) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(pagetable->mmu); + struct kgsl_iommu *iommu = to_kgsl_iommu(pagetable); + struct kgsl_iommu_context *context = &iommu->user_context; + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&context->pdev->dev); + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + struct kgsl_global_memdesc *md; + + list_for_each_entry(md, &device->globals, node) { + if (md->memdesc.flags & KGSL_MEMFLAGS_SECURE) + continue; + + kgsl_iommu_default_unmap(pagetable, &md->memdesc); + } + + adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL); + + kfree(pt); +} + +static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) +{ + struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + + kfree(pt); +} + +static void _enable_gpuhtw_llc(struct kgsl_mmu *mmu, struct iommu_domain *domain) +{ + int val = 1; + + if (!test_bit(KGSL_MMU_LLCC_ENABLE, &mmu->features)) + return; + + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_LLC_NWA, &val); + else + iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT, &val); +} + +static int set_smmu_aperture(struct kgsl_device *device, + struct kgsl_iommu_context *context) +{ + int ret; + + if (!test_bit(KGSL_MMU_SMMU_APERTURE, &device->mmu.features)) + return 0; + + ret = qcom_scm_kgsl_set_smmu_aperture(context->cb_num); + if (ret == -EBUSY) + ret = qcom_scm_kgsl_set_smmu_aperture(context->cb_num); + + if (ret) + dev_err(device->dev, "Unable to set the SMMU aperture: %d. The aperture needs to be set to use per-process pagetables\n", + ret); + + return ret; +} + +/* FIXME: better name feor this function */ +static int kgsl_iopgtbl_alloc(struct kgsl_iommu_context *ctx, struct kgsl_iommu_pt *pt) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&ctx->pdev->dev); + const struct io_pgtable_cfg *cfg = NULL; + + if (adreno_smmu->cookie) + cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); + if (!cfg) + return -ENODEV; + + pt->cfg = *cfg; + pt->cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; + pt->cfg.tlb = &kgsl_iopgtbl_tlb_ops; + + pt->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pt->cfg, NULL); + + if (!pt->pgtbl_ops) + return -ENOMEM; + + pt->ttbr0 = pt->cfg.arm_lpae_s1_cfg.ttbr; + + return 0; +} + +/* Enable TTBR0 for the given context with the specific configuration */ +static void kgsl_iommu_enable_ttbr0(struct kgsl_iommu_context *context, + struct kgsl_iommu_pt *pt) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&context->pdev->dev); + struct kgsl_mmu *mmu = pt->base.mmu; + + /* Quietly return if the context doesn't have a domain */ + if (!context->domain) + return; + + /* Enable CX and clocks before we call into SMMU to setup registers */ + kgsl_iommu_enable_clk(mmu); + adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &pt->cfg); + kgsl_iommu_disable_clk(mmu); +} + +static struct kgsl_pagetable *kgsl_iommu_default_pagetable(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_pt *iommu_pt; + int ret; + + iommu_pt = kzalloc(sizeof(*iommu_pt), GFP_KERNEL); + if (!iommu_pt) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, &iommu_pt->base, KGSL_MMU_GLOBAL_PT); + + iommu_pt->base.fault_addr = U64_MAX; + iommu_pt->base.rbtree = RB_ROOT; + iommu_pt->base.pt_ops = &default_pt_ops; + + if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { + iommu_pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; + iommu_pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + iommu_pt->base.va_start = KGSL_IOMMU_VA_BASE64; + iommu_pt->base.va_end = KGSL_IOMMU_VA_END64; + + } else { + iommu_pt->base.va_start = KGSL_IOMMU_SVM_BASE32; + + if (mmu->secured) + iommu_pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); + else + iommu_pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + + iommu_pt->base.compat_va_start = iommu_pt->base.va_start; + iommu_pt->base.compat_va_end = iommu_pt->base.va_end; + } + + if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) { + iommu_pt->base.global_base = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + + kgsl_mmu_pagetable_add(mmu, &iommu_pt->base); + return &iommu_pt->base; + } + + iommu_pt->base.global_base = KGSL_IOMMU_SPLIT_TABLE_BASE; + + /* + * Set up a "default' TTBR0 for the pagetable - this would only be used + * in cases when the per-process pagetable allocation failed for some + * reason + */ + ret = kgsl_iopgtbl_alloc(&iommu->user_context, iommu_pt); + if (ret) { + kfree(iommu_pt); + return ERR_PTR(ret); + } + + kgsl_mmu_pagetable_add(mmu, &iommu_pt->base); + return &iommu_pt->base; + +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static struct kgsl_pagetable *kgsl_iommu_secure_pagetable(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu_pt *iommu_pt; + + if (!mmu->secured) + return ERR_PTR(-EPERM); + + iommu_pt = kzalloc(sizeof(*iommu_pt), GFP_KERNEL); + if (!iommu_pt) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, &iommu_pt->base, KGSL_MMU_SECURE_PT); + iommu_pt->base.fault_addr = U64_MAX; + iommu_pt->base.rbtree = RB_ROOT; + iommu_pt->base.pt_ops = &secure_pt_ops; + + iommu_pt->base.compat_va_start = KGSL_IOMMU_SECURE_BASE(mmu); + iommu_pt->base.compat_va_end = KGSL_IOMMU_SECURE_END(mmu); + iommu_pt->base.va_start = KGSL_IOMMU_SECURE_BASE(mmu); + iommu_pt->base.va_end = KGSL_IOMMU_SECURE_END(mmu); + + kgsl_mmu_pagetable_add(mmu, &iommu_pt->base); + return &iommu_pt->base; +} +#else +static struct kgsl_pagetable *kgsl_iommu_secure_pagetable(struct kgsl_mmu *mmu) +{ + return ERR_PTR(-EPERM); +} +#endif + +static struct kgsl_pagetable *kgsl_iopgtbl_pagetable(struct kgsl_mmu *mmu, u32 name) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_pt *pt; + int ret; + + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + if (!pt) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, &pt->base, name); + + pt->base.fault_addr = U64_MAX; + pt->base.rbtree = RB_ROOT; + pt->base.pt_ops = &iopgtbl_pt_ops; + + if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { + pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; + pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + pt->base.va_start = KGSL_IOMMU_VA_BASE64; + pt->base.va_end = KGSL_IOMMU_VA_END64; + + if (is_compat_task()) { + pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; + pt->base.svm_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + } else { + pt->base.svm_start = KGSL_IOMMU_SVM_BASE64; + pt->base.svm_end = KGSL_IOMMU_SVM_END64; + } + + } else { + pt->base.va_start = KGSL_IOMMU_SVM_BASE32; + + if (mmu->secured) + pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); + else + pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + + pt->base.compat_va_start = pt->base.va_start; + pt->base.compat_va_end = pt->base.va_end; + pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; + pt->base.svm_end = KGSL_IOMMU_SVM_END32; + } + + ret = kgsl_iopgtbl_alloc(&iommu->user_context, pt); + if (ret) { + kfree(pt); + return ERR_PTR(ret); + } + + kgsl_mmu_pagetable_add(mmu, &pt->base); + return &pt->base; +} + +static struct kgsl_pagetable *kgsl_iommu_getpagetable(struct kgsl_mmu *mmu, + unsigned long name) +{ + struct kgsl_pagetable *pt; + + /* If we already know the pagetable, return it */ + pt = kgsl_get_pagetable(name); + if (pt) + return pt; + + /* If io-pgtables are not in effect, just use the default pagetable */ + if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + return mmu->defaultpagetable; + + pt = kgsl_iopgtbl_pagetable(mmu, name); + + /* + * If the io-pgtable allocation didn't work then fall back to the + * default pagetable for this cycle + */ + if (!pt) + return mmu->defaultpagetable; + + return pt; +} + +static void kgsl_iommu_detach_context(struct kgsl_iommu_context *context) +{ + if (!context->domain) + return; + + iommu_detach_device(context->domain, &context->pdev->dev); + iommu_domain_free(context->domain); + + context->domain = NULL; + + platform_device_put(context->pdev); + + context->pdev = NULL; +} + +static void kgsl_iommu_close(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + /* First put away the default pagetables */ + kgsl_mmu_putpagetable(mmu->defaultpagetable); + mmu->defaultpagetable = NULL; + + kgsl_mmu_putpagetable(mmu->securepagetable); + mmu->securepagetable = NULL; + + /* Next, detach the context banks */ + kgsl_iommu_detach_context(&iommu->user_context); + kgsl_iommu_detach_context(&iommu->lpac_context); + kgsl_iommu_detach_context(&iommu->secure_context); + + kgsl_free_secure_page(kgsl_secure_guard_page); + kgsl_secure_guard_page = NULL; + + if (kgsl_guard_page != NULL) { + __free_page(kgsl_guard_page); + kgsl_guard_page = NULL; + } + + of_platform_depopulate(&iommu->pdev->dev); + platform_device_put(iommu->pdev); + + kmem_cache_destroy(addr_entry_cache); + addr_entry_cache = NULL; +} + +/* Program the PRR marker and enable it in the ACTLR register */ +static void _iommu_context_set_prr(struct kgsl_mmu *mmu, + struct kgsl_iommu_context *ctx) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct page *page = kgsl_vbo_zero_page; + u32 val; + + if (ctx->cb_num < 0) + return; + + if (!page) + return; + + writel_relaxed(lower_32_bits(page_to_phys(page)), + iommu->regbase + KGSL_IOMMU_PRR_CFG_LADDR); + + writel_relaxed(upper_32_bits(page_to_phys(page)), + iommu->regbase + KGSL_IOMMU_PRR_CFG_UADDR); + + val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_ACTLR); + val |= FIELD_PREP(KGSL_IOMMU_ACTLR_PRR_ENABLE, 1); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_ACTLR, val); + + /* Make sure all of the preceding writes have posted */ + wmb(); +} + +static void _setup_user_context(struct kgsl_mmu *mmu) +{ + unsigned int sctlr_val; + struct kgsl_iommu_context *ctx = &mmu->iommu.user_context; + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + + /* + * If pagefault policy is GPUHALT_ENABLE, + * 1) Program CFCFG to 1 to enable STALL mode + * 2) Program HUPCF to 0 (Stall or terminate subsequent + * transactions in the presence of an outstanding fault) + * else + * 1) Program CFCFG to 0 to disable STALL mode (0=Terminate) + * 2) Program HUPCF to 1 (Process subsequent transactions + * independently of any outstanding fault) + */ + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy)) { + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } else { + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); +} + +static int kgsl_iommu_start(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + + kgsl_iommu_enable_clk(mmu); + + /* Set the following registers only when the MMU type is QSMMU */ + if (mmu->subtype != KGSL_IOMMU_SMMU_V500) { + /* Enable hazard check from GPU_SMMU_HUM_CFG */ + writel_relaxed(0x02, iommu->regbase + 0x6800); + + /* Write to GPU_SMMU_DORA_ORDERING to disable reordering */ + writel_relaxed(0x01, iommu->regbase + 0x64a0); + + /* make sure register write committed */ + wmb(); + } + + /* FIXME: We would need to program stall on fault for LPAC too */ + _setup_user_context(mmu); + + _iommu_context_set_prr(mmu, &iommu->user_context); + if (mmu->secured) + _iommu_context_set_prr(mmu, &iommu->secure_context); + _iommu_context_set_prr(mmu, &iommu->lpac_context); + + kgsl_iommu_disable_clk(mmu); + return 0; +} + +static void kgsl_iommu_clear_fsr(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + unsigned int sctlr_val; + + if (ctx->stalled_on_fault) { + kgsl_iommu_enable_clk(mmu); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSR, 0xffffffff); + /* + * Re-enable context fault interrupts after clearing + * FSR to prevent the interrupt from firing repeatedly + */ + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); + /* + * Make sure the above register writes + * are not reordered across the barrier + * as we use writel_relaxed to write them + */ + wmb(); + kgsl_iommu_disable_clk(mmu); + ctx->stalled_on_fault = false; + } +} + +static void kgsl_iommu_pagefault_resume(struct kgsl_mmu *mmu, bool terminate) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + u32 sctlr_val = 0; + + if (!ctx->stalled_on_fault) + return; + + if (!terminate) + goto clear_fsr; + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + /* + * As part of recovery, GBIF halt sequence should be performed. + * In a worst case scenario, if any GPU block is generating a + * stream of un-ending faulting transactions, SMMU would enter + * stall-on-fault mode again after resuming and not let GBIF + * halt succeed. In order to avoid that situation and terminate + * those faulty transactions, set CFCFG and HUPCF to 0. + */ + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); + /* + * Make sure the above register write is not reordered across + * the barrier as we use writel_relaxed to write it. + */ + wmb(); + +clear_fsr: + /* + * This will only clear fault bits in FSR. FSR.SS will still + * be set. Writing to RESUME (below) is the only way to clear + * FSR.SS bit. + */ + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_FSR, 0xffffffff); + /* + * Make sure the above register write is not reordered across + * the barrier as we use writel_relaxed to write it. + */ + wmb(); + + /* + * Write 1 to RESUME.TnR to terminate the stalled transaction. + * This will also allow the SMMU to process new transactions. + */ + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_RESUME, 1); + /* + * Make sure the above register writes are not reordered across + * the barrier as we use writel_relaxed to write them. + */ + wmb(); +} + +static u64 +kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu) +{ + u64 val; + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + + /* + * We cannot enable or disable the clocks in interrupt context, this + * function is called from interrupt context if there is an axi error + */ + if (in_interrupt()) + return 0; + + if (ctx->cb_num < 0) + return 0; + + kgsl_iommu_enable_clk(mmu); + val = KGSL_IOMMU_GET_CTX_REG_Q(ctx, KGSL_IOMMU_CTX_TTBR0); + kgsl_iommu_disable_clk(mmu); + return val; +} + +/* + * kgsl_iommu_set_pf_policy() - Set the pagefault policy for IOMMU + * @mmu: Pointer to mmu structure + * @pf_policy: The pagefault polict to set + * + * Check if the new policy indicated by pf_policy is same as current + * policy, if same then return else set the policy + */ +static int kgsl_iommu_set_pf_policy(struct kgsl_mmu *mmu, + unsigned long pf_policy) +{ + struct kgsl_iommu *iommu = &mmu->iommu; + struct kgsl_iommu_context *ctx = &iommu->user_context; + unsigned int sctlr_val; + int cur, new; + + cur = test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &mmu->pfpolicy); + new = test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &pf_policy); + + if (cur == new) + return 0; + + kgsl_iommu_enable_clk(mmu); + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR); + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &pf_policy)) { + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } else { + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } + + KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_SCTLR, sctlr_val); + + kgsl_iommu_disable_clk(mmu); + return 0; +} + +static struct kgsl_iommu_addr_entry *_find_gpuaddr( + struct kgsl_pagetable *pagetable, uint64_t gpuaddr) +{ + struct rb_node *node = pagetable->rbtree.rb_node; + + while (node != NULL) { + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + if (gpuaddr < entry->base) + node = node->rb_left; + else if (gpuaddr > entry->base) + node = node->rb_right; + else + return entry; + } + + return NULL; +} + +static int _remove_gpuaddr(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + struct kgsl_iommu_addr_entry *entry; + + entry = _find_gpuaddr(pagetable, gpuaddr); + + if (WARN(!entry, "GPU address %llx doesn't exist\n", gpuaddr)) + return -ENOMEM; + + rb_erase(&entry->node, &pagetable->rbtree); + kmem_cache_free(addr_entry_cache, entry); + return 0; +} + +static int _insert_gpuaddr(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + struct rb_node **node, *parent = NULL; + struct kgsl_iommu_addr_entry *new = + kmem_cache_alloc(addr_entry_cache, GFP_ATOMIC); + + if (new == NULL) + return -ENOMEM; + + new->base = gpuaddr; + new->size = size; + + node = &pagetable->rbtree.rb_node; + + while (*node != NULL) { + struct kgsl_iommu_addr_entry *this; + + parent = *node; + this = rb_entry(parent, struct kgsl_iommu_addr_entry, node); + + if (new->base < this->base) + node = &parent->rb_left; + else if (new->base > this->base) + node = &parent->rb_right; + else { + /* Duplicate entry */ + WARN(1, "duplicate gpuaddr: 0x%llx\n", gpuaddr); + kmem_cache_free(addr_entry_cache, new); + return -EEXIST; + } + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, &pagetable->rbtree); + + return 0; +} + +static uint64_t _get_unmapped_area(struct kgsl_pagetable *pagetable, + uint64_t bottom, uint64_t top, uint64_t size, + uint64_t align) +{ + struct rb_node *node = rb_first(&pagetable->rbtree); + uint64_t start; + + bottom = ALIGN(bottom, align); + start = bottom; + + while (node != NULL) { + uint64_t gap; + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + /* + * Skip any entries that are outside of the range, but make sure + * to account for some that might straddle the lower bound + */ + if (entry->base < bottom) { + if (entry->base + entry->size > bottom) + start = ALIGN(entry->base + entry->size, align); + node = rb_next(node); + continue; + } + + /* Stop if we went over the top */ + if (entry->base >= top) + break; + + /* Make sure there is a gap to consider */ + if (start < entry->base) { + gap = entry->base - start; + + if (gap >= size) + return start; + } + + /* Stop if there is no more room in the region */ + if (entry->base + entry->size >= top) + return (uint64_t) -ENOMEM; + + /* Start the next cycle at the end of the current entry */ + start = ALIGN(entry->base + entry->size, align); + node = rb_next(node); + } + + if (start + size <= top) + return start; + + return (uint64_t) -ENOMEM; +} + +static uint64_t _get_unmapped_area_topdown(struct kgsl_pagetable *pagetable, + uint64_t bottom, uint64_t top, uint64_t size, + uint64_t align) +{ + struct rb_node *node = rb_last(&pagetable->rbtree); + uint64_t end = top; + uint64_t mask = ~(align - 1); + struct kgsl_iommu_addr_entry *entry; + + /* Make sure that the bottom is correctly aligned */ + bottom = ALIGN(bottom, align); + + /* Make sure the requested size will fit in the range */ + if (size > (top - bottom)) + return -ENOMEM; + + /* Walk back through the list to find the highest entry in the range */ + for (node = rb_last(&pagetable->rbtree); node != NULL; node = rb_prev(node)) { + entry = rb_entry(node, struct kgsl_iommu_addr_entry, node); + if (entry->base < top) + break; + } + + while (node != NULL) { + uint64_t offset; + + entry = rb_entry(node, struct kgsl_iommu_addr_entry, node); + + /* If the entire entry is below the range the search is over */ + if ((entry->base + entry->size) < bottom) + break; + + /* Get the top of the entry properly aligned */ + offset = ALIGN(entry->base + entry->size, align); + + /* + * Try to allocate the memory from the top of the gap, + * making sure that it fits between the top of this entry and + * the bottom of the previous one + */ + + if ((end > size) && (offset < end)) { + uint64_t chunk = (end - size) & mask; + + if (chunk >= offset) + return chunk; + } + + /* + * If we get here and the current entry is outside of the range + * then we are officially out of room + */ + + if (entry->base < bottom) + return (uint64_t) -ENOMEM; + + /* Set the top of the gap to the current entry->base */ + end = entry->base; + + /* And move on to the next lower entry */ + node = rb_prev(node); + } + + /* If we get here then there are no more entries in the region */ + if ((end > size) && (((end - size) & mask) >= bottom)) + return (end - size) & mask; + + return (uint64_t) -ENOMEM; +} + +static uint64_t kgsl_iommu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t alignment) +{ + uint64_t addr; + + /* Avoid black holes */ + if (WARN(end <= start, "Bad search range: 0x%llx-0x%llx", start, end)) + return (uint64_t) -EINVAL; + + spin_lock(&pagetable->lock); + addr = _get_unmapped_area_topdown(pagetable, + start, end, size, alignment); + spin_unlock(&pagetable->lock); + return addr; +} + +static bool iommu_addr_in_svm_ranges(struct kgsl_pagetable *pagetable, + u64 gpuaddr, u64 size) +{ + if ((gpuaddr >= pagetable->compat_va_start && gpuaddr < pagetable->compat_va_end) && + ((gpuaddr + size) > pagetable->compat_va_start && + (gpuaddr + size) <= pagetable->compat_va_end)) + return true; + + if ((gpuaddr >= pagetable->svm_start && gpuaddr < pagetable->svm_end) && + ((gpuaddr + size) > pagetable->svm_start && + (gpuaddr + size) <= pagetable->svm_end)) + return true; + + return false; +} + +static int kgsl_iommu_set_svm_region(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + int ret = -ENOMEM; + struct rb_node *node; + + /* Make sure the requested address doesn't fall out of SVM range */ + if (!iommu_addr_in_svm_ranges(pagetable, gpuaddr, size)) + return -ENOMEM; + + spin_lock(&pagetable->lock); + node = pagetable->rbtree.rb_node; + + while (node != NULL) { + uint64_t start, end; + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + start = entry->base; + end = entry->base + entry->size; + + if (gpuaddr + size <= start) + node = node->rb_left; + else if (end <= gpuaddr) + node = node->rb_right; + else + goto out; + } + + ret = _insert_gpuaddr(pagetable, gpuaddr, size); +out: + spin_unlock(&pagetable->lock); + return ret; +} + + +static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int ret = 0; + uint64_t addr, start, end, size; + unsigned int align; + + if (WARN_ON(kgsl_memdesc_use_cpu_map(memdesc))) + return -EINVAL; + + if (memdesc->flags & KGSL_MEMFLAGS_SECURE && + pagetable->name != KGSL_MMU_SECURE_PT) + return -EINVAL; + + size = kgsl_memdesc_footprint(memdesc); + + align = max_t(uint64_t, 1 << kgsl_memdesc_get_align(memdesc), + PAGE_SIZE); + + if (memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT) { + start = pagetable->compat_va_start; + end = pagetable->compat_va_end; + } else { + start = pagetable->va_start; + end = pagetable->va_end; + } + + spin_lock(&pagetable->lock); + + addr = _get_unmapped_area(pagetable, start, end, size, align); + + if (addr == (uint64_t) -ENOMEM) { + ret = -ENOMEM; + goto out; + } + + /* + * This path is only called in a non-SVM path with locks so we can be + * sure we aren't racing with anybody so we don't need to worry about + * taking the lock + */ + ret = _insert_gpuaddr(pagetable, addr, size); + if (ret == 0) { + memdesc->gpuaddr = addr; + memdesc->pagetable = pagetable; + } + +out: + spin_unlock(&pagetable->lock); + return ret; +} + +static void kgsl_iommu_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + if (memdesc->pagetable == NULL) + return; + + spin_lock(&memdesc->pagetable->lock); + + _remove_gpuaddr(memdesc->pagetable, memdesc->gpuaddr); + + spin_unlock(&memdesc->pagetable->lock); +} + +static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags) +{ + bool gpu_compat = (memflags & KGSL_MEMFLAGS_FORCE_32BIT) != 0; + + if (lo != NULL) + *lo = gpu_compat ? pagetable->compat_va_start : pagetable->svm_start; + if (hi != NULL) + *hi = gpu_compat ? pagetable->compat_va_end : pagetable->svm_end; + + return 0; +} + +static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + if (gpuaddr == 0) + return false; + + if (gpuaddr >= pagetable->va_start && gpuaddr < pagetable->va_end) + return true; + + if (gpuaddr >= pagetable->compat_va_start && gpuaddr < pagetable->compat_va_end) + return true; + + if (gpuaddr >= pagetable->svm_start && gpuaddr < pagetable->svm_end) + return true; + + return false; +} + +static int kgsl_iommu_setup_context(struct kgsl_mmu *mmu, + struct device_node *parent, + struct kgsl_iommu_context *context, const char *name, + iommu_fault_handler_t handler) +{ + struct device_node *node = of_find_node_by_name(parent, name); + struct platform_device *pdev; + int ret; + + if (!node) + return -ENOENT; + + pdev = of_find_device_by_node(node); + ret = of_dma_configure(&pdev->dev, node, true); + of_node_put(node); + + if (ret) + return ret; + + context->cb_num = -1; + context->name = name; + context->kgsldev = KGSL_MMU_DEVICE(mmu); + context->pdev = pdev; + ratelimit_default_init(&context->ratelimit); + + /* Set the adreno_smmu priv data for the device */ + dev_set_drvdata(&pdev->dev, &context->adreno_smmu); + + /* Create a new context */ + context->domain = iommu_domain_alloc(&platform_bus_type); + if (!context->domain) { + /*FIXME: Put back the pdev here? */ + return -ENODEV; + } + + _enable_gpuhtw_llc(mmu, context->domain); + + ret = iommu_attach_device(context->domain, &context->pdev->dev); + if (ret) { + /* FIXME: put back the device here? */ + iommu_domain_free(context->domain); + context->domain = NULL; + return ret; + } + + iommu_set_fault_handler(context->domain, handler, mmu); + + context->cb_num = _iommu_domain_context_bank(context->domain); + + if (context->cb_num >= 0) + return 0; + + dev_err(KGSL_MMU_DEVICE(mmu)->dev, "Couldn't get the context bank for %s: %d\n", + context->name, context->cb_num); + + iommu_detach_device(context->domain, &context->pdev->dev); + iommu_domain_free(context->domain); + + /* FIXME: put back the device here? */ + context->domain = NULL; + + return context->cb_num; +} + +static int iommu_probe_user_context(struct kgsl_device *device, + struct device_node *node) +{ + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct kgsl_mmu *mmu = &device->mmu; + int ret; + + ret = kgsl_iommu_setup_context(mmu, node, &iommu->user_context, + "gfx3d_user", kgsl_iommu_default_fault_handler); + if (ret) + return ret; + + /* LPAC is optional so don't worry if it returns error */ + kgsl_iommu_setup_context(mmu, node, &iommu->lpac_context, + "gfx3d_lpac", kgsl_iommu_lpac_fault_handler); + + /* + * FIXME: If adreno_smmu->cookie wasn't initialized then we can't do + * IOPGTABLE + */ + + /* Make the default pagetable */ + mmu->defaultpagetable = kgsl_iommu_default_pagetable(mmu); + if (IS_ERR(mmu->defaultpagetable)) + return PTR_ERR(mmu->defaultpagetable); + + /* If IOPGTABLE isn't enabled then we are done */ + if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + return 0; + + /* Enable TTBR0 on the default and LPAC contexts */ + kgsl_iommu_enable_ttbr0(&iommu->user_context, + to_iommu_pt(mmu->defaultpagetable)); + + set_smmu_aperture(device, &iommu->user_context); + + kgsl_iommu_enable_ttbr0(&iommu->lpac_context, + to_iommu_pt(mmu->defaultpagetable)); + + /* FIXME: set LPAC SMMU aperture */ + return 0; +} + +static int iommu_probe_secure_context(struct kgsl_device *device, + struct device_node *parent) +{ + struct device_node *node; + struct platform_device *pdev; + int ret; + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_iommu_context *context = &iommu->secure_context; + int secure_vmid = VMID_CP_PIXEL; + + if (!mmu->secured) + return -EPERM; + + node = of_find_node_by_name(parent, "gfx3d_secure"); + if (!node) + return -ENOENT; + + pdev = of_find_device_by_node(node); + ret = of_dma_configure(&pdev->dev, node, true); + of_node_put(node); + + if (ret) + return ret; + + context->cb_num = -1; + context->name = "gfx3d_secure"; + context->kgsldev = device; + context->pdev = pdev; + ratelimit_default_init(&context->ratelimit); + + context->domain = iommu_domain_alloc(&platform_bus_type); + if (!context->domain) { + /* FIXME: put away the device */ + return -ENODEV; + } + + ret = iommu_domain_set_attr(context->domain, DOMAIN_ATTR_SECURE_VMID, + &secure_vmid); + if (ret) { + dev_err(device->dev, "Unable to set the secure VMID: %d\n", ret); + iommu_domain_free(context->domain); + context->domain = NULL; + + /* FIXME: put away the device */ + return ret; + } + + _enable_gpuhtw_llc(mmu, context->domain); + + ret = iommu_attach_device(context->domain, &context->pdev->dev); + if (ret) { + iommu_domain_free(context->domain); + /* FIXME: Put way the device */ + context->domain = NULL; + return ret; + } + + iommu_set_fault_handler(context->domain, + kgsl_iommu_secure_fault_handler, mmu); + + context->cb_num = _iommu_domain_context_bank(context->domain); + + if (context->cb_num < 0) { + iommu_detach_device(context->domain, &context->pdev->dev); + iommu_domain_free(context->domain); + context->domain = NULL; + return context->cb_num; + } + + mmu->securepagetable = kgsl_iommu_secure_pagetable(mmu); + + if (IS_ERR(mmu->securepagetable)) + mmu->secured = false; + + return 0; +} + +static const char * const kgsl_iommu_clocks[] = { + "gcc_gpu_memnoc_gfx", + "gcc_gpu_snoc_dvm_gfx", + "gpu_cc_ahb", + "gpu_cc_cx_gmu", + "gpu_cc_hlos1_vote_gpu_smmu", + "gpu_cc_hub_aon", + "gpu_cc_hub_cx_int", + "gcc_bimc_gpu_axi", + "gcc_gpu_ahb", + "gcc_gpu_axi_clk", +}; + +static const struct kgsl_mmu_ops kgsl_iommu_ops; + +static void kgsl_iommu_check_config(struct kgsl_mmu *mmu, + struct device_node *parent) +{ + struct device_node *node = of_find_node_by_name(parent, "gfx3d_user"); + struct device_node *phandle; + + if (!node) + return; + + phandle = of_parse_phandle(node, "iommus", 0); + + if (phandle) { + if (of_device_is_compatible(phandle, "qcom,qsmmu-v500")) + mmu->subtype = KGSL_IOMMU_SMMU_V500; + if (of_device_is_compatible(phandle, "qcom,adreno-smmu")) + set_bit(KGSL_MMU_IOPGTABLE, &mmu->features); + + of_node_put(phandle); + } + + of_node_put(node); +} + +int kgsl_iommu_probe(struct kgsl_device *device) +{ + u32 val[2]; + int ret, i; + struct kgsl_iommu *iommu = KGSL_IOMMU(device); + struct platform_device *pdev; + struct kgsl_mmu *mmu = &device->mmu; + struct device_node *node; + struct kgsl_global_memdesc *md; + + node = of_find_compatible_node(NULL, NULL, "qcom,kgsl-smmu-v2"); + if (!node) + return -ENODEV; + + /* Create a kmem cache for the pagetable address objects */ + if (!addr_entry_cache) { + addr_entry_cache = KMEM_CACHE(kgsl_iommu_addr_entry, 0); + if (!addr_entry_cache) { + ret = -ENOMEM; + goto err; + } + } + + ret = of_property_read_u32_array(node, "reg", val, 2); + if (ret) { + dev_err(device->dev, + "%pOF: Unable to read KGSL IOMMU register range\n", + node); + goto err; + } + + iommu->regbase = devm_ioremap(&device->pdev->dev, val[0], val[1]); + if (!iommu->regbase) { + dev_err(&device->pdev->dev, "Couldn't map IOMMU registers\n"); + ret = -ENOMEM; + goto err; + } + + pdev = of_find_device_by_node(node); + iommu->pdev = pdev; + iommu->num_clks = 0; + + iommu->clks = devm_kcalloc(&pdev->dev, ARRAY_SIZE(kgsl_iommu_clocks), + sizeof(*iommu->clks), GFP_KERNEL); + if (!iommu->clks) { + platform_device_put(pdev); + ret = -ENOMEM; + goto err; + } + + for (i = 0; i < ARRAY_SIZE(kgsl_iommu_clocks); i++) { + struct clk *c; + + c = devm_clk_get(&device->pdev->dev, kgsl_iommu_clocks[i]); + if (IS_ERR(c)) + continue; + + iommu->clks[iommu->num_clks].id = kgsl_iommu_clocks[i]; + iommu->clks[iommu->num_clks++].clk = c; + } + + /* Get the CX regulator if it is available */ + iommu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + + set_bit(KGSL_MMU_PAGED, &mmu->features); + + mmu->type = KGSL_MMU_TYPE_IOMMU; + mmu->mmu_ops = &kgsl_iommu_ops; + + /* Fill out the rest of the devices in the node */ + of_platform_populate(node, NULL, NULL, &pdev->dev); + + /* Peek at the phandle to set up configuration */ + kgsl_iommu_check_config(mmu, node); + + /* Probe the default pagetable */ + ret = iommu_probe_user_context(device, node); + if (ret) { + of_platform_depopulate(&pdev->dev); + platform_device_put(pdev); + goto err; + } + + /* Probe the secure pagetable (this is optional) */ + iommu_probe_secure_context(device, node); + of_node_put(node); + + /* Map any globals that might have been created early */ + list_for_each_entry(md, &device->globals, node) { + + if (md->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (IS_ERR_OR_NULL(mmu->securepagetable)) + continue; + + kgsl_iommu_secure_map(mmu->securepagetable, + &md->memdesc); + } else + kgsl_iommu_default_map(mmu->defaultpagetable, + &md->memdesc); + } + + /* QDSS is supported only when QCOM_KGSL_QDSS_STM is enabled */ + if (IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) + device->qdss_desc = kgsl_allocate_global_fixed(device, + "qcom,gpu-qdss-stm", "gpu-qdss"); + + device->qtimer_desc = kgsl_allocate_global_fixed(device, + "qcom,gpu-timer", "gpu-qtimer"); + + /* + * Only support VBOs on MMU500 hardware that supports the PRR + * marker register to ignore writes to the zero page + */ + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) { + /* + * We need to allocate a page because we need a known physical + * address to program in the PRR register but the hardware + * should intercept accesses to the page before they go to DDR + * so this should be mostly just a placeholder + */ + kgsl_vbo_zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_NORETRY | __GFP_HIGHMEM); + if (kgsl_vbo_zero_page) + set_bit(KGSL_MMU_SUPPORT_VBO, &mmu->features); + } + + return 0; + +err: + kmem_cache_destroy(addr_entry_cache); + addr_entry_cache = NULL; + + of_node_put(node); + return ret; +} + +static const struct kgsl_mmu_ops kgsl_iommu_ops = { + .mmu_close = kgsl_iommu_close, + .mmu_start = kgsl_iommu_start, + .mmu_clear_fsr = kgsl_iommu_clear_fsr, + .mmu_get_current_ttbr0 = kgsl_iommu_get_current_ttbr0, + .mmu_enable_clk = kgsl_iommu_enable_clk, + .mmu_disable_clk = kgsl_iommu_disable_clk, + .mmu_set_pf_policy = kgsl_iommu_set_pf_policy, + .mmu_pagefault_resume = kgsl_iommu_pagefault_resume, + .mmu_getpagetable = kgsl_iommu_getpagetable, + .mmu_map_global = kgsl_iommu_map_global, +}; + +static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops = { + .mmu_map = kgsl_iopgtbl_map, + .mmu_map_child = kgsl_iopgtbl_map_child, + .mmu_map_zero_page_to_range = kgsl_iopgtbl_map_zero_page_to_range, + .mmu_unmap = kgsl_iopgtbl_unmap, + .mmu_unmap_range = kgsl_iopgtbl_unmap_range, + .mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable, + .get_ttbr0 = kgsl_iommu_get_ttbr0, + .get_context_bank = kgsl_iommu_get_context_bank, + .get_gpuaddr = kgsl_iommu_get_gpuaddr, + .put_gpuaddr = kgsl_iommu_put_gpuaddr, + .set_svm_region = kgsl_iommu_set_svm_region, + .find_svm_region = kgsl_iommu_find_svm_region, + .svm_range = kgsl_iommu_svm_range, + .addr_in_range = kgsl_iommu_addr_in_range, +}; + +static const struct kgsl_mmu_pt_ops secure_pt_ops = { + .mmu_map = kgsl_iommu_secure_map, + .mmu_unmap = kgsl_iommu_secure_unmap, + .mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable, + .get_context_bank = kgsl_iommu_get_context_bank, + .get_gpuaddr = kgsl_iommu_get_gpuaddr, + .put_gpuaddr = kgsl_iommu_put_gpuaddr, + .addr_in_range = kgsl_iommu_addr_in_range, +}; + +static const struct kgsl_mmu_pt_ops default_pt_ops = { + .mmu_map = kgsl_iommu_default_map, + .mmu_unmap = kgsl_iommu_default_unmap, + .mmu_destroy_pagetable = kgsl_iommu_destroy_default_pagetable, + .get_ttbr0 = kgsl_iommu_get_ttbr0, + .get_context_bank = kgsl_iommu_get_context_bank, + .get_gpuaddr = kgsl_iommu_get_gpuaddr, + .put_gpuaddr = kgsl_iommu_put_gpuaddr, + .addr_in_range = kgsl_iommu_addr_in_range, +}; diff --git a/kgsl_iommu.h b/kgsl_iommu.h new file mode 100644 index 0000000000..4632992831 --- /dev/null +++ b/kgsl_iommu.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_IOMMU_H +#define __KGSL_IOMMU_H + +#include +#include +/* + * These defines control the address range for allocations that + * are mapped into all pagetables. + */ +#define KGSL_IOMMU_GLOBAL_MEM_SIZE (20 * SZ_1M) +#define KGSL_IOMMU_GLOBAL_MEM_BASE32 0xf8000000 +#define KGSL_IOMMU_GLOBAL_MEM_BASE64 0xfc000000 + +/* + * This is a dummy token address that we use to identify memstore when the user + * wants to map it. mmap() uses a unsigned long for the offset so we need a 32 + * bit value that works with all sized apps. We chose a value that was purposely + * unmapped so if you increase the global memory size make sure it doesn't + * conflict + */ + +#define KGSL_MEMSTORE_TOKEN_ADDRESS 0xfff00000 + +#define KGSL_IOMMU_GLOBAL_MEM_BASE(__mmu) \ + (test_bit(KGSL_MMU_64BIT, &(__mmu)->features) ? \ + KGSL_IOMMU_GLOBAL_MEM_BASE64 : KGSL_IOMMU_GLOBAL_MEM_BASE32) + +#define KGSL_IOMMU_SVM_BASE32 0x300000 +#define KGSL_IOMMU_SVM_END32 (0xC0000000 - SZ_16M) + +/* + * Limit secure size to 256MB for 32bit kernels. + */ +#define KGSL_IOMMU_SECURE_SIZE32 SZ_256M +#define KGSL_IOMMU_SECURE_END32(_mmu) KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) +#define KGSL_IOMMU_SECURE_BASE32(_mmu) \ + (KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) - KGSL_IOMMU_SECURE_SIZE32) + +/* + * Try to use maximum allowed secure size i.e 0xFFFFF000 + * for both 32bit and 64bit secure apps when using 64bit kernel. + */ +#define KGSL_IOMMU_SECURE_BASE64 0x0100000000ULL +#define KGSL_IOMMU_SECURE_END64 0x01FFFFF000ULL +#define KGSL_IOMMU_SECURE_SIZE64 \ + (KGSL_IOMMU_SECURE_END64 - KGSL_IOMMU_SECURE_BASE64) + +#define KGSL_IOMMU_SECURE_BASE(_mmu) (test_bit(KGSL_MMU_64BIT, \ + &(_mmu)->features) ? KGSL_IOMMU_SECURE_BASE64 : \ + KGSL_IOMMU_SECURE_BASE32(_mmu)) +#define KGSL_IOMMU_SECURE_END(_mmu) (test_bit(KGSL_MMU_64BIT, \ + &(_mmu)->features) ? KGSL_IOMMU_SECURE_END64 : \ + KGSL_IOMMU_SECURE_END32(_mmu)) +#define KGSL_IOMMU_SECURE_SIZE(_mmu) (test_bit(KGSL_MMU_64BIT, \ + &(_mmu)->features) ? KGSL_IOMMU_SECURE_SIZE64 : \ + KGSL_IOMMU_SECURE_SIZE32) + +/* The CPU supports 39 bit addresses */ +#define KGSL_IOMMU_SVM_BASE64 0x1000000000ULL +#define KGSL_IOMMU_SVM_END64 0x4000000000ULL +#define KGSL_IOMMU_VA_BASE64 0x4000000000ULL +#define KGSL_IOMMU_VA_END64 0x8000000000ULL + +#define CP_APERTURE_REG 0 +#define CP_SMMU_APERTURE_ID 0x1B + +/* Global SMMU register offsets */ +#define KGSL_IOMMU_PRR_CFG_LADDR 0x6008 +#define KGSL_IOMMU_PRR_CFG_UADDR 0x600c + +/* Register offsets */ +#define KGSL_IOMMU_CTX_SCTLR 0x0000 +#define KGSL_IOMMU_CTX_ACTLR 0x0004 +#define KGSL_IOMMU_CTX_TTBR0 0x0020 +#define KGSL_IOMMU_CTX_CONTEXTIDR 0x0034 +#define KGSL_IOMMU_CTX_FSR 0x0058 +#define KGSL_IOMMU_CTX_TLBIALL 0x0618 +#define KGSL_IOMMU_CTX_RESUME 0x0008 +#define KGSL_IOMMU_CTX_FSYNR0 0x0068 +#define KGSL_IOMMU_CTX_FSYNR1 0x006c +#define KGSL_IOMMU_CTX_TLBSYNC 0x07f0 +#define KGSL_IOMMU_CTX_TLBSTATUS 0x07f4 + +/* TLBSTATUS register fields */ +#define KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE BIT(0) + +/* SCTLR fields */ +#define KGSL_IOMMU_SCTLR_HUPCF_SHIFT 8 +#define KGSL_IOMMU_SCTLR_CFCFG_SHIFT 7 +#define KGSL_IOMMU_SCTLR_CFIE_SHIFT 6 + +#define KGSL_IOMMU_ACTLR_PRR_ENABLE BIT(5) + +/* FSR fields */ +#define KGSL_IOMMU_FSR_SS_SHIFT 30 + +/* offset at which a nop command is placed in setstate */ +#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024 + +/* + * struct kgsl_iommu_context - Structure holding data about an iommu context + * bank + * @pdev: pointer to the iommu context's platform device + * @name: context name + * @id: The id of the context, used for deciding how it is used. + * @cb_num: The hardware context bank number, used for calculating register + * offsets. + * @kgsldev: The kgsl device that uses this context. + * @stalled_on_fault: Flag when set indicates that this iommu device is stalled + * on a page fault + */ +struct kgsl_iommu_context { + struct platform_device *pdev; + const char *name; + int cb_num; + struct kgsl_device *kgsldev; + bool stalled_on_fault; + /** ratelimit: Ratelimit state for the context */ + struct ratelimit_state ratelimit; + struct iommu_domain *domain; + struct adreno_smmu_priv adreno_smmu; +}; + +/* + * struct kgsl_iommu - Structure holding iommu data for kgsl driver + * @regbase: Virtual address of the IOMMU register base + * @regstart: Physical address of the iommu registers + * @regsize: Length of the iommu register region. + * @setstate: Scratch GPU memory for IOMMU operations + * @clk_enable_count: The ref count of clock enable calls + * @clks: Array of pointers to IOMMU clocks + * @smmu_info: smmu info used in a5xx preemption + */ +struct kgsl_iommu { + /** @user_context: Container for the user iommu context */ + struct kgsl_iommu_context user_context; + /** @secure_context: Container for the secure iommu context */ + struct kgsl_iommu_context secure_context; + /** @lpac_context: Container for the LPAC iommu context */ + struct kgsl_iommu_context lpac_context; + void __iomem *regbase; + struct kgsl_memdesc *setstate; + atomic_t clk_enable_count; + struct clk_bulk_data *clks; + int num_clks; + struct kgsl_memdesc *smmu_info; + /** @pdev: Pointer to the platform device for the IOMMU device */ + struct platform_device *pdev; + /** + * @ppt_active: Set when the first per process pagetable is created. + * This is used to warn when global buffers are created that might not + * be mapped in all contexts + */ + bool ppt_active; + /** @cb0_offset: Offset of context bank 0 from iommu register base */ + u32 cb0_offset; + /** @pagesize: Size of each context bank register space */ + u32 pagesize; + /** @cx_gdsc: CX GDSC handle in case the IOMMU needs it */ + struct regulator *cx_gdsc; +}; + +/* + * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver + * @domain: Pointer to the iommu domain that contains the iommu pagetable + * @ttbr0: register value to set when using this pagetable + */ +struct kgsl_iommu_pt { + struct kgsl_pagetable base; + u64 ttbr0; + + struct io_pgtable_ops *pgtbl_ops; + struct io_pgtable_cfg cfg; +}; + +#endif diff --git a/kgsl_mmu.c b/kgsl_mmu.c new file mode 100644 index 0000000000..c0cc54f202 --- /dev/null +++ b/kgsl_mmu.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include + +#include "kgsl_device.h" +#include "kgsl_mmu.h" +#include "kgsl_sharedmem.h" + +static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable); + +static void _deferred_destroy(struct work_struct *ws) +{ + struct kgsl_pagetable *pagetable = container_of(ws, + struct kgsl_pagetable, destroy_ws); + + WARN_ON(!list_empty(&pagetable->list)); + + pagetable->pt_ops->mmu_destroy_pagetable(pagetable); +} + +static void kgsl_destroy_pagetable(struct kref *kref) +{ + struct kgsl_pagetable *pagetable = container_of(kref, + struct kgsl_pagetable, refcount); + + kgsl_mmu_detach_pagetable(pagetable); + + kgsl_schedule_work(&pagetable->destroy_ws); +} + +struct kgsl_pagetable * +kgsl_get_pagetable(unsigned long name) +{ + struct kgsl_pagetable *pt, *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (name == pt->name && kref_get_unless_zero(&pt->refcount)) { + ret = pt; + break; + } + } + + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + return ret; +} + +static struct kgsl_pagetable * +_get_pt_from_kobj(struct kobject *kobj) +{ + unsigned int ptname; + + if (!kobj) + return NULL; + + if (kstrtou32(kobj->name, 0, &ptname)) + return NULL; + + return kgsl_get_pagetable(ptname); +} + +static ssize_t +sysfs_show_entries(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + unsigned int val = atomic_read(&pt->stats.entries); + + ret += scnprintf(buf, PAGE_SIZE, "%d\n", val); + } + + kref_put(&pt->refcount, kgsl_destroy_pagetable); + return ret; +} + +static ssize_t +sysfs_show_mapped(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + uint64_t val = atomic_long_read(&pt->stats.mapped); + + ret += scnprintf(buf, PAGE_SIZE, "%llu\n", val); + } + + kref_put(&pt->refcount, kgsl_destroy_pagetable); + return ret; +} + +static ssize_t +sysfs_show_max_mapped(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + uint64_t val = atomic_long_read(&pt->stats.max_mapped); + + ret += scnprintf(buf, PAGE_SIZE, "%llu\n", val); + } + + kref_put(&pt->refcount, kgsl_destroy_pagetable); + return ret; +} + +static struct kobj_attribute attr_entries = { + .attr = { .name = "entries", .mode = 0444 }, + .show = sysfs_show_entries, + .store = NULL, +}; + +static struct kobj_attribute attr_mapped = { + .attr = { .name = "mapped", .mode = 0444 }, + .show = sysfs_show_mapped, + .store = NULL, +}; + +static struct kobj_attribute attr_max_mapped = { + .attr = { .name = "max_mapped", .mode = 0444 }, + .show = sysfs_show_max_mapped, + .store = NULL, +}; + +static struct attribute *pagetable_attrs[] = { + &attr_entries.attr, + &attr_mapped.attr, + &attr_max_mapped.attr, + NULL, +}; + +static struct attribute_group pagetable_attr_group = { + .attrs = pagetable_attrs, +}; + +static void +pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable) +{ + if (pagetable->kobj) + sysfs_remove_group(pagetable->kobj, + &pagetable_attr_group); + + kobject_put(pagetable->kobj); + pagetable->kobj = NULL; +} + +static int +pagetable_add_sysfs_objects(struct kgsl_pagetable *pagetable) +{ + char ptname[16]; + int ret = -ENOMEM; + + snprintf(ptname, sizeof(ptname), "%d", pagetable->name); + pagetable->kobj = kobject_create_and_add(ptname, + kgsl_driver.ptkobj); + if (pagetable->kobj == NULL) + goto err; + + ret = sysfs_create_group(pagetable->kobj, &pagetable_attr_group); + +err: + if (ret) { + if (pagetable->kobj) + kobject_put(pagetable->kobj); + + pagetable->kobj = NULL; + } + + return ret; +} + +#ifdef CONFIG_TRACE_GPU_MEM +static void kgsl_mmu_trace_gpu_mem_pagetable(struct kgsl_pagetable *pagetable) +{ + if (pagetable->name == KGSL_MMU_GLOBAL_PT || + pagetable->name == KGSL_MMU_SECURE_PT) + return; + + trace_gpu_mem_total(0, pagetable->name, + (u64)atomic_long_read(&pagetable->stats.mapped)); +} +#else +static void kgsl_mmu_trace_gpu_mem_pagetable(struct kgsl_pagetable *pagetable) +{ +} +#endif + +void +kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable) +{ + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + + if (!list_empty(&pagetable->list)) + list_del_init(&pagetable->list); + + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + pagetable_remove_sysfs_objects(pagetable); +} + +unsigned int +kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, u64 pt_base, + uint64_t addr) +{ + struct kgsl_pagetable *pt; + unsigned int ret = 0; + + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (kgsl_mmu_pagetable_get_ttbr0(pt) == pt_base) { + if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) { + ret = 1; + break; + } + pt->fault_addr = (addr & ~(PAGE_SIZE-1)); + ret = 0; + break; + } + } + spin_unlock(&kgsl_driver.ptlock); + + return ret; +} + +int kgsl_mmu_start(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + + if (MMU_OP_VALID(mmu, mmu_start)) + return mmu->mmu_ops->mmu_start(mmu); + + return 0; +} + +void kgsl_mmu_pagetable_init(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pagetable, u32 name) +{ + kref_init(&pagetable->refcount); + + spin_lock_init(&pagetable->lock); + INIT_WORK(&pagetable->destroy_ws, _deferred_destroy); + + pagetable->mmu = mmu; + pagetable->name = name; + + atomic_set(&pagetable->stats.entries, 0); + atomic_long_set(&pagetable->stats.mapped, 0); + atomic_long_set(&pagetable->stats.max_mapped, 0); +} + +void kgsl_mmu_pagetable_add(struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable) +{ + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_add(&pagetable->list, &kgsl_driver.pagetable_list); + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + /* Create the sysfs entries */ + pagetable_add_sysfs_objects(pagetable); +} + +void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable) +{ + if (!IS_ERR_OR_NULL(pagetable)) + kref_put(&pagetable->refcount, kgsl_destroy_pagetable); +} + +/** + * kgsl_mmu_find_svm_region() - Find a empty spot in the SVM region + * @pagetable: KGSL pagetable to search + * @start: start of search range, must be within kgsl_mmu_svm_range() + * @end: end of search range, must be within kgsl_mmu_svm_range() + * @size: Size of the region to find + * @align: Desired alignment of the address + */ +uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t align) +{ + if (PT_OP_VALID(pagetable, find_svm_region)) + return pagetable->pt_ops->find_svm_region(pagetable, start, + end, size, align); + return -ENOMEM; +} + +/** + * kgsl_mmu_set_svm_region() - Check if a region is empty and reserve it if so + * @pagetable: KGSL pagetable to search + * @gpuaddr: GPU address to check/reserve + * @size: Size of the region to check/reserve + */ +int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, + uint64_t size) +{ + if (PT_OP_VALID(pagetable, set_svm_region)) + return pagetable->pt_ops->set_svm_region(pagetable, gpuaddr, + size); + return -ENOMEM; +} + +int +kgsl_mmu_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int size; + struct kgsl_device *device = KGSL_MMU_DEVICE(pagetable->mmu); + + if (!memdesc->gpuaddr) + return -EINVAL; + /* Only global mappings should be mapped multiple times */ + if (!kgsl_memdesc_is_global(memdesc) && + (KGSL_MEMDESC_MAPPED & memdesc->priv)) + return -EINVAL; + + if (memdesc->flags & KGSL_MEMFLAGS_VBO) + return -EINVAL; + + size = kgsl_memdesc_footprint(memdesc); + + if (PT_OP_VALID(pagetable, mmu_map)) { + int ret; + + ret = pagetable->pt_ops->mmu_map(pagetable, memdesc); + if (ret) + return ret; + + atomic_inc(&pagetable->stats.entries); + KGSL_STATS_ADD(size, &pagetable->stats.mapped, + &pagetable->stats.max_mapped); + kgsl_mmu_trace_gpu_mem_pagetable(pagetable); + + if (!kgsl_memdesc_is_global(memdesc) + && !(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION)) { + kgsl_trace_gpu_mem_total(device, size); + } + + memdesc->priv |= KGSL_MEMDESC_MAPPED; + } + + return 0; +} + +int kgsl_mmu_map_child(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, + struct kgsl_memdesc *child, u64 child_offset, + u64 length) +{ + /* This only makes sense for virtual buffer objects */ + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + if (!memdesc->gpuaddr) + return -EINVAL; + + if (PT_OP_VALID(pt, mmu_map_child)) { + int ret; + + ret = pt->pt_ops->mmu_map_child(pt, memdesc, + offset, child, child_offset, length); + if (ret) + return ret; + + KGSL_STATS_ADD(length, &pt->stats.mapped, + &pt->stats.max_mapped); + } + + return 0; +} + +int kgsl_mmu_map_zero_page_to_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 start, u64 length) +{ + int ret = -EINVAL; + + /* This only makes sense for virtual buffer objects */ + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + if (!memdesc->gpuaddr) + return -EINVAL; + + if (PT_OP_VALID(pt, mmu_map_zero_page_to_range)) { + ret = pt->pt_ops->mmu_map_zero_page_to_range(pt, + memdesc, start, length); + if (ret) + return ret; + + KGSL_STATS_ADD(length, &pt->stats.mapped, + &pt->stats.max_mapped); + } + + return 0; +} + +/** + * kgsl_mmu_svm_range() - Return the range for SVM (if applicable) + * @pagetable: Pagetable to query the range from + * @lo: Pointer to store the start of the SVM range + * @hi: Pointer to store the end of the SVM range + * @memflags: Flags from the buffer we are mapping + */ +int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags) +{ + if (PT_OP_VALID(pagetable, svm_range)) + return pagetable->pt_ops->svm_range(pagetable, lo, hi, + memflags); + + return -ENODEV; +} + +int +kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int ret = 0; + struct kgsl_device *device = KGSL_MMU_DEVICE(pagetable->mmu); + + if (memdesc->size == 0) + return -EINVAL; + + if ((memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + /* Only global mappings should be mapped multiple times */ + if (!(KGSL_MEMDESC_MAPPED & memdesc->priv)) + return -EINVAL; + + if (PT_OP_VALID(pagetable, mmu_unmap)) { + uint64_t size; + + size = kgsl_memdesc_footprint(memdesc); + + ret = pagetable->pt_ops->mmu_unmap(pagetable, memdesc); + + atomic_dec(&pagetable->stats.entries); + atomic_long_sub(size, &pagetable->stats.mapped); + kgsl_mmu_trace_gpu_mem_pagetable(pagetable); + + if (!kgsl_memdesc_is_global(memdesc)) { + memdesc->priv &= ~KGSL_MEMDESC_MAPPED; + if (!(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION)) + kgsl_trace_gpu_mem_total(device, -(size)); + } + } + + return ret; +} + +int +kgsl_mmu_unmap_range(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc, u64 offset, u64 length) +{ + int ret = 0; + + /* Only allow virtual buffer objects to use this function */ + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return -EINVAL; + + if (PT_OP_VALID(pagetable, mmu_unmap_range)) { + ret = pagetable->pt_ops->mmu_unmap_range(pagetable, memdesc, + offset, length); + + atomic_long_sub(length, &pagetable->stats.mapped); + } + + return ret; +} + +void kgsl_mmu_map_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u32 padding) +{ + struct kgsl_mmu *mmu = &(device->mmu); + + if (MMU_OP_VALID(mmu, mmu_map_global)) + mmu->mmu_ops->mmu_map_global(mmu, memdesc, padding); +} + +void kgsl_mmu_close(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &(device->mmu); + + if (MMU_OP_VALID(mmu, mmu_close)) + mmu->mmu_ops->mmu_close(mmu); +} + +int kgsl_mmu_pagetable_get_context_bank(struct kgsl_pagetable *pagetable) +{ + if (PT_OP_VALID(pagetable, get_context_bank)) + return pagetable->pt_ops->get_context_bank(pagetable); + + return -ENOENT; +} + +enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device) +{ + return device ? device->mmu.type : KGSL_MMU_TYPE_NONE; +} + +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + if (PT_OP_VALID(pagetable, addr_in_range)) + return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr); + + return false; +} + +/* + * NOMMU definitions - NOMMU really just means that the MMU is kept in pass + * through and the GPU directly accesses physical memory. Used in debug mode + * and when a real MMU isn't up and running yet. + */ + +static bool nommu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + return (gpuaddr != 0) ? true : false; +} + +static int nommu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (WARN_ONCE(memdesc->sgt->nents > 1, + "Attempt to map non-contiguous memory with NOMMU\n")) + return -EINVAL; + + memdesc->gpuaddr = (uint64_t) sg_phys(memdesc->sgt->sgl); + + if (memdesc->gpuaddr) { + memdesc->pagetable = pagetable; + return 0; + } + + return -ENOMEM; +} + +static void nommu_destroy_pagetable(struct kgsl_pagetable *pt) +{ + kfree(pt); +} + +static const struct kgsl_mmu_pt_ops nommu_pt_ops = { + .get_gpuaddr = nommu_get_gpuaddr, + .addr_in_range = nommu_gpuaddr_in_range, + .mmu_destroy_pagetable = nommu_destroy_pagetable, +}; + +static struct kgsl_pagetable *nommu_getpagetable(struct kgsl_mmu *mmu, + unsigned long name) +{ + struct kgsl_device *device = KGSL_MMU_DEVICE(mmu); + struct kgsl_pagetable *pagetable; + struct kgsl_global_memdesc *md; + + pagetable = kgsl_get_pagetable(KGSL_MMU_GLOBAL_PT); + + if (pagetable == NULL) { + pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL); + if (!pagetable) + return ERR_PTR(-ENOMEM); + + kgsl_mmu_pagetable_init(mmu, pagetable, KGSL_MMU_GLOBAL_PT); + pagetable->pt_ops = &nommu_pt_ops; + + list_for_each_entry(md, &device->globals, node) + md->memdesc.gpuaddr = + (uint64_t) sg_phys(md->memdesc.sgt->sgl); + + kgsl_mmu_pagetable_add(mmu, pagetable); + } + + return pagetable; +} + +static struct kgsl_mmu_ops kgsl_nommu_ops = { + .mmu_getpagetable = nommu_getpagetable, +}; + +int kgsl_mmu_probe(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + int ret; + + /* + * Try to probe for the IOMMU and if it doesn't exist for some reason + * go for the NOMMU option instead + */ + ret = kgsl_iommu_probe(device); + if (!ret || ret == -EPROBE_DEFER) + return ret; + + mmu->mmu_ops = &kgsl_nommu_ops; + mmu->type = KGSL_MMU_TYPE_NONE; + return 0; +} diff --git a/kgsl_mmu.h b/kgsl_mmu.h new file mode 100644 index 0000000000..0852ca7097 --- /dev/null +++ b/kgsl_mmu.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_MMU_H +#define __KGSL_MMU_H + +#include + +/* Identifier for the global page table */ +/* + * Per process page tables will probably pass in the thread group + * as an identifier + */ +#define KGSL_MMU_GLOBAL_PT 0 +#define KGSL_MMU_SECURE_PT 1 + +#define MMU_DEFAULT_TTBR0(_d) \ + (kgsl_mmu_pagetable_get_ttbr0((_d)->mmu.defaultpagetable)) + +#define KGSL_MMU_DEVICE(_mmu) \ + container_of((_mmu), struct kgsl_device, mmu) + +/** + * enum kgsl_ft_pagefault_policy_bits - KGSL pagefault policy bits + * @KGSL_FT_PAGEFAULT_INT_ENABLE: No longer used, but retained for compatibility + * @KGSL_FT_PAGEFAULT_GPUHALT_ENABLE: enable GPU halt on pagefaults + * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE: log one pagefault per page + * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT: log one pagefault per interrupt + */ +enum { + KGSL_FT_PAGEFAULT_INT_ENABLE = 0, + KGSL_FT_PAGEFAULT_GPUHALT_ENABLE = 1, + KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE = 2, + KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT = 3, + /* KGSL_FT_PAGEFAULT_MAX_BITS is used to calculate the mask */ + KGSL_FT_PAGEFAULT_MAX_BITS, +}; + +#define KGSL_FT_PAGEFAULT_MASK GENMASK(KGSL_FT_PAGEFAULT_MAX_BITS - 1, 0) + +#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY 0 + +struct kgsl_device; + +enum kgsl_mmutype { + KGSL_MMU_TYPE_IOMMU = 0, + KGSL_MMU_TYPE_NONE +}; + +#define KGSL_IOMMU_SMMU_V500 1 + +struct kgsl_pagetable { + spinlock_t lock; + struct kref refcount; + struct list_head list; + unsigned int name; + struct kobject *kobj; + struct work_struct destroy_ws; + + struct { + atomic_t entries; + atomic_long_t mapped; + atomic_long_t max_mapped; + } stats; + const struct kgsl_mmu_pt_ops *pt_ops; + uint64_t fault_addr; + struct kgsl_mmu *mmu; + /** @rbtree: all buffers mapped into the pagetable, indexed by gpuaddr */ + struct rb_root rbtree; + /** @va_start: Start of virtual range used in this pagetable */ + u64 va_start; + /** @va_end: End of virtual range */ + u64 va_end; + /** + * @svm_start: Start of shared virtual memory range. Addresses in this + * range are also valid in the process's CPU address space. + */ + u64 svm_start; + /** @svm_end: end of 32 bit compatible range */ + u64 svm_end; + /** + * @compat_va_start - Start of the "compat" virtual address range for + * forced 32 bit allocations + */ + u64 compat_va_start; + /** + * @compat_va_end - End of the "compat" virtual address range for + * forced 32 bit allocations + */ + u64 compat_va_end; + u64 global_base; +}; + +struct kgsl_mmu; + +struct kgsl_mmu_ops { + void (*mmu_close)(struct kgsl_mmu *mmu); + int (*mmu_start)(struct kgsl_mmu *mmu); + uint64_t (*mmu_get_current_ttbr0)(struct kgsl_mmu *mmu); + void (*mmu_pagefault_resume)(struct kgsl_mmu *mmu, bool terminate); + void (*mmu_clear_fsr)(struct kgsl_mmu *mmu); + void (*mmu_enable_clk)(struct kgsl_mmu *mmu); + void (*mmu_disable_clk)(struct kgsl_mmu *mmu); + int (*mmu_set_pf_policy)(struct kgsl_mmu *mmu, unsigned long pf_policy); + int (*mmu_init_pt)(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt); + struct kgsl_pagetable * (*mmu_getpagetable)(struct kgsl_mmu *mmu, + unsigned long name); + void (*mmu_map_global)(struct kgsl_mmu *mmu, + struct kgsl_memdesc *memdesc, u32 padding); +}; + +struct kgsl_mmu_pt_ops { + int (*mmu_map)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + int (*mmu_map_child)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, + struct kgsl_memdesc *child, u64 child_offset, + u64 length); + int (*mmu_map_zero_page_to_range)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 start, u64 length); + int (*mmu_unmap)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + int (*mmu_unmap_range)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, u64 length); + void (*mmu_destroy_pagetable)(struct kgsl_pagetable *pt); + u64 (*get_ttbr0)(struct kgsl_pagetable *pt); + int (*get_context_bank)(struct kgsl_pagetable *pt); + int (*get_gpuaddr)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + void (*put_gpuaddr)(struct kgsl_memdesc *memdesc); + uint64_t (*find_svm_region)(struct kgsl_pagetable *pt, uint64_t start, + uint64_t end, uint64_t size, uint64_t align); + int (*set_svm_region)(struct kgsl_pagetable *pt, + uint64_t gpuaddr, uint64_t size); + int (*svm_range)(struct kgsl_pagetable *pt, uint64_t *lo, uint64_t *hi, + uint64_t memflags); + bool (*addr_in_range)(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr); +}; + +enum kgsl_mmu_feature { + /* @KGSL_MMU_64BIT: Use 64 bit virtual address space */ + KGSL_MMU_64BIT, + /* @KGSL_MMU_PAGED: Support paged memory */ + KGSL_MMU_PAGED, + /* + * @KGSL_MMU_NEED_GUARD_PAGE: Set if a guard page is needed for each + * mapped region + */ + KGSL_MMU_NEED_GUARD_PAGE, + /** @KGSL_MMU_IO_COHERENT: Set if a device supports I/O coherency */ + KGSL_MMU_IO_COHERENT, + /** @KGSL_MMU_LLC_ENABLE: Set if LLC is activated for the target */ + KGSL_MMU_LLCC_ENABLE, + /** @KGSL_MMU_SMMU_APERTURE: Set the SMMU aperture */ + KGSL_MMU_SMMU_APERTURE, + /** + * @KGSL_MMU_IOPGTABLE: Set if the qcom,adreno-smmu implementation is + * available. Implies split address space and per-process pagetables + */ + KGSL_MMU_IOPGTABLE, + /** @KGSL_MMU_SUPPORT_VBO: Non-secure VBOs are supported */ + KGSL_MMU_SUPPORT_VBO, +}; + +#include "kgsl_iommu.h" + +/** + * struct kgsl_mmu - Master definition for KGSL MMU devices + * @flags: MMU device flags + * @type: Type of MMU that is attached + * @subtype: Sub Type of MMU that is attached + * @defaultpagetable: Default pagetable object for the MMU + * @securepagetable: Default secure pagetable object for the MMU + * @mmu_ops: Function pointers for the MMU sub-type + * @secured: True if the MMU needs to be secured + * @feature: Static list of MMU features + */ +struct kgsl_mmu { + unsigned long flags; + enum kgsl_mmutype type; + u32 subtype; + struct kgsl_pagetable *defaultpagetable; + struct kgsl_pagetable *securepagetable; + const struct kgsl_mmu_ops *mmu_ops; + bool secured; + unsigned long features; + /** @pfpolicy: The current pagefault policy for the device */ + unsigned long pfpolicy; + /** mmu: Pointer to the IOMMU sub-device */ + struct kgsl_iommu iommu; +}; + +#define KGSL_IOMMU(d) (&((d)->mmu.iommu)) + +int kgsl_mmu_probe(struct kgsl_device *device); +int kgsl_mmu_start(struct kgsl_device *device); + +void kgsl_print_global_pt_entries(struct seq_file *s); +void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable); + +int kgsl_mmu_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_map_child(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, + struct kgsl_memdesc *child, u64 child_offset, + u64 length); +int kgsl_mmu_map_zero_page_to_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 start, u64 length); +int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_unmap_range(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, u64 offset, u64 length); +unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, + u64 ttbr0, uint64_t addr); +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr); + +int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size); + +int kgsl_mmu_find_region(struct kgsl_pagetable *pagetable, + uint64_t region_start, uint64_t region_end, + uint64_t *gpuaddr, uint64_t size, unsigned int align); + +void kgsl_mmu_close(struct kgsl_device *device); + +uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t alignment); + +int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, + uint64_t size); + +void kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable); + +int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags); + +struct kgsl_pagetable *kgsl_get_pagetable(unsigned long name); + +/* + * Static inline functions of MMU that simply call the SMMU specific + * function using a function pointer. These functions can be thought + * of as wrappers around the actual function + */ + +#define MMU_OP_VALID(_mmu, _field) \ + (((_mmu) != NULL) && \ + ((_mmu)->mmu_ops != NULL) && \ + ((_mmu)->mmu_ops->_field != NULL)) + +#define PT_OP_VALID(_pt, _field) \ + (((_pt) != NULL) && \ + ((_pt)->pt_ops != NULL) && \ + ((_pt)->pt_ops->_field != NULL)) + +/** + * kgsl_mmu_get_gpuaddr - Assign a GPU address to the memdesc + * @pagetable: GPU pagetable to assign the address in + * @memdesc: mem descriptor to assign the memory to + * + * Return: 0 on success or negative on failure + */ +static inline int kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (PT_OP_VALID(pagetable, get_gpuaddr)) + return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc); + + return -ENOMEM; +} + +/** + * kgsl_mmu_put_gpuaddr - Remove a GPU address from a pagetable + * @pagetable: Pagetable to release the memory from + * @memdesc: Memory descriptor containing the GPU address to free + * + * Release a GPU address in the MMU virtual address space. + */ +static inline void kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (PT_OP_VALID(pagetable, put_gpuaddr)) + pagetable->pt_ops->put_gpuaddr(memdesc); +} + +static inline u64 kgsl_mmu_get_current_ttbr0(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_get_current_ttbr0)) + return mmu->mmu_ops->mmu_get_current_ttbr0(mmu); + + return 0; +} + +static inline struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *mmu, + unsigned long name) +{ + if (MMU_OP_VALID(mmu, mmu_getpagetable)) + return mmu->mmu_ops->mmu_getpagetable(mmu, name); + + return NULL; +} + +static inline void kgsl_mmu_enable_clk(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_enable_clk)) + mmu->mmu_ops->mmu_enable_clk(mmu); +} + +static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_disable_clk)) + mmu->mmu_ops->mmu_disable_clk(mmu); +} + +static inline int kgsl_mmu_set_pagefault_policy(struct kgsl_mmu *mmu, + unsigned long pf_policy) +{ + if (MMU_OP_VALID(mmu, mmu_set_pf_policy)) + return mmu->mmu_ops->mmu_set_pf_policy(mmu, pf_policy); + + return 0; +} + +static inline void kgsl_mmu_pagefault_resume(struct kgsl_mmu *mmu, + bool terminate) +{ + if (MMU_OP_VALID(mmu, mmu_pagefault_resume)) + return mmu->mmu_ops->mmu_pagefault_resume(mmu, terminate); +} + +static inline void kgsl_mmu_clear_fsr(struct kgsl_mmu *mmu) +{ + if (MMU_OP_VALID(mmu, mmu_clear_fsr)) + return mmu->mmu_ops->mmu_clear_fsr(mmu); +} + +static inline bool kgsl_mmu_is_perprocess(struct kgsl_mmu *mmu) +{ + return test_bit(KGSL_MMU_IOPGTABLE, &mmu->features); +} + +static inline bool kgsl_mmu_is_secured(struct kgsl_mmu *mmu) +{ + return mmu && (mmu->secured) && (!IS_ERR_OR_NULL(mmu->securepagetable)); +} + +static inline u64 +kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable) +{ + if (PT_OP_VALID(pagetable, get_ttbr0)) + return pagetable->pt_ops->get_ttbr0(pagetable); + + return 0; +} + +/** + * kgsl_mmu_map_global - Map a memdesc as a global buffer + * @device: A KGSL GPU device handle + * @memdesc: Pointer to a GPU memory descriptor + * @padding: Any padding to add to the end of the VA allotment (in bytes) + * + * Map a buffer as globally accessible in all pagetable contexts + */ +void kgsl_mmu_map_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u32 padding); + +/** + * kgsl_mmu_pagetable_get_context_bank - Return the context bank number + * @pagetable: A handle to a given pagetable + * + * This function will find the context number of the given pagetable + + * Return: The context bank number the pagetable is attached to or + * negative error on failure. + */ +int kgsl_mmu_pagetable_get_context_bank(struct kgsl_pagetable *pagetable); + +void kgsl_mmu_pagetable_init(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pagetable, u32 name); + +void kgsl_mmu_pagetable_add(struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable); + +#if IS_ENABLED(CONFIG_ARM_SMMU) +int kgsl_iommu_probe(struct kgsl_device *device); +#else +static inline int kgsl_iommu_probe(struct kgsl_device *device) +{ + return -ENODEV; +} +#endif +#endif /* __KGSL_MMU_H */ diff --git a/kgsl_pool.c b/kgsl_pool.c new file mode 100644 index 0000000000..18f6a8e28d --- /dev/null +++ b/kgsl_pool.c @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_pool.h" +#include "kgsl_sharedmem.h" +#include "kgsl_trace.h" + +#ifdef CONFIG_QCOM_KGSL_SORT_POOL + +struct kgsl_pool_page_entry { + phys_addr_t physaddr; + struct page *page; + struct rb_node node; +}; + +static struct kmem_cache *addr_page_cache; + +/** + * struct kgsl_page_pool - Structure to hold information for the pool + * @pool_order: Page order describing the size of the page + * @page_count: Number of pages currently present in the pool + * @reserved_pages: Number of pages reserved at init for the pool + * @list_lock: Spinlock for page list in the pool + * @pool_rbtree: RB tree with all pages held/reserved in this pool + */ +struct kgsl_page_pool { + unsigned int pool_order; + unsigned int page_count; + unsigned int reserved_pages; + spinlock_t list_lock; + struct rb_root pool_rbtree; +}; + +static int +__kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) +{ + struct rb_node **node, *parent; + struct kgsl_pool_page_entry *new_page, *entry; + + new_page = kmem_cache_alloc(addr_page_cache, GFP_KERNEL); + if (new_page == NULL) + return -ENOMEM; + + spin_lock(&pool->list_lock); + node = &pool->pool_rbtree.rb_node; + new_page->physaddr = page_to_phys(p); + new_page->page = p; + + while (*node != NULL) { + parent = *node; + entry = rb_entry(parent, struct kgsl_pool_page_entry, node); + + if (new_page->physaddr < entry->physaddr) + node = &parent->rb_left; + else + node = &parent->rb_right; + } + + rb_link_node(&new_page->node, parent, node); + rb_insert_color(&new_page->node, &pool->pool_rbtree); + pool->page_count++; + spin_unlock(&pool->list_lock); + + return 0; +} + +static struct page * +__kgsl_pool_get_page(struct kgsl_page_pool *pool) +{ + struct rb_node *node; + struct kgsl_pool_page_entry *entry; + struct page *p; + + node = rb_first(&pool->pool_rbtree); + if (!node) + return NULL; + + entry = rb_entry(node, struct kgsl_pool_page_entry, node); + p = entry->page; + rb_erase(&entry->node, &pool->pool_rbtree); + kmem_cache_free(addr_page_cache, entry); + pool->page_count--; + return p; +} + +static void kgsl_pool_list_init(struct kgsl_page_pool *pool) +{ + pool->pool_rbtree = RB_ROOT; +} + +static void kgsl_pool_cache_init(void) +{ + addr_page_cache = KMEM_CACHE(kgsl_pool_page_entry, 0); +} +#else +/** + * struct kgsl_page_pool - Structure to hold information for the pool + * @pool_order: Page order describing the size of the page + * @page_count: Number of pages currently present in the pool + * @reserved_pages: Number of pages reserved at init for the pool + * @list_lock: Spinlock for page list in the pool + * @page_list: List of pages held/reserved in this pool + */ +struct kgsl_page_pool { + unsigned int pool_order; + unsigned int page_count; + unsigned int reserved_pages; + spinlock_t list_lock; + struct list_head page_list; +}; + +static int +__kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) +{ + spin_lock(&pool->list_lock); + list_add_tail(&p->lru, &pool->page_list); + pool->page_count++; + spin_unlock(&pool->list_lock); + + return 0; +} + +static struct page * +__kgsl_pool_get_page(struct kgsl_page_pool *pool) +{ + struct page *p; + + p = list_first_entry_or_null(&pool->page_list, struct page, lru); + if (p) { + pool->page_count--; + list_del(&p->lru); + } + + return p; +} + +static void kgsl_pool_list_init(struct kgsl_page_pool *pool) +{ + INIT_LIST_HEAD(&pool->page_list); +} + +static void kgsl_pool_cache_init(void) +{ +} +#endif + +static struct kgsl_page_pool kgsl_pools[6]; +static int kgsl_num_pools; +static int kgsl_pool_max_pages; + +/* Return the index of the pool for the specified order */ +static int kgsl_get_pool_index(int order) +{ + int i; + + for (i = 0; i < kgsl_num_pools; i++) { + if (kgsl_pools[i].pool_order == order) + return i; + } + + return -EINVAL; +} + +/* Returns KGSL pool corresponding to input page order*/ +static struct kgsl_page_pool * +_kgsl_get_pool_from_order(int order) +{ + int index = kgsl_get_pool_index(order); + + return index >= 0 ? &kgsl_pools[index] : NULL; +} + +/* Add a page to specified pool */ +static void +_kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) +{ + if (!p) + return; + + /* + * Sanity check to make sure we don't re-pool a page that + * somebody else has a reference to. + */ + if (WARN_ON(unlikely(page_count(p) > 1))) { + __free_pages(p, pool->pool_order); + return; + } + + if (__kgsl_pool_add_page(pool, p)) { + __free_pages(p, pool->pool_order); + trace_kgsl_pool_free_page(pool->pool_order); + return; + } + + trace_kgsl_pool_add_page(pool->pool_order, pool->page_count); + mod_node_page_state(page_pgdat(p), NR_KERNEL_MISC_RECLAIMABLE, + (1 << pool->pool_order)); +} + +/* Returns a page from specified pool */ +static struct page * +_kgsl_pool_get_page(struct kgsl_page_pool *pool) +{ + struct page *p = NULL; + + spin_lock(&pool->list_lock); + p = __kgsl_pool_get_page(pool); + spin_unlock(&pool->list_lock); + if (p != NULL) { + trace_kgsl_pool_get_page(pool->pool_order, pool->page_count); + mod_node_page_state(page_pgdat(p), NR_KERNEL_MISC_RECLAIMABLE, + -(1 << pool->pool_order)); + } + return p; +} + +/* Returns the number of pages in all kgsl page pools */ +static int kgsl_pool_size_total(void) +{ + int i; + int total = 0; + + for (i = 0; i < kgsl_num_pools; i++) { + struct kgsl_page_pool *kgsl_pool = &kgsl_pools[i]; + + spin_lock(&kgsl_pool->list_lock); + total += kgsl_pool->page_count * (1 << kgsl_pool->pool_order); + spin_unlock(&kgsl_pool->list_lock); + } + + return total; +} + +/* Returns the total number of pages in all pools excluding reserved pages */ +static unsigned long kgsl_pool_size_nonreserved(void) +{ + int i; + unsigned long total = 0; + + for (i = 0; i < kgsl_num_pools; i++) { + struct kgsl_page_pool *pool = &kgsl_pools[i]; + + spin_lock(&pool->list_lock); + if (pool->page_count > pool->reserved_pages) + total += (pool->page_count - pool->reserved_pages) * + (1 << pool->pool_order); + spin_unlock(&pool->list_lock); + } + + return total; +} + +/* + * Returns a page from specified pool only if pool + * currently holds more number of pages than reserved + * pages. + */ +static struct page * +_kgsl_pool_get_nonreserved_page(struct kgsl_page_pool *pool) +{ + struct page *p = NULL; + + spin_lock(&pool->list_lock); + if (pool->page_count <= pool->reserved_pages) { + spin_unlock(&pool->list_lock); + return NULL; + } + + p = __kgsl_pool_get_page(pool); + spin_unlock(&pool->list_lock); + if (p != NULL) { + trace_kgsl_pool_get_page(pool->pool_order, pool->page_count); + mod_node_page_state(page_pgdat(p), NR_KERNEL_MISC_RECLAIMABLE, + -(1 << pool->pool_order)); + } + return p; +} + +/* + * This will shrink the specified pool by num_pages or by + * (page_count - reserved_pages), whichever is smaller. + */ +static unsigned int +_kgsl_pool_shrink(struct kgsl_page_pool *pool, + unsigned int num_pages, bool exit) +{ + int j; + unsigned int pcount = 0; + struct page *(*get_page)(struct kgsl_page_pool *) = + _kgsl_pool_get_nonreserved_page; + + if (pool == NULL || num_pages == 0) + return pcount; + + num_pages = (num_pages + (1 << pool->pool_order) - 1) >> + pool->pool_order; + + /* This is to ensure that we free reserved pages */ + if (exit) + get_page = _kgsl_pool_get_page; + + for (j = 0; j < num_pages; j++) { + struct page *page = get_page(pool); + + if (!page) + break; + + __free_pages(page, pool->pool_order); + pcount += (1 << pool->pool_order); + trace_kgsl_pool_free_page(pool->pool_order); + } + + return pcount; +} + +/* + * This function removes number of pages specified by + * target_pages from the total pool size. + * + * Remove target_pages from the pool, starting from higher order pool. + */ +static unsigned long +kgsl_pool_reduce(int target_pages, bool exit) +{ + int i, ret; + unsigned long pcount = 0; + + for (i = (kgsl_num_pools - 1); i >= 0; i--) { + if (target_pages <= 0) + return pcount; + + /* Remove target_pages pages from this pool */ + ret = _kgsl_pool_shrink(&kgsl_pools[i], target_pages, exit); + target_pages -= ret; + pcount += ret; + } + + return pcount; +} + +void kgsl_pool_free_pages(struct page **pages, unsigned int pcount) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < pcount;) { + /* + * Free each page or compound page group individually. + */ + struct page *p = pages[i]; + + i += 1 << compound_order(p); + kgsl_pool_free_page(p); + } +} + +static int kgsl_pool_get_retry_order(unsigned int order) +{ + int i; + + for (i = kgsl_num_pools-1; i > 0; i--) + if (order >= kgsl_pools[i].pool_order) + return kgsl_pools[i].pool_order; + + return 0; +} + +/* + * Return true if the pool of specified page size is supported + * or no pools are supported otherwise return false. + */ +static bool kgsl_pool_available(unsigned int page_size) +{ + int order = get_order(page_size); + + if (!kgsl_num_pools) + return true; + + return (kgsl_get_pool_index(order) >= 0); +} + +int kgsl_get_page_size(size_t size, unsigned int align) +{ + size_t pool; + + for (pool = SZ_1M; pool > PAGE_SIZE; pool >>= 1) + if ((align >= ilog2(pool)) && (size >= pool) && + kgsl_pool_available(pool)) + return pool; + + return PAGE_SIZE; +} + +int kgsl_pool_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + struct device *dev) +{ + int j; + int pcount = 0; + struct kgsl_page_pool *pool; + struct page *page = NULL; + struct page *p = NULL; + int order = get_order(*page_size); + int pool_idx; + size_t size = 0; + + if ((pages == NULL) || pages_len < (*page_size >> PAGE_SHIFT)) + return -EINVAL; + + /* If the pool is not configured get pages from the system */ + if (!kgsl_num_pools) { + gfp_t gfp_mask = kgsl_gfp_mask(order); + + page = alloc_pages(gfp_mask, order); + if (page == NULL) { + /* Retry with lower order pages */ + if (order > 0) { + size = PAGE_SIZE << --order; + goto eagain; + + } else + return -ENOMEM; + } + trace_kgsl_pool_alloc_page_system(order); + goto done; + } + + pool = _kgsl_get_pool_from_order(order); + if (pool == NULL) { + /* Retry with lower order pages */ + if (order > 0) { + size = PAGE_SIZE << kgsl_pool_get_retry_order(order); + goto eagain; + } else { + /* + * Fall back to direct allocation in case + * pool with zero order is not present + */ + gfp_t gfp_mask = kgsl_gfp_mask(order); + + page = alloc_pages(gfp_mask, order); + if (page == NULL) + return -ENOMEM; + trace_kgsl_pool_alloc_page_system(order); + goto done; + } + } + + pool_idx = kgsl_get_pool_index(order); + page = _kgsl_pool_get_page(pool); + + /* Allocate a new page if not allocated from pool */ + if (page == NULL) { + gfp_t gfp_mask = kgsl_gfp_mask(order); + + page = alloc_pages(gfp_mask, order); + + if (!page) { + if (pool_idx > 0) { + /* Retry with lower order pages */ + size = PAGE_SIZE << + kgsl_pools[pool_idx-1].pool_order; + goto eagain; + } else + return -ENOMEM; + } + trace_kgsl_pool_alloc_page_system(order); + } + +done: + kgsl_zero_page(page, order, dev); + + for (j = 0; j < (*page_size >> PAGE_SHIFT); j++) { + p = nth_page(page, j); + pages[pcount] = p; + pcount++; + } + + return pcount; + +eagain: + trace_kgsl_pool_try_page_lower(get_order(*page_size)); + *page_size = kgsl_get_page_size(size, ilog2(size)); + *align = ilog2(*page_size); + return -EAGAIN; +} + +void kgsl_pool_free_page(struct page *page) +{ + struct kgsl_page_pool *pool; + int page_order; + + if (page == NULL) + return; + + page_order = compound_order(page); + + if (!kgsl_pool_max_pages || + (kgsl_pool_size_total() < kgsl_pool_max_pages)) { + pool = _kgsl_get_pool_from_order(page_order); + if (pool != NULL) { + _kgsl_pool_add_page(pool, page); + return; + } + } + + /* Give back to system as not added to pool */ + __free_pages(page, page_order); + trace_kgsl_pool_free_page(page_order); +} + +/* Functions for the shrinker */ + +static unsigned long +kgsl_pool_shrink_scan_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + /* sc->nr_to_scan represents number of pages to be removed*/ + unsigned long pcount = kgsl_pool_reduce(sc->nr_to_scan, false); + + /* If pools are exhausted return SHRINK_STOP */ + return pcount ? pcount : SHRINK_STOP; +} + +static unsigned long +kgsl_pool_shrink_count_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + /* + * Return non-reserved pool size as we don't + * want shrinker to free reserved pages. + */ + return kgsl_pool_size_nonreserved(); +} + +/* Shrinker callback data*/ +static struct shrinker kgsl_pool_shrinker = { + .count_objects = kgsl_pool_shrink_count_objects, + .scan_objects = kgsl_pool_shrink_scan_objects, + .seeks = DEFAULT_SEEKS, + .batch = 0, +}; + +static void kgsl_pool_reserve_pages(struct kgsl_page_pool *pool, + struct device_node *node) +{ + u32 reserved = 0; + int i; + + of_property_read_u32(node, "qcom,mempool-reserved", &reserved); + + /* Limit the total number of reserved pages to 4096 */ + pool->reserved_pages = min_t(u32, reserved, 4096); + + for (i = 0; i < pool->reserved_pages; i++) { + gfp_t gfp_mask = kgsl_gfp_mask(pool->pool_order); + struct page *page; + + page = alloc_pages(gfp_mask, pool->pool_order); + _kgsl_pool_add_page(pool, page); + } +} + +static int kgsl_of_parse_mempool(struct kgsl_page_pool *pool, + struct device_node *node) +{ + u32 size; + int order; + + if (of_property_read_u32(node, "qcom,mempool-page-size", &size)) + return -EINVAL; + + order = get_order(size); + + if (order > 8) { + pr_err("kgsl: %pOF: pool order %d is too big\n", node, order); + return -EINVAL; + } + + pool->pool_order = order; + + spin_lock_init(&pool->list_lock); + kgsl_pool_list_init(pool); + + kgsl_pool_reserve_pages(pool, node); + + return 0; +} + +void kgsl_probe_page_pools(void) +{ + struct device_node *node, *child; + int index = 0; + + node = of_find_compatible_node(NULL, NULL, "qcom,gpu-mempools"); + if (!node) + return; + + /* Get Max pages limit for mempool */ + of_property_read_u32(node, "qcom,mempool-max-pages", + &kgsl_pool_max_pages); + + kgsl_pool_cache_init(); + + for_each_child_of_node(node, child) { + if (!kgsl_of_parse_mempool(&kgsl_pools[index], child)) + index++; + + if (index == ARRAY_SIZE(kgsl_pools)) { + of_node_put(child); + break; + } + } + + kgsl_num_pools = index; + of_node_put(node); + + /* Initialize shrinker */ + register_shrinker(&kgsl_pool_shrinker); +} + +void kgsl_exit_page_pools(void) +{ + /* Release all pages in pools, if any.*/ + kgsl_pool_reduce(INT_MAX, true); + + /* Unregister shrinker */ + unregister_shrinker(&kgsl_pool_shrinker); +} + diff --git a/kgsl_pool.h b/kgsl_pool.h new file mode 100644 index 0000000000..c375c31ae8 --- /dev/null +++ b/kgsl_pool.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2017,2019,2021 The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_POOL_H +#define __KGSL_POOL_H + +#ifdef CONFIG_QCOM_KGSL_USE_SHMEM +static inline void kgsl_probe_page_pools(void) { } +static inline void kgsl_exit_page_pools(void) { } +static inline int kgsl_get_page_size(size_t size, unsigned int align) +{ + return PAGE_SIZE; +} +#else +/** + * kgsl_pool_free_page - Frees the page and adds it back to pool/system memory + * @page: Pointer to page struct that needs to be freed + */ +void kgsl_pool_free_page(struct page *page); + +/** + * kgsl_get_page_size - Get supported pagesize + * @size: Size of the page + * @align: Desired alignment of the size + * + * Return largest available page size from pools that can be used to meet + * given size and alignment requirements + */ +int kgsl_get_page_size(size_t size, unsigned int align); + +/** + * kgsl_pool_alloc_page - Allocate a page of requested size + * @page_size: Size of the page to be allocated + * @pages: pointer to hold list of pages, should be big enough to hold + * requested page + * @len: Length of array pages + * + * Return total page count on success and negative value on failure + */ +int kgsl_pool_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + struct device *dev); + +/** + * kgsl_pool_free_pages - Free pages in an pages array + * @pages: pointer to an array of page structs + * @page_count: Number of entries in @pages + * + * Free the pages by collapsing any physical adjacent pages. + * Pages are added back to the pool, if pool has sufficient space + * otherwise they are given back to system. + */ +void kgsl_pool_free_pages(struct page **pages, unsigned int page_count); + +/** + * kgsl_probe_page_pools - Initialize the memory pools pools + */ +void kgsl_probe_page_pools(void); + +/** + * kgsl_exit_page_pools - Free outstanding pooled memory + */ +void kgsl_exit_page_pools(void); + +#endif +#endif /* __KGSL_POOL_H */ + diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c new file mode 100644 index 0000000000..5a3e52c3a7 --- /dev/null +++ b/kgsl_pwrctrl.c @@ -0,0 +1,2329 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_bus.h" +#include "kgsl_pwrscale.h" +#include "kgsl_sysfs.h" +#include "kgsl_trace.h" +#include "kgsl_util.h" + +#define UPDATE_BUSY_VAL 1000000 + +#define KGSL_MAX_BUSLEVELS 20 + +/* Order deeply matters here because reasons. New entries go on the end */ +static const char * const clocks[] = { + "src_clk", + "core_clk", + "iface_clk", + "mem_clk", + "mem_iface_clk", + "alt_mem_iface_clk", + "rbbmtimer_clk", + "gtcu_clk", + "gtbu_clk", + "gtcu_iface_clk", + "alwayson_clk", + "isense_clk", + "rbcpr_clk", + "iref_clk", + "gmu_clk", + "ahb_clk", + "smmu_vote", + "apb_pclk", +}; + +static void kgsl_pwrctrl_clk(struct kgsl_device *device, bool state, + int requested_state); +static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state); +static void kgsl_pwrctrl_set_state(struct kgsl_device *device, + unsigned int state); +static int _isense_clk_set_rate(struct kgsl_pwrctrl *pwr, int level); +static int kgsl_pwrctrl_clk_set_rate(struct clk *grp_clk, unsigned int freq, + const char *name); +static void _gpu_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name); +static void _bimc_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name); + +/** + * _adjust_pwrlevel() - Given a requested power level do bounds checking on the + * constraints and return the nearest possible level + * @device: Pointer to the kgsl_device struct + * @level: Requested level + * @pwrc: Pointer to the power constraint to be applied + * + * Apply thermal and max/min limits first. Then force the level with a + * constraint if one exists. + */ +static unsigned int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level, + struct kgsl_pwr_constraint *pwrc) +{ + unsigned int max_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel, + pwr->max_pwrlevel); + unsigned int min_pwrlevel = min_t(unsigned int, + pwr->thermal_pwrlevel_floor, + pwr->min_pwrlevel); + + /* Ensure that max/min pwrlevels are within thermal max/min limits */ + max_pwrlevel = min_t(unsigned int, max_pwrlevel, + pwr->thermal_pwrlevel_floor); + min_pwrlevel = max_t(unsigned int, min_pwrlevel, + pwr->thermal_pwrlevel); + + switch (pwrc->type) { + case KGSL_CONSTRAINT_PWRLEVEL: { + switch (pwrc->sub_type) { + case KGSL_CONSTRAINT_PWR_MAX: + return max_pwrlevel; + case KGSL_CONSTRAINT_PWR_MIN: + return min_pwrlevel; + default: + break; + } + } + break; + } + + if (level < max_pwrlevel) + return max_pwrlevel; + if (level > min_pwrlevel) + return min_pwrlevel; + + return level; +} + +/** + * kgsl_pwrctrl_pwrlevel_change_settings() - Program h/w during powerlevel + * transitions + * @device: Pointer to the kgsl_device struct + * @post: flag to check if the call is before/after the clk_rate change + * @wake_up: flag to check if device is active or waking up + */ +static void kgsl_pwrctrl_pwrlevel_change_settings(struct kgsl_device *device, + bool post) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int old = pwr->previous_pwrlevel; + unsigned int new = pwr->active_pwrlevel; + + if (device->state != KGSL_STATE_ACTIVE) + return; + if (old == new) + return; + + device->ftbl->pwrlevel_change_settings(device, old, new, post); +} + +/** + * kgsl_pwrctrl_adjust_pwrlevel() - Adjust the power level if + * required by thermal, max/min, constraints, etc + * @device: Pointer to the kgsl_device struct + * @new_level: Requested powerlevel, an index into the pwrlevel array + */ +unsigned int kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, + unsigned int new_level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int old_level = pwr->active_pwrlevel; + bool reset = false; + + /* If a pwr constraint is expired, remove it */ + if ((pwr->constraint.type != KGSL_CONSTRAINT_NONE) && + (time_after(jiffies, pwr->constraint.expires))) { + + struct kgsl_context *context = kgsl_context_get(device, + pwr->constraint.owner_id); + + /* We couldn't get a reference, clear the constraint */ + if (!context) { + reset = true; + goto done; + } + + /* + * If the last timestamp that set the constraint has retired, + * clear the constraint + */ + if (kgsl_check_timestamp(device, context, + pwr->constraint.owner_timestamp)) { + reset = true; + kgsl_context_put(context); + goto done; + } + + /* + * Increase the timeout to keep the constraint at least till + * the timestamp retires + */ + pwr->constraint.expires = jiffies + + msecs_to_jiffies(device->pwrctrl.interval_timeout); + + kgsl_context_put(context); + } + +done: + if (reset) { + /* Trace the constraint being un-set by the driver */ + trace_kgsl_constraint(device, pwr->constraint.type, + old_level, 0); + /*Invalidate the constraint set */ + pwr->constraint.expires = 0; + pwr->constraint.type = KGSL_CONSTRAINT_NONE; + } + + /* + * Adjust the power level if required by thermal, max/min, + * constraints, etc + */ + return _adjust_pwrlevel(pwr, new_level, &pwr->constraint); +} + +/** + * kgsl_pwrctrl_pwrlevel_change() - Validate and change power levels + * @device: Pointer to the kgsl_device struct + * @new_level: Requested powerlevel, an index into the pwrlevel array + * + * Check that any power level constraints are still valid. Update the + * requested level according to any thermal, max/min, or power constraints. + * If a new GPU level is going to be set, update the bus to that level's + * default value. Do not change the bus if a constraint keeps the new + * level at the current level. Set the new GPU frequency. + */ +void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, + unsigned int new_level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pwrlevel; + unsigned int old_level = pwr->active_pwrlevel; + + new_level = kgsl_pwrctrl_adjust_pwrlevel(device, new_level); + + if (new_level == old_level) + return; + + kgsl_pwrscale_update_stats(device); + + /* + * Set the active and previous powerlevel first in case the clocks are + * off - if we don't do this then the pwrlevel change won't take effect + * when the clocks come back + */ + pwr->active_pwrlevel = new_level; + pwr->previous_pwrlevel = old_level; + + /* + * If the bus is running faster than its default level and the GPU + * frequency is moving down keep the DDR at a relatively high level. + */ + if (pwr->bus_mod < 0 || new_level < old_level) { + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + } + /* + * Update the bus before the GPU clock to prevent underrun during + * frequency increases. + */ + kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + + pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel]; + /* Change register settings if any BEFORE pwrlevel change*/ + kgsl_pwrctrl_pwrlevel_change_settings(device, 0); + device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); + _isense_clk_set_rate(pwr, pwr->active_pwrlevel); + + trace_kgsl_pwrlevel(device, + pwr->active_pwrlevel, pwrlevel->gpu_freq, + pwr->previous_pwrlevel, + pwr->pwrlevels[old_level].gpu_freq); + + trace_gpu_frequency(pwrlevel->gpu_freq/1000, 0); + + /* + * Some targets do not support the bandwidth requirement of + * GPU at TURBO, for such targets we need to set GPU-BIMC + * interface clocks to TURBO directly whenever GPU runs at + * TURBO. The TURBO frequency of gfx-bimc need to be defined + * in target device tree. + */ + if (pwr->gpu_bimc_int_clk) { + if (pwr->active_pwrlevel == 0 && + !pwr->gpu_bimc_interface_enabled) { + kgsl_pwrctrl_clk_set_rate(pwr->gpu_bimc_int_clk, + pwr->gpu_bimc_int_clk_freq, + "bimc_gpu_clk"); + _bimc_clk_prepare_enable(device, + pwr->gpu_bimc_int_clk, + "bimc_gpu_clk"); + pwr->gpu_bimc_interface_enabled = true; + } else if (pwr->previous_pwrlevel == 0 + && pwr->gpu_bimc_interface_enabled) { + clk_disable_unprepare(pwr->gpu_bimc_int_clk); + pwr->gpu_bimc_interface_enabled = false; + } + } + + /* Change register settings if any AFTER pwrlevel change*/ + kgsl_pwrctrl_pwrlevel_change_settings(device, 1); +} + +void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, + struct kgsl_pwr_constraint *pwrc, uint32_t id, u32 ts) +{ + unsigned int constraint; + struct kgsl_pwr_constraint *pwrc_old; + + if (device == NULL || pwrc == NULL) + return; + constraint = _adjust_pwrlevel(&device->pwrctrl, + device->pwrctrl.active_pwrlevel, pwrc); + pwrc_old = &device->pwrctrl.constraint; + + /* + * If a constraint is already set, set a new constraint only + * if it is faster. If the requested constraint is the same + * as the current one, update ownership and timestamp. + */ + if ((pwrc_old->type == KGSL_CONSTRAINT_NONE) || + (constraint < pwrc_old->hint.pwrlevel.level)) { + pwrc_old->type = pwrc->type; + pwrc_old->sub_type = pwrc->sub_type; + pwrc_old->hint.pwrlevel.level = constraint; + pwrc_old->owner_id = id; + pwrc_old->expires = jiffies + + msecs_to_jiffies(device->pwrctrl.interval_timeout); + pwrc_old->owner_timestamp = ts; + kgsl_pwrctrl_pwrlevel_change(device, constraint); + /* Trace the constraint being set by the driver */ + trace_kgsl_constraint(device, pwrc_old->type, constraint, 1); + } else if ((pwrc_old->type == pwrc->type) && + (pwrc_old->hint.pwrlevel.level == constraint)) { + pwrc_old->owner_id = id; + pwrc_old->owner_timestamp = ts; + pwrc_old->expires = jiffies + + msecs_to_jiffies(device->pwrctrl.interval_timeout); + } +} + +static int kgsl_pwrctrl_set_thermal_limit(struct kgsl_device *device, + u32 level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret = -EINVAL; + + if (level >= pwr->num_pwrlevels) + level = pwr->num_pwrlevels - 1; + + if (dev_pm_qos_request_active(&pwr->sysfs_thermal_req)) + ret = dev_pm_qos_update_request(&pwr->sysfs_thermal_req, + (pwr->pwrlevels[level].gpu_freq / 1000)); + + return (ret < 0) ? ret : 0; +} + +static ssize_t thermal_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + u32 level; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + ret = kgsl_pwrctrl_set_thermal_limit(device, level); + if (ret) + return ret; + + return count; +} + +static ssize_t thermal_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel); +} + +static ssize_t max_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + unsigned int level = 0; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + mutex_lock(&device->mutex); + + /* You can't set a maximum power level lower than the minimum */ + if (level > pwr->min_pwrlevel) + level = pwr->min_pwrlevel; + + pwr->max_pwrlevel = level; + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t max_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%u\n", pwr->max_pwrlevel); +} + +static void kgsl_pwrctrl_min_pwrlevel_set(struct kgsl_device *device, + int level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + mutex_lock(&device->mutex); + if (level >= pwr->num_pwrlevels) + level = pwr->num_pwrlevels - 1; + + /* You can't set a minimum power level lower than the maximum */ + if (level < pwr->max_pwrlevel) + level = pwr->max_pwrlevel; + + pwr->min_pwrlevel = level; + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + + mutex_unlock(&device->mutex); +} + +static ssize_t min_pwrlevel_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + unsigned int level = 0; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + kgsl_pwrctrl_min_pwrlevel_set(device, level); + + return count; +} + +static ssize_t min_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%u\n", pwr->min_pwrlevel); +} + +static ssize_t num_pwrlevels_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels); +} + +/* Given a GPU clock value, return the lowest matching powerlevel */ + +static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock) +{ + int i; + + for (i = pwr->num_pwrlevels - 1; i >= 0; i--) { + if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000) + return i; + } + + return -ERANGE; +} + +static ssize_t max_gpuclk_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + u32 freq; + int ret, level; + + ret = kstrtou32(buf, 0, &freq); + if (ret) + return ret; + + level = _get_nearest_pwrlevel(&device->pwrctrl, freq); + if (level < 0) + return level; + + /* + * You would think this would set max_pwrlevel but the legacy behavior + * is that it set thermal_pwrlevel instead so we don't want to mess with + * that. + */ + ret = kgsl_pwrctrl_set_thermal_limit(device, level); + if (ret) + return ret; + + return count; +} + +static ssize_t max_gpuclk_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.pwrlevels[pwr->thermal_pwrlevel].gpu_freq); +} + +static ssize_t gpuclk_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int val = 0; + int ret, level; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + level = _get_nearest_pwrlevel(pwr, val); + if (level >= 0) + kgsl_pwrctrl_pwrlevel_change(device, (unsigned int) level); + + mutex_unlock(&device->mutex); + return count; +} + +static ssize_t gpuclk_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%ld\n", + kgsl_pwrctrl_active_freq(&device->pwrctrl)); +} + +static ssize_t idle_timer_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int val = 0; + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + /* + * We don't quite accept a maximum of 0xFFFFFFFF due to internal jiffy + * math, so make sure the value falls within the largest offset we can + * deal with + */ + + if (val > jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return -EINVAL; + + mutex_lock(&device->mutex); + device->pwrctrl.interval_timeout = val; + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t idle_timer_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", device->pwrctrl.interval_timeout); +} + +static ssize_t minbw_timer_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + u32 val; + int ret; + + if (device->pwrctrl.ctrl_flags & BIT(KGSL_PWRFLAGS_NAP_OFF)) + return -EINVAL; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + device->pwrctrl.minbw_timeout = val; + return count; +} + +static ssize_t minbw_timer_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + device->pwrctrl.minbw_timeout); +} + +static ssize_t gpubusy_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats; + + ret = scnprintf(buf, PAGE_SIZE, "%7d %7d\n", + stats->busy_old, stats->total_old); + if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) { + stats->busy_old = 0; + stats->total_old = 0; + } + return ret; +} + +static ssize_t gpu_available_frequencies_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int index, num_chars = 0; + + for (index = 0; index < pwr->num_pwrlevels; index++) { + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 1, + "%d ", pwr->pwrlevels[index].gpu_freq); + /* One space for trailing null and another for the newline */ + if (num_chars >= PAGE_SIZE - 2) + break; + } + buf[num_chars++] = '\n'; + return num_chars; +} + +static ssize_t gpu_clock_stats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int index, num_chars = 0; + + mutex_lock(&device->mutex); + kgsl_pwrscale_update_stats(device); + mutex_unlock(&device->mutex); + for (index = 0; index < pwr->num_pwrlevels; index++) + num_chars += scnprintf(buf + num_chars, PAGE_SIZE - num_chars, + "%llu ", pwr->clock_times[index]); + + if (num_chars < PAGE_SIZE) + buf[num_chars++] = '\n'; + + return num_chars; +} + +static ssize_t reset_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter); +} + +static void __force_on(struct kgsl_device *device, int flag, int on) +{ + if (on) { + switch (flag) { + case KGSL_PWRFLAGS_CLK_ON: + /* make sure pwrrail is ON before enabling clocks */ + kgsl_pwrctrl_pwrrail(device, true); + kgsl_pwrctrl_clk(device, true, + KGSL_STATE_ACTIVE); + break; + case KGSL_PWRFLAGS_AXI_ON: + kgsl_pwrctrl_axi(device, true); + break; + case KGSL_PWRFLAGS_POWER_ON: + kgsl_pwrctrl_pwrrail(device, true); + break; + } + set_bit(flag, &device->pwrctrl.ctrl_flags); + } else { + clear_bit(flag, &device->pwrctrl.ctrl_flags); + } +} + +static ssize_t __force_on_show(struct device *dev, + struct device_attribute *attr, + char *buf, int flag) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + test_bit(flag, &device->pwrctrl.ctrl_flags)); +} + +static ssize_t __force_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, + int flag) +{ + unsigned int val = 0; + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + __force_on(device, flag, val); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t force_clk_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_CLK_ON); +} + +static ssize_t force_clk_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_CLK_ON); +} + +static ssize_t force_bus_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_AXI_ON); +} + +static ssize_t force_bus_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_AXI_ON); +} + +static ssize_t force_rail_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_POWER_ON); +} + +static ssize_t force_rail_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_POWER_ON); +} + +static ssize_t force_no_nap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_NAP_OFF); +} + +static ssize_t force_no_nap_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, + KGSL_PWRFLAGS_NAP_OFF); +} + +static ssize_t bus_split_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.bus_control); +} + +static ssize_t bus_split_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int val = 0; + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + device->pwrctrl.bus_control = val ? true : false; + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t default_pwrlevel_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.default_pwrlevel); +} + +static ssize_t default_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + int ret; + unsigned int level = 0; + + ret = kstrtou32(buf, 0, &level); + if (ret) + return ret; + + if (level >= pwr->num_pwrlevels) + return count; + + mutex_lock(&device->mutex); + pwr->default_pwrlevel = level; + pwrscale->gpu_profile.profile.initial_freq + = pwr->pwrlevels[level].gpu_freq; + + mutex_unlock(&device->mutex); + return count; +} + +static ssize_t popp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + /* POPP is deprecated, so return it as always disabled */ + return scnprintf(buf, PAGE_SIZE, "0\n"); +} + +static ssize_t _gpu_busy_show(struct kgsl_device *device, + char *buf) +{ + int ret; + struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats; + unsigned int busy_percent = 0; + + if (stats->total_old != 0) + busy_percent = (stats->busy_old * 100) / stats->total_old; + + ret = scnprintf(buf, PAGE_SIZE, "%d %%\n", busy_percent); + + /* Reset the stats if GPU is OFF */ + if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) { + stats->busy_old = 0; + stats->total_old = 0; + } + return ret; +} + +static ssize_t gpu_busy_percentage_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _gpu_busy_show(device, buf); +} + +static ssize_t _min_clock_mhz_show(struct kgsl_device *device, + char *buf) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + pwr->pwrlevels[pwr->min_pwrlevel].gpu_freq / 1000000); +} + + +static ssize_t min_clock_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _min_clock_mhz_show(device, buf); +} + +static ssize_t _min_clock_mhz_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + int level, ret; + unsigned int freq; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + ret = kstrtou32(buf, 0, &freq); + if (ret) + return ret; + + freq *= 1000000; + level = _get_nearest_pwrlevel(pwr, freq); + + if (level >= 0) + kgsl_pwrctrl_min_pwrlevel_set(device, level); + + return count; +} + +static ssize_t min_clock_mhz_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _min_clock_mhz_store(device, buf, count); +} + +static ssize_t _max_clock_mhz_show(struct kgsl_device *device, char *buf) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq / 1000000); +} + +static ssize_t max_clock_mhz_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _max_clock_mhz_show(device, buf); +} + +static ssize_t _max_clock_mhz_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + u32 freq; + int ret, level; + + ret = kstrtou32(buf, 0, &freq); + if (ret) + return ret; + + level = _get_nearest_pwrlevel(&device->pwrctrl, freq * 1000000); + if (level < 0) + return level; + + /* + * You would think this would set max_pwrlevel but the legacy behavior + * is that it set thermal_pwrlevel instead so we don't want to mess with + * that. + */ + ret = kgsl_pwrctrl_set_thermal_limit(device, level); + if (ret) + return ret; + + return count; +} + +static ssize_t max_clock_mhz_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _max_clock_mhz_store(device, buf, count); +} + +static ssize_t _clock_mhz_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%ld\n", + kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000); +} + +static ssize_t clock_mhz_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _clock_mhz_show(device, buf); +} + +static ssize_t _freq_table_mhz_show(struct kgsl_device *device, + char *buf) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int index, num_chars = 0; + + for (index = 0; index < pwr->num_pwrlevels; index++) { + num_chars += scnprintf(buf + num_chars, + PAGE_SIZE - num_chars - 1, + "%d ", pwr->pwrlevels[index].gpu_freq / 1000000); + /* One space for trailing null and another for the newline */ + if (num_chars >= PAGE_SIZE - 2) + break; + } + + buf[num_chars++] = '\n'; + + return num_chars; +} + +static ssize_t freq_table_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _freq_table_mhz_show(device, buf); +} + +static ssize_t _gpu_tmu_show(struct kgsl_device *device, + char *buf) +{ + struct device *dev; + struct thermal_zone_device *thermal_dev; + int temperature = 0, max_temp = 0; + const char *name; + struct property *prop; + + dev = &device->pdev->dev; + + of_property_for_each_string(dev->of_node, "qcom,tzone-names", prop, name) { + thermal_dev = thermal_zone_get_zone_by_name(name); + if (IS_ERR(thermal_dev)) + continue; + + if (thermal_zone_get_temp(thermal_dev, &temperature)) + continue; + + max_temp = max(temperature, max_temp); + } + + return scnprintf(buf, PAGE_SIZE, "%d\n", + max_temp); +} + +static ssize_t temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + return _gpu_tmu_show(device, buf); +} + +static ssize_t pwrscale_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + int ret; + unsigned int enable = 0; + + ret = kstrtou32(buf, 0, &enable); + if (ret) + return ret; + + mutex_lock(&device->mutex); + + if (enable) + kgsl_pwrscale_enable(device); + else + kgsl_pwrscale_disable(device, false); + + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t pwrscale_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrscale *psc = &device->pwrscale; + + return scnprintf(buf, PAGE_SIZE, "%u\n", psc->enabled); +} + +static DEVICE_ATTR_RO(temp); +static DEVICE_ATTR_RW(gpuclk); +static DEVICE_ATTR_RW(max_gpuclk); +static DEVICE_ATTR_RW(idle_timer); +static DEVICE_ATTR_RW(minbw_timer); +static DEVICE_ATTR_RO(gpubusy); +static DEVICE_ATTR_RO(gpu_available_frequencies); +static DEVICE_ATTR_RO(gpu_clock_stats); +static DEVICE_ATTR_RW(max_pwrlevel); +static DEVICE_ATTR_RW(min_pwrlevel); +static DEVICE_ATTR_RW(thermal_pwrlevel); +static DEVICE_ATTR_RO(num_pwrlevels); +static DEVICE_ATTR_RO(reset_count); +static DEVICE_ATTR_RW(force_clk_on); +static DEVICE_ATTR_RW(force_bus_on); +static DEVICE_ATTR_RW(force_rail_on); +static DEVICE_ATTR_RW(bus_split); +static DEVICE_ATTR_RW(default_pwrlevel); +static DEVICE_ATTR_RO(popp); +static DEVICE_ATTR_RW(force_no_nap); +static DEVICE_ATTR_RO(gpu_busy_percentage); +static DEVICE_ATTR_RW(min_clock_mhz); +static DEVICE_ATTR_RW(max_clock_mhz); +static DEVICE_ATTR_RO(clock_mhz); +static DEVICE_ATTR_RO(freq_table_mhz); +static DEVICE_ATTR_RW(pwrscale); + +static const struct attribute *pwrctrl_attr_list[] = { + &dev_attr_gpuclk.attr, + &dev_attr_max_gpuclk.attr, + &dev_attr_idle_timer.attr, + &dev_attr_minbw_timer.attr, + &dev_attr_gpubusy.attr, + &dev_attr_gpu_available_frequencies.attr, + &dev_attr_gpu_clock_stats.attr, + &dev_attr_max_pwrlevel.attr, + &dev_attr_min_pwrlevel.attr, + &dev_attr_thermal_pwrlevel.attr, + &dev_attr_num_pwrlevels.attr, + &dev_attr_reset_count.attr, + &dev_attr_force_clk_on.attr, + &dev_attr_force_bus_on.attr, + &dev_attr_force_rail_on.attr, + &dev_attr_force_no_nap.attr, + &dev_attr_bus_split.attr, + &dev_attr_default_pwrlevel.attr, + &dev_attr_popp.attr, + &dev_attr_gpu_busy_percentage.attr, + &dev_attr_min_clock_mhz.attr, + &dev_attr_max_clock_mhz.attr, + &dev_attr_clock_mhz.attr, + &dev_attr_freq_table_mhz.attr, + &dev_attr_temp.attr, + &dev_attr_pwrscale.attr, + NULL, +}; + +static GPU_SYSFS_ATTR(gpu_busy, 0444, _gpu_busy_show, NULL); +static GPU_SYSFS_ATTR(gpu_min_clock, 0644, _min_clock_mhz_show, + _min_clock_mhz_store); +static GPU_SYSFS_ATTR(gpu_max_clock, 0644, _max_clock_mhz_show, + _max_clock_mhz_store); +static GPU_SYSFS_ATTR(gpu_clock, 0444, _clock_mhz_show, NULL); +static GPU_SYSFS_ATTR(gpu_freq_table, 0444, _freq_table_mhz_show, NULL); +static GPU_SYSFS_ATTR(gpu_tmu, 0444, _gpu_tmu_show, NULL); + +static const struct attribute *gpu_sysfs_attr_list[] = { + &gpu_sysfs_attr_gpu_busy.attr, + &gpu_sysfs_attr_gpu_min_clock.attr, + &gpu_sysfs_attr_gpu_max_clock.attr, + &gpu_sysfs_attr_gpu_clock.attr, + &gpu_sysfs_attr_gpu_freq_table.attr, + &gpu_sysfs_attr_gpu_tmu.attr, + NULL, +}; + +int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device) +{ + int ret; + + ret = sysfs_create_files(&device->dev->kobj, pwrctrl_attr_list); + if (ret) + return ret; + + if (!device->gpu_sysfs_kobj.state_in_sysfs) + return 0; + + return sysfs_create_files(&device->gpu_sysfs_kobj, gpu_sysfs_attr_list); +} + +/* + * Track the amount of time the gpu is on vs the total system time. + * Regularly update the percentage of busy time displayed by sysfs. + */ +void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy) +{ + struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats; + + stats->total += time; + stats->busy += busy; + + if (stats->total < UPDATE_BUSY_VAL) + return; + + /* Update the output regularly and reset the counters. */ + stats->total_old = stats->total; + stats->busy_old = stats->busy; + stats->total = 0; + stats->busy = 0; + + trace_kgsl_gpubusy(device, stats->busy_old, stats->total_old); +} + +static void kgsl_pwrctrl_clk(struct kgsl_device *device, bool state, + int requested_state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i = 0; + + if (gmu_core_gpmu_isenabled(device)) + return; + if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->ctrl_flags)) + return; + + if (!state) { + if (test_and_clear_bit(KGSL_PWRFLAGS_CLK_ON, + &pwr->power_flags)) { + trace_kgsl_clk(device, state, + kgsl_pwrctrl_active_freq(pwr)); + /* Disable gpu-bimc-interface clocks */ + if (pwr->gpu_bimc_int_clk && + pwr->gpu_bimc_interface_enabled) { + clk_disable_unprepare(pwr->gpu_bimc_int_clk); + pwr->gpu_bimc_interface_enabled = false; + } + + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_disable(pwr->grp_clks[i]); + /* High latency clock maintenance. */ + if ((pwr->pwrlevels[0].gpu_freq > 0) && + (requested_state != KGSL_STATE_NAP) && + (requested_state != KGSL_STATE_MINBW)) { + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_unprepare(pwr->grp_clks[i]); + device->ftbl->gpu_clock_set(device, + pwr->num_pwrlevels - 1); + _isense_clk_set_rate(pwr, + pwr->num_pwrlevels - 1); + } + + /* Turn off the IOMMU clocks */ + kgsl_mmu_disable_clk(&device->mmu); + } else if (requested_state == KGSL_STATE_SLUMBER) { + /* High latency clock maintenance. */ + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_unprepare(pwr->grp_clks[i]); + if ((pwr->pwrlevels[0].gpu_freq > 0)) { + device->ftbl->gpu_clock_set(device, + pwr->num_pwrlevels - 1); + _isense_clk_set_rate(pwr, + pwr->num_pwrlevels - 1); + } + } + } else { + if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON, + &pwr->power_flags)) { + trace_kgsl_clk(device, state, + kgsl_pwrctrl_active_freq(pwr)); + /* High latency clock maintenance. */ + if ((device->state != KGSL_STATE_NAP) && + (device->state != KGSL_STATE_MINBW)) { + if (pwr->pwrlevels[0].gpu_freq > 0) { + device->ftbl->gpu_clock_set(device, + pwr->active_pwrlevel); + _isense_clk_set_rate(pwr, + pwr->active_pwrlevel); + } + } + + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + _gpu_clk_prepare_enable(device, + pwr->grp_clks[i], clocks[i]); + + /* Enable the gpu-bimc-interface clocks */ + if (pwr->gpu_bimc_int_clk) { + if (pwr->active_pwrlevel == 0 && + !pwr->gpu_bimc_interface_enabled) { + kgsl_pwrctrl_clk_set_rate( + pwr->gpu_bimc_int_clk, + pwr->gpu_bimc_int_clk_freq, + "bimc_gpu_clk"); + _bimc_clk_prepare_enable(device, + pwr->gpu_bimc_int_clk, + "bimc_gpu_clk"); + pwr->gpu_bimc_interface_enabled = true; + } + } + + /* Turn on the IOMMU clocks */ + kgsl_mmu_enable_clk(&device->mmu); + } + + } +} + +int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->ctrl_flags)) + return 0; + + if (!state) { + if (test_and_clear_bit(KGSL_PWRFLAGS_AXI_ON, + &pwr->power_flags)) { + trace_kgsl_bus(device, state); + return kgsl_bus_update(device, KGSL_BUS_VOTE_OFF); + } + } else { + if (!test_and_set_bit(KGSL_PWRFLAGS_AXI_ON, + &pwr->power_flags)) { + trace_kgsl_bus(device, state); + return kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + } + } + + return 0; +} + +static int enable_regulator(struct device *dev, struct regulator *regulator, + const char *name) +{ + int ret; + + if (IS_ERR_OR_NULL(regulator)) + return 0; + + ret = regulator_enable(regulator); + if (ret) + dev_err(dev, "Unable to enable regulator %s: %d\n", name, ret); + return ret; +} + +static int enable_regulators(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + + if (test_and_set_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) + return 0; + + ret = enable_regulator(&device->pdev->dev, pwr->cx_gdsc, "vddcx"); + if (!ret) { + /* Set parent in retention voltage to power up vdd supply */ + ret = kgsl_regulator_set_voltage(device->dev, + pwr->gx_gdsc_parent, + pwr->gx_gdsc_parent_min_corner); + if (!ret) + ret = enable_regulator(&device->pdev->dev, + pwr->gx_gdsc, "vdd"); + } + + if (ret) { + clear_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags); + return ret; + } + + trace_kgsl_rail(device, KGSL_PWRFLAGS_POWER_ON); + return 0; +} + +static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int status = 0; + + if (gmu_core_gpmu_isenabled(device)) + return 0; + /* + * Disabling the regulator means also disabling dependent clocks. + * Hence don't disable it if force clock ON is set. + */ + if (test_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->ctrl_flags) || + test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->ctrl_flags)) + return 0; + + if (!state) { + if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON, + &pwr->power_flags)) { + trace_kgsl_rail(device, state); + if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) + dev_err(device->dev, "Regulator vdd is stuck on\n"); + if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) + dev_err(device->dev, "Regulator vddcx is stuck on\n"); + } + } else + status = enable_regulators(device); + + return status; +} + +void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (state) { + if (!test_and_set_bit(KGSL_PWRFLAGS_IRQ_ON, + &pwr->power_flags)) { + trace_kgsl_irq(device, state); + enable_irq(pwr->interrupt_num); + } + } else { + if (test_and_clear_bit(KGSL_PWRFLAGS_IRQ_ON, + &pwr->power_flags)) { + trace_kgsl_irq(device, state); + if (in_interrupt()) + disable_irq_nosync(pwr->interrupt_num); + else + disable_irq(pwr->interrupt_num); + } + } +} + +static void kgsl_minbw_timer(struct timer_list *t) +{ + struct kgsl_pwrctrl *pwr = from_timer(pwr, t, minbw_timer); + struct kgsl_device *device = container_of(pwr, + struct kgsl_device, pwrctrl); + + if (device->state == KGSL_STATE_NAP) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_MINBW); + kgsl_schedule_work(&device->idle_check_ws); + } +} + +static int _get_clocks(struct kgsl_device *device) +{ + struct device *dev = &device->pdev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + const char *name; + struct property *prop; + + pwr->isense_clk_indx = 0; + of_property_for_each_string(dev->of_node, "clock-names", prop, name) { + int i; + + for (i = 0; i < KGSL_MAX_CLKS; i++) { + if (pwr->grp_clks[i] || strcmp(clocks[i], name)) + continue; + /* apb_pclk should only be enabled if QCOM_KGSL_QDSS_STM is enabled */ + if (!strcmp(name, "apb_pclk") && !IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) + continue; + + pwr->grp_clks[i] = devm_clk_get(dev, name); + + if (IS_ERR(pwr->grp_clks[i])) { + int ret = PTR_ERR(pwr->grp_clks[i]); + + dev_err(dev, "Couldn't get clock: %s (%d)\n", + name, ret); + pwr->grp_clks[i] = NULL; + return ret; + } + + if (!strcmp(name, "isense_clk")) + pwr->isense_clk_indx = i; + break; + } + } + + if (pwr->isense_clk_indx && of_property_read_u32(dev->of_node, + "qcom,isense-clk-on-level", &pwr->isense_clk_on_level)) { + dev_err(dev, "Couldn't get isense clock on level\n"); + return -ENXIO; + } + return 0; +} + +static int _isense_clk_set_rate(struct kgsl_pwrctrl *pwr, int level) +{ + int rate; + + if (!pwr->isense_clk_indx) + return -EINVAL; + + rate = clk_round_rate(pwr->grp_clks[pwr->isense_clk_indx], + level > pwr->isense_clk_on_level ? + KGSL_XO_CLK_FREQ : KGSL_ISENSE_CLK_FREQ); + return kgsl_pwrctrl_clk_set_rate(pwr->grp_clks[pwr->isense_clk_indx], + rate, clocks[pwr->isense_clk_indx]); +} + +/* + * _gpu_clk_prepare_enable - Enable the specified GPU clock + * Try once to enable it and then BUG() for debug + */ +static void _gpu_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name) +{ + int ret; + + if (kgsl_state_is_nap_or_minbw(device)) { + ret = clk_enable(clk); + if (ret) + goto err; + return; + } + + ret = clk_prepare_enable(clk); + if (!ret) + return; +err: + /* Failure is fatal so BUG() to facilitate debug */ + dev_err(device->dev, "GPU Clock %s enable error:%d\n", name, ret); +} + +/* + * _bimc_clk_prepare_enable - Enable the specified GPU clock + * Try once to enable it and then BUG() for debug + */ +static void _bimc_clk_prepare_enable(struct kgsl_device *device, + struct clk *clk, const char *name) +{ + int ret = clk_prepare_enable(clk); + /* Failure is fatal so BUG() to facilitate debug */ + if (ret) + dev_err(device->dev, "GPU clock %s enable error:%d\n", + name, ret); +} + +static int kgsl_pwrctrl_clk_set_rate(struct clk *grp_clk, unsigned int freq, + const char *name) +{ + int ret = clk_set_rate(grp_clk, freq); + + WARN(ret, "%s set freq %d failed:%d\n", name, freq, ret); + return ret; +} + +int kgsl_pwrctrl_init(struct kgsl_device *device) +{ + int i, result, freq; + struct platform_device *pdev = device->pdev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + result = _get_clocks(device); + if (result) + return result; + + /* Make sure we have a source clk for freq setting */ + if (pwr->grp_clks[0] == NULL) + pwr->grp_clks[0] = pwr->grp_clks[1]; + + /* Getting gfx-bimc-interface-clk frequency */ + if (!of_property_read_u32(pdev->dev.of_node, + "qcom,gpu-bimc-interface-clk-freq", + &pwr->gpu_bimc_int_clk_freq)) + pwr->gpu_bimc_int_clk = devm_clk_get(&pdev->dev, + "bimc_gpu_clk"); + + if (of_property_read_bool(pdev->dev.of_node, "qcom,no-nap")) + device->pwrctrl.ctrl_flags |= BIT(KGSL_PWRFLAGS_NAP_OFF); + else if (!IS_ENABLED(CONFIG_COMMON_CLK_QCOM)) { + dev_warn(device->dev, "KGSL nap state is not supported\n"); + device->pwrctrl.ctrl_flags |= BIT(KGSL_PWRFLAGS_NAP_OFF); + } + + if (pwr->num_pwrlevels == 0) { + dev_err(device->dev, "No power levels are defined\n"); + return -EINVAL; + } + + init_waitqueue_head(&device->active_cnt_wq); + + /* Initialize the user and thermal clock constraints */ + + pwr->max_pwrlevel = 0; + pwr->min_pwrlevel = pwr->num_pwrlevels - 1; + pwr->thermal_pwrlevel = 0; + pwr->thermal_pwrlevel_floor = pwr->min_pwrlevel; + + pwr->wakeup_maxpwrlevel = 0; + + result = dev_pm_qos_add_request(&pdev->dev, &pwr->sysfs_thermal_req, + DEV_PM_QOS_MAX_FREQUENCY, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + if (result < 0) + dev_err(device->dev, "PM QoS thermal request failed:\n", result); + + for (i = 0; i < pwr->num_pwrlevels; i++) { + freq = pwr->pwrlevels[i].gpu_freq; + + if (freq > 0) + freq = clk_round_rate(pwr->grp_clks[0], freq); + + if (freq >= pwr->pwrlevels[i].gpu_freq) + pwr->pwrlevels[i].gpu_freq = freq; + } + + clk_set_rate(pwr->grp_clks[0], + pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq); + + freq = clk_round_rate(pwr->grp_clks[6], KGSL_XO_CLK_FREQ); + if (freq > 0) + kgsl_pwrctrl_clk_set_rate(pwr->grp_clks[6], + freq, clocks[6]); + + _isense_clk_set_rate(pwr, pwr->num_pwrlevels - 1); + + if (of_property_read_bool(pdev->dev.of_node, "vddcx-supply")) + pwr->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + + if (of_property_read_bool(pdev->dev.of_node, "vdd-supply")) + pwr->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + + if (of_property_read_bool(pdev->dev.of_node, "vdd-parent-supply")) { + pwr->gx_gdsc_parent = devm_regulator_get(&pdev->dev, + "vdd-parent"); + if (IS_ERR(pwr->gx_gdsc_parent)) { + dev_err(device->dev, + "Failed to get vdd-parent regulator:%ld\n", + PTR_ERR(pwr->gx_gdsc_parent)); + return -ENODEV; + } + if (of_property_read_u32(pdev->dev.of_node, + "vdd-parent-min-corner", + &pwr->gx_gdsc_parent_min_corner)) { + dev_err(device->dev, + "vdd-parent-min-corner not found\n"); + return -ENODEV; + } + } + + pwr->power_flags = 0; + + pm_runtime_enable(&pdev->dev); + + timer_setup(&pwr->minbw_timer, kgsl_minbw_timer, 0); + + return 0; +} + +void kgsl_pwrctrl_close(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->power_flags = 0; + + kgsl_bus_close(device); + + if (dev_pm_qos_request_active(&pwr->sysfs_thermal_req)) + dev_pm_qos_remove_request(&pwr->sysfs_thermal_req); + + pm_runtime_disable(&device->pdev->dev); +} + +void kgsl_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, struct kgsl_device, + idle_check_ws); + int ret = 0; + unsigned int requested_state; + + mutex_lock(&device->mutex); + + /* + * After scheduling idle work for transitioning to either NAP or + * SLUMBER, it's possible that requested state can change to NONE + * if any new workload comes before kgsl_idle_check is executed or + * it gets the device mutex. In such case, no need to change state + * to NONE. + */ + if (device->requested_state == KGSL_STATE_NONE) { + mutex_unlock(&device->mutex); + return; + } + + requested_state = device->requested_state; + + if (device->state == KGSL_STATE_ACTIVE + || kgsl_state_is_nap_or_minbw(device)) { + + if (!atomic_read(&device->active_cnt)) { + spin_lock(&device->submit_lock); + if (device->submit_now) { + spin_unlock(&device->submit_lock); + goto done; + } + /* Don't allow GPU inline submission in SLUMBER */ + if (requested_state == KGSL_STATE_SLUMBER) + device->slumber = true; + spin_unlock(&device->submit_lock); + + ret = kgsl_pwrctrl_change_state(device, + device->requested_state); + if (ret == -EBUSY) { + if (requested_state == KGSL_STATE_SLUMBER) { + spin_lock(&device->submit_lock); + device->slumber = false; + spin_unlock(&device->submit_lock); + } + /* + * If the GPU is currently busy, restore + * the requested state and reschedule + * idle work. + */ + kgsl_pwrctrl_request_state(device, + requested_state); + kgsl_schedule_work(&device->idle_check_ws); + } + } +done: + if (!ret) + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + + if (device->state == KGSL_STATE_ACTIVE) + kgsl_start_idle_timer(device); + } + + if (device->state != KGSL_STATE_MINBW) + kgsl_pwrscale_update(device); + mutex_unlock(&device->mutex); +} + +void kgsl_timer(struct timer_list *t) +{ + struct kgsl_device *device = from_timer(device, t, idle_timer); + + if (device->requested_state != KGSL_STATE_SUSPEND) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER); + /* Have work run in a non-interrupt context. */ + kgsl_schedule_work(&device->idle_check_ws); + } +} + +static bool kgsl_pwrctrl_isenabled(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + return ((test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) != 0) && + (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags) != 0)); +} + +/** + * kgsl_pre_hwaccess - Enforce preconditions for touching registers + * @device: The device + * + * This function ensures that the correct lock is held and that the GPU + * clock is on immediately before a register is read or written. Note + * that this function does not check active_cnt because the registers + * must be accessed during device start and stop, when the active_cnt + * may legitimately be 0. + */ +void kgsl_pre_hwaccess(struct kgsl_device *device) +{ + /* In order to touch a register you must hold the device mutex */ + WARN_ON(!mutex_is_locked(&device->mutex)); + + /* + * A register access without device power will cause a fatal timeout. + * This is not valid for targets with a GMU. + */ + if (!gmu_core_gpmu_isenabled(device)) + WARN_ON(!kgsl_pwrctrl_isenabled(device)); +} + +static int kgsl_pwrctrl_enable(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int level, status; + + if (pwr->wakeup_maxpwrlevel) { + level = pwr->max_pwrlevel; + pwr->wakeup_maxpwrlevel = 0; + } else { + level = pwr->default_pwrlevel; + } + + kgsl_pwrctrl_pwrlevel_change(device, level); + + /* Order pwrrail/clk sequence based upon platform */ + status = kgsl_pwrctrl_pwrrail(device, true); + if (status) + return status; + kgsl_pwrctrl_clk(device, true, KGSL_STATE_ACTIVE); + kgsl_pwrctrl_axi(device, true); + + return device->ftbl->regulator_enable(device); +} + +void kgsl_pwrctrl_clear_l3_vote(struct kgsl_device *device) +{ + int status; + struct dcvs_freq freq = {0}; + + if (!device->num_l3_pwrlevels) + return; + + freq.hw_type = DCVS_L3; + + status = qcom_dcvs_update_votes(KGSL_L3_DEVICE, &freq, 1, + DCVS_SLOW_PATH); + if (!status) + device->cur_l3_pwrlevel = 0; + else + dev_err(device->dev, "Could not clear l3_vote: %d\n", + status); +} + +static void kgsl_pwrctrl_disable(struct kgsl_device *device) +{ + kgsl_pwrctrl_clear_l3_vote(device); + + /* Order pwrrail/clk sequence based upon platform */ + device->ftbl->regulator_disable(device); + kgsl_pwrctrl_axi(device, false); + kgsl_pwrctrl_clk(device, false, KGSL_STATE_SLUMBER); + kgsl_pwrctrl_pwrrail(device, false); +} + +static void +kgsl_pwrctrl_clk_set_options(struct kgsl_device *device, bool on) +{ + int i; + + for (i = 0; i < KGSL_MAX_CLKS; i++) + device->ftbl->clk_set_options(device, clocks[i], + device->pwrctrl.grp_clks[i], on); +} + +/** + * _init() - Get the GPU ready to start, but don't turn anything on + * @device - Pointer to the kgsl_device struct + */ +static int _init(struct kgsl_device *device) +{ + int status = 0; + + switch (device->state) { + case KGSL_STATE_MINBW: + fallthrough; + case KGSL_STATE_NAP: + del_timer_sync(&device->pwrctrl.minbw_timer); + /* Force power on to do the stop */ + status = kgsl_pwrctrl_enable(device); + fallthrough; + case KGSL_STATE_ACTIVE: + kgsl_pwrctrl_irq(device, false); + del_timer_sync(&device->idle_timer); + device->ftbl->stop(device); + fallthrough; + case KGSL_STATE_AWARE: + kgsl_pwrctrl_disable(device); + fallthrough; + case KGSL_STATE_SLUMBER: + fallthrough; + case KGSL_STATE_NONE: + kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); + } + + return status; +} + +/** + * _wake() - Power up the GPU from a slumber state + * @device - Pointer to the kgsl_device struct + * + * Resume the GPU from a lower power state to ACTIVE. + */ +static int _wake(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int status = 0; + + switch (device->state) { + case KGSL_STATE_SUSPEND: + complete_all(&device->hwaccess_gate); + /* Call the GPU specific resume function */ + device->ftbl->resume(device); + fallthrough; + case KGSL_STATE_SLUMBER: + kgsl_pwrctrl_clk_set_options(device, true); + status = device->ftbl->start(device, + device->pwrctrl.superfast); + device->pwrctrl.superfast = false; + + if (status) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + dev_err(device->dev, "start failed %d\n", status); + break; + } + kgsl_pwrctrl_axi(device, true); + kgsl_pwrscale_wake(device); + kgsl_pwrctrl_irq(device, true); + trace_gpu_frequency( + pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq/1000, 0); + fallthrough; + case KGSL_STATE_MINBW: + kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + fallthrough; + case KGSL_STATE_NAP: + /* Turn on the core clocks */ + kgsl_pwrctrl_clk(device, true, KGSL_STATE_ACTIVE); + + device->ftbl->deassert_gbif_halt(device); + pwr->last_stat_updated = ktime_get(); + /* + * No need to turn on/off irq here as it no longer affects + * power collapse + */ + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + + /* + * Change register settings if any after pwrlevel change. + * If there was dcvs level change during nap - call + * pre and post in the row after clock is enabled. + */ + kgsl_pwrctrl_pwrlevel_change_settings(device, 0); + kgsl_pwrctrl_pwrlevel_change_settings(device, 1); + /* All settings for power level transitions are complete*/ + pwr->previous_pwrlevel = pwr->active_pwrlevel; + kgsl_start_idle_timer(device); + del_timer_sync(&device->pwrctrl.minbw_timer); + break; + case KGSL_STATE_AWARE: + kgsl_pwrctrl_clk_set_options(device, true); + /* Enable state before turning on irq */ + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + kgsl_pwrctrl_irq(device, true); + kgsl_start_idle_timer(device); + del_timer_sync(&device->pwrctrl.minbw_timer); + break; + default: + dev_warn(device->dev, "unhandled state %s\n", + kgsl_pwrstate_to_str(device->state)); + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + status = -EINVAL; + break; + } + return status; +} + +/* + * _aware() - Put device into AWARE + * @device: Device pointer + * + * The GPU should be available for register reads/writes and able + * to communicate with the rest of the system. However disable all + * paths that allow a switch to an interrupt context (interrupts & + * timers). + * Return 0 on success else error code + */ +static int +_aware(struct kgsl_device *device) +{ + int status = 0; + + switch (device->state) { + case KGSL_STATE_INIT: + status = kgsl_pwrctrl_enable(device); + break; + /* The following 4 cases shouldn't occur, but don't panic. */ + case KGSL_STATE_MINBW: + fallthrough; + case KGSL_STATE_NAP: + status = _wake(device); + fallthrough; + case KGSL_STATE_ACTIVE: + kgsl_pwrctrl_irq(device, false); + del_timer_sync(&device->idle_timer); + break; + case KGSL_STATE_SLUMBER: + status = kgsl_pwrctrl_enable(device); + break; + default: + status = -EINVAL; + } + + if (!status) + kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); + + return status; +} + +static int +_nap(struct kgsl_device *device) +{ + switch (device->state) { + case KGSL_STATE_ACTIVE: + if (!device->ftbl->is_hw_collapsible(device)) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + return -EBUSY; + } + + + /* + * Read HW busy counters before going to NAP state. + * The data might be used by power scale governors + * independently of the HW activity. For example + * the simple-on-demand governor will get the latest + * busy_time data even if the gpu isn't active. + */ + kgsl_pwrscale_update_stats(device); + + mod_timer(&device->pwrctrl.minbw_timer, jiffies + + msecs_to_jiffies(device->pwrctrl.minbw_timeout)); + + kgsl_pwrctrl_clk(device, false, KGSL_STATE_NAP); + kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP); + fallthrough; + case KGSL_STATE_SLUMBER: + break; + case KGSL_STATE_AWARE: + dev_warn(device->dev, + "transition AWARE -> NAP is not permitted\n"); + fallthrough; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + } + return 0; +} + +static int +_minbw(struct kgsl_device *device) +{ + switch (device->state) { + /* + * Device is expected to be clock gated to move to + * a deeper low power state. No other transition is + * permitted + */ + case KGSL_STATE_NAP: + kgsl_bus_update(device, KGSL_BUS_VOTE_MINIMUM); + kgsl_pwrctrl_set_state(device, KGSL_STATE_MINBW); + break; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + } + return 0; +} + +static int +_slumber(struct kgsl_device *device) +{ + int status = 0; + + switch (device->state) { + case KGSL_STATE_ACTIVE: + if (!device->ftbl->is_hw_collapsible(device)) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + return -EBUSY; + } + fallthrough; + case KGSL_STATE_NAP: + fallthrough; + case KGSL_STATE_MINBW: + del_timer_sync(&device->pwrctrl.minbw_timer); + del_timer_sync(&device->idle_timer); + kgsl_pwrctrl_irq(device, false); + /* make sure power is on to stop the device*/ + status = kgsl_pwrctrl_enable(device); + device->ftbl->suspend_context(device); + device->ftbl->stop(device); + kgsl_pwrctrl_clk_set_options(device, false); + kgsl_pwrctrl_disable(device); + kgsl_pwrscale_sleep(device); + trace_gpu_frequency(0, 0); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + case KGSL_STATE_SUSPEND: + complete_all(&device->hwaccess_gate); + device->ftbl->resume(device); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + case KGSL_STATE_AWARE: + kgsl_pwrctrl_disable(device); + trace_gpu_frequency(0, 0); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + + } + return status; +} + +/* + * _suspend() - Put device into suspend + * @device: Device pointer + * + * Return 0 on success else error code + */ +static int _suspend(struct kgsl_device *device) +{ + int ret = 0; + + if ((device->state == KGSL_STATE_NONE) || + (device->state == KGSL_STATE_INIT) || + (device->state == KGSL_STATE_SUSPEND)) + return ret; + + /* + * drain to prevent from more commands being submitted + * and wait for it to go idle + */ + ret = device->ftbl->drain_and_idle(device); + if (ret) + goto err; + + ret = _slumber(device); + if (ret) + goto err; + + kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND); + return ret; + +err: + device->ftbl->resume(device); + dev_err(device->dev, "device failed to SUSPEND %d\n", ret); + return ret; +} + +/* + * kgsl_pwrctrl_change_state() changes the GPU state to the input + * @device: Pointer to a KGSL device + * @state: desired KGSL state + * + * Caller must hold the device mutex. If the requested state change + * is valid, execute it. Otherwise return an error code explaining + * why the change has not taken place. Also print an error if an + * unexpected state change failure occurs. For example, a change to + * NAP may be rejected because the GPU is busy, this is not an error. + * A change to SUSPEND should go through no matter what, so if it + * fails an additional error message will be printed to dmesg. + */ +int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state) +{ + int status = 0; + + if (device->state == state) + return status; + kgsl_pwrctrl_request_state(device, state); + + /* Work through the legal state transitions */ + switch (state) { + case KGSL_STATE_INIT: + status = _init(device); + break; + case KGSL_STATE_AWARE: + status = _aware(device); + break; + case KGSL_STATE_ACTIVE: + status = _wake(device); + break; + case KGSL_STATE_NAP: + status = _nap(device); + break; + case KGSL_STATE_MINBW: + status = _minbw(device); + break; + case KGSL_STATE_SLUMBER: + status = _slumber(device); + break; + case KGSL_STATE_SUSPEND: + status = _suspend(device); + break; + default: + dev_err(device->dev, "bad state request 0x%x\n", state); + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + status = -EINVAL; + break; + } + + return status; +} + +static void kgsl_pwrctrl_set_state(struct kgsl_device *device, + unsigned int state) +{ + trace_kgsl_pwr_set_state(device, state); + device->state = state; + device->requested_state = KGSL_STATE_NONE; + + spin_lock(&device->submit_lock); + if (state == KGSL_STATE_SLUMBER || state == KGSL_STATE_SUSPEND) + device->slumber = true; + else + device->slumber = false; + spin_unlock(&device->submit_lock); +} + +void kgsl_pwrctrl_request_state(struct kgsl_device *device, + unsigned int state) +{ + if (state != KGSL_STATE_NONE && state != device->requested_state) + trace_kgsl_pwr_request_state(device, state); + device->requested_state = state; +} + +const char *kgsl_pwrstate_to_str(unsigned int state) +{ + switch (state) { + case KGSL_STATE_NONE: + return "NONE"; + case KGSL_STATE_INIT: + return "INIT"; + case KGSL_STATE_AWARE: + return "AWARE"; + case KGSL_STATE_ACTIVE: + return "ACTIVE"; + case KGSL_STATE_NAP: + return "NAP"; + case KGSL_STATE_MINBW: + return "MINBW"; + case KGSL_STATE_SUSPEND: + return "SUSPEND"; + case KGSL_STATE_SLUMBER: + return "SLUMBER"; + default: + break; + } + return "UNKNOWN"; +} + +static int _check_active_count(struct kgsl_device *device, int count) +{ + /* Return 0 if the active count is greater than the desired value */ + return atomic_read(&device->active_cnt) > count ? 0 : 1; +} + +int kgsl_active_count_wait(struct kgsl_device *device, int count, + unsigned long wait_jiffies) +{ + int result = 0; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return -EINVAL; + + while (atomic_read(&device->active_cnt) > count) { + long ret; + + mutex_unlock(&device->mutex); + ret = wait_event_timeout(device->active_cnt_wq, + _check_active_count(device, count), wait_jiffies); + mutex_lock(&device->mutex); + result = ret == 0 ? -ETIMEDOUT : 0; + if (!result) + wait_jiffies = ret; + else + break; + } + + return result; +} + +/** + * kgsl_pwrctrl_set_default_gpu_pwrlevel() - Set GPU to default power level + * @device: Pointer to the kgsl_device struct + */ +int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int new_level = pwr->default_pwrlevel; + unsigned int old_level = pwr->active_pwrlevel; + + /* + * Update the level according to any thermal, + * max/min, or power constraints. + */ + new_level = kgsl_pwrctrl_adjust_pwrlevel(device, new_level); + + pwr->active_pwrlevel = new_level; + pwr->previous_pwrlevel = old_level; + + /* Request adjusted DCVS level */ + return device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); +} + +/** + * kgsl_pwrctrl_update_thermal_pwrlevel() - Update GPU thermal power level + * @device: Pointer to the kgsl_device struct + */ +void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device) +{ + s32 qos_max_freq = dev_pm_qos_read_value(&device->pdev->dev, + DEV_PM_QOS_MAX_FREQUENCY); + int level = 0; + + if (qos_max_freq != PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE) { + level = _get_nearest_pwrlevel(&device->pwrctrl, + qos_max_freq * 1000); + if (level < 0) + return; + } + + if (level != device->pwrctrl.thermal_pwrlevel) { + trace_kgsl_thermal_constraint( + device->pwrctrl.pwrlevels[level].gpu_freq); + + device->pwrctrl.thermal_pwrlevel = level; + } +} + +int kgsl_gpu_num_freqs(void) +{ + struct kgsl_device *device = kgsl_get_device(0); + + if (!device) + return -ENODEV; + + return device->pwrctrl.num_pwrlevels; +} +EXPORT_SYMBOL(kgsl_gpu_num_freqs); + +int kgsl_gpu_stat(struct kgsl_gpu_freq_stat *stats, u32 numfreq) +{ + struct kgsl_device *device = kgsl_get_device(0); + struct kgsl_pwrctrl *pwr; + int i; + + if (!device) + return -ENODEV; + + pwr = &device->pwrctrl; + + if (!stats || (numfreq < pwr->num_pwrlevels)) + return -EINVAL; + + mutex_lock(&device->mutex); + kgsl_pwrscale_update_stats(device); + + for (i = 0; i < pwr->num_pwrlevels; i++) { + stats[i].freq = pwr->pwrlevels[i].gpu_freq; + stats[i].active_time = pwr->clock_times[i]; + stats[i].idle_time = pwr->time_in_pwrlevel[i] - pwr->clock_times[i]; + } + mutex_unlock(&device->mutex); + + return 0; +} +EXPORT_SYMBOL(kgsl_gpu_stat); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h new file mode 100644 index 0000000000..925aceeef5 --- /dev/null +++ b/kgsl_pwrctrl.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_PWRCTRL_H +#define __KGSL_PWRCTRL_H + +#include +#include + +/***************************************************************************** + * power flags + ****************************************************************************/ +#define KGSL_MAX_CLKS 18 + +#define KGSL_MAX_PWRLEVELS 16 + +#define KGSL_PWRFLAGS_POWER_ON 0 +#define KGSL_PWRFLAGS_CLK_ON 1 +#define KGSL_PWRFLAGS_AXI_ON 2 +#define KGSL_PWRFLAGS_IRQ_ON 3 +#define KGSL_PWRFLAGS_NAP_OFF 5 + +/* Use to enable all the force power on states at once */ +#define KGSL_PWR_ON GENMASK(5, 0) + +/* Only two supported levels, min & max */ +#define KGSL_CONSTRAINT_PWR_MAXLEVELS 2 + +#define KGSL_XO_CLK_FREQ 19200000 +#define KGSL_ISENSE_CLK_FREQ 200000000 + +struct platform_device; +struct icc_path; + +struct kgsl_clk_stats { + unsigned int busy; + unsigned int total; + unsigned int busy_old; + unsigned int total_old; +}; + +struct kgsl_pwr_constraint { + unsigned int type; + unsigned int sub_type; + union { + struct { + unsigned int level; + } pwrlevel; + } hint; + unsigned long expires; + uint32_t owner_id; + u32 owner_timestamp; +}; + +/** + * struct kgsl_pwrlevel - Struct holding different pwrlevel info obtained from + * from dtsi file + * @gpu_freq: GPU frequency vote in Hz + * @bus_freq: Bus bandwidth vote index + * @bus_min: Min bus index @gpu_freq + * @bus_max: Max bus index @gpu_freq + */ +struct kgsl_pwrlevel { + unsigned int gpu_freq; + unsigned int bus_freq; + unsigned int bus_min; + unsigned int bus_max; + unsigned int acd_level; + /** @voltage_level: Voltage level used by the GMU to vote RPMh */ + u32 voltage_level; +}; + +/** + * struct kgsl_pwrctrl - Power control settings for a KGSL device + * @interrupt_num - The interrupt number for the device + * @grp_clks - Array of clocks structures that we control + * @power_flags - Control flags for power + * @pwrlevels - List of supported power levels + * @active_pwrlevel - The currently active power level + * @previous_pwrlevel - The power level before transition + * @thermal_pwrlevel - maximum powerlevel constraint from thermal + * @thermal_pwrlevel_floor - minimum powerlevel constraint from thermal + * @default_pwrlevel - device wake up power level + * @max_pwrlevel - maximum allowable powerlevel per the user + * @min_pwrlevel - minimum allowable powerlevel per the user + * @num_pwrlevels - number of available power levels + * @throttle_mask - LM throttle mask + * @interval_timeout - timeout to be idle before a power event + * @clock_times - Each GPU frequency's accumulated active time in us + * @clk_stats - structure of clock statistics + * @input_disable - To disable GPU wakeup on touch input event + * @bus_control - true if the bus calculation is independent + * @bus_mod - modifier from the current power level for the bus vote + * @bus_percent_ab - current percent of total possible bus usage + * @bus_width - target specific bus width in number of bytes + * @bus_ab_mbytes - AB vote in Mbytes for current bus usage + * @constraint - currently active power constraint + * @superfast - Boolean flag to indicate that the GPU start should be run in the + * higher priority thread + * isense_clk_indx - index of isense clock, 0 if no isense + * isense_clk_on_level - isense clock rate is XO rate below this level. + */ + +struct kgsl_pwrctrl { + int interrupt_num; + struct clk *grp_clks[KGSL_MAX_CLKS]; + struct clk *gpu_bimc_int_clk; + /** @cx_gdsc: Pointer to the CX domain regulator if applicable */ + struct regulator *cx_gdsc; + /** @gx_gdsc: Pointer to the GX domain regulator if applicable */ + struct regulator *gx_gdsc; + /** @gx_gdsc: Pointer to the GX domain parent supply */ + struct regulator *gx_gdsc_parent; + /** @gx_gdsc_parent_min_corner: Minimum supply voltage for GX parent */ + u32 gx_gdsc_parent_min_corner; + int isense_clk_indx; + int isense_clk_on_level; + unsigned long power_flags; + unsigned long ctrl_flags; + struct kgsl_pwrlevel pwrlevels[KGSL_MAX_PWRLEVELS]; + unsigned int active_pwrlevel; + unsigned int previous_pwrlevel; + unsigned int thermal_pwrlevel; + unsigned int thermal_pwrlevel_floor; + unsigned int default_pwrlevel; + unsigned int wakeup_maxpwrlevel; + unsigned int max_pwrlevel; + unsigned int min_pwrlevel; + unsigned int num_pwrlevels; + unsigned int throttle_mask; + u32 interval_timeout; + u64 clock_times[KGSL_MAX_PWRLEVELS]; + struct kgsl_clk_stats clk_stats; + bool bus_control; + int bus_mod; + unsigned int bus_percent_ab; + unsigned int bus_width; + unsigned long bus_ab_mbytes; + /** @ddr_table: List of the DDR bandwidths in KBps for the target */ + u32 *ddr_table; + /** @ddr_table_count: Number of objects in @ddr_table */ + int ddr_table_count; + /** cur_buslevel: The last buslevel voted by the driver */ + int cur_buslevel; + /** @bus_max: The maximum bandwidth available to the device */ + unsigned long bus_max; + struct kgsl_pwr_constraint constraint; + bool superfast; + unsigned int gpu_bimc_int_clk_freq; + bool gpu_bimc_interface_enabled; + /** @icc_path: Interconnect path for the GPU (if applicable) */ + struct icc_path *icc_path; + /** cur_ab: The last ab voted by the driver */ + u32 cur_ab; + /** @minbw_timer - Timer struct for entering minimum bandwidth state */ + struct timer_list minbw_timer; + /** @minbw_timeout - Timeout for entering minimum bandwidth state */ + u32 minbw_timeout; + /** @sysfs_thermal_req - PM QoS maximum frequency request from user (via sysfs) */ + struct dev_pm_qos_request sysfs_thermal_req; + /** @time_in_pwrlevel: Each pwrlevel active duration in usec */ + u64 time_in_pwrlevel[KGSL_MAX_PWRLEVELS]; + /** @last_stat_updated: The last time stats were updated */ + ktime_t last_stat_updated; +}; + +int kgsl_pwrctrl_init(struct kgsl_device *device); +void kgsl_pwrctrl_close(struct kgsl_device *device); +void kgsl_timer(struct timer_list *t); +void kgsl_pre_hwaccess(struct kgsl_device *device); +void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, + unsigned int level); +int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device); +int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state); + +unsigned int kgsl_pwrctrl_adjust_pwrlevel(struct kgsl_device *device, + unsigned int new_level); + +/* + * kgsl_pwrctrl_active_freq - get currently configured frequency + * @pwr: kgsl_pwrctrl structure for the device + * + * Returns the currently configured frequency for the device. + */ +static inline unsigned long +kgsl_pwrctrl_active_freq(struct kgsl_pwrctrl *pwr) +{ + return pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq; +} + +/** + * kgsl_active_count_wait() - Wait for activity to finish. + * @device: Pointer to a KGSL device + * @count: Active count value to wait for + * @wait_jiffies: Jiffies to wait + * + * Block until the active_cnt value hits the desired value + */ +int kgsl_active_count_wait(struct kgsl_device *device, int count, + unsigned long wait_jiffies); +void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy); + +/** + * kgsl_pwrctrl_set_constraint() - Validate and change enforced constraint + * @device: Pointer to the kgsl_device struct + * @pwrc: Pointer to requested constraint + * @id: Context id which owns the constraint + * @ts: The timestamp for which this constraint is enforced + * + * Accept the new constraint if no previous constraint existed or if the + * new constraint is faster than the previous one. If the new and previous + * constraints are equal, update the timestamp and ownership to make sure + * the constraint expires at the correct time. + */ +void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, + struct kgsl_pwr_constraint *pwrc, u32 id, u32 ts); +int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device); +void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_request_state - Request a specific power state + * @device: Pointer to the kgsl device + * @state: Power state requested + */ +void kgsl_pwrctrl_request_state(struct kgsl_device *device, u32 state); + +/** + * kgsl_pwrctrl_axi - Propagate bus votes during slumber entry and exit + * @device: Pointer to the kgsl device + * @state: Whether we are going to slumber or coming out of slumber + * + * This function will propagate the default bus vote when coming out of + * slumber and set bus bandwidth to 0 when going into slumber + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state); + +/** + * kgsl_idle_check - kgsl idle function + * @work: work item being run by the function + * + * This function is called for work that is queued by the interrupt + * handler or the idle timer. It attempts to transition to a clocks + * off state if the active_cnt is 0 and the hardware is idle. + */ +void kgsl_idle_check(struct work_struct *work); + +/** + * kgsl_pwrctrl_irq - Enable or disable gpu interrupts + * @device: Handle to the kgsl device + * @state: Variable to decide whether interrupts need to be enabled or disabled + * + */ +void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state); + +/** + * kgsl_pwrctrl_clear_l3_vote - Relinquish l3 vote + * @device: Handle to the kgsl device + * + * Clear the l3 vote when going into slumber + */ +void kgsl_pwrctrl_clear_l3_vote(struct kgsl_device *device); +#endif /* __KGSL_PWRCTRL_H */ diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c new file mode 100644 index 0000000000..c7d0ff0d66 --- /dev/null +++ b/kgsl_pwrscale.c @@ -0,0 +1,805 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_bus.h" +#include "kgsl_device.h" +#include "kgsl_pwrscale.h" +#include "kgsl_trace.h" + +static struct devfreq_msm_adreno_tz_data adreno_tz_data = { + .bus = { + .max = 350, + .floating = true, + }, + .mod_percent = 100, +}; + +static void do_devfreq_suspend(struct work_struct *work); +static void do_devfreq_resume(struct work_struct *work); +static void do_devfreq_notify(struct work_struct *work); + +/* + * These variables are used to keep the latest data + * returned by kgsl_devfreq_get_dev_status + */ +static struct xstats last_xstats; +static struct devfreq_dev_status last_status = { .private_data = &last_xstats }; + +/* + * kgsl_pwrscale_sleep - notify governor that device is going off + * @device: The device + * + * Called shortly after all pending work is completed. + */ +void kgsl_pwrscale_sleep(struct kgsl_device *device) +{ + if (!device->pwrscale.enabled) + return; + device->pwrscale.on_time = 0; + + /* to call devfreq_suspend_device() from a kernel thread */ + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_suspend_ws); +} + +/* + * kgsl_pwrscale_wake - notify governor that device is going on + * @device: The device + * + * Called when the device is returning to an active state. + */ +void kgsl_pwrscale_wake(struct kgsl_device *device) +{ + struct kgsl_power_stats stats; + struct kgsl_pwrscale *psc = &device->pwrscale; + + if (!device->pwrscale.enabled) + return; + /* clear old stats before waking */ + memset(&psc->accum_stats, 0, sizeof(psc->accum_stats)); + memset(&last_xstats, 0, sizeof(last_xstats)); + + /* and any hw activity from waking up*/ + device->ftbl->power_stats(device, &stats); + + psc->time = ktime_get(); + + psc->next_governor_call = ktime_add_us(psc->time, + KGSL_GOVERNOR_CALL_INTERVAL); + + /* to call devfreq_resume_device() from a kernel thread */ + queue_work(psc->devfreq_wq, &psc->devfreq_resume_ws); +} + +/* + * kgsl_pwrscale_busy - update pwrscale state for new work + * @device: The device + * + * Called when new work is submitted to the device. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_busy(struct kgsl_device *device) +{ + if (!device->pwrscale.enabled) + return; + if (device->pwrscale.on_time == 0) + device->pwrscale.on_time = ktime_to_us(ktime_get()); +} + +/** + * kgsl_pwrscale_update_stats() - update device busy statistics + * @device: The device + * + * Read hardware busy counters and accumulate the results. + */ +void kgsl_pwrscale_update_stats(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwrctrl = &device->pwrctrl; + struct kgsl_pwrscale *psc = &device->pwrscale; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (!psc->enabled) + return; + + if (device->state == KGSL_STATE_ACTIVE) { + struct kgsl_power_stats stats; + ktime_t cur_time = ktime_get(); + + device->ftbl->power_stats(device, &stats); + device->pwrscale.accum_stats.busy_time += stats.busy_time; + device->pwrscale.accum_stats.ram_time += stats.ram_time; + device->pwrscale.accum_stats.ram_wait += stats.ram_wait; + pwrctrl->clock_times[pwrctrl->active_pwrlevel] += + stats.busy_time; + pwrctrl->time_in_pwrlevel[pwrctrl->active_pwrlevel] += + ktime_us_delta(cur_time, pwrctrl->last_stat_updated); + pwrctrl->last_stat_updated = cur_time; + } +} + +/** + * kgsl_pwrscale_update() - update device busy statistics + * @device: The device + * + * If enough time has passed schedule the next call to devfreq + * get_dev_status. + */ +void kgsl_pwrscale_update(struct kgsl_device *device) +{ + ktime_t t; + + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (!device->pwrscale.enabled) + return; + + t = ktime_get(); + if (ktime_compare(t, device->pwrscale.next_governor_call) < 0) + return; + + device->pwrscale.next_governor_call = ktime_add_us(t, + KGSL_GOVERNOR_CALL_INTERVAL); + + /* to call update_devfreq() from a kernel thread */ + if (device->state != KGSL_STATE_SLUMBER) + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_notify_ws); +} + +/* + * kgsl_pwrscale_disable - temporarily disable the governor + * @device: The device + * @turbo: Indicates if pwrlevel should be forced to turbo + * + * Temporarily disable the governor, to prevent interference + * with profiling tools that expect a fixed clock frequency. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_disable(struct kgsl_device *device, bool turbo) +{ + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (device->pwrscale.devfreqptr) + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_suspend_ws); + device->pwrscale.enabled = false; + if (turbo) + kgsl_pwrctrl_pwrlevel_change(device, 0); +} + +/* + * kgsl_pwrscale_enable - re-enable the governor + * @device: The device + * + * Reenable the governor after a kgsl_pwrscale_disable() call. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_enable(struct kgsl_device *device) +{ + if (WARN_ON(!mutex_is_locked(&device->mutex))) + return; + + if (device->pwrscale.devfreqptr) { + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_resume_ws); + device->pwrscale.enabled = true; + } else { + /* + * Don't enable it if devfreq is not set and let the device + * run at default level; + */ + kgsl_pwrctrl_pwrlevel_change(device, + device->pwrctrl.default_pwrlevel); + device->pwrscale.enabled = false; + } +} + +#ifdef DEVFREQ_FLAG_WAKEUP_MAXFREQ +static inline bool _check_maxfreq(u32 flags) +{ + return (flags & DEVFREQ_FLAG_WAKEUP_MAXFREQ); +} +#else +static inline bool _check_maxfreq(u32 flags) +{ + return false; +} +#endif + +/* + * kgsl_devfreq_target - devfreq_dev_profile.target callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This is a devfreq callback function for dcvs recommendations and + * thermal constraints. If any thermal constraints are present, + * devfreq adjusts the gpu frequency range to cap the max frequency + * thereby not recommending anything above the constraint. + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr; + int level; + unsigned int i; + unsigned long cur_freq, rec_freq; + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + pwr = &device->pwrctrl; + + if (_check_maxfreq(flags)) { + /* + * The GPU is about to get suspended, + * but it needs to be at the max power level when waking up + */ + pwr->wakeup_maxpwrlevel = 1; + return 0; + } + + rec_freq = *freq; + + mutex_lock(&device->mutex); + cur_freq = kgsl_pwrctrl_active_freq(pwr); + level = pwr->active_pwrlevel; + + kgsl_pwrctrl_update_thermal_pwrlevel(device); + + /* If the governor recommends a new frequency, update it here */ + if (rec_freq != cur_freq) { + for (i = 0; i < pwr->num_pwrlevels; i++) + if (rec_freq == pwr->pwrlevels[i].gpu_freq) { + level = i; + break; + } + if (level != pwr->active_pwrlevel) + kgsl_pwrctrl_pwrlevel_change(device, level); + } + + *freq = kgsl_pwrctrl_active_freq(pwr); + + mutex_unlock(&device->mutex); + return 0; +} + +/* + * kgsl_devfreq_get_dev_status - devfreq_dev_profile.get_dev_status callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwrctrl; + struct kgsl_pwrscale *pwrscale; + ktime_t tmp1, tmp2; + + if (device == NULL) + return -ENODEV; + if (stat == NULL) + return -EINVAL; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + pwrscale = &device->pwrscale; + pwrctrl = &device->pwrctrl; + + mutex_lock(&device->mutex); + + tmp1 = ktime_get(); + /* + * If the GPU clock is on grab the latest power counter + * values. Otherwise the most recent ACTIVE values will + * already be stored in accum_stats. + */ + kgsl_pwrscale_update_stats(device); + + tmp2 = ktime_get(); + stat->total_time = ktime_us_delta(tmp2, pwrscale->time); + pwrscale->time = tmp1; + + stat->busy_time = pwrscale->accum_stats.busy_time; + + stat->current_frequency = kgsl_pwrctrl_active_freq(&device->pwrctrl); + + stat->private_data = &device->active_context_count; + + /* + * keep the latest devfreq_dev_status values + * and vbif counters data + * to be (re)used by kgsl_busmon_get_dev_status() + */ + if (pwrctrl->bus_control) { + struct xstats *last_b = + (struct xstats *)last_status.private_data; + + last_status.total_time = stat->total_time; + last_status.busy_time = stat->busy_time; + last_status.current_frequency = stat->current_frequency; + + last_b->ram_time = device->pwrscale.accum_stats.ram_time; + last_b->ram_wait = device->pwrscale.accum_stats.ram_wait; + last_b->buslevel = device->pwrctrl.cur_buslevel; + } + + kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time); + trace_kgsl_pwrstats(device, stat->total_time, + &pwrscale->accum_stats, device->active_context_count); + memset(&pwrscale->accum_stats, 0, sizeof(pwrscale->accum_stats)); + + mutex_unlock(&device->mutex); + + return 0; +} + +/* + * kgsl_devfreq_get_cur_freq - devfreq_dev_profile.get_cur_freq callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + mutex_lock(&device->mutex); + *freq = kgsl_pwrctrl_active_freq(&device->pwrctrl); + mutex_unlock(&device->mutex); + + return 0; +} + +/* + * kgsl_busmon_get_dev_status - devfreq_dev_profile.get_dev_status callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_busmon_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct xstats *b; + struct kgsl_device *device = dev_get_drvdata(dev); + + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + stat->total_time = last_status.total_time; + stat->busy_time = last_status.busy_time; + stat->current_frequency = last_status.current_frequency; + + if (stat->private_data) { + struct xstats *last_b = + (struct xstats *)last_status.private_data; + b = (struct xstats *)stat->private_data; + b->ram_time = last_b->ram_time; + b->ram_wait = last_b->ram_wait; + b->buslevel = last_b->buslevel; + } + return 0; +} + +#ifdef DEVFREQ_FLAG_FAST_HINT +static inline bool _check_fast_hint(u32 flags) +{ + return (flags & DEVFREQ_FLAG_FAST_HINT); +} +#else +static inline bool _check_fast_hint(u32 flags) +{ + return false; +} +#endif + +#ifdef DEVFREQ_FLAG_SLOW_HINT +static inline bool _check_slow_hint(u32 flags) +{ + return (flags & DEVFREQ_FLAG_SLOW_HINT); +} +#else +static inline bool _check_slow_hint(u32 flags) +{ + return false; +} +#endif + +/* + * kgsl_busmon_target - devfreq_dev_profile.target callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr; + struct kgsl_pwrlevel *pwr_level; + int level, b; + u32 bus_flag; + unsigned long ab_mbytes; + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.enabled) + return 0; + if (!device->pwrscale.devfreq_enabled) + return -EPROTO; + + pwr = &device->pwrctrl; + + if (!pwr->bus_control) + return 0; + + mutex_lock(&device->mutex); + level = pwr->active_pwrlevel; + pwr_level = &pwr->pwrlevels[level]; + bus_flag = device->pwrscale.bus_profile.flag; + device->pwrscale.bus_profile.flag = 0; + ab_mbytes = device->pwrscale.bus_profile.ab_mbytes; + + /* + * Bus devfreq governor has calculated its recomendations + * when gpu was running with *freq frequency. + * If the gpu frequency is different now it's better to + * ignore the call + */ + if (pwr_level->gpu_freq != *freq) { + mutex_unlock(&device->mutex); + return 0; + } + + b = pwr->bus_mod; + if (_check_fast_hint(bus_flag)) + pwr->bus_mod++; + else if (_check_slow_hint(bus_flag)) + pwr->bus_mod--; + + /* trim calculated change to fit range */ + if (pwr_level->bus_freq + pwr->bus_mod < pwr_level->bus_min) + pwr->bus_mod = -(pwr_level->bus_freq - pwr_level->bus_min); + else if (pwr_level->bus_freq + pwr->bus_mod > pwr_level->bus_max) + pwr->bus_mod = pwr_level->bus_max - pwr_level->bus_freq; + + /* Update bus vote if AB or IB is modified */ + if ((pwr->bus_mod != b) || (pwr->bus_ab_mbytes != ab_mbytes)) { + pwr->bus_percent_ab = device->pwrscale.bus_profile.percent_ab; + pwr->bus_ab_mbytes = ab_mbytes; + kgsl_bus_update(device, KGSL_BUS_VOTE_ON); + } + + mutex_unlock(&device->mutex); + return 0; +} + +int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq) +{ + return 0; +} + +static void pwrscale_busmon_create(struct kgsl_device *device, + struct platform_device *pdev, unsigned long *table) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + struct device *dev = &pwrscale->busmondev; + struct msm_busmon_extended_profile *bus_profile; + struct devfreq *bus_devfreq; + int i, ret; + + bus_profile = &pwrscale->bus_profile; + bus_profile->private_data = &adreno_tz_data; + + bus_profile->profile.target = kgsl_busmon_target; + bus_profile->profile.get_dev_status = kgsl_busmon_get_dev_status; + bus_profile->profile.get_cur_freq = kgsl_busmon_get_cur_freq; + + bus_profile->profile.max_state = pwr->num_pwrlevels; + bus_profile->profile.freq_table = table; + + dev->parent = &pdev->dev; + + dev_set_name(dev, "kgsl-busmon"); + dev_set_drvdata(dev, device); + if (device_register(dev)) + return; + + /* Build out the OPP table for the busmon device */ + for (i = 0; i < pwr->num_pwrlevels; i++) { + if (!pwr->pwrlevels[i].gpu_freq) + continue; + + dev_pm_opp_add(dev, pwr->pwrlevels[i].gpu_freq, 0); + } + + ret = devfreq_gpubw_init(); + if (ret) { + dev_err(&pdev->dev, "Failed to add busmon governor: %d\n", ret); + put_device(dev); + return; + } + + bus_devfreq = devfreq_add_device(dev, &pwrscale->bus_profile.profile, + "gpubw_mon", NULL); + + if (IS_ERR_OR_NULL(bus_devfreq)) { + dev_err(&pdev->dev, "Bus scaling not enabled\n"); + devfreq_gpubw_exit(); + put_device(dev); + return; + } + + pwrscale->bus_devfreq = bus_devfreq; +} + +static void pwrscale_of_get_ca_target_pwrlevel(struct kgsl_device *device, + struct device_node *node) +{ + u32 pwrlevel = 1; + + of_property_read_u32(node, "qcom,ca-target-pwrlevel", &pwrlevel); + + if (pwrlevel >= device->pwrctrl.num_pwrlevels) + pwrlevel = 1; + + device->pwrscale.ctxt_aware_target_pwrlevel = pwrlevel; +} + +/* Get context aware properties */ +static void pwrscale_of_ca_aware(struct kgsl_device *device) +{ + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + struct device_node *parent = device->pdev->dev.of_node; + struct device_node *node, *child; + + pwrscale->ctxt_aware_enable = + of_property_read_bool(parent, "qcom,enable-ca-jump"); + + if (!pwrscale->ctxt_aware_enable) + return; + + pwrscale->ctxt_aware_busy_penalty = 12000; + of_property_read_u32(parent, "qcom,ca-busy-penalty", + &pwrscale->ctxt_aware_busy_penalty); + + + pwrscale->ctxt_aware_target_pwrlevel = 1; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins"); + if (node == NULL) { + pwrscale_of_get_ca_target_pwrlevel(device, parent); + return; + } + + for_each_child_of_node(node, child) { + u32 bin; + + if (of_property_read_u32(child, "qcom,speed-bin", &bin)) + continue; + + if (bin == device->speed_bin) { + pwrscale_of_get_ca_target_pwrlevel(device, child); + of_node_put(child); + break; + } + } + + of_node_put(node); +} + +int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, + const char *governor) +{ + struct kgsl_pwrscale *pwrscale = &device->pwrscale; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct devfreq *devfreq; + struct msm_adreno_extended_profile *gpu_profile; + int i, ret; + + pwrscale->enabled = true; + + gpu_profile = &pwrscale->gpu_profile; + gpu_profile->private_data = &adreno_tz_data; + + gpu_profile->profile.target = kgsl_devfreq_target; + gpu_profile->profile.get_dev_status = kgsl_devfreq_get_dev_status; + gpu_profile->profile.get_cur_freq = kgsl_devfreq_get_cur_freq; + + gpu_profile->profile.initial_freq = + pwr->pwrlevels[pwr->default_pwrlevel].gpu_freq; + + gpu_profile->profile.polling_ms = 10; + + pwrscale_of_ca_aware(device); + + for (i = 0; i < pwr->num_pwrlevels; i++) + pwrscale->freq_table[i] = pwr->pwrlevels[i].gpu_freq; + + /* + * Max_state is the number of valid power levels. + * The valid power levels range from 0 - (max_state - 1) + */ + gpu_profile->profile.max_state = pwr->num_pwrlevels; + /* link storage array to the devfreq profile pointer */ + gpu_profile->profile.freq_table = pwrscale->freq_table; + + /* if there is only 1 freq, no point in running a governor */ + if (gpu_profile->profile.max_state == 1) + governor = "performance"; + + /* initialize msm-adreno-tz governor specific data here */ + adreno_tz_data.disable_busy_time_burst = + of_property_read_bool(pdev->dev.of_node, + "qcom,disable-busy-time-burst"); + + if (pwrscale->ctxt_aware_enable) { + adreno_tz_data.ctxt_aware_enable = pwrscale->ctxt_aware_enable; + adreno_tz_data.bin.ctxt_aware_target_pwrlevel = + pwrscale->ctxt_aware_target_pwrlevel; + adreno_tz_data.bin.ctxt_aware_busy_penalty = + pwrscale->ctxt_aware_busy_penalty; + } + + /* + * If there is a separate GX power rail, allow + * independent modification to its voltage through + * the bus bandwidth vote. + */ + if (pwr->bus_control) { + adreno_tz_data.bus.num = pwr->ddr_table_count; + adreno_tz_data.bus.ib_kbps = pwr->ddr_table; + adreno_tz_data.bus.width = pwr->bus_width; + + if (!kgsl_of_property_read_ddrtype(device->pdev->dev.of_node, + "qcom,bus-accesses", &adreno_tz_data.bus.max)) + adreno_tz_data.bus.floating = false; + } + + pwrscale->devfreq_wq = create_freezable_workqueue("kgsl_devfreq_wq"); + if (!pwrscale->devfreq_wq) { + dev_err(device->dev, "Failed to allocate kgsl devfreq workqueue\n"); + device->pwrscale.enabled = false; + return -ENOMEM; + } + + ret = msm_adreno_tz_init(); + if (ret) { + dev_err(device->dev, "Failed to add adreno tz governor: %d\n", ret); + device->pwrscale.enabled = false; + return ret; + } + + devfreq = devfreq_add_device(&pdev->dev, &gpu_profile->profile, + governor, &adreno_tz_data); + if (IS_ERR(devfreq)) { + device->pwrscale.enabled = false; + msm_adreno_tz_exit(); + return PTR_ERR(devfreq); + } + + pwrscale->devfreqptr = devfreq; + pwrscale->cooling_dev = of_devfreq_cooling_register(pdev->dev.of_node, + devfreq); + if (IS_ERR(pwrscale->cooling_dev)) + pwrscale->cooling_dev = NULL; + + if (adreno_tz_data.bus.num) + pwrscale_busmon_create(device, pdev, pwrscale->freq_table); + + WARN_ON(sysfs_create_link(&device->dev->kobj, + &devfreq->dev.kobj, "devfreq")); + + INIT_WORK(&pwrscale->devfreq_suspend_ws, do_devfreq_suspend); + INIT_WORK(&pwrscale->devfreq_resume_ws, do_devfreq_resume); + INIT_WORK(&pwrscale->devfreq_notify_ws, do_devfreq_notify); + + pwrscale->next_governor_call = ktime_add_us(ktime_get(), + KGSL_GOVERNOR_CALL_INTERVAL); + + return 0; +} + +/* + * kgsl_pwrscale_close - clean up pwrscale + * @device: the device + * + * This function should be called with the device mutex locked. + */ +void kgsl_pwrscale_close(struct kgsl_device *device) +{ + struct kgsl_pwrscale *pwrscale; + struct kgsl_pwrctrl *pwr; + + pwr = &device->pwrctrl; + pwrscale = &device->pwrscale; + + if (pwrscale->bus_devfreq) { + devfreq_remove_device(pwrscale->bus_devfreq); + pwrscale->bus_devfreq = NULL; + put_device(&pwrscale->busmondev); + devfreq_gpubw_exit(); + } + + if (!pwrscale->devfreqptr) + return; + if (pwrscale->cooling_dev) + devfreq_cooling_unregister(pwrscale->cooling_dev); + + if (pwrscale->devfreq_wq) { + flush_workqueue(pwrscale->devfreq_wq); + destroy_workqueue(pwrscale->devfreq_wq); + pwrscale->devfreq_wq = NULL; + } + + devfreq_remove_device(device->pwrscale.devfreqptr); + device->pwrscale.devfreqptr = NULL; + msm_adreno_tz_exit(); +} + +static void do_devfreq_suspend(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_suspend_ws); + + devfreq_suspend_device(pwrscale->devfreqptr); + devfreq_suspend_device(pwrscale->bus_devfreq); +} + +static void do_devfreq_resume(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_resume_ws); + + devfreq_resume_device(pwrscale->devfreqptr); + devfreq_resume_device(pwrscale->bus_devfreq); +} + +static void do_devfreq_notify(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_notify_ws); + + mutex_lock(&pwrscale->devfreqptr->lock); + update_devfreq(pwrscale->devfreqptr); + mutex_unlock(&pwrscale->devfreqptr->lock); + + if (pwrscale->bus_devfreq) { + mutex_lock(&pwrscale->bus_devfreq->lock); + update_devfreq(pwrscale->bus_devfreq); + mutex_unlock(&pwrscale->bus_devfreq->lock); + } +} diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h new file mode 100644 index 0000000000..2bdc9db0d9 --- /dev/null +++ b/kgsl_pwrscale.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __KGSL_PWRSCALE_H +#define __KGSL_PWRSCALE_H + +#include "kgsl_pwrctrl.h" +#include "msm_adreno_devfreq.h" + +/* devfreq governor call window in usec */ +#define KGSL_GOVERNOR_CALL_INTERVAL 10000 + +struct kgsl_power_stats { + u64 busy_time; + u64 ram_time; + u64 ram_wait; +}; + +/** + * struct kgsl_pwrscale - Power scaling settings for a KGSL device + * @devfreqptr - Pointer to the devfreq device + * @gpu_profile - GPU profile data for the devfreq device + * @bus_profile - Bus specific data for the bus devfreq device + * @freq_table - GPU frequencies for the DCVS algorithm + * @last_governor - Prior devfreq governor + * @accum_stats - Accumulated statistics for various frequency calculations + * @enabled - Whether or not power scaling is enabled + * @time - Last submitted sample timestamp + * @on_time - Timestamp when gpu busy begins + * @devfreq_wq - Main devfreq workqueue + * @devfreq_suspend_ws - Pass device suspension to devfreq + * @devfreq_resume_ws - Pass device resume to devfreq + * @devfreq_notify_ws - Notify devfreq to update sampling + * @next_governor_call - Timestamp after which the governor may be notified of + * a new sample + * @cooling_dev - Thermal cooling device handle + * @ctxt_aware_enable - Whether or not ctxt aware DCVS feature is enabled + * @ctxt_aware_busy_penalty - The time in microseconds required to trigger + * ctxt aware power level jump + * @ctxt_aware_target_pwrlevel - pwrlevel to jump on in case of ctxt aware + * power level jump + */ +struct kgsl_pwrscale { + struct devfreq *devfreqptr; + struct msm_adreno_extended_profile gpu_profile; + struct msm_busmon_extended_profile bus_profile; + unsigned long freq_table[KGSL_MAX_PWRLEVELS]; + char last_governor[DEVFREQ_NAME_LEN]; + struct kgsl_power_stats accum_stats; + bool enabled; + ktime_t time; + s64 on_time; + struct workqueue_struct *devfreq_wq; + struct work_struct devfreq_suspend_ws; + struct work_struct devfreq_resume_ws; + struct work_struct devfreq_notify_ws; + ktime_t next_governor_call; + struct thermal_cooling_device *cooling_dev; + bool ctxt_aware_enable; + unsigned int ctxt_aware_target_pwrlevel; + unsigned int ctxt_aware_busy_penalty; + /** @busmondev: A child device for the busmon governor */ + struct device busmondev; + /** @bus_devfreq: Pointer to the bus devfreq device */ + struct devfreq *bus_devfreq; + /** @devfreq_enabled: Whether or not devfreq is enabled */ + bool devfreq_enabled; +}; + +/** + * kgsl_pwrscale_init - Initialize the pwrscale subsystem + * @device: A GPU device handle + * @pdev: A pointer to the GPU platform device + * @governor: default devfreq governor to use for GPU frequency scaling + * + * Return: 0 on success or negative on failure + */ +int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, + const char *governor); +void kgsl_pwrscale_close(struct kgsl_device *device); + +void kgsl_pwrscale_update(struct kgsl_device *device); +void kgsl_pwrscale_update_stats(struct kgsl_device *device); +void kgsl_pwrscale_busy(struct kgsl_device *device); +void kgsl_pwrscale_sleep(struct kgsl_device *device); +void kgsl_pwrscale_wake(struct kgsl_device *device); + +void kgsl_pwrscale_enable(struct kgsl_device *device); +void kgsl_pwrscale_disable(struct kgsl_device *device, bool turbo); + +int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags); +int kgsl_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat); +int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq); + +int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags); +int kgsl_busmon_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat); +int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq); + +int msm_adreno_tz_init(void); + +void msm_adreno_tz_exit(void); + +int devfreq_gpubw_init(void); + +void devfreq_gpubw_exit(void); +#endif diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c new file mode 100644 index 0000000000..b0d6804456 --- /dev/null +++ b/kgsl_reclaim.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "kgsl_reclaim.h" +#include "kgsl_sharedmem.h" + +/* + * Reclaiming excessive number of pages from a process will impact launch + * latency for the subsequent launch of the process. After measuring the + * launch latencies by having various maximum limits, it has been decided + * that allowing 30MB (7680 pages) of relcaim per process will have little + * impact and the latency will be within acceptable limit. + */ +static u32 kgsl_reclaim_max_page_limit = 7680; + +/* Setting this to 0 means we reclaim pages as specified in shrinker call */ +static u32 kgsl_nr_to_scan; +static atomic_t kgsl_shrinker_active = ATOMIC_INIT(0); + +static unsigned long shmem_swap_pages(struct address_space *mapping) +{ + struct inode *inode = mapping->host; + struct shmem_inode_info *info = SHMEM_I(inode); + unsigned long swapped; + + swapped = READ_ONCE(info->swapped); + return swapped; +} + +static unsigned long kgsl_process_get_reclaim_count( + struct kgsl_process_private *process) +{ + struct kgsl_mem_entry *entry; + struct kgsl_memdesc *memdesc; + unsigned long reclaim_count = 0; + int id; + + spin_lock(&process->mem_lock); + idr_for_each_entry(&process->mem_idr, entry, id) { + memdesc = &entry->memdesc; + if (memdesc->shmem_filp) + reclaim_count += shmem_swap_pages( + memdesc->shmem_filp->f_mapping); + } + spin_unlock(&process->mem_lock); + + return reclaim_count; +} + +static int kgsl_memdesc_get_reclaimed_pages(struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc *memdesc = &entry->memdesc; + int i, ret; + struct page *page; + + for (i = 0; i < memdesc->page_count; i++) { + if (memdesc->pages[i]) + continue; + + page = shmem_read_mapping_page_gfp( + memdesc->shmem_filp->f_mapping, i, kgsl_gfp_mask(0)); + + if (IS_ERR(page)) + return PTR_ERR(page); + + kgsl_page_sync_for_device(memdesc->dev, page, PAGE_SIZE); + + /* + * Update the pages array only if vmfault has not + * updated it meanwhile + */ + spin_lock(&memdesc->lock); + if (!memdesc->pages[i]) { + memdesc->pages[i] = page; + atomic_dec(&entry->priv->unpinned_page_count); + } else + put_page(page); + spin_unlock(&memdesc->lock); + } + + ret = kgsl_mmu_map(memdesc->pagetable, memdesc); + if (ret) + return ret; + + memdesc->priv &= ~KGSL_MEMDESC_RECLAIMED; + memdesc->priv &= ~KGSL_MEMDESC_SKIP_RECLAIM; + + return 0; +} + +int kgsl_reclaim_to_pinned_state( + struct kgsl_process_private *process) +{ + struct kgsl_mem_entry *entry, *valid_entry; + int next = 0, ret = 0; + + mutex_lock(&process->reclaim_lock); + + if (test_bit(KGSL_PROC_PINNED_STATE, &process->state)) + goto done; + + for ( ; ; ) { + valid_entry = NULL; + spin_lock(&process->mem_lock); + entry = idr_get_next(&process->mem_idr, &next); + if (entry == NULL) { + spin_unlock(&process->mem_lock); + break; + } + + if (!entry->pending_free && + (entry->memdesc.priv & KGSL_MEMDESC_RECLAIMED)) + valid_entry = kgsl_mem_entry_get(entry); + spin_unlock(&process->mem_lock); + + if (valid_entry) { + ret = kgsl_memdesc_get_reclaimed_pages(entry); + kgsl_mem_entry_put(entry); + if (ret) + goto done; + } + + next++; + } + + set_bit(KGSL_PROC_PINNED_STATE, &process->state); +done: + mutex_unlock(&process->reclaim_lock); + return ret; +} + +static void kgsl_reclaim_foreground_work(struct work_struct *work) +{ + struct kgsl_process_private *process = + container_of(work, struct kgsl_process_private, fg_work); + + if (test_bit(KGSL_PROC_STATE, &process->state)) + kgsl_reclaim_to_pinned_state(process); + kgsl_process_private_put(process); +} + +static ssize_t kgsl_proc_state_show(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf) +{ + struct kgsl_process_private *process = + container_of(kobj, struct kgsl_process_private, kobj); + + if (test_bit(KGSL_PROC_STATE, &process->state)) + return scnprintf(buf, PAGE_SIZE, "foreground\n"); + else + return scnprintf(buf, PAGE_SIZE, "background\n"); +} + +static ssize_t kgsl_proc_state_store(struct kobject *kobj, + struct kgsl_process_attribute *attr, const char *buf, ssize_t count) +{ + struct kgsl_process_private *process = + container_of(kobj, struct kgsl_process_private, kobj); + + if (sysfs_streq(buf, "foreground")) { + if (!test_and_set_bit(KGSL_PROC_STATE, &process->state) && + kgsl_process_private_get(process)) + kgsl_schedule_work(&process->fg_work); + } else if (sysfs_streq(buf, "background")) { + clear_bit(KGSL_PROC_STATE, &process->state); + } else + return -EINVAL; + + return count; +} + +static ssize_t gpumem_reclaimed_show(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf) +{ + struct kgsl_process_private *process = + container_of(kobj, struct kgsl_process_private, kobj); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + kgsl_process_get_reclaim_count(process) << PAGE_SHIFT); +} + +PROCESS_ATTR(state, 0644, kgsl_proc_state_show, kgsl_proc_state_store); +PROCESS_ATTR(gpumem_reclaimed, 0444, gpumem_reclaimed_show, NULL); + +static const struct attribute *proc_reclaim_attrs[] = { + &attr_state.attr, + &attr_gpumem_reclaimed.attr, + NULL, +}; + +void kgsl_reclaim_proc_sysfs_init(struct kgsl_process_private *process) +{ + WARN_ON(sysfs_create_files(&process->kobj, proc_reclaim_attrs)); +} + +ssize_t kgsl_proc_max_reclaim_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int ret; + + ret = kstrtou32(buf, 0, &kgsl_reclaim_max_page_limit); + return ret ? ret : count; +} + +ssize_t kgsl_proc_max_reclaim_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", kgsl_reclaim_max_page_limit); +} + +ssize_t kgsl_nr_to_scan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int ret; + + ret = kstrtou32(buf, 0, &kgsl_nr_to_scan); + return ret ? ret : count; +} + +ssize_t kgsl_nr_to_scan_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", kgsl_nr_to_scan); +} + +static u32 kgsl_reclaim_process(struct kgsl_process_private *process, + u32 pages_to_reclaim) +{ + struct kgsl_memdesc *memdesc; + struct kgsl_mem_entry *entry, *valid_entry; + u32 next = 0, remaining = pages_to_reclaim; + + /* + * If we do not get the lock here, it means that the buffers are + * being pinned back. So do not keep waiting here as we would anyway + * return empty handed once the lock is acquired. + */ + if (!mutex_trylock(&process->reclaim_lock)) + return 0; + + while (remaining) { + + if (atomic_read(&process->unpinned_page_count) >= + kgsl_reclaim_max_page_limit) + break; + + /* Abort reclaim if process submitted work. */ + if (atomic_read(&process->cmd_count)) + break; + + /* Abort reclaim if process foreground hint is received. */ + if (test_bit(KGSL_PROC_STATE, &process->state)) + break; + + valid_entry = NULL; + spin_lock(&process->mem_lock); + entry = idr_get_next(&process->mem_idr, &next); + if (entry == NULL) { + spin_unlock(&process->mem_lock); + break; + } + + memdesc = &entry->memdesc; + if (!entry->pending_free && + (memdesc->priv & KGSL_MEMDESC_CAN_RECLAIM) && + !(memdesc->priv & KGSL_MEMDESC_RECLAIMED) && + !(memdesc->priv & KGSL_MEMDESC_SKIP_RECLAIM)) + valid_entry = kgsl_mem_entry_get(entry); + spin_unlock(&process->mem_lock); + + if (!valid_entry) { + next++; + continue; + } + + if ((atomic_read(&process->unpinned_page_count) + + memdesc->page_count) > kgsl_reclaim_max_page_limit) { + kgsl_mem_entry_put(entry); + next++; + continue; + } + + if (memdesc->page_count > remaining) { + kgsl_mem_entry_put(entry); + next++; + continue; + } + + if (!kgsl_mmu_unmap(memdesc->pagetable, memdesc)) { + int i; + + for (i = 0; i < memdesc->page_count; i++) { + set_page_dirty_lock(memdesc->pages[i]); + shmem_mark_page_lazyfree(memdesc->pages[i]); + spin_lock(&memdesc->lock); + put_page(memdesc->pages[i]); + memdesc->pages[i] = NULL; + atomic_inc(&process->unpinned_page_count); + spin_unlock(&memdesc->lock); + remaining--; + } + + memdesc->priv |= KGSL_MEMDESC_RECLAIMED; + } + + kgsl_mem_entry_put(entry); + next++; + } + if (next) + clear_bit(KGSL_PROC_PINNED_STATE, &process->state); + mutex_unlock(&process->reclaim_lock); + return (pages_to_reclaim - remaining); +} + +/* Functions for the shrinker */ + +static unsigned long +kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + /* nr_pages represents number of pages to be reclaimed*/ + u32 nr_pages = kgsl_nr_to_scan ? kgsl_nr_to_scan : sc->nr_to_scan; + u32 bg_proc = 0; + u64 pp_nr_pages; + struct list_head kgsl_reclaim_process_list; + struct kgsl_process_private *process, *next; + + if (atomic_inc_return(&kgsl_shrinker_active) > 1) { + atomic_dec(&kgsl_shrinker_active); + return 0; + } + + INIT_LIST_HEAD(&kgsl_reclaim_process_list); + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(process, &kgsl_driver.process_list, list) { + if (test_bit(KGSL_PROC_STATE, &process->state) || + !kgsl_process_private_get(process)) + continue; + + bg_proc++; + list_add(&process->reclaim_list, &kgsl_reclaim_process_list); + } + read_unlock(&kgsl_driver.proclist_lock); + + list_for_each_entry(process, &kgsl_reclaim_process_list, reclaim_list) { + if (!nr_pages) + break; + + pp_nr_pages = nr_pages; + do_div(pp_nr_pages, bg_proc--); + nr_pages -= kgsl_reclaim_process(process, pp_nr_pages); + } + + list_for_each_entry_safe(process, next, + &kgsl_reclaim_process_list, reclaim_list) { + list_del(&process->reclaim_list); + kgsl_process_private_put(process); + } + + atomic_dec(&kgsl_shrinker_active); + return ((kgsl_nr_to_scan ? + kgsl_nr_to_scan : sc->nr_to_scan) - nr_pages); +} + +static unsigned long +kgsl_reclaim_shrink_count_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct kgsl_process_private *process; + unsigned long count_reclaimable = 0; + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(process, &kgsl_driver.process_list, list) { + if (!test_bit(KGSL_PROC_STATE, &process->state)) + count_reclaimable += kgsl_reclaim_max_page_limit - + atomic_read(&process->unpinned_page_count); + } + read_unlock(&kgsl_driver.proclist_lock); + + return (count_reclaimable << PAGE_SHIFT); +} + +/* Shrinker callback data*/ +static struct shrinker kgsl_reclaim_shrinker = { + .count_objects = kgsl_reclaim_shrink_count_objects, + .scan_objects = kgsl_reclaim_shrink_scan_objects, + .seeks = DEFAULT_SEEKS, + .batch = 0, +}; + +void kgsl_reclaim_proc_private_init(struct kgsl_process_private *process) +{ + mutex_init(&process->reclaim_lock); + INIT_WORK(&process->fg_work, kgsl_reclaim_foreground_work); + set_bit(KGSL_PROC_PINNED_STATE, &process->state); + set_bit(KGSL_PROC_STATE, &process->state); + atomic_set(&process->unpinned_page_count, 0); +} + +int kgsl_reclaim_init(void) +{ + int ret; + + /* Initialize shrinker */ + ret = register_shrinker(&kgsl_reclaim_shrinker); + if (ret) + pr_err("kgsl: reclaim: Failed to register shrinker\n"); + + return ret; +} + +void kgsl_reclaim_close(void) +{ + /* Unregister shrinker */ + unregister_shrinker(&kgsl_reclaim_shrinker); +} diff --git a/kgsl_reclaim.h b/kgsl_reclaim.h new file mode 100644 index 0000000000..bb2c01861c --- /dev/null +++ b/kgsl_reclaim.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_RECLAIM_H +#define __KGSL_RECLAIM_H + + +#include "kgsl_device.h" + +#ifdef CONFIG_QCOM_KGSL_PROCESS_RECLAIM + +/* Set if all the memdescs of this process are pinned */ +#define KGSL_PROC_PINNED_STATE 0 +/* Process foreground/background state. Set if process is in foreground */ +#define KGSL_PROC_STATE 1 + +int kgsl_reclaim_init(void); +void kgsl_reclaim_close(void); +int kgsl_reclaim_to_pinned_state(struct kgsl_process_private *priv); +void kgsl_reclaim_proc_sysfs_init(struct kgsl_process_private *process); +void kgsl_reclaim_proc_private_init(struct kgsl_process_private *process); +ssize_t kgsl_proc_max_reclaim_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); +ssize_t kgsl_proc_max_reclaim_limit_show(struct device *dev, + struct device_attribute *attr, char *buf); +ssize_t kgsl_nr_to_scan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count); +ssize_t kgsl_nr_to_scan_show(struct device *dev, + struct device_attribute *attr, char *buf); +#else +static inline int kgsl_reclaim_init(void) +{ + return 0; +} + +static inline void kgsl_reclaim_close(void) { } + +static inline int kgsl_reclaim_to_pinned_state( + struct kgsl_process_private *priv) +{ + return 0; +} + +static inline void kgsl_reclaim_proc_sysfs_init + (struct kgsl_process_private *process) { } + +static inline void kgsl_reclaim_proc_private_init + (struct kgsl_process_private *process) { } + +#endif +#endif /* __KGSL_RECLAIM_H */ diff --git a/kgsl_regmap.c b/kgsl_regmap.c new file mode 100644 index 0000000000..d20608b711 --- /dev/null +++ b/kgsl_regmap.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include + +#include "kgsl_regmap.h" +#include "kgsl_trace.h" + +#define region_addr(region, _offset) \ + ((region)->virt + (((_offset) - (region)->offset) << 2)) + +static int kgsl_regmap_init_region(struct kgsl_regmap *regmap, + struct platform_device *pdev, + struct kgsl_regmap_region *region, + struct resource *res, const struct kgsl_regmap_ops *ops, + void *priv) +{ + void __iomem *ptr; + + ptr = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!ptr) + return -ENOMEM; + + region->virt = ptr; + region->offset = (res->start - regmap->base->start) >> 2; + region->size = resource_size(res) >> 2; + region->ops = ops; + region->priv = priv; + + return 0; +} + +/* Initialize the regmap with the base region. All added regions will be offset + * from this base + */ +int kgsl_regmap_init(struct platform_device *pdev, struct kgsl_regmap *regmap, + const char *name, const struct kgsl_regmap_ops *ops, + void *priv) +{ + struct kgsl_regmap_region *region; + struct resource *res; + int ret; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (!res) + return -ENODEV; + + regmap->base = res; + + region = ®map->region[0]; + ret = kgsl_regmap_init_region(regmap, pdev, region, res, ops, priv); + + if (!ret) + regmap->count = 1; + + return ret; +} + +/* Add a new region to the regmap */ +int kgsl_regmap_add_region(struct kgsl_regmap *regmap, struct platform_device *pdev, + const char *name, const struct kgsl_regmap_ops *ops, void *priv) +{ + struct kgsl_regmap_region *region; + struct resource *res; + int ret; + + if (WARN_ON(regmap->count >= ARRAY_SIZE(regmap->region))) + return -ENODEV; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (!res) + return -ENODEV; + + region = ®map->region[regmap->count]; + + ret = kgsl_regmap_init_region(regmap, pdev, region, res, ops, priv); + if (!ret) + regmap->count++; + + return ret; +} + +#define in_range(a, base, len) \ + (((a) >= (base)) && ((a) < ((base) + (len)))) + +struct kgsl_regmap_region *kgsl_regmap_get_region(struct kgsl_regmap *regmap, + u32 offset) +{ + int i; + + for (i = 0; i < regmap->count; i++) { + struct kgsl_regmap_region *region = ®map->region[i]; + + if (in_range(offset, region->offset, region->size)) + return region; + } + + return NULL; +} + +u32 kgsl_regmap_read(struct kgsl_regmap *regmap, u32 offset) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + u32 val; + + if (WARN(!region, "Out of bounds register read offset: 0x%x\n", offset)) + return 0; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + val = readl_relaxed(region_addr(region, offset)); + /* Allow previous read to post before returning the value */ + rmb(); + + return val; +} + +void kgsl_regmap_write(struct kgsl_regmap *regmap, u32 value, u32 offset) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (WARN(!region, "Out of bounds register write offset: 0x%x\n", offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* Make sure all pending writes have posted first */ + wmb(); + writel_relaxed(value, region_addr(region, offset)); + + trace_kgsl_regwrite(offset, value); +} + +void kgsl_regmap_multi_write(struct kgsl_regmap *regmap, + const struct kgsl_regmap_list *list, int count) +{ + struct kgsl_regmap_region *region, *prev = NULL; + int i; + + /* + * do one write barrier to ensure all previous writes are done before + * starting the list + */ + wmb(); + + for (i = 0; i < count; i++) { + region = kgsl_regmap_get_region(regmap, list[i].offset); + + if (WARN(!region, "Out of bounds register write offset: 0x%x\n", + list[i].offset)) + continue; + + /* + * The registers might be in different regions. If a region has + * a preaccess function we need to call it at least once before + * writing registers but we don't want to call it every time if + * we can avoid it. "cache" the current region and don't call + * pre-access if it is the same region from the previous access. + * This isn't perfect but it should cut down on some unneeded + * cpu cycles + */ + + if (region != prev && region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + prev = region; + + writel_relaxed(list[i].val, region_addr(region, list[i].offset)); + trace_kgsl_regwrite(list[i].val, list[i].offset); + } +} + +void kgsl_regmap_rmw(struct kgsl_regmap *regmap, u32 offset, u32 mask, + u32 or) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + u32 val; + + if (WARN(!region, "Out of bounds register read-modify-write offset: 0x%x\n", + offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + val = readl_relaxed(region_addr(region, offset)); + /* Make sure the read posted and all pending writes are done */ + mb(); + writel_relaxed((val & ~mask) | or, region_addr(region, offset)); + + trace_kgsl_regwrite(offset, (val & ~mask) | or); +} + +void kgsl_regmap_bulk_write(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (WARN(!region, "Out of bounds register bulk write offset: 0x%x\n", offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* + * A bulk write operation can only be in one region - it cannot + * cross boundaries + */ + if (WARN((offset - region->offset) + dwords > region->size, + "OUt of bounds bulk write size: 0x%x\n", offset + dwords)) + return; + + /* Make sure all pending write are done first */ + wmb(); + memcpy_toio(region_addr(region, offset), data, dwords << 2); +} + +void kgsl_regmap_bulk_read(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (WARN(!region, "Out of bounds register bulk read offset: 0x%x\n", offset)) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* + * A bulk read operation can only be in one region - it cannot + * cross boundaries + */ + if (WARN((offset - region->offset) + dwords > region->size, + "Out of bounds bulk read size: 0x%x\n", offset + dwords)) + return; + + memcpy_fromio(region_addr(region, offset), data, dwords << 2); + + /* Make sure the copy is finished before moving on */ + rmb(); +} + +void __iomem *kgsl_regmap_virt(struct kgsl_regmap *regmap, u32 offset) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, offset); + + if (region) + return region_addr(region, offset); + + return NULL; +} + +void kgsl_regmap_read_indexed(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, int count) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, addr); + int i; + + if (!region) + return; + + /* Make sure the offset is in the same region */ + if (kgsl_regmap_get_region(regmap, data) != region) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + /* Write the address register */ + writel_relaxed(0, region_addr(region, addr)); + + /* Make sure the write finishes */ + wmb(); + + for (i = 0; i < count; i++) + dest[i] = readl_relaxed(region_addr(region, data)); + + /* Do one barrier at the end to make sure all the data is posted */ + rmb(); +} + +void kgsl_regmap_read_indexed_interleaved(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, u32 start, int count) +{ + struct kgsl_regmap_region *region = kgsl_regmap_get_region(regmap, addr); + int i; + + if (!region) + return; + + /* Make sure the offset is in the same region */ + if (kgsl_regmap_get_region(regmap, data) != region) + return; + + if (region->ops && region->ops->preaccess) + region->ops->preaccess(region); + + for (i = 0; i < count; i++) { + /* Write the address register */ + writel_relaxed(start + i, region_addr(region, addr)); + /* Make sure the write finishes */ + wmb(); + + dest[i] = readl_relaxed(region_addr(region, data)); + /* Make sure the read finishes */ + rmb(); + } +} + +/* A special helper function to work with read_poll_timeout */ +int kgsl_regmap_poll_read(struct kgsl_regmap_region *region, u32 offset, + u32 *val) +{ + /* FIXME: WARN on !region? */ + if (WARN(!region, "Out of bounds poll read: 0x%x\n", offset)) + return -ENODEV; + + *val = readl_relaxed(region_addr(region, offset)); + /* Make sure the read is finished before moving on */ + rmb(); + + return 0; +} diff --git a/kgsl_regmap.h b/kgsl_regmap.h new file mode 100644 index 0000000000..0e7ed20689 --- /dev/null +++ b/kgsl_regmap.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef KGSL_REGMAP_H +#define KGSL_REGMAP_H + +struct kgsl_regmap; +struct kgsl_regmap_region; + +/** + * @ksgl_regmap_ops - Helper functions to access registers in a regmap region + */ +struct kgsl_regmap_ops { + /** + * @preaccess: called before accesses to the register. This is used by + * adreno to call kgsl_pre_hwaccess() + */ + void (*preaccess)(struct kgsl_regmap_region *region); +}; + +/** + * struct kgsl_regmap_region - Defines a region of registers in a kgsl_regmap + */ +struct kgsl_regmap_region { + /** @virt: Kernel address for the re-mapped region */ + void __iomem *virt; + /** @offset: Dword offset of the region from the regmap base */ + u32 offset; + /** @size: Size of the region in dwords */ + u32 size; + /** @ops: Helper functions to access registers in the region */ + const struct kgsl_regmap_ops *ops; + /** @priv: Private data to send to the ops */ + void *priv; +}; + +/** + * struct kgsl_regmap - Define a set of individual regions that are all indexed + * from a commmon base. This is used to access GPU and GMU registers in + * separate io-remmaped regions from a single set of function calls. + */ +struct kgsl_regmap { + /** + * @base: Resource pointer for the "base" region (the region that all + * other regions are indexed from) + */ + struct resource *base; + /** @region: Array of regions for this regmap */ + struct kgsl_regmap_region region[3]; + /** @count: Number of active regions in @region */ + int count; +}; + +/** + * struct kgsl_regmap_list + */ +struct kgsl_regmap_list { + /** offset: Dword offset of the register to write */ + u32 offset; + /** val: Value to write */ + u32 val; +}; + +/** + * kgsl_regmap_init - Initialize a regmap + * @pdev: Pointer to the platform device that owns @name + * @regmap: Pointer to the regmap to initialize + * @name: Name of the resource to map + * @ops: Pointer to the regmap ops for this region + * @priv: Private data to pass to the regmap ops + * + * Initialize a regmap and set the resource @name as the base region in the + * regmap. All other regions will be indexed from the start of this region. + * This will nominally be the start of the GPU register region. + * + * Return: 0 on success or negative error on failure. + */ +int kgsl_regmap_init(struct platform_device *pdev, struct kgsl_regmap *regmap, + const char *name, const struct kgsl_regmap_ops *ops, + void *priv); + +/** + * kgsl_regmap_add_region - Add a region to an existing regmap + * @regmap: The regmap to add the region to + * @pdev: Pointer to the platform device that owns @name + * @name: Name of the resource to map + * @ops: Pointer to the regmap ops for this region + * @priv: Private data to pass to the regmap ops + * + * Add a new region to the regmap. It will be indexed against the base + * address already defined when the regmap was initialized. For example, + * if the base GPU address is at physical address 0x3d000000 and the new + * region is at physical address 0x3d010000 this region will be added at + * (0x3d010000 - 0x3d000000) or dword offset 0x4000. + * + * Return: 0 on success or negative error on failure. + */ +int kgsl_regmap_add_region(struct kgsl_regmap *regmap, struct platform_device *pdev, + const char *name, const struct kgsl_regmap_ops *ops, void *priv); + +/** + * kgsl_regmap_read - Read a register from the regmap + * @regmap: The regmap to read from + * @offset: The dword offset to read + * + * Read the register at the specified offset indexed against the base address in + * the regmap. An offset that falls out of mapped regions will WARN and return + * 0. + * + * Return: The value of the register at @offset + */ +u32 kgsl_regmap_read(struct kgsl_regmap *regmap, u32 offset); + +/** + * kgsl_regmap_write - Write a register to the regmap + * @regmap: The regmap to write to + * @data: The value to write to @offset + * @offset: The dword offset to write + * + * Write @data to the register at the specified offset indexed against the base + * address in he regmap. An offset that falls out of mapped regions will WARN + * and skip the write. + */ +void kgsl_regmap_write(struct kgsl_regmap *regmap, u32 value, u32 offset); + +/** + * kgsl_regmap_multi_write - Write a list of registers + * @regmap: The regmap to write to + * @list: A pointer to an array of &strut kgsl_regmap_list items + * @count: NUmber of items in @list + * + * Write all the registers in @list to the regmap. + */ + +void kgsl_regmap_multi_write(struct kgsl_regmap *regmap, + const struct kgsl_regmap_list *list, int count); + +/** + * kgsl_regmap_rmw - read-modify-write a register in the regmap + * @regmap: The regmap to write to + * @offset: The dword offset to write + * @mask: Mask the register contents against this mask + * @or: OR these bits into the register before writing it back again + * + * Read the register at @offset, mask it against @mask, OR the bits in @or and + * write it back to @offset. @offset will be indexed against the base + * address in the regmap. An offset that falls out of mapped regions will WARN + * and skip the operation. + */ +void kgsl_regmap_rmw(struct kgsl_regmap *regmap, u32 offset, u32 mask, + u32 or); + +/** + * kgsl_regmap_bulk_write - Write an array of values to a I/O region + * @regmap: The regmap to write to + * @offset: The dword offset to start writing to + * @data: The data to write + * @dwords: Number of dwords to write + * + * Bulk write @data to the I/O region starting at @offset for @dwords. + * The write operation must fit fully inside a single region (no crossing the + * boundaries). @offset will be indexed against the base + * address in he regmap. An offset that falls out of mapped regions will WARN + * and skip the operation. + */ +void kgsl_regmap_bulk_write(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords); + +/** + * kgsl_regmap_bulk_read - Read an array of values to a I/O region + * @regmap: The regmap to read from + * @offset: The dword offset to start reading from + * @data: The data pointer to read into + * @dwords: Number of dwords to read + * + * Bulk read into @data the I/O region starting at @offset for @dwords. + * The read operation must fit fully inside a single region (no crossing the + * boundaries). @offset will be indexed against the base + * address in the regmap. An offset that falls out of mapped regions will WARN + * and skip the operation. + */ +void kgsl_regmap_bulk_read(struct kgsl_regmap *regmap, u32 offset, + const void *data, int dwords); + +/** + * kgsl_regmap_virt - Return the kernel address for a offset + * @regmap: The regmap to write to + * @offset: The dword offset to map to a kernel address + * + * Return: The kernel address for @offset or NULL if out of range. + */ +void __iomem *kgsl_regmap_virt(struct kgsl_regmap *regmap, u32 offset); + +/** + * kgsl_regmap_read_indexed - Read a indexed pair of registers + * @regmap: The regmap to read from + * @addr: The offset of the address register for the index pair + * @data: The offset of the data register for the index pair + * @dest: An array to put the values + * @count: Number of dwords to read from @data + * + * This function configures the address register once and then + * reads from the data register in a loop. + */ +void kgsl_regmap_read_indexed(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, int count); + +/** + * kgsl_regmap_read_indexed_interleaved - Dump an indexed pair of registers + * @regmap: The regmap to read from + * @addr: The offset of the address register for the index pair + * @data: The offset of the data register for the index pair + * @dest: An array to put the values + * @start: Starting value to be programmed in the address register + * @count: Number of dwords to read from @data + * + * This function is slightly different than kgsl_regmap_read_indexed() + * in that it takes as argument a start value that is to be programmed + * in the address register and secondly, the address register is to be + * configured before every read of the data register. + */ +void kgsl_regmap_read_indexed_interleaved(struct kgsl_regmap *regmap, u32 addr, + u32 data, u32 *dest, u32 start, int count); + +/** + * kgsl_regmap_get_region - Return the region for the given offset + * @regmap: The regmap to query + * @offset: The offset to query + * + * Return: The &struct kgsl_regmap_region that owns the offset or NULL + */ +struct kgsl_regmap_region *kgsl_regmap_get_region(struct kgsl_regmap *regmap, + u32 offset); + +/** + * kgsl_regmap_poll_read - A helper function for kgsl_regmap_read_poll_timeout + * @region: Pointer to a &struct kgsl_regmap_region + * @offset: Offset to read + * @val: Pointer for the result + * + * This is a special helper function to be called only from + * kgsl_regmap_read_poll_timeout. + * + * Return: 0 on success or -ENODEV if the region is NULL. + */ +int kgsl_regmap_poll_read(struct kgsl_regmap_region *region, u32 offset, + u32 *val); + +#define kgsl_regmap_read_poll_timeout(regmap, offset, val, cond, \ + sleep_us, timeout_us) \ +({ \ + int __ret, __tmp; \ + struct kgsl_regmap_region *region = \ + kgsl_regmap_get_region(regmap, offset); \ + \ + if (region && region->ops && region->ops->preaccess) \ + region->ops->preaccess(region); \ + __tmp = read_poll_timeout(kgsl_regmap_poll_read, __ret, __ret || (cond),\ + sleep_us, timeout_us, false, region, offset, &(val)); \ + __ret ?: __tmp; \ +}) + +#endif diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c new file mode 100644 index 0000000000..42311fd494 --- /dev/null +++ b/kgsl_sharedmem.c @@ -0,0 +1,1605 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_pool.h" +#include "kgsl_reclaim.h" +#include "kgsl_sharedmem.h" + +/* + * The user can set this from debugfs to force failed memory allocations to + * fail without trying OOM first. This is a debug setting useful for + * stress applications that want to test failure cases without pushing the + * system into unrecoverable OOM panics + */ + +bool kgsl_sharedmem_noretry_flag; + +static DEFINE_MUTEX(kernel_map_global_lock); + +/* An attribute for showing per-process memory statistics */ +struct kgsl_mem_entry_attribute { + struct kgsl_process_attribute attr; + int memtype; + ssize_t (*show)(struct kgsl_process_private *priv, + int type, char *buf); +}; + +static inline struct kgsl_process_attribute *to_process_attr( + struct attribute *attr) +{ + return container_of(attr, struct kgsl_process_attribute, attr); +} + +#define to_mem_entry_attr(a) \ +container_of(a, struct kgsl_mem_entry_attribute, attr) + +#define __MEM_ENTRY_ATTR(_type, _name, _show) \ +{ \ + .attr = __ATTR(_name, 0444, mem_entry_sysfs_show, NULL), \ + .memtype = _type, \ + .show = _show, \ +} + +#define MEM_ENTRY_ATTR(_type, _name, _show) \ + static struct kgsl_mem_entry_attribute mem_entry_##_name = \ + __MEM_ENTRY_ATTR(_type, _name, _show) + +static ssize_t mem_entry_sysfs_show(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf) +{ + struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr); + struct kgsl_process_private *priv = + container_of(kobj, struct kgsl_process_private, kobj); + + return pattr->show(priv, pattr->memtype, buf); +} + +static ssize_t +imported_mem_show(struct kgsl_process_private *priv, + int type, char *buf) +{ + struct kgsl_mem_entry *entry; + uint64_t imported_mem = 0; + int id = 0; + + spin_lock(&priv->mem_lock); + for (entry = idr_get_next(&priv->mem_idr, &id); entry; + id++, entry = idr_get_next(&priv->mem_idr, &id)) { + + int egl_surface_count = 0, egl_image_count = 0; + struct kgsl_memdesc *m; + + if (!kgsl_mem_entry_get(entry)) + continue; + spin_unlock(&priv->mem_lock); + + m = &entry->memdesc; + if (kgsl_memdesc_usermem_type(m) == KGSL_MEM_ENTRY_ION) { + kgsl_get_egl_counts(entry, &egl_surface_count, + &egl_image_count); + + if (kgsl_memdesc_get_memtype(m) == + KGSL_MEMTYPE_EGL_SURFACE) + imported_mem += m->size; + else if (egl_surface_count == 0) { + uint64_t size = m->size; + + do_div(size, (egl_image_count ? + egl_image_count : 1)); + imported_mem += size; + } + } + + kgsl_mem_entry_put(entry); + spin_lock(&priv->mem_lock); + } + spin_unlock(&priv->mem_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", imported_mem); +} + +static ssize_t +gpumem_mapped_show(struct kgsl_process_private *priv, + int type, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%lld\n", + atomic64_read(&priv->gpumem_mapped)); +} + +static ssize_t +gpumem_unmapped_show(struct kgsl_process_private *priv, int type, char *buf) +{ + u64 gpumem_total = atomic64_read(&priv->stats[type].cur); + u64 gpumem_mapped = atomic64_read(&priv->gpumem_mapped); + + if (gpumem_mapped > gpumem_total) + return -EIO; + + return scnprintf(buf, PAGE_SIZE, "%llu\n", + gpumem_total - gpumem_mapped); +} + +/** + * Show the current amount of memory allocated for the given memtype + */ + +static ssize_t +mem_entry_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%lld\n", + atomic64_read(&priv->stats[type].cur)); +} + +/** + * Show the maximum memory allocated for the given memtype through the life of + * the process + */ + +static ssize_t +mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].max); +} + +static ssize_t process_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_process_attribute *pattr = to_process_attr(attr); + + return pattr->show(kobj, pattr, buf); +} + +static ssize_t process_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct kgsl_process_attribute *pattr = to_process_attr(attr); + + if (pattr->store) + return pattr->store(kobj, pattr, buf, count); + return -EIO; +} + +/* Dummy release function - we have nothing to do here */ +static void process_sysfs_release(struct kobject *kobj) +{ +} + +static const struct sysfs_ops process_sysfs_ops = { + .show = process_sysfs_show, + .store = process_sysfs_store, +}; + +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_KERNEL, kernel, mem_entry_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_KERNEL, kernel_max, mem_entry_max_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, user, mem_entry_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, user_max, mem_entry_max_show); +#ifdef CONFIG_ION +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, ion, mem_entry_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_USER, ion_max, mem_entry_max_show); +#endif +MEM_ENTRY_ATTR(0, imported_mem, imported_mem_show); +MEM_ENTRY_ATTR(0, gpumem_mapped, gpumem_mapped_show); +MEM_ENTRY_ATTR(KGSL_MEM_ENTRY_KERNEL, gpumem_unmapped, gpumem_unmapped_show); + +static struct attribute *mem_entry_attrs[] = { + &mem_entry_kernel.attr.attr, + &mem_entry_kernel_max.attr.attr, + &mem_entry_user.attr.attr, + &mem_entry_user_max.attr.attr, +#ifdef CONFIG_ION + &mem_entry_ion.attr.attr, + &mem_entry_ion_max.attr.attr, +#endif + &mem_entry_imported_mem.attr.attr, + &mem_entry_gpumem_mapped.attr.attr, + &mem_entry_gpumem_unmapped.attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mem_entry); + +static struct kobj_type process_ktype = { + .sysfs_ops = &process_sysfs_ops, + .release = &process_sysfs_release, + .default_groups = mem_entry_groups, +}; +#ifdef CONFIG_QCOM_KGSL_PROCESS_RECLAIM +static struct device_attribute dev_attr_max_reclaim_limit = { + .attr = { .name = "max_reclaim_limit", .mode = 0644 }, + .show = kgsl_proc_max_reclaim_limit_show, + .store = kgsl_proc_max_reclaim_limit_store, +}; + +static struct device_attribute dev_attr_page_reclaim_per_call = { + .attr = { .name = "page_reclaim_per_call", .mode = 0644 }, + .show = kgsl_nr_to_scan_show, + .store = kgsl_nr_to_scan_store, +}; +#endif + +/** + * kgsl_process_init_sysfs() - Initialize and create sysfs files for a process + * + * @device: Pointer to kgsl device struct + * @private: Pointer to the structure for the process + * + * kgsl_process_init_sysfs() is called at the time of creating the + * process struct when a process opens the kgsl device for the first time. + * This function creates the sysfs files for the process. + */ +void kgsl_process_init_sysfs(struct kgsl_device *device, + struct kgsl_process_private *private) +{ + if (kobject_init_and_add(&private->kobj, &process_ktype, + kgsl_driver.prockobj, "%d", pid_nr(private->pid))) { + dev_err(device->dev, "Unable to add sysfs for process %d\n", + pid_nr(private->pid)); + kgsl_process_private_put(private); + } + + kgsl_reclaim_proc_sysfs_init(private); +} + +static ssize_t memstat_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + uint64_t val = 0; + + if (!strcmp(attr->attr.name, "vmalloc")) + val = atomic_long_read(&kgsl_driver.stats.vmalloc); + else if (!strcmp(attr->attr.name, "vmalloc_max")) + val = atomic_long_read(&kgsl_driver.stats.vmalloc_max); + else if (!strcmp(attr->attr.name, "page_alloc")) + val = atomic_long_read(&kgsl_driver.stats.page_alloc); + else if (!strcmp(attr->attr.name, "page_alloc_max")) + val = atomic_long_read(&kgsl_driver.stats.page_alloc_max); + else if (!strcmp(attr->attr.name, "coherent")) + val = atomic_long_read(&kgsl_driver.stats.coherent); + else if (!strcmp(attr->attr.name, "coherent_max")) + val = atomic_long_read(&kgsl_driver.stats.coherent_max); + else if (!strcmp(attr->attr.name, "secure")) + val = atomic_long_read(&kgsl_driver.stats.secure); + else if (!strcmp(attr->attr.name, "secure_max")) + val = atomic_long_read(&kgsl_driver.stats.secure_max); + else if (!strcmp(attr->attr.name, "mapped")) + val = atomic_long_read(&kgsl_driver.stats.mapped); + else if (!strcmp(attr->attr.name, "mapped_max")) + val = atomic_long_read(&kgsl_driver.stats.mapped_max); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t full_cache_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned int thresh = 0; + + ret = kstrtou32(buf, 0, &thresh); + if (ret) + return ret; + + kgsl_driver.full_cache_threshold = thresh; + return count; +} + +static ssize_t full_cache_threshold_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", + kgsl_driver.full_cache_threshold); +} + +static DEVICE_ATTR(vmalloc, 0444, memstat_show, NULL); +static DEVICE_ATTR(vmalloc_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(page_alloc, 0444, memstat_show, NULL); +static DEVICE_ATTR(page_alloc_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(coherent, 0444, memstat_show, NULL); +static DEVICE_ATTR(coherent_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(secure, 0444, memstat_show, NULL); +static DEVICE_ATTR(secure_max, 0444, memstat_show, NULL); +static DEVICE_ATTR(mapped, 0444, memstat_show, NULL); +static DEVICE_ATTR(mapped_max, 0444, memstat_show, NULL); +static DEVICE_ATTR_RW(full_cache_threshold); + +static const struct attribute *drv_attr_list[] = { + &dev_attr_vmalloc.attr, + &dev_attr_vmalloc_max.attr, + &dev_attr_page_alloc.attr, + &dev_attr_page_alloc_max.attr, + &dev_attr_coherent.attr, + &dev_attr_coherent_max.attr, + &dev_attr_secure.attr, + &dev_attr_secure_max.attr, + &dev_attr_mapped.attr, + &dev_attr_mapped_max.attr, + &dev_attr_full_cache_threshold.attr, +#ifdef CONFIG_QCOM_KGSL_PROCESS_RECLAIM + &dev_attr_max_reclaim_limit.attr, + &dev_attr_page_reclaim_per_call.attr, +#endif + NULL, +}; + +int +kgsl_sharedmem_init_sysfs(void) +{ + return sysfs_create_files(&kgsl_driver.virtdev.kobj, drv_attr_list); +} + +static vm_fault_t kgsl_paged_vmfault(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + int pgoff, ret; + struct page *page; + unsigned int offset = vmf->address - vma->vm_start; + + if (offset >= memdesc->size) + return VM_FAULT_SIGBUS; + + pgoff = offset >> PAGE_SHIFT; + + spin_lock(&memdesc->lock); + if (memdesc->pages[pgoff]) { + page = memdesc->pages[pgoff]; + get_page(page); + } else { + struct kgsl_process_private *priv = + ((struct kgsl_mem_entry *)vma->vm_private_data)->priv; + + /* We are here because page was reclaimed */ + memdesc->priv |= KGSL_MEMDESC_SKIP_RECLAIM; + spin_unlock(&memdesc->lock); + + page = shmem_read_mapping_page_gfp( + memdesc->shmem_filp->f_mapping, pgoff, + kgsl_gfp_mask(0)); + if (IS_ERR(page)) + return VM_FAULT_SIGBUS; + kgsl_page_sync_for_device(memdesc->dev, page, PAGE_SIZE); + + spin_lock(&memdesc->lock); + /* + * Update the pages array only if the page was + * not already brought back. + */ + if (!memdesc->pages[pgoff]) { + memdesc->pages[pgoff] = page; + atomic_dec(&priv->unpinned_page_count); + get_page(page); + } + } + spin_unlock(&memdesc->lock); + + ret = vmf_insert_page(vma, vmf->address, page); + put_page(page); + return ret; +} + +static void kgsl_paged_unmap_kernel(struct kgsl_memdesc *memdesc) +{ + mutex_lock(&kernel_map_global_lock); + if (!memdesc->hostptr) { + /* If already unmapped the refcount should be 0 */ + WARN_ON(memdesc->hostptr_count); + goto done; + } + memdesc->hostptr_count--; + if (memdesc->hostptr_count) + goto done; + vunmap(memdesc->hostptr); + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.vmalloc); + memdesc->hostptr = NULL; +done: + mutex_unlock(&kernel_map_global_lock); +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) + +#include + +static int lock_sgt(struct sg_table *sgt, u64 size) +{ + int dest_perms = PERM_READ | PERM_WRITE; + int source_vm = VMID_HLOS; + int dest_vm = VMID_CP_PIXEL; + int ret; + + do { + ret = hyp_assign_table(sgt, &source_vm, 1, &dest_vm, + &dest_perms, 1); + } while (ret == -EAGAIN); + + if (ret) { + /* + * If returned error code is EADDRNOTAVAIL, then this + * memory may no longer be in a usable state as security + * state of the pages is unknown after this failure. This + * memory can neither be added back to the pool nor buddy + * system. + */ + if (ret == -EADDRNOTAVAIL) + pr_err("Failure to lock secure GPU memory 0x%llx bytes will not be recoverable\n", + size); + + return ret; + } + + return 0; +} + +static int unlock_sgt(struct sg_table *sgt) +{ + int dest_perms = PERM_READ | PERM_WRITE | PERM_EXEC; + int source_vm = VMID_CP_PIXEL; + int dest_vm = VMID_HLOS; + int ret; + + do { + ret = hyp_assign_table(sgt, &source_vm, 1, &dest_vm, + &dest_perms, 1); + } while (ret == -EAGAIN); + + if (ret) + return ret; + + return 0; +} +#endif + +static int kgsl_paged_map_kernel(struct kgsl_memdesc *memdesc) +{ + int ret = 0; + + /* Sanity check - don't map more than we could possibly chew */ + if (memdesc->size > ULONG_MAX) + return -ENOMEM; + + mutex_lock(&kernel_map_global_lock); + if ((!memdesc->hostptr) && (memdesc->pages != NULL)) { + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + + memdesc->hostptr = vmap(memdesc->pages, memdesc->page_count, + VM_IOREMAP, page_prot); + if (memdesc->hostptr) + KGSL_STATS_ADD(memdesc->size, + &kgsl_driver.stats.vmalloc, + &kgsl_driver.stats.vmalloc_max); + else + ret = -ENOMEM; + } + if (memdesc->hostptr) + memdesc->hostptr_count++; + + mutex_unlock(&kernel_map_global_lock); + + return ret; +} + +static vm_fault_t kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + unsigned long offset, pfn; + + offset = ((unsigned long) vmf->address - vma->vm_start) >> + PAGE_SHIFT; + + pfn = (memdesc->physaddr >> PAGE_SHIFT) + offset; + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +static void _dma_cache_op(struct device *dev, struct page *page, + unsigned int op) +{ + struct scatterlist sgl; + + sg_init_table(&sgl, 1); + sg_set_page(&sgl, page, PAGE_SIZE, 0); + sg_dma_address(&sgl) = page_to_phys(page); + + switch (op) { + case KGSL_CACHE_OP_FLUSH: + dma_sync_sg_for_device(dev, &sgl, 1, DMA_TO_DEVICE); + dma_sync_sg_for_device(dev, &sgl, 1, DMA_FROM_DEVICE); + break; + case KGSL_CACHE_OP_CLEAN: + dma_sync_sg_for_device(dev, &sgl, 1, DMA_TO_DEVICE); + break; + case KGSL_CACHE_OP_INV: + dma_sync_sg_for_device(dev, &sgl, 1, DMA_FROM_DEVICE); + break; + } +} + +int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, uint64_t offset, + uint64_t size, unsigned int op) +{ + int i; + + if (memdesc->flags & KGSL_MEMFLAGS_IOCOHERENT) + return 0; + + if (size == 0 || size > UINT_MAX) + return -EINVAL; + + /* Make sure that the offset + size does not overflow */ + if ((offset + size < offset) || (offset + size < size)) + return -ERANGE; + + /* Check that offset+length does not exceed memdesc->size */ + if (offset + size > memdesc->size) + return -ERANGE; + + size += offset & PAGE_MASK; + offset &= ~PAGE_MASK; + + /* If there is a sgt, use for_each_sg_page to walk it */ + if (memdesc->sgt) { + struct sg_page_iter sg_iter; + + for_each_sg_page(memdesc->sgt->sgl, &sg_iter, + PAGE_ALIGN(size) >> PAGE_SHIFT, offset >> PAGE_SHIFT) + _dma_cache_op(memdesc->dev, sg_page_iter_page(&sg_iter), op); + return 0; + } + + /* Otherwise just walk through the list of pages */ + for (i = 0; i < memdesc->page_count; i++) { + u64 cur = (i << PAGE_SHIFT); + + if ((cur < offset) || (cur >= (offset + size))) + continue; + + _dma_cache_op(memdesc->dev, memdesc->pages[i], op); + } + + return 0; +} + +void kgsl_memdesc_init(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint64_t flags) +{ + struct kgsl_mmu *mmu = &device->mmu; + unsigned int align; + + memset(memdesc, 0, sizeof(*memdesc)); + /* Turn off SVM if the system doesn't support it */ + if (!kgsl_mmu_is_perprocess(mmu)) + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + /* Secure memory disables advanced addressing modes */ + if (flags & KGSL_MEMFLAGS_SECURE) + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + /* Disable IO coherence if it is not supported on the chip */ + if (!kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT)) { + flags &= ~((uint64_t) KGSL_MEMFLAGS_IOCOHERENT); + + WARN_ONCE(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT), + "I/O coherency is not supported on this target\n"); + } else if (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT)) + flags |= KGSL_MEMFLAGS_IOCOHERENT; + + /* + * We can't enable I/O coherency on uncached surfaces because of + * situations where hardware might snoop the cpu caches which can + * have stale data. This happens primarily due to the limitations + * of dma caching APIs available on arm64 + */ + if (!kgsl_cachemode_is_cached(flags)) + flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT); + + if (kgsl_mmu_has_feature(device, KGSL_MMU_NEED_GUARD_PAGE) || + (flags & KGSL_MEMFLAGS_GUARD_PAGE)) + memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE; + + if (flags & KGSL_MEMFLAGS_SECURE) + memdesc->priv |= KGSL_MEMDESC_SECURE; + + memdesc->flags = flags; + memdesc->dev = &device->pdev->dev; + + align = max_t(unsigned int, + kgsl_memdesc_get_align(memdesc), ilog2(PAGE_SIZE)); + kgsl_memdesc_set_align(memdesc, align); + + spin_lock_init(&memdesc->lock); +} + +void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) +{ + if (!memdesc || !memdesc->size) + return; + + /* Assume if no operations were specified something went bad early */ + if (!memdesc->ops) + return; + + if (memdesc->ops->put_gpuaddr) + memdesc->ops->put_gpuaddr(memdesc); + + if (memdesc->ops->free) + memdesc->ops->free(memdesc); +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +void kgsl_free_secure_page(struct page *page) +{ + struct sg_table sgt; + struct scatterlist sgl; + + if (!page) + return; + + sgt.sgl = &sgl; + sgt.nents = 1; + sgt.orig_nents = 1; + sg_init_table(&sgl, 1); + sg_set_page(&sgl, page, PAGE_SIZE, 0); + + unlock_sgt(&sgt); + __free_page(page); +} + +struct page *kgsl_alloc_secure_page(void) +{ + struct page *page; + struct sg_table sgt; + struct scatterlist sgl; + int status; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_NORETRY | __GFP_HIGHMEM); + if (!page) + return NULL; + + sgt.sgl = &sgl; + sgt.nents = 1; + sgt.orig_nents = 1; + sg_init_table(&sgl, 1); + sg_set_page(&sgl, page, PAGE_SIZE, 0); + + status = lock_sgt(&sgt, PAGE_SIZE); + if (status) { + if (status == -EADDRNOTAVAIL) + return NULL; + + __free_page(page); + return NULL; + } + return page; +} +#else +void kgsl_free_secure_page(struct page *page) +{ +} + +struct page *kgsl_alloc_secure_page(void) +{ + return NULL; +} +#endif + +int +kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, + uint32_t *dst, + uint64_t offsetbytes) +{ + uint32_t *src; + + if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL || + dst == NULL)) + return -EINVAL; + + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t))); + if (offsetbytes > (memdesc->size - sizeof(uint32_t))) + return -ERANGE; + + /* + * We are reading shared memory between CPU and GPU. + * Make sure reads before this are complete + */ + rmb(); + src = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; + return 0; +} + +void +kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint32_t src) +{ + /* Quietly return if the memdesc isn't valid */ + if (IS_ERR_OR_NULL(memdesc) || WARN_ON(!memdesc->hostptr)) + return; + + if (WARN_ON(!IS_ALIGNED(offsetbytes, sizeof(u32)))) + return; + + if (WARN_ON(offsetbytes > (memdesc->size - sizeof(u32)))) + return; + + *((u32 *) (memdesc->hostptr + offsetbytes)) = src; + + /* Make sure the write is posted before continuing */ + wmb(); +} + +int +kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc, + uint64_t *dst, + uint64_t offsetbytes) +{ + uint64_t *src; + + if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL || + dst == NULL)) + return -EINVAL; + + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t))); + if (offsetbytes > (memdesc->size - sizeof(uint32_t))) + return -ERANGE; + + /* + * We are reading shared memory between CPU and GPU. + * Make sure reads before this are complete + */ + rmb(); + src = (uint64_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; + return 0; +} + +void +kgsl_sharedmem_writeq(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint64_t src) +{ + /* Quietly return if the memdesc isn't valid */ + if (IS_ERR_OR_NULL(memdesc) || WARN_ON(!memdesc->hostptr)) + return; + + if (WARN_ON(!IS_ALIGNED(offsetbytes, sizeof(u64)))) + return; + + if (WARN_ON(offsetbytes > (memdesc->size - sizeof(u64)))) + return; + + *((u64 *) (memdesc->hostptr + offsetbytes)) = src; + + /* Make sure the write is posted before continuing */ + wmb(); +} + +static const char * const memtype_str[] = { + [KGSL_MEMTYPE_OBJECTANY] = "any(0)", + [KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer", + [KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer", + [KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer", + [KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer", + [KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer", + [KGSL_MEMTYPE_TEXTURE] = "texture", + [KGSL_MEMTYPE_SURFACE] = "surface", + [KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface", + [KGSL_MEMTYPE_GL] = "gl", + [KGSL_MEMTYPE_CL] = "cl", + [KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map", + [KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap", + [KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map", + [KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap", + [KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack", + [KGSL_MEMTYPE_COMMAND] = "command", + [KGSL_MEMTYPE_2D] = "2d", + [KGSL_MEMTYPE_EGL_IMAGE] = "egl_image", + [KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow", + [KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample", + /* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */ +}; + +void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags) +{ + unsigned int type = FIELD_GET(KGSL_MEMTYPE_MASK, memflags); + + if (type == KGSL_MEMTYPE_KERNEL) + strlcpy(name, "kernel", name_size); + else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL) + strlcpy(name, memtype_str[type], name_size); + else + snprintf(name, name_size, "VK/others(%3d)", type); +} + +int kgsl_memdesc_sg_dma(struct kgsl_memdesc *memdesc, + phys_addr_t addr, u64 size) +{ + int ret; + struct page *page = phys_to_page(addr); + + memdesc->sgt = kmalloc(sizeof(*memdesc->sgt), GFP_KERNEL); + if (memdesc->sgt == NULL) + return -ENOMEM; + + ret = sg_alloc_table(memdesc->sgt, 1, GFP_KERNEL); + if (ret) { + kfree(memdesc->sgt); + memdesc->sgt = NULL; + return ret; + } + + sg_set_page(memdesc->sgt->sgl, page, (size_t) size, 0); + return 0; +} + +static void _kgsl_contiguous_free(struct kgsl_memdesc *memdesc) +{ + dma_free_attrs(memdesc->dev, memdesc->size, + memdesc->hostptr, memdesc->physaddr, + memdesc->attrs); + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + + memdesc->sgt = NULL; +} + +static void kgsl_contiguous_free(struct kgsl_memdesc *memdesc) +{ + if (!memdesc->hostptr) + return; + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.coherent); + + _kgsl_contiguous_free(memdesc); +} + +#ifdef CONFIG_QCOM_KGSL_USE_SHMEM +static int kgsl_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + unsigned int page_off, struct file *shmem_filp, + struct device *dev) +{ + struct page *page; + + if (pages == NULL) + return -EINVAL; + + page = shmem_read_mapping_page_gfp(shmem_filp->f_mapping, page_off, + kgsl_gfp_mask(0)); + if (IS_ERR(page)) + return PTR_ERR(page); + + kgsl_zero_page(page, 0, dev); + *pages = page; + + return 1; +} + +static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc, uint64_t size) +{ + int ret; + + memdesc->shmem_filp = shmem_file_setup("kgsl-3d0", size, + VM_NORESERVE); + if (IS_ERR(memdesc->shmem_filp)) { + ret = PTR_ERR(memdesc->shmem_filp); + pr_err("kgsl: unable to setup shmem file err %d\n", + ret); + memdesc->shmem_filp = NULL; + return ret; + } + + return 0; +} + +static void kgsl_free_page(struct page *p) +{ + put_page(p); +} + +static void _kgsl_free_pages(struct kgsl_memdesc *memdesc, unsigned int pcount) +{ + int i; + + for (i = 0; i < memdesc->page_count; i++) + if (memdesc->pages[i]) + put_page(memdesc->pages[i]); + + fput(memdesc->shmem_filp); +} +#else +static int kgsl_alloc_page(int *page_size, struct page **pages, + unsigned int pages_len, unsigned int *align, + unsigned int page_off, struct file *shmem_filp, + struct device *dev) +{ + return kgsl_pool_alloc_page(page_size, pages, + pages_len, align, dev); +} + +static int kgsl_memdesc_file_setup(struct kgsl_memdesc *memdesc, uint64_t size) +{ + return 0; +} + +static void kgsl_free_page(struct page *p) +{ + kgsl_pool_free_page(p); +} + +static void _kgsl_free_pages(struct kgsl_memdesc *memdesc, unsigned int pcount) +{ + kgsl_pool_free_pages(memdesc->pages, pcount); +} +#endif + +static void kgsl_free_pages_from_sgt(struct kgsl_memdesc *memdesc) +{ + int i; + struct scatterlist *sg; + + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { + /* + * sg_alloc_table_from_pages() will collapse any physically + * adjacent pages into a single scatterlist entry. We cannot + * just call __free_pages() on the entire set since we cannot + * ensure that the size is a whole order. Instead, free each + * page or compound page group individually. + */ + struct page *p = sg_page(sg), *next; + unsigned int count; + unsigned int j = 0; + + while (j < (sg->length/PAGE_SIZE)) { + count = 1 << compound_order(p); + next = nth_page(p, count); + kgsl_free_page(p); + + p = next; + j += count; + } + } + + if (memdesc->shmem_filp) + fput(memdesc->shmem_filp); +} + +void kgsl_page_sync_for_device(struct device *dev, struct page *page, + size_t size) +{ + struct scatterlist sg; + + /* The caller may choose not to specify a device on purpose */ + if (!dev) + return; + + sg_init_table(&sg, 1); + sg_set_page(&sg, page, size, 0); + sg_dma_address(&sg) = page_to_phys(page); + + dma_sync_sg_for_device(dev, &sg, 1, DMA_BIDIRECTIONAL); +} + +void kgsl_zero_page(struct page *p, unsigned int order, + struct device *dev) +{ + int i; + + for (i = 0; i < (1 << order); i++) { + struct page *page = nth_page(p, i); + + clear_highpage(page); + } + + kgsl_page_sync_for_device(dev, p, PAGE_SIZE << order); +} + +gfp_t kgsl_gfp_mask(int page_order) +{ + gfp_t gfp_mask = __GFP_HIGHMEM; + + if (page_order > 0) { + gfp_mask |= __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN; + gfp_mask &= ~__GFP_RECLAIM; + } else + gfp_mask |= GFP_KERNEL; + + if (kgsl_sharedmem_noretry_flag) + gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; + + return gfp_mask; +} + +static int _kgsl_alloc_pages(struct kgsl_memdesc *memdesc, + u64 size, struct page ***pages, struct device *dev) +{ + int count = 0; + int npages = size >> PAGE_SHIFT; + struct page **local = kvcalloc(npages, sizeof(*local), GFP_KERNEL); + u32 page_size, align; + u64 len = size; + + if (!local) + return -ENOMEM; + + count = kgsl_memdesc_file_setup(memdesc, size); + if (count) { + kvfree(local); + return count; + } + + /* Start with 1MB alignment to get the biggest page we can */ + align = ilog2(SZ_1M); + + page_size = kgsl_get_page_size(len, align); + + while (len) { + int ret = kgsl_alloc_page(&page_size, &local[count], + npages, &align, count, memdesc->shmem_filp, dev); + + if (ret == -EAGAIN) + continue; + else if (ret <= 0) { + int i; + + for (i = 0; i < count; ) { + int n = 1 << compound_order(local[i]); + + kgsl_free_page(local[i]); + i += n; + } + kvfree(local); + + if (!kgsl_sharedmem_noretry_flag) + pr_err_ratelimited("kgsl: out of memory: only allocated %lldKb of %lldKb requested\n", + (size - len) >> 10, size >> 10); + + if (memdesc->shmem_filp) + fput(memdesc->shmem_filp); + + return -ENOMEM; + } + + count += ret; + npages -= ret; + len -= page_size; + + page_size = kgsl_get_page_size(len, align); + } + + *pages = local; + + return count; +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static void kgsl_free_secure_system_pages(struct kgsl_memdesc *memdesc) +{ + int i; + struct scatterlist *sg; + int ret = unlock_sgt(memdesc->sgt); + + if (ret) { + /* + * Unlock of the secure buffer failed. This buffer will + * be stuck in secure side forever and is unrecoverable. + * Give up on the buffer and don't return it to the + * pool. + */ + pr_err("kgsl: secure buf unlock failed: gpuaddr: %llx size: %llx ret: %d\n", + memdesc->gpuaddr, memdesc->size, ret); + return; + } + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure); + + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { + struct page *page = sg_page(sg); + + __free_pages(page, get_order(PAGE_SIZE)); + } + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + + memdesc->sgt = NULL; +} + +static void kgsl_free_secure_pages(struct kgsl_memdesc *memdesc) +{ + int ret = unlock_sgt(memdesc->sgt); + + if (ret) { + /* + * Unlock of the secure buffer failed. This buffer will + * be stuck in secure side forever and is unrecoverable. + * Give up on the buffer and don't return it to the + * pool. + */ + pr_err("kgsl: secure buf unlock failed: gpuaddr: %llx size: %llx ret: %d\n", + memdesc->gpuaddr, memdesc->size, ret); + return; + } + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure); + + kgsl_free_pages_from_sgt(memdesc); + + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + + memdesc->sgt = NULL; +} +#endif + +static void kgsl_free_pages(struct kgsl_memdesc *memdesc) +{ + kgsl_paged_unmap_kernel(memdesc); + WARN_ON(memdesc->hostptr); + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); + + _kgsl_free_pages(memdesc, memdesc->page_count); + + memdesc->page_count = 0; + kvfree(memdesc->pages); + + memdesc->pages = NULL; +} + + +static void kgsl_free_system_pages(struct kgsl_memdesc *memdesc) +{ + int i; + + kgsl_paged_unmap_kernel(memdesc); + WARN_ON(memdesc->hostptr); + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); + + for (i = 0; i < memdesc->page_count; i++) + __free_pages(memdesc->pages[i], get_order(PAGE_SIZE)); + + memdesc->page_count = 0; + kvfree(memdesc->pages); + memdesc->pages = NULL; +} + +void kgsl_unmap_and_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + if (!memdesc->size || !memdesc->gpuaddr) + return; + + if (WARN_ON(kgsl_memdesc_is_global(memdesc))) + return; + + /* + * Don't release the GPU address if the memory fails to unmap because + * the IOMMU driver will BUG later if we reallocated the address and + * tried to map it + */ + if (!kgsl_memdesc_is_reclaimed(memdesc) && + kgsl_mmu_unmap(memdesc->pagetable, memdesc)) + return; + + kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); + + memdesc->gpuaddr = 0; + memdesc->pagetable = NULL; +} + +static const struct kgsl_memdesc_ops kgsl_contiguous_ops = { + .free = kgsl_contiguous_free, + .vmflags = VM_DONTDUMP | VM_PFNMAP | VM_DONTEXPAND | VM_DONTCOPY, + .vmfault = kgsl_contiguous_vmfault, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static const struct kgsl_memdesc_ops kgsl_secure_system_ops = { + .free = kgsl_free_secure_system_pages, + /* FIXME: Make sure vmflags / vmfault does the right thing here */ +}; + +static const struct kgsl_memdesc_ops kgsl_secure_page_ops = { + .free = kgsl_free_secure_pages, + /* FIXME: Make sure vmflags / vmfault does the right thing here */ + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; +#endif + +static const struct kgsl_memdesc_ops kgsl_page_ops = { + .free = kgsl_free_pages, + .vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP, + .vmfault = kgsl_paged_vmfault, + .map_kernel = kgsl_paged_map_kernel, + .unmap_kernel = kgsl_paged_unmap_kernel, + .put_gpuaddr = kgsl_unmap_and_put_gpuaddr, +}; + +static const struct kgsl_memdesc_ops kgsl_system_ops = { + .free = kgsl_free_system_pages, + .vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP, + .vmfault = kgsl_paged_vmfault, + .map_kernel = kgsl_paged_map_kernel, + .unmap_kernel = kgsl_paged_unmap_kernel, +}; + +static int kgsl_system_alloc_pages(u64 size, struct page ***pages, + struct device *dev) +{ + struct scatterlist sg; + struct page **local; + int i, npages = size >> PAGE_SHIFT; + + local = kvcalloc(npages, sizeof(*pages), GFP_KERNEL | __GFP_NORETRY); + if (!local) + return -ENOMEM; + + for (i = 0; i < npages; i++) { + gfp_t gfp = __GFP_ZERO | __GFP_HIGHMEM | + GFP_KERNEL | __GFP_NORETRY; + + local[i] = alloc_pages(gfp, get_order(PAGE_SIZE)); + if (!local[i]) { + for (i = i - 1; i >= 0; i--) + __free_pages(local[i], get_order(PAGE_SIZE)); + kvfree(local); + return -ENOMEM; + } + + /* Make sure the cache is clean */ + sg_init_table(&sg, 1); + sg_set_page(&sg, local[i], PAGE_SIZE, 0); + sg_dma_address(&sg) = page_to_phys(local[i]); + + dma_sync_sg_for_device(dev, &sg, 1, DMA_BIDIRECTIONAL); + } + + *pages = local; + return npages; +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static int kgsl_alloc_secure_pages(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + struct page **pages; + int count; + struct sg_table *sgt; + int ret; + + size = PAGE_ALIGN(size); + + if (!size || size > UINT_MAX) + return -EINVAL; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv |= priv; + + if (priv & KGSL_MEMDESC_SYSMEM) { + memdesc->ops = &kgsl_secure_system_ops; + count = kgsl_system_alloc_pages(size, &pages, device->dev); + } else { + memdesc->ops = &kgsl_secure_page_ops; + count = _kgsl_alloc_pages(memdesc, size, &pages, device->dev); + } + + if (count < 0) + return count; + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + _kgsl_free_pages(memdesc, count); + kvfree(pages); + return -ENOMEM; + } + + ret = sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL); + if (ret) { + kfree(sgt); + _kgsl_free_pages(memdesc, count); + kvfree(pages); + return ret; + } + + /* Now that we've moved to a sg table don't need the pages anymore */ + kvfree(pages); + + ret = lock_sgt(sgt, size); + if (ret) { + if (ret != -EADDRNOTAVAIL) + kgsl_free_pages_from_sgt(memdesc); + sg_free_table(sgt); + kfree(sgt); + return ret; + } + + memdesc->sgt = sgt; + memdesc->size = size; + + KGSL_STATS_ADD(size, &kgsl_driver.stats.secure, + &kgsl_driver.stats.secure_max); + + return 0; +} +#endif + +static int kgsl_alloc_pages(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + struct page **pages; + int count; + + size = PAGE_ALIGN(size); + + if (!size || size > UINT_MAX) + return -EINVAL; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv |= priv; + + if (priv & KGSL_MEMDESC_SYSMEM) { + memdesc->ops = &kgsl_system_ops; + count = kgsl_system_alloc_pages(size, &pages, device->dev); + } else { + memdesc->ops = &kgsl_page_ops; + count = _kgsl_alloc_pages(memdesc, size, &pages, device->dev); + } + + if (count < 0) + return count; + + memdesc->pages = pages; + memdesc->size = size; + memdesc->page_count = count; + + KGSL_STATS_ADD(size, &kgsl_driver.stats.page_alloc, + &kgsl_driver.stats.page_alloc_max); + + return 0; +} + +static int _kgsl_alloc_contiguous(struct device *dev, + struct kgsl_memdesc *memdesc, u64 size, unsigned long attrs) +{ + int ret; + phys_addr_t phys; + void *ptr; + + ptr = dma_alloc_attrs(dev, (size_t) size, &phys, + GFP_KERNEL, attrs); + if (!ptr) + return -ENOMEM; + + memdesc->size = size; + memdesc->dev = dev; + memdesc->hostptr = ptr; + memdesc->physaddr = phys; + memdesc->gpuaddr = phys; + memdesc->attrs = attrs; + + ret = kgsl_memdesc_sg_dma(memdesc, phys, size); + if (ret) + dma_free_attrs(dev, (size_t) size, ptr, phys, attrs); + + return ret; +} + +static int kgsl_alloc_contiguous(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + int ret; + + size = PAGE_ALIGN(size); + + if (!size || size > UINT_MAX) + return -EINVAL; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv |= priv; + + memdesc->ops = &kgsl_contiguous_ops; + ret = _kgsl_alloc_contiguous(&device->pdev->dev, memdesc, size, 0); + + if (!ret) + KGSL_STATS_ADD(size, &kgsl_driver.stats.coherent, + &kgsl_driver.stats.coherent_max); + + return ret; +} + +#if IS_ENABLED(CONFIG_QCOM_SECURE_BUFFER) +static int kgsl_allocate_secure(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + return kgsl_alloc_secure_pages(device, memdesc, size, flags, priv); +} +#else +static int kgsl_allocate_secure(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + return -ENODEV; +} +#endif + +int kgsl_allocate_user(struct kgsl_device *device, struct kgsl_memdesc *memdesc, + u64 size, u64 flags, u32 priv) +{ + if (device->mmu.type == KGSL_MMU_TYPE_NONE) + return kgsl_alloc_contiguous(device, memdesc, size, flags, + priv); + else if (flags & KGSL_MEMFLAGS_SECURE) + return kgsl_allocate_secure(device, memdesc, size, flags, priv); + + return kgsl_alloc_pages(device, memdesc, size, flags, priv); +} + +int kgsl_allocate_kernel(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv) +{ + int ret; + + ret = kgsl_allocate_user(device, memdesc, size, flags, priv); + if (ret) + return ret; + + if (memdesc->ops->map_kernel) { + ret = memdesc->ops->map_kernel(memdesc); + if (ret) { + kgsl_sharedmem_free(memdesc); + return ret; + } + } + + return 0; +} + +struct kgsl_memdesc *kgsl_allocate_global_fixed(struct kgsl_device *device, + const char *resource, const char *name) +{ + struct kgsl_global_memdesc *md; + u32 entry[2]; + int ret; + + if (of_property_read_u32_array(device->pdev->dev.of_node, + resource, entry, 2)) + return ERR_PTR(-ENODEV); + + md = kzalloc(sizeof(*md), GFP_KERNEL); + if (!md) + return ERR_PTR(-ENOMEM); + + kgsl_memdesc_init(device, &md->memdesc, 0); + md->memdesc.priv = KGSL_MEMDESC_GLOBAL; + md->memdesc.physaddr = entry[0]; + md->memdesc.size = entry[1]; + + ret = kgsl_memdesc_sg_dma(&md->memdesc, entry[0], entry[1]); + if (ret) { + kfree(md); + return ERR_PTR(ret); + } + + md->name = name; + + /* + * No lock here, because this function is only called during probe/init + * while the caller is holding the mutex + */ + list_add_tail(&md->node, &device->globals); + + kgsl_mmu_map_global(device, &md->memdesc, 0); + + return &md->memdesc; +} + +static struct kgsl_memdesc * +kgsl_allocate_secure_global(struct kgsl_device *device, + u64 size, u64 flags, u32 priv, const char *name) +{ + struct kgsl_global_memdesc *md; + int ret; + + md = kzalloc(sizeof(*md), GFP_KERNEL); + if (!md) + return ERR_PTR(-ENOMEM); + + /* Make sure that we get global memory from system memory */ + priv |= KGSL_MEMDESC_GLOBAL | KGSL_MEMDESC_SYSMEM; + + ret = kgsl_allocate_secure(device, &md->memdesc, size, flags, priv); + if (ret) { + kfree(md); + return ERR_PTR(ret); + } + + md->name = name; + + /* + * No lock here, because this function is only called during probe/init + * while the caller is holding the mutex + */ + list_add_tail(&md->node, &device->globals); + + /* + * No offset needed, we'll get an address inside of the pagetable + * normally + */ + kgsl_mmu_map_global(device, &md->memdesc, 0); + kgsl_trace_gpu_mem_total(device, md->memdesc.size); + + return &md->memdesc; +} + +struct kgsl_memdesc *kgsl_allocate_global(struct kgsl_device *device, + u64 size, u32 padding, u64 flags, u32 priv, const char *name) +{ + int ret; + struct kgsl_global_memdesc *md; + + if (flags & KGSL_MEMFLAGS_SECURE) + return kgsl_allocate_secure_global(device, size, flags, priv, + name); + + md = kzalloc(sizeof(*md), GFP_KERNEL); + if (!md) + return ERR_PTR(-ENOMEM); + + /* + * Make sure that we get global memory from system memory to keep from + * taking up pool memory for the life of the driver + */ + priv |= KGSL_MEMDESC_GLOBAL | KGSL_MEMDESC_SYSMEM; + + ret = kgsl_allocate_kernel(device, &md->memdesc, size, flags, priv); + if (ret) { + kfree(md); + return ERR_PTR(ret); + } + + md->name = name; + + /* + * No lock here, because this function is only called during probe/init + * while the caller is holding the mute + */ + list_add_tail(&md->node, &device->globals); + + kgsl_mmu_map_global(device, &md->memdesc, padding); + kgsl_trace_gpu_mem_total(device, md->memdesc.size); + + return &md->memdesc; +} + +void kgsl_free_globals(struct kgsl_device *device) +{ + struct kgsl_global_memdesc *md, *tmp; + + list_for_each_entry_safe(md, tmp, &device->globals, node) { + kgsl_sharedmem_free(&md->memdesc); + list_del(&md->node); + kfree(md); + } +} diff --git a/kgsl_sharedmem.h b/kgsl_sharedmem.h new file mode 100644 index 0000000000..8183283f0f --- /dev/null +++ b/kgsl_sharedmem.h @@ -0,0 +1,463 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_SHAREDMEM_H +#define __KGSL_SHAREDMEM_H + +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_mmu.h" + +struct kgsl_device; +struct kgsl_process_private; + +extern bool kgsl_sharedmem_noretry_flag; + +#define KGSL_CACHE_OP_INV 0x01 +#define KGSL_CACHE_OP_FLUSH 0x02 +#define KGSL_CACHE_OP_CLEAN 0x03 + +void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc); + +int kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, + uint32_t *dst, + uint64_t offsetbytes); + +/** + * kgsl_sharedmem_writel - write a 32 bit value to a shared memory object + * @memdesc: Pointer to a GPU memory object + * @offsetbytes: Offset inside of @memdesc to write to + * @src: Value to write + * + * Write @src to @offsetbytes from the start of @memdesc + */ +void kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint32_t src); + +int kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc, + uint64_t *dst, + uint64_t offsetbytes); + +/** + * kgsl_sharedmem_writeq - write a 64 bit value to a shared memory object + * @memdesc: Pointer to a GPU memory object + * @offsetbytes: Offset inside of @memdesc to write to + * @src: Value to write + * + * Write @src to @offsetbytes from the start of @memdesc + */ +void kgsl_sharedmem_writeq(const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint64_t src); + +int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, + uint64_t offset, uint64_t size, + unsigned int op); + +void kgsl_memdesc_init(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint64_t flags); + +void kgsl_process_init_sysfs(struct kgsl_device *device, + struct kgsl_process_private *private); + +int kgsl_sharedmem_init_sysfs(void); + +void kgsl_get_memory_usage(char *str, size_t len, uint64_t memflags); + +void kgsl_free_secure_page(struct page *page); + +struct page *kgsl_alloc_secure_page(void); + +/** + * kgsl_zero_page() - zero out a page + * @p: pointer to the struct page + * @order: order of the page + * @dev: A &struct device pointer + * + * Map a page into kernel and zero it out + */ +void kgsl_zero_page(struct page *p, unsigned int order, + struct device *dev); + +/** + * kgsl_gfp_mask() - get gfp_mask to be used + * @page_order: order of the page + * + * Get the gfp_mask to be used for page allocation + * based on the order of the page + * + * Return appropriate gfp_mask + */ +gfp_t kgsl_gfp_mask(int page_order); + +/** + * kgsl_allocate_user - Allocate user visible GPU memory + * @device: A GPU device handle + * @memdesc: Memory descriptor for the object + * @size: Size of the allocation in bytes + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * + * Allocate GPU memory on behalf of the user. + * Return: 0 on success or negative on failure. + */ +int kgsl_allocate_user(struct kgsl_device *device, struct kgsl_memdesc *memdesc, + u64 size, u64 flags, u32 priv); + +/** + * kgsl_allocate_kernel - Allocate kernel visible GPU memory + * @device: A GPU device handle + * @memdesc: Memory descriptor for the object + * @size: Size of the allocation in bytes + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * + * Allocate GPU memory on for use by the kernel. Kernel objects are + * automatically mapped into the kernel address space (except for secure). + * Return: 0 on success or negative on failure. + */ +int kgsl_allocate_kernel(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags, u32 priv); + +/** + * kgsl_allocate_global - Allocate a global GPU memory object + * @device: A GPU device handle + * @size: Size of the allocation in bytes + * @padding: Amount of extra adding to add to the VA allocation + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * @name: Name of the allocation (for the debugfs file) + * + * Allocate a global GPU object for use by all processes. The buffer is + * automatically mapped into the kernel address space and added to the list of + * global buffers that get mapped into each newly created pagetable. + * Return: The memory descriptor on success or a ERR_PTR encoded error on + * failure. + */ +struct kgsl_memdesc *kgsl_allocate_global(struct kgsl_device *device, + u64 size, u32 padding, u64 flags, u32 priv, const char *name); + +/** + * kgsl_allocate_global_fixed - Allocate a global GPU memory object from a fixed + * region defined in the device tree + * @device: A GPU device handle + * @size: Size of the allocation in bytes + * @flags: Control flags for the allocation + * @priv: Internal flags for the allocation + * + * Allocate a global GPU object for use by all processes. The buffer is + * added to the list of global buffers that get mapped into each newly created + * pagetable. + * + * Return: The memory descriptor on success or a ERR_PTR encoded error on + * failure. + */ +struct kgsl_memdesc *kgsl_allocate_global_fixed(struct kgsl_device *device, + const char *resource, const char *name); + +/** + * kgsl_free_globals - Free all global objects + * @device: A GPU device handle + * + * Free all the global buffer objects. Should only be called during shutdown + * after the pagetables have been freed + */ +void kgsl_free_globals(struct kgsl_device *device); + +/** + * kgsl_page_sync_for_device - Initialize SG table with page & sync it for device + * @dev: A GPU device handle + * @page: Pointer to the struct page + * @size: Size of the page + */ +void kgsl_page_sync_for_device(struct device *dev, struct page *page, + size_t size); + +/* + * kgsl_memdesc_get_align - Get alignment flags from a memdesc + * @memdesc - the memdesc + * + * Returns the alignment requested, as power of 2 exponent. + */ +static inline int +kgsl_memdesc_get_align(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_MEMALIGN_MASK, memdesc->flags); +} + +/* + * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc + * @memdesc: the memdesc + * + * Returns a KGSL_CACHEMODE* value. + */ +static inline int +kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_CACHEMODE_MASK, memdesc->flags); +} + +static inline unsigned int +kgsl_memdesc_get_memtype(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_MEMTYPE_MASK, memdesc->flags); +} +/* + * kgsl_memdesc_set_align - Set alignment flags of a memdesc + * @memdesc - the memdesc + * @align - alignment requested, as a power of 2 exponent. + */ +static inline int +kgsl_memdesc_set_align(struct kgsl_memdesc *memdesc, unsigned int align) +{ + if (align > 32) + align = 32; + + memdesc->flags &= ~(uint64_t)KGSL_MEMALIGN_MASK; + memdesc->flags |= FIELD_PREP(KGSL_MEMALIGN_MASK, align); + return 0; +} + +/** + * kgsl_memdesc_usermem_type - return buffer type + * @memdesc - the memdesc + * + * Returns a KGSL_MEM_ENTRY_* value for this buffer, which + * identifies if was allocated by us, or imported from + * another allocator. + */ +static inline unsigned int +kgsl_memdesc_usermem_type(const struct kgsl_memdesc *memdesc) +{ + return FIELD_GET(KGSL_MEMFLAGS_USERMEM_MASK, memdesc->flags); +} + +/** + * kgsl_memdesc_sg_dma - Turn a dma_addr (from CMA) into a sg table + * @memdesc: Pointer to a memory descriptor + * @addr: Physical address from the dma_alloc function + * @size: Size of the chunk + * + * Create a sg table for the contiguous chunk specified by addr and size. + * + * Return: 0 on success or negative on failure. + */ +int kgsl_memdesc_sg_dma(struct kgsl_memdesc *memdesc, + phys_addr_t addr, u64 size); + +/* + * kgsl_memdesc_is_global - is this a globally mapped buffer? + * @memdesc: the memdesc + * + * Return: True if this is a global mapping + */ +static inline bool kgsl_memdesc_is_global(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->priv & KGSL_MEMDESC_GLOBAL); +} + +/* + * kgsl_memdesc_is_secured - is this a secure buffer? + * @memdesc: the memdesc + * + * Returns true if this is a secure mapping, false otherwise + */ +static inline bool kgsl_memdesc_is_secured(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->priv & KGSL_MEMDESC_SECURE); +} + +/* + * kgsl_memdesc_is_reclaimed - check if a buffer is reclaimed + * @memdesc: the memdesc + * + * Return: true if the memdesc pages were reclaimed, false otherwise + */ +static inline bool kgsl_memdesc_is_reclaimed(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->priv & KGSL_MEMDESC_RECLAIMED); +} + +/* + * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU? + * @memdesc: the memdesc + * + * Return: true if the memdesc is using SVM mapping + */ +static inline bool +kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP); +} + +/* + * kgsl_memdesc_footprint - get the size of the mmap region + * @memdesc - the memdesc + * + * The entire memdesc must be mapped. Additionally if the + * CPU mapping is going to be mirrored, there must be room + * for the guard page to be mapped so that the address spaces + * match up. + */ +static inline uint64_t +kgsl_memdesc_footprint(const struct kgsl_memdesc *memdesc) +{ + if (!(memdesc->priv & KGSL_MEMDESC_GUARD_PAGE)) + return memdesc->size; + + return PAGE_ALIGN(memdesc->size + PAGE_SIZE); +} + +/** + * kgsl_memdesc_put_gpuaddr - Release the gpuaddr assigned to a memdesc + * @memdesc: Pointer to a GPU memory object + * + * Call the memdesc specific function to release the GPU address assigned to the + * memdesc and unmap the memory + */ +static inline void kgsl_sharedmem_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + if (memdesc && memdesc->ops->put_gpuaddr) + memdesc->ops->put_gpuaddr(memdesc); +} + +/** + * kgsl_cachemode_is_cached - Return true if the passed flags indicate a cached + * buffer + * @flags: A bitmask of KGSL_MEMDESC_ flags + * + * Return: true if the flags indicate a cached buffer + */ +static inline bool kgsl_cachemode_is_cached(u64 flags) +{ + u64 mode = FIELD_GET(KGSL_CACHEMODE_MASK, flags); + + return (mode != KGSL_CACHEMODE_UNCACHED && + mode != KGSL_CACHEMODE_WRITECOMBINE); +} + +/** + * kgsl_unmap_and_put_gpuaddr - Unmap the memory and release the gpuaddr + * assigned to a memdesc + * @memdesc: Pointer to a GPU memory object + * + * Remove the mapping from pagetable and release the GPU address assigned + * to the memdesc + */ +void kgsl_unmap_and_put_gpuaddr(struct kgsl_memdesc *memdesc); + +/** + * struct kgsl_process_attribute - basic attribute for a process + * @attr: Underlying struct attribute + * @show: Attribute show function + * @store: Attribute store function + */ +struct kgsl_process_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct kgsl_process_attribute *attr, char *buf); + ssize_t (*store)(struct kobject *kobj, + struct kgsl_process_attribute *attr, const char *buf, + ssize_t count); +}; + +#define PROCESS_ATTR(_name, _mode, _show, _store) \ + static struct kgsl_process_attribute attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +struct kgsl_sharedmem_bind_op_range { + u64 start; + u64 last; + u32 child_offset; + u32 op; + struct kgsl_mem_entry *entry; +}; + +struct kgsl_sharedmem_bind_op { + struct kgsl_mem_entry *target; + struct kgsl_sharedmem_bind_op_range *ops; + int nr_ops; + void (*callback)(struct kgsl_sharedmem_bind_op *op); + void *data; + struct work_struct work; + struct kref ref; +}; + +/** + * kgsl_sharedmem_allocate_vbo - Allocate a new virtual buffer object + * @device: A KGSL GPU handle + * @memdesc: Memory descriptor container to initialize + * @size: Size of the VBO + * @flags: Bitmask of KGSL_MEMFLAGS_* + * + * Initialize a new virtual buffer object memory descriptor + * + * Return: 0 on success or negative on failure. + */ +int kgsl_sharedmem_allocate_vbo(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags); + +/** + * kgsl_memdesc_print_vbo_ranges - Print a new virtual buffer object + * @entry: A KGSL memory entry + * @s: seq_file pointer + * + * Print virtual buffer object memory ranges + */ +void kgsl_memdesc_print_vbo_ranges(struct kgsl_mem_entry *entry, + struct seq_file *s); + +/** + * kgsl_sharedmem_create_bind_op - Create a new bind op + * @private: A KGSL process private + * @target_id: Target virtual buffer object id + * @ranges: User memory pointer to an array of range operations of type &struct + * kgsl_gpumem_bind_range + * @ranges_nents: Number of entries in @ranges + * @ranges_size: Size of each entry in @ranges in bytes + * + * Create a new bind op to be used to map ranges + * + * Return: On success return kgsl_sharedmem_bind_op pointer or negative + * on failure + * + */ +struct kgsl_sharedmem_bind_op * +kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, + u32 target_id, void __user *ranges, u32 ranges_nents, + u64 ranges_size); + +/** + * kgsl_sharedmem_bind_ranges - Bind ranges to virtual buffer object + * @op: One of KGSL_GPUMEM_RANGE_OP_BIND or KGSL_GPUMEM_RANGE_OP_UNBIND + * + * Add or remove a range from kgsl memory descriptor + */ +void kgsl_sharedmem_bind_ranges(struct kgsl_sharedmem_bind_op *op); + +/** + * kgsl_sharedmem_bind_range_destroy - Bind ranges to virtual buffer object + * @kref: kref to bind kgsl_sharedmem_bind_op + * + * Destroy bind ranges object + */ +void kgsl_sharedmem_bind_range_destroy(struct kref *kref); + +/** + * kgsl_sharedmem_put_bind_op - Bind ranges to virtual buffer object + * @op: One of KGSL_GPUMEM_RANGE_OP_BIND or KGSL_GPUMEM_RANGE_OP_UNBIND + * + * Put kgsl_sharedmem_bind_range_destroy to free resources + */ +static inline void kgsl_sharedmem_put_bind_op(struct kgsl_sharedmem_bind_op *op) +{ + if (!IS_ERR_OR_NULL(op)) + kref_put(&op->ref, kgsl_sharedmem_bind_range_destroy); +} +#endif /* __KGSL_SHAREDMEM_H */ diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c new file mode 100644 index 0000000000..c2cac0c881 --- /dev/null +++ b/kgsl_snapshot.c @@ -0,0 +1,1273 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "adreno_cp_parser.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" +#include "kgsl_snapshot.h" +#include "kgsl_util.h" + +static void kgsl_snapshot_save_frozen_objs(struct work_struct *work); + +/* Placeholder for list of ib objects that contain all objects in that IB */ + +struct kgsl_snapshot_cp_obj { + struct adreno_ib_object_list *ib_obj_list; + struct list_head node; +}; + +struct snapshot_obj_itr { + u8 *buf; /* Buffer pointer to write to */ + int pos; /* Current position in the sequence */ + loff_t offset; /* file offset to start writing from */ + size_t remain; /* Bytes remaining in buffer */ + size_t write; /* Bytes written so far */ +}; + +static inline u64 snapshot_phy_addr(struct kgsl_device *device) +{ + return device->snapshot_memory.dma_handle ? + device->snapshot_memory.dma_handle : __pa(device->snapshot_memory.ptr); +} + +static inline u64 atomic_snapshot_phy_addr(struct kgsl_device *device) +{ + return device->snapshot_memory_atomic.ptr == device->snapshot_memory.ptr ? + snapshot_phy_addr(device) : __pa(device->snapshot_memory_atomic.ptr); +} + +static void obj_itr_init(struct snapshot_obj_itr *itr, u8 *buf, + loff_t offset, size_t remain) +{ + itr->buf = buf; + itr->offset = offset; + itr->remain = remain; + itr->pos = 0; + itr->write = 0; +} + +static int obj_itr_out(struct snapshot_obj_itr *itr, void *src, int size) +{ + if (itr->remain == 0) + return 0; + + if ((itr->pos + size) <= itr->offset) + goto done; + + /* Handle the case that offset is in the middle of the buffer */ + + if (itr->offset > itr->pos) { + src += (itr->offset - itr->pos); + size -= (itr->offset - itr->pos); + + /* Advance pos to the offset start */ + itr->pos = itr->offset; + } + + if (size > itr->remain) + size = itr->remain; + + memcpy(itr->buf, src, size); + + itr->buf += size; + itr->write += size; + itr->remain -= size; + +done: + itr->pos += size; + return size; +} + +static void kgsl_snapshot_put_object(struct kgsl_snapshot_object *obj) +{ + list_del(&obj->node); + + obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN; + obj->entry->memdesc.priv &= ~KGSL_MEMDESC_SKIP_RECLAIM; + kgsl_mem_entry_put(obj->entry); + + kfree(obj); +} + +/** + * kgsl_snapshot_have_object() - return 1 if the object has been processed + * @snapshot: the snapshot data + * @process: The process that owns the the object to freeze + * @gpuaddr: The gpu address of the object to freeze + * @size: the size of the object (may not always be the size of the region) + * + * Return 1 if the object is already in the list - this can save us from + * having to parse the same thing over again. There are 2 lists that are + * tracking objects so check for the object in both lists + */ +int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t size) +{ + struct kgsl_snapshot_object *obj; + struct kgsl_snapshot_cp_obj *obj_cp; + struct adreno_ib_object *ib_obj; + int i; + + /* Check whether the object is tracked already in ib list */ + list_for_each_entry(obj_cp, &snapshot->cp_list, node) { + if (obj_cp->ib_obj_list == NULL + || obj_cp->ib_obj_list->num_objs == 0) + continue; + + ib_obj = &(obj_cp->ib_obj_list->obj_list[0]); + if (ib_obj->entry == NULL || ib_obj->entry->priv != process) + continue; + + for (i = 0; i < obj_cp->ib_obj_list->num_objs; i++) { + ib_obj = &(obj_cp->ib_obj_list->obj_list[i]); + if ((gpuaddr >= ib_obj->gpuaddr) && + ((gpuaddr + size) <= + (ib_obj->gpuaddr + ib_obj->size))) + return 1; + } + } + + list_for_each_entry(obj, &snapshot->obj_list, node) { + if (obj->entry == NULL || obj->entry->priv != process) + continue; + + if ((gpuaddr >= obj->gpuaddr) && + ((gpuaddr + size) <= (obj->gpuaddr + obj->size))) + return 1; + } + + return 0; +} + +/** + * kgsl_snapshot_get_object() - Mark a GPU buffer to be frozen + * @snapshot: The snapshot data + * @process: The process that owns the object we want to freeze + * @gpuaddr: The gpu address of the object to freeze + * @size: the size of the object (may not always be the size of the region) + * @type: the type of object being saved (shader, vbo, etc) + * + * Mark and freeze a GPU buffer object. This will prevent it from being + * freed until it can be copied out as part of the snapshot dump. Returns the + * size of the object being frozen + */ +int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, uint64_t gpuaddr, + uint64_t size, unsigned int type) +{ + struct kgsl_mem_entry *entry; + struct kgsl_snapshot_object *obj; + uint64_t offset; + int ret = -EINVAL; + unsigned int mem_type; + + if (!gpuaddr) + return 0; + + entry = kgsl_sharedmem_find(process, gpuaddr); + + if (entry == NULL) + return -EINVAL; + + /* We can't freeze external memory, because we don't own it */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_MASK) + goto err_put; + /* + * Do not save texture and render targets in snapshot, + * they can be just too big + */ + + mem_type = kgsl_memdesc_get_memtype(&entry->memdesc); + if (mem_type == KGSL_MEMTYPE_TEXTURE || + mem_type == KGSL_MEMTYPE_EGL_SURFACE || + mem_type == KGSL_MEMTYPE_EGL_IMAGE) { + ret = 0; + goto err_put; + } + + /* + * size indicates the number of bytes in the region to save. This might + * not always be the entire size of the region because some buffers are + * sub-allocated from a larger region. However, if size 0 was passed + * thats a flag that the caller wants to capture the entire buffer + */ + + if (size == 0) { + size = entry->memdesc.size; + offset = 0; + + /* Adjust the gpuaddr to the start of the object */ + gpuaddr = entry->memdesc.gpuaddr; + } else { + offset = gpuaddr - entry->memdesc.gpuaddr; + } + + if (size + offset > entry->memdesc.size) { + dev_err(snapshot->device->dev, + "snapshot: invalid size for GPU buffer 0x%016llx\n", + gpuaddr); + goto err_put; + } + + /* If the buffer is already on the list, skip it */ + list_for_each_entry(obj, &snapshot->obj_list, node) { + /* combine the range with existing object if they overlap */ + if (obj->entry->priv == process && obj->type == type && + kgsl_addr_range_overlap(obj->gpuaddr, obj->size, + gpuaddr, size)) { + uint64_t end1 = obj->gpuaddr + obj->size; + uint64_t end2 = gpuaddr + size; + + if (obj->gpuaddr > gpuaddr) + obj->gpuaddr = gpuaddr; + if (end1 > end2) + obj->size = end1 - obj->gpuaddr; + else + obj->size = end2 - obj->gpuaddr; + obj->offset = obj->gpuaddr - entry->memdesc.gpuaddr; + ret = 0; + goto err_put; + } + } + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + + if (obj == NULL) + goto err_put; + + obj->type = type; + obj->entry = entry; + obj->gpuaddr = gpuaddr; + obj->size = size; + obj->offset = offset; + + list_add(&obj->node, &snapshot->obj_list); + + /* + * Return the size of the entire mem entry that was frozen - this gets + * used for tracking how much memory is frozen for a hang. Also, mark + * the memory entry as frozen. If the entry was already marked as + * frozen, then another buffer already got to it. In that case, return + * 0 so it doesn't get counted twice + */ + + ret = (entry->memdesc.priv & KGSL_MEMDESC_FROZEN) ? 0 + : entry->memdesc.size; + + entry->memdesc.priv |= KGSL_MEMDESC_FROZEN; + + return ret; +err_put: + entry->memdesc.priv &= ~KGSL_MEMDESC_SKIP_RECLAIM; + kgsl_mem_entry_put(entry); + return ret; +} + +/** + * kgsl_snapshot_dump_registers - helper function to dump device registers + * @device - the device to dump registers from + * @snapshot - pointer to the start of the region of memory for the snapshot + * @remain - a pointer to the number of bytes remaining in the snapshot + * @priv - A pointer to the kgsl_snapshot_registers data + * + * Given an array of register ranges pairs (start,end [inclusive]), dump the + * registers into a snapshot register section. The snapshot region stores a + * part of dwords for each register - the word address of the register, and + * the value. + */ +size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + struct kgsl_snapshot_registers *regs = priv; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int count = 0, j, k; + + /* Figure out how many registers we are going to dump */ + + for (j = 0; j < regs->count; j++) { + int start = regs->regs[j * 2]; + int end = regs->regs[j * 2 + 1]; + + count += (end - start + 1); + } + + if (remain < (count * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (j = 0; j < regs->count; j++) { + unsigned int start = regs->regs[j * 2]; + unsigned int end = regs->regs[j * 2 + 1]; + + for (k = start; k <= end; k++) { + unsigned int val; + + kgsl_regread(device, k, &val); + *data++ = k; + *data++ = val; + } + } + + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + +struct kgsl_snapshot_indexed_registers { + unsigned int index; + unsigned int data; + unsigned int start; + unsigned int count; +}; + +static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_indexed_registers *iregs = priv; + struct kgsl_snapshot_indexed_regs *header = + (struct kgsl_snapshot_indexed_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + + if (remain < (iregs->count * 4) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS"); + return 0; + } + + header->index_reg = iregs->index; + header->data_reg = iregs->data; + header->count = iregs->count; + header->start = iregs->start; + + kgsl_regmap_read_indexed_interleaved(&device->regmap, iregs->index, + iregs->data, data, iregs->start, iregs->count); + + return (iregs->count * 4) + sizeof(*header); +} + +/** + * kgsl_snapshot_indexed_registers - Add a set of indexed registers to the + * snapshot + * @device: Pointer to the KGSL device being snapshotted + * @snapshot: Snapshot instance + * @index: Offset for the index register + * @data: Offset for the data register + * @start: Index to start reading + * @count: Number of entries to read + * + * Dump the values from an indexed register group into the snapshot + */ +void kgsl_snapshot_indexed_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + unsigned int index, unsigned int data, + unsigned int start, + unsigned int count) +{ + struct kgsl_snapshot_indexed_registers iregs; + + iregs.index = index; + iregs.data = data; + iregs.start = start; + iregs.count = count; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS, + snapshot, kgsl_snapshot_dump_indexed_regs, &iregs); +} + +/** + * kgsl_snapshot_add_section() - Add a new section to the GPU snapshot + * @device: the KGSL device being snapshotted + * @id: the section id + * @snapshot: pointer to the snapshot instance + * @func: Function pointer to fill the section + * @priv: Private pointer to pass to the function + * + * Set up a KGSL snapshot header by filling the memory with the callback + * function and adding the standard section header + */ +void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id, + struct kgsl_snapshot *snapshot, + size_t (*func)(struct kgsl_device *, u8 *, size_t, void *), + void *priv) +{ + struct kgsl_snapshot_section_header *header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *data = snapshot->ptr + sizeof(*header); + size_t ret = 0; + + /* + * Sanity check to make sure there is enough for the header. The + * callback will check to make sure there is enough for the rest + * of the data. If there isn't enough room then don't advance the + * pointer. + */ + + if (snapshot->remain < sizeof(*header)) + return; + + /* It is legal to have no function (i.e. - make an empty section) */ + if (func) { + ret = func(device, data, snapshot->remain - sizeof(*header), + priv); + + /* + * If there wasn't enough room for the data then don't bother + * setting up the header. + */ + + if (ret == 0) + return; + } + + header->magic = SNAPSHOT_SECTION_MAGIC; + header->id = id; + header->size = ret + sizeof(*header); + + snapshot->ptr += header->size; + snapshot->remain -= header->size; + snapshot->size += header->size; +} + +static void kgsl_free_snapshot(struct kgsl_snapshot *snapshot) +{ + struct kgsl_snapshot_object *obj, *tmp; + struct kgsl_device *device = snapshot->device; + + wait_for_completion(&snapshot->dump_gate); + + list_for_each_entry_safe(obj, tmp, + &snapshot->obj_list, node) + kgsl_snapshot_put_object(obj); + + if (snapshot->mempool) + vfree(snapshot->mempool); + + kfree(snapshot); + dev_err(device->dev, "snapshot: objects released\n"); +} + +#define SP0_ISDB_ISDB_BRKPT_CFG 0x40014 +#define SP0_ISDB_ISDB_EN 0x40004 +#define SP0_ISDB_ISDB_CMD 0x4000C + +static void isdb_write(void __iomem *base, u32 offset) +{ + /* To set the SCHBREAKTYPE bit */ + __raw_writel(0x801, base + SP0_ISDB_ISDB_BRKPT_CFG + offset); + + /* + * ensure the configurations are set before + * enabling ISDB + */ + wmb(); + /* To set the ISDBCLKON and ISDB_EN bits*/ + __raw_writel(0x03, base + SP0_ISDB_ISDB_EN + offset); + + /* + * ensure previous write to enable isdb posts + * before issuing the break command + */ + wmb(); + /*To issue ISDB_0_ISDB_CMD_BREAK*/ + __raw_writel(0x1, base + SP0_ISDB_ISDB_CMD + offset); +} + +static void set_isdb_breakpoint_registers(struct kgsl_device *device) +{ + struct clk *clk; + int ret; + + if (!device->set_isdb_breakpoint || device->ftbl->is_hwcg_on(device) + || device->qdss_gfx_virt == NULL) + return; + + clk = clk_get(&device->pdev->dev, "apb_pclk"); + + if (IS_ERR(clk)) { + dev_err(device->dev, "Unable to get QDSS clock\n"); + goto err; + } + + ret = clk_prepare_enable(clk); + + if (ret) { + dev_err(device->dev, "QDSS Clock enable error: %d\n", ret); + clk_put(clk); + goto err; + } + + /* Issue break command for all eight SPs */ + isdb_write(device->qdss_gfx_virt, 0x0000); + isdb_write(device->qdss_gfx_virt, 0x1000); + isdb_write(device->qdss_gfx_virt, 0x2000); + isdb_write(device->qdss_gfx_virt, 0x3000); + isdb_write(device->qdss_gfx_virt, 0x4000); + isdb_write(device->qdss_gfx_virt, 0x5000); + isdb_write(device->qdss_gfx_virt, 0x6000); + isdb_write(device->qdss_gfx_virt, 0x7000); + + clk_disable_unprepare(clk); + clk_put(clk); + + return; + +err: + /* Do not force kernel panic if isdb writes did not go through */ + device->force_panic = false; +} + +static void kgsl_device_snapshot_atomic(struct kgsl_device *device) +{ + struct kgsl_snapshot *snapshot; + struct timespec64 boot; + + if (device->snapshot && device->force_panic) + return; + + if (!atomic_read(&device->active_cnt)) { + dev_err(device->dev, "snapshot: device is powered off\n"); + return; + } + + device->snapshot_memory_atomic.size = device->snapshot_memory.size; + if (!device->snapshot_faultcount) { + /* Use non-atomic snapshot memory if it is unused */ + device->snapshot_memory_atomic.ptr = device->snapshot_memory.ptr; + } else { + /* Limit size to 3MB to avoid failure for atomic snapshot memory */ + if (device->snapshot_memory_atomic.size > (SZ_2M + SZ_1M)) + device->snapshot_memory_atomic.size = (SZ_2M + SZ_1M); + + device->snapshot_memory_atomic.ptr = devm_kzalloc(&device->pdev->dev, + device->snapshot_memory_atomic.size, GFP_ATOMIC); + + /* If we fail to allocate more than 1MB fall back to 1MB */ + if (WARN_ON((!device->snapshot_memory_atomic.ptr) && + device->snapshot_memory_atomic.size > SZ_1M)) { + device->snapshot_memory_atomic.size = SZ_1M; + device->snapshot_memory_atomic.ptr = devm_kzalloc(&device->pdev->dev, + device->snapshot_memory_atomic.size, GFP_ATOMIC); + } + + if (!device->snapshot_memory_atomic.ptr) { + dev_err(device->dev, + "Failed to allocate memory for atomic snapshot\n"); + return; + } + } + + /* Allocate memory for the snapshot instance */ + snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); + if (snapshot == NULL) + return; + + device->snapshot_atomic = true; + INIT_LIST_HEAD(&snapshot->obj_list); + INIT_LIST_HEAD(&snapshot->cp_list); + + snapshot->start = device->snapshot_memory_atomic.ptr; + snapshot->ptr = device->snapshot_memory_atomic.ptr; + snapshot->remain = device->snapshot_memory_atomic.size; + + /* + * Trigger both GPU and GMU snapshot. GPU specific code + * will take care of whether to dumps full state or only + * GMU state based on current GPU power state. + */ + if (device->ftbl->snapshot) + device->ftbl->snapshot(device, snapshot, NULL); + + /* + * The timestamp is the seconds since boot so it is easier to match to + * the kernel log + */ + getboottime64(&boot); + snapshot->timestamp = get_seconds() - boot.tv_sec; + + kgsl_add_to_minidump("ATOMIC_GPU_SNAPSHOT", (u64) device->snapshot_memory_atomic.ptr, + atomic_snapshot_phy_addr(device), device->snapshot_memory_atomic.size); + + /* log buffer info to aid in ramdump fault tolerance */ + dev_err(device->dev, "Atomic GPU snapshot created at pa %llx++0x%zx\n", + atomic_snapshot_phy_addr(device), snapshot->size); +} + +/** + * kgsl_snapshot() - construct a device snapshot + * @device: device to snapshot + * @context: the context that is hung, might be NULL if unknown. + * @gmu_fault: whether this snapshot is triggered by a GMU fault. + * + * Given a device, construct a binary snapshot dump of the current device state + * and store it in the device snapshot memory. + */ +void kgsl_device_snapshot(struct kgsl_device *device, + struct kgsl_context *context, bool gmu_fault) +{ + struct kgsl_snapshot *snapshot; + struct timespec64 boot; + + set_isdb_breakpoint_registers(device); + + if (device->snapshot_memory.ptr == NULL) { + dev_err(device->dev, + "snapshot: no snapshot memory available\n"); + return; + } + + if (WARN(!kgsl_state_is_awake(device), + "snapshot: device is powered off\n")) + return; + + /* increment the hang count for good book keeping */ + device->snapshot_faultcount++; + + if (device->snapshot != NULL) { + + /* + * Snapshot over-write policy: + * 1. By default, don't over-write the very first snapshot, + * be it a gmu or gpu fault. + * 2. Never over-write existing snapshot on a gpu fault. + * 3. Never over-write a snapshot that we didn't recover from. + * 4. In order to over-write a new gmu fault snapshot with a + * previously recovered fault, then set the sysfs knob + * prioritize_recoverable to true. + */ + if (!device->prioritize_unrecoverable || + !device->snapshot->recovered || !gmu_fault) + return; + + /* + * If another thread is currently reading it, that thread + * will free it, otherwise free it now. + */ + if (!device->snapshot->sysfs_read) + kgsl_free_snapshot(device->snapshot); + device->snapshot = NULL; + } + + /* Allocate memory for the snapshot instance */ + snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL); + if (snapshot == NULL) + return; + + init_completion(&snapshot->dump_gate); + INIT_LIST_HEAD(&snapshot->obj_list); + INIT_LIST_HEAD(&snapshot->cp_list); + INIT_WORK(&snapshot->work, kgsl_snapshot_save_frozen_objs); + + snapshot->start = device->snapshot_memory.ptr; + snapshot->ptr = device->snapshot_memory.ptr; + snapshot->remain = device->snapshot_memory.size; + snapshot->recovered = false; + snapshot->first_read = true; + snapshot->sysfs_read = 0; + + device->ftbl->snapshot(device, snapshot, context); + + /* + * The timestamp is the seconds since boot so it is easier to match to + * the kernel log + */ + + getboottime64(&boot); + snapshot->timestamp = get_seconds() - boot.tv_sec; + + /* Store the instance in the device until it gets dumped */ + device->snapshot = snapshot; + snapshot->device = device; + + /* log buffer info to aid in ramdump fault tolerance */ + dev_err(device->dev, "%s snapshot created at pa %llx++0x%zx\n", + gmu_fault ? "GMU" : "GPU", snapshot_phy_addr(device), + snapshot->size); + + kgsl_add_to_minidump("GPU_SNAPSHOT", (u64) device->snapshot_memory.ptr, + snapshot_phy_addr(device), device->snapshot_memory.size); + + if (device->skip_ib_capture) + BUG_ON(device->force_panic); + + sysfs_notify(&device->snapshot_kobj, NULL, "timestamp"); + + /* + * Queue a work item that will save the IB data in snapshot into + * static memory to prevent loss of data due to overwriting of + * memory. + * + */ + kgsl_schedule_work(&snapshot->work); +} + +/* An attribute for showing snapshot details */ +struct kgsl_snapshot_attribute { + struct attribute attr; + ssize_t (*show)(struct kgsl_device *device, char *buf); + ssize_t (*store)(struct kgsl_device *device, const char *buf, + size_t count); +}; + +/** + * kgsl_snapshot_process_ib_obj_list() - Go through the list of IB's which need + * to be dumped for snapshot and move them to the global snapshot list so + * they will get dumped when the global list is dumped + * @device: device being snapshotted + */ +static void kgsl_snapshot_process_ib_obj_list(struct kgsl_snapshot *snapshot) +{ + struct kgsl_snapshot_cp_obj *obj, *obj_temp; + struct adreno_ib_object *ib_obj; + int i; + + list_for_each_entry_safe(obj, obj_temp, &snapshot->cp_list, + node) { + for (i = 0; i < obj->ib_obj_list->num_objs; i++) { + ib_obj = &(obj->ib_obj_list->obj_list[i]); + kgsl_snapshot_get_object(snapshot, ib_obj->entry->priv, + ib_obj->gpuaddr, ib_obj->size, + ib_obj->snapshot_obj_type); + } + list_del(&obj->node); + adreno_ib_destroy_obj_list(obj->ib_obj_list); + kfree(obj); + } +} + +#define to_snapshot_attr(a) \ +container_of(a, struct kgsl_snapshot_attribute, attr) + +#define kobj_to_device(a) \ +container_of(a, struct kgsl_device, snapshot_kobj) + +static int snapshot_release(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + bool snapshot_free = false; + int ret = 0; + + mutex_lock(&device->mutex); + snapshot->sysfs_read--; + + /* + * If someone's replaced the snapshot, return an error and free + * the snapshot if this is the last thread to read it. + */ + if (device->snapshot != snapshot) { + ret = -EIO; + if (!snapshot->sysfs_read) + snapshot_free = true; + } + mutex_unlock(&device->mutex); + if (snapshot_free) + kgsl_free_snapshot(snapshot); + return ret; +} + +/* Dump the sysfs binary data to the user */ +static ssize_t snapshot_show(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + struct kgsl_device *device = kobj_to_device(kobj); + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_section_header head; + struct snapshot_obj_itr itr; + int ret = 0; + + mutex_lock(&device->mutex); + snapshot = device->snapshot; + if (snapshot != NULL) { + /* + * If we're reading at a non-zero offset from a new snapshot, + * that means we want to read from the previous snapshot (which + * was overwritten), so return an error + */ + if (snapshot->first_read) { + if (off) + ret = -EIO; + else + snapshot->first_read = false; + } + if (!ret) + snapshot->sysfs_read++; + } + mutex_unlock(&device->mutex); + + if (ret) + return ret; + + /* Return nothing if we haven't taken a snapshot yet */ + if (snapshot == NULL) + return 0; + + /* + * Wait for the dump worker to finish. This is interruptible + * to allow userspace to bail if things go horribly wrong. + */ + ret = wait_for_completion_interruptible(&snapshot->dump_gate); + if (ret) { + snapshot_release(device, snapshot); + return ret; + } + + obj_itr_init(&itr, buf, off, count); + + ret = obj_itr_out(&itr, snapshot->start, snapshot->size); + if (ret == 0) + goto done; + + /* Dump the memory pool if it exists */ + if (snapshot->mempool) { + ret = obj_itr_out(&itr, snapshot->mempool, + snapshot->mempool_size); + if (ret == 0) + goto done; + } + + { + head.magic = SNAPSHOT_SECTION_MAGIC; + head.id = KGSL_SNAPSHOT_SECTION_END; + head.size = sizeof(head); + + obj_itr_out(&itr, &head, sizeof(head)); + } + + /* + * Make sure everything has been written out before destroying things. + * The best way to confirm this is to go all the way through without + * writing any bytes - so only release if we get this far and + * itr->write is 0 and there are no concurrent reads pending + */ + + if (itr.write == 0) { + bool snapshot_free = false; + + mutex_lock(&device->mutex); + if (--snapshot->sysfs_read == 0) { + if (device->snapshot == snapshot) + device->snapshot = NULL; + snapshot_free = true; + } + mutex_unlock(&device->mutex); + + if (snapshot_free) + kgsl_free_snapshot(snapshot); + return 0; + } + +done: + ret = snapshot_release(device, snapshot); + return (ret < 0) ? ret : itr.write; +} + +/* Show the total number of hangs since device boot */ +static ssize_t faultcount_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_faultcount); +} + +/* Reset the total number of hangs since device boot */ +static ssize_t faultcount_store(struct kgsl_device *device, const char *buf, + size_t count) +{ + if (count) + device->snapshot_faultcount = 0; + + return count; +} + +/* Show the force_panic request status */ +static ssize_t force_panic_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->force_panic); +} + +/* Store the panic request value to force_panic */ +static ssize_t force_panic_store(struct kgsl_device *device, const char *buf, + size_t count) +{ + if (strtobool(buf, &device->force_panic)) + return -EINVAL; + return count; +} + +/* Show the break_ib request status */ +static ssize_t skip_ib_capture_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->skip_ib_capture); +} + +/* Store the panic request value to break_ib */ +static ssize_t skip_ib_capture_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + int ret; + + ret = kstrtobool(buf, &device->skip_ib_capture); + return ret ? ret : count; +} + +/* Show the prioritize_unrecoverable status */ +static ssize_t prioritize_unrecoverable_show( + struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", + device->prioritize_unrecoverable); +} + +/* Store the priority value to prioritize unrecoverable */ +static ssize_t prioritize_unrecoverable_store( + struct kgsl_device *device, const char *buf, size_t count) +{ + if (strtobool(buf, &device->prioritize_unrecoverable)) + return -EINVAL; + + return count; +} + +/* Show the snapshot_crashdumper request status */ +static ssize_t snapshot_crashdumper_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_crashdumper); +} + + +/* Store the value to snapshot_crashdumper*/ +static ssize_t snapshot_crashdumper_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + if (strtobool(buf, &device->snapshot_crashdumper)) + return -EINVAL; + return count; +} + +/* Show the timestamp of the last collected snapshot */ +static ssize_t timestamp_show(struct kgsl_device *device, char *buf) +{ + unsigned long timestamp; + + mutex_lock(&device->mutex); + timestamp = device->snapshot ? device->snapshot->timestamp : 0; + mutex_unlock(&device->mutex); + return scnprintf(buf, PAGE_SIZE, "%lu\n", timestamp); +} + +static ssize_t snapshot_legacy_show(struct kgsl_device *device, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_legacy); +} + +static ssize_t snapshot_legacy_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + if (strtobool(buf, &device->snapshot_legacy)) + return -EINVAL; + + return count; +} + +static struct bin_attribute snapshot_attr = { + .attr.name = "dump", + .attr.mode = 0444, + .size = 0, + .read = snapshot_show +}; + +#define SNAPSHOT_ATTR(_name, _mode, _show, _store) \ +struct kgsl_snapshot_attribute attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +static SNAPSHOT_ATTR(timestamp, 0444, timestamp_show, NULL); +static SNAPSHOT_ATTR(faultcount, 0644, faultcount_show, faultcount_store); +static SNAPSHOT_ATTR(force_panic, 0644, force_panic_show, force_panic_store); +static SNAPSHOT_ATTR(prioritize_unrecoverable, 0644, + prioritize_unrecoverable_show, prioritize_unrecoverable_store); +static SNAPSHOT_ATTR(snapshot_crashdumper, 0644, snapshot_crashdumper_show, + snapshot_crashdumper_store); +static SNAPSHOT_ATTR(snapshot_legacy, 0644, snapshot_legacy_show, + snapshot_legacy_store); +static SNAPSHOT_ATTR(skip_ib_capture, 0644, skip_ib_capture_show, + skip_ib_capture_store); + +static ssize_t snapshot_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret; + + if (device && pattr->show) + ret = pattr->show(device, buf); + else + ret = -EIO; + + return ret; +} + +static ssize_t snapshot_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret = -EIO; + + if (pattr->store) + ret = pattr->store(device, buf, count); + + return ret; +} + +static const struct sysfs_ops snapshot_sysfs_ops = { + .show = snapshot_sysfs_show, + .store = snapshot_sysfs_store, +}; + +static struct kobj_type ktype_snapshot = { + .sysfs_ops = &snapshot_sysfs_ops, +}; + +static const struct attribute *snapshot_attrs[] = { + &attr_timestamp.attr, + &attr_faultcount.attr, + &attr_force_panic.attr, + &attr_prioritize_unrecoverable.attr, + &attr_snapshot_crashdumper.attr, + &attr_snapshot_legacy.attr, + &attr_skip_ib_capture.attr, + NULL, +}; + +static int kgsl_panic_notifier_callback(struct notifier_block *nb, + unsigned long action, void *unused) +{ + struct kgsl_device *device = container_of(nb, struct kgsl_device, + panic_nb); + + /* To send NMI to GMU */ + device->gmu_fault = true; + kgsl_device_snapshot_atomic(device); + + return NOTIFY_OK; +} + +void kgsl_device_snapshot_probe(struct kgsl_device *device, u32 size) +{ + device->snapshot_memory.size = size; + + device->snapshot_memory.ptr = dma_alloc_coherent(&device->pdev->dev, + device->snapshot_memory.size, &device->snapshot_memory.dma_handle, + GFP_KERNEL); + /* + * If we fail to allocate more than 1MB for snapshot fall back + * to 1MB + */ + if (WARN_ON((!device->snapshot_memory.ptr) && size > SZ_1M)) { + device->snapshot_memory.size = SZ_1M; + device->snapshot_memory.ptr = devm_kzalloc(&device->pdev->dev, + device->snapshot_memory.size, GFP_KERNEL); + } + + if (!device->snapshot_memory.ptr) { + dev_err(device->dev, + "KGSL failed to allocate memory for snapshot\n"); + return; + } + + device->snapshot = NULL; + device->snapshot_faultcount = 0; + device->force_panic = false; + device->snapshot_crashdumper = true; + device->snapshot_legacy = false; + + device->snapshot_atomic = false; + device->panic_nb.notifier_call = kgsl_panic_notifier_callback; + device->panic_nb.priority = 1; + device->snapshot_ctxt_record_size = 64 * 1024; + + /* + * Set this to false so that we only ever keep the first snapshot around + * If we want to over-write with a gmu snapshot, then set it to true + * via sysfs + */ + device->prioritize_unrecoverable = false; + + if (kobject_init_and_add(&device->snapshot_kobj, &ktype_snapshot, + &device->dev->kobj, "snapshot")) + return; + + WARN_ON(sysfs_create_bin_file(&device->snapshot_kobj, &snapshot_attr)); + WARN_ON(sysfs_create_files(&device->snapshot_kobj, snapshot_attrs)); + atomic_notifier_chain_register(&panic_notifier_list, + &device->panic_nb); +} + +/** + * kgsl_device_snapshot_close() - take down snapshot memory for a device + * @device: Pointer to the kgsl_device + * + * Remove the sysfs files and free the memory allocated for the GPU + * snapshot + */ +void kgsl_device_snapshot_close(struct kgsl_device *device) +{ + kgsl_remove_from_minidump("GPU_SNAPSHOT", (u64) device->snapshot_memory.ptr, + snapshot_phy_addr(device), device->snapshot_memory.size); + + sysfs_remove_bin_file(&device->snapshot_kobj, &snapshot_attr); + sysfs_remove_files(&device->snapshot_kobj, snapshot_attrs); + + kobject_put(&device->snapshot_kobj); + + if (device->snapshot_memory.dma_handle) + dma_free_coherent(&device->pdev->dev, device->snapshot_memory.size, + device->snapshot_memory.ptr, device->snapshot_memory.dma_handle); +} + +/** + * kgsl_snapshot_add_ib_obj_list() - Add a IB object list to the snapshot + * object list + * @device: the device that is being snapshotted + * @ib_obj_list: The IB list that has objects required to execute an IB + * @num_objs: Number of IB objects + * @ptbase: The pagetable base in which the IB is mapped + * + * Adds a new IB to the list of IB objects maintained when getting snapshot + * Returns 0 on success else -ENOMEM on error + */ +int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot, + struct adreno_ib_object_list *ib_obj_list) +{ + struct kgsl_snapshot_cp_obj *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + obj->ib_obj_list = ib_obj_list; + list_add(&obj->node, &snapshot->cp_list); + return 0; +} + +static size_t _mempool_add_object(struct kgsl_snapshot *snapshot, u8 *data, + struct kgsl_snapshot_object *obj) +{ + struct kgsl_snapshot_section_header *section = + (struct kgsl_snapshot_section_header *)data; + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)(data + sizeof(*section)); + u8 *dest = data + sizeof(*section) + sizeof(*header); + uint64_t size; + + size = obj->size; + + if (!kgsl_memdesc_map(&obj->entry->memdesc)) { + dev_err(snapshot->device->dev, + "snapshot: failed to map GPU object\n"); + return 0; + } + + section->magic = SNAPSHOT_SECTION_MAGIC; + section->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section->size = size + sizeof(*header) + sizeof(*section); + + header->size = size >> 2; + header->gpuaddr = obj->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable); + header->type = obj->type; + + if (kgsl_addr_range_overlap(obj->gpuaddr, obj->size, + snapshot->ib1base, snapshot->ib1size)) + snapshot->ib1dumped = true; + + if (kgsl_addr_range_overlap(obj->gpuaddr, obj->size, + snapshot->ib2base, snapshot->ib2size)) + snapshot->ib2dumped = true; + + memcpy(dest, obj->entry->memdesc.hostptr + obj->offset, size); + kgsl_memdesc_unmap(&obj->entry->memdesc); + + return section->size; +} + +/** + * kgsl_snapshot_save_frozen_objs() - Save the objects frozen in snapshot into + * memory so that the data reported in these objects is correct when snapshot + * is taken + * @work: The work item that scheduled this work + */ +static void kgsl_snapshot_save_frozen_objs(struct work_struct *work) +{ + struct kgsl_snapshot *snapshot = container_of(work, + struct kgsl_snapshot, work); + struct kgsl_snapshot_object *obj, *tmp; + size_t size = 0; + void *ptr; + + if (snapshot->device->gmu_fault) + goto gmu_only; + + kgsl_snapshot_process_ib_obj_list(snapshot); + + list_for_each_entry(obj, &snapshot->obj_list, node) { + obj->size = ALIGN(obj->size, 4); + + size += ((size_t) obj->size + + sizeof(struct kgsl_snapshot_gpu_object_v2) + + sizeof(struct kgsl_snapshot_section_header)); + } + + if (size == 0) + goto done; + + snapshot->mempool = vmalloc(size); + + ptr = snapshot->mempool; + snapshot->mempool_size = 0; + + /* even if vmalloc fails, make sure we clean up the obj_list */ + list_for_each_entry_safe(obj, tmp, &snapshot->obj_list, node) { + if (snapshot->mempool) { + size_t ret = _mempool_add_object(snapshot, ptr, obj); + + ptr += ret; + snapshot->mempool_size += ret; + } + + kgsl_snapshot_put_object(obj); + } +done: + /* + * Get rid of the process struct here, so that it doesn't sit + * around until someone bothers to read the snapshot file. + */ + kgsl_process_private_put(snapshot->process); + snapshot->process = NULL; + + if (snapshot->ib1base && !snapshot->ib1dumped) + dev_err(snapshot->device->dev, + "snapshot: Active IB1:%016llx not dumped\n", + snapshot->ib1base); + else if (snapshot->ib2base && !snapshot->ib2dumped) + dev_err(snapshot->device->dev, + "snapshot: Active IB2:%016llx not dumped\n", + snapshot->ib2base); + +gmu_only: + BUG_ON(!snapshot->device->skip_ib_capture && + snapshot->device->force_panic); + complete_all(&snapshot->dump_gate); +} diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h new file mode 100644 index 0000000000..a9a94152e2 --- /dev/null +++ b/kgsl_snapshot.h @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_SNAPSHOT_H_ +#define _KGSL_SNAPSHOT_H_ + +#include + +/* Snapshot header */ + +/* High word is static, low word is snapshot version ID */ +#define SNAPSHOT_MAGIC 0x504D0002 + +/* GPU ID scheme: + * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D) + * [00:16] - GPU specific identifier + */ + +struct kgsl_snapshot_header { + __u32 magic; /* Magic identifier */ + __u32 gpuid; /* GPU ID - see above */ + /* Added in snapshot version 2 */ + __u32 chipid; /* Chip ID from the GPU */ +} __packed; + +/* Section header */ +#define SNAPSHOT_SECTION_MAGIC 0xABCD + +struct kgsl_snapshot_section_header { + __u16 magic; /* Magic identifier */ + __u16 id; /* Type of section */ + __u32 size; /* Size of the section including this header */ +} __packed; + +/* Section identifiers */ +#define KGSL_SNAPSHOT_SECTION_OS 0x0101 +#define KGSL_SNAPSHOT_SECTION_REGS 0x0201 +#define KGSL_SNAPSHOT_SECTION_REGS_V2 0x0202 +#define KGSL_SNAPSHOT_SECTION_RB 0x0301 +#define KGSL_SNAPSHOT_SECTION_RB_V2 0x0302 +#define KGSL_SNAPSHOT_SECTION_IB 0x0401 +#define KGSL_SNAPSHOT_SECTION_IB_V2 0x0402 +#define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501 +#define KGSL_SNAPSHOT_SECTION_ISTORE 0x0801 +#define KGSL_SNAPSHOT_SECTION_DEBUG 0x0901 +#define KGSL_SNAPSHOT_SECTION_DEBUGBUS 0x0A01 +#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT 0x0B01 +#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02 +#define KGSL_SNAPSHOT_SECTION_MEMLIST 0x0E01 +#define KGSL_SNAPSHOT_SECTION_MEMLIST_V2 0x0E02 +#define KGSL_SNAPSHOT_SECTION_SHADER 0x1201 +#define KGSL_SNAPSHOT_SECTION_SHADER_V2 0x1202 +#define KGSL_SNAPSHOT_SECTION_MVC 0x1501 +#define KGSL_SNAPSHOT_SECTION_MVC_V2 0x1502 +#define KGSL_SNAPSHOT_SECTION_GMU 0x1601 +#define KGSL_SNAPSHOT_SECTION_GMU_MEMORY 0x1701 +#define KGSL_SNAPSHOT_SECTION_SIDE_DEBUGBUS 0x1801 + +#define KGSL_SNAPSHOT_SECTION_END 0xFFFF + +/* OS sub-section header */ +#define KGSL_SNAPSHOT_OS_LINUX 0x0001 +#define KGSL_SNAPSHOT_OS_LINUX_V3 0x00000202 + +/* Linux OS specific information */ +struct kgsl_snapshot_linux { + int osid; /* subsection OS identifier */ + int state; /* 1 if the thread is running, 0 for hung */ + __u32 seconds; /* Unix timestamp for the snapshot */ + __u32 power_flags; /* Current power flags */ + __u32 power_level; /* Current power level */ + __u32 power_interval_timeout; /* Power interval timeout */ + __u32 grpclk; /* Current GP clock value */ + __u32 busclk; /* Current busclk value */ + __u32 ptbase; /* Current ptbase */ + __u32 pid; /* PID of the process that owns the PT */ + __u32 current_context; /* ID of the current context */ + __u32 ctxtcount; /* Number of contexts appended to section */ + unsigned char release[32]; /* kernel release */ + unsigned char version[32]; /* kernel version */ + unsigned char comm[16]; /* Name of the process that owns the PT */ +} __packed; + +struct kgsl_snapshot_linux_v2 { + int osid; /* subsection OS identifier */ + __u32 seconds; /* Unix timestamp for the snapshot */ + __u32 power_flags; /* Current power flags */ + __u32 power_level; /* Current power level */ + __u32 power_interval_timeout; /* Power interval timeout */ + __u32 grpclk; /* Current GP clock value */ + __u32 busclk; /* Current busclk value */ + __u64 ptbase; /* Current ptbase */ + __u32 pid; /* PID of the process that owns the PT */ + __u32 current_context; /* ID of the current context */ + __u32 ctxtcount; /* Number of contexts appended to section */ + unsigned char release[32]; /* kernel release */ + unsigned char version[32]; /* kernel version */ + unsigned char comm[16]; /* Name of the process that owns the PT */ +} __packed; + +/* + * This structure contains a record of an active context. + * These are appended one after another in the OS section below + * the header above + */ + +struct kgsl_snapshot_linux_context { + __u32 id; /* The context ID */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +}; + +struct kgsl_snapshot_linux_context_v2 { + __u32 id; /* The context ID */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_consumed; /* The last timestamp consumed by HW */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +}; +/* Ringbuffer sub-section header */ +struct kgsl_snapshot_rb { + int start; /* dword at the start of the dump */ + int end; /* dword at the end of the dump */ + int rbsize; /* Size (in dwords) of the ringbuffer */ + int wptr; /* Current index of the CPU write pointer */ + int rptr; /* Current index of the GPU read pointer */ + int count; /* Number of dwords in the dump */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +} __packed; + +struct kgsl_snapshot_rb_v2 { + int start; /* dword at the start of the dump */ + int end; /* dword at the end of the dump */ + int rbsize; /* Size (in dwords) of the ringbuffer */ + int wptr; /* Current index of the CPU write pointer */ + int rptr; /* Current index of the GPU read pointer */ + int count; /* Number of dwords in the dump */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ + __u64 gpuaddr; /* The GPU address of the ringbuffer */ + __u32 id; /* Ringbuffer identifier */ +} __packed; + + +/* Replay or Memory list section, both sections have same header */ +struct kgsl_snapshot_replay_mem_list { + /* + * Number of IBs to replay for replay section or + * number of memory list entries for mem list section + */ + int num_entries; + /* Pagetable base to which the replay IBs or memory entries belong */ + __u32 ptbase; +} __packed; + +/* Replay or Memory list section, both sections have same header */ +struct kgsl_snapshot_mem_list_v2 { + /* + * Number of IBs to replay for replay section or + * number of memory list entries for mem list section + */ + int num_entries; + /* Pagetable base to which the replay IBs or memory entries belong */ + __u64 ptbase; +} __packed; + + +/* Indirect buffer sub-section header */ +struct kgsl_snapshot_ib { + __u32 gpuaddr; /* GPU address of the the IB */ + __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ + int size; /* Size of the IB */ +} __packed; + +/* Indirect buffer sub-section header (v2) */ +struct kgsl_snapshot_ib_v2 { + __u64 gpuaddr; /* GPU address of the the IB */ + __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ + __u64 size; /* Size of the IB */ +} __packed; + +/* GMU memory ID's */ +#define SNAPSHOT_GMU_MEM_UNKNOWN 0x00 +#define SNAPSHOT_GMU_MEM_HFI 0x01 +#define SNAPSHOT_GMU_MEM_LOG 0x02 +#define SNAPSHOT_GMU_MEM_BWTABLE 0x03 +#define SNAPSHOT_GMU_MEM_DEBUG 0x04 +#define SNAPSHOT_GMU_MEM_BIN_BLOCK 0x05 + +/* GMU memory section data */ +struct kgsl_snapshot_gmu_mem { + int type; + uint64_t hostaddr; + uint64_t gmuaddr; + uint64_t gpuaddr; +} __packed; + +/* Register sub-section header */ +struct kgsl_snapshot_regs { + __u32 count; /* Number of register pairs in the section */ +} __packed; + +/* Indexed register sub-section header */ +struct kgsl_snapshot_indexed_regs { + __u32 index_reg; /* Offset of the index register for this section */ + __u32 data_reg; /* Offset of the data register for this section */ + int start; /* Starting index */ + int count; /* Number of dwords in the data */ +} __packed; + +/* MVC register sub-section header */ +struct kgsl_snapshot_mvc_regs { + int ctxt_id; + int cluster_id; +} __packed; + +struct kgsl_snapshot_mvc_regs_v2 { + int ctxt_id; + int cluster_id; + int pipe_id; + int location_id; +} __packed; + +/* Istore sub-section header */ +struct kgsl_snapshot_istore { + int count; /* Number of instructions in the istore */ +} __packed; + +/* Debug data sub-section header */ + +/* A2XX debug sections */ +#define SNAPSHOT_DEBUG_SX 1 +#define SNAPSHOT_DEBUG_CP 2 +#define SNAPSHOT_DEBUG_SQ 3 +#define SNAPSHOT_DEBUG_SQTHREAD 4 +#define SNAPSHOT_DEBUG_MIU 5 + +/* A3XX debug sections */ +#define SNAPSHOT_DEBUG_VPC_MEMORY 6 +#define SNAPSHOT_DEBUG_CP_MEQ 7 +#define SNAPSHOT_DEBUG_CP_PM4_RAM 8 +#define SNAPSHOT_DEBUG_CP_PFP_RAM 9 +#define SNAPSHOT_DEBUG_CP_ROQ 10 +#define SNAPSHOT_DEBUG_SHADER_MEMORY 11 +#define SNAPSHOT_DEBUG_CP_MERCIU 12 +#define SNAPSHOT_DEBUG_SQE_VERSION 14 + +/* GMU Version information */ +#define SNAPSHOT_DEBUG_GMU_CORE_VERSION 15 +#define SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION 16 +#define SNAPSHOT_DEBUG_GMU_PWR_VERSION 17 +#define SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION 18 +#define SNAPSHOT_DEBUG_GMU_HFI_VERSION 19 + +struct kgsl_snapshot_debug { + int type; /* Type identifier for the attached tata */ + int size; /* Size of the section in dwords */ +} __packed; + +struct kgsl_snapshot_debugbus { + int id; /* Debug bus ID */ + int count; /* Number of dwords in the dump */ +} __packed; + +struct kgsl_snapshot_side_debugbus { + int id; /* Debug bus ID */ + int size; /* Number of dwords in the dump */ + int valid_data; /* Mask of valid bits of the side debugbus */ +} __packed; + +struct kgsl_snapshot_shader { + int type; /* SP/TP statetype */ + int index; /* SP/TP index */ + int size; /* Number of dwords in the dump */ +} __packed; + +struct kgsl_snapshot_shader_v2 { + int type; /* SP/TP statetype */ + int index; /* SP/TP index */ + int usptp; /* USPTP index */ + int pipe_id; /* Pipe id */ + int location; /* Location value */ + u32 size; /* Number of dwords in the dump */ +} __packed; + +#define SNAPSHOT_GPU_OBJECT_SHADER 1 +#define SNAPSHOT_GPU_OBJECT_IB 2 +#define SNAPSHOT_GPU_OBJECT_GENERIC 3 +#define SNAPSHOT_GPU_OBJECT_DRAW 4 +#define SNAPSHOT_GPU_OBJECT_GLOBAL 5 + +struct kgsl_snapshot_gpu_object { + int type; /* Type of GPU object */ + __u32 gpuaddr; /* GPU address of the the object */ + __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ + int size; /* Size of the object (in dwords) */ +}; + +struct kgsl_snapshot_gpu_object_v2 { + int type; /* Type of GPU object */ + __u64 gpuaddr; /* GPU address of the the object */ + __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ + __u64 size; /* Size of the object (in dwords) */ +} __packed; + +struct kgsl_device; +struct kgsl_process_private; + +void kgsl_snapshot_push_object(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords); +#endif diff --git a/kgsl_sync.c b/kgsl_sync.c new file mode 100644 index 0000000000..1103b51248 --- /dev/null +++ b/kgsl_sync.c @@ -0,0 +1,884 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2012-2019, 2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_sync.h" + +static void kgsl_sync_timeline_signal(struct kgsl_sync_timeline *timeline, + unsigned int timestamp); + +static const struct dma_fence_ops kgsl_sync_fence_ops; + +static struct kgsl_sync_fence *kgsl_sync_fence_create( + struct kgsl_context *context, + unsigned int timestamp) +{ + struct kgsl_sync_fence *kfence; + struct kgsl_sync_timeline *ktimeline = context->ktimeline; + unsigned long flags; + + /* Get a refcount to the timeline. Put when released */ + if (!kref_get_unless_zero(&ktimeline->kref)) + return NULL; + + kfence = kzalloc(sizeof(*kfence), GFP_KERNEL); + if (kfence == NULL) { + kgsl_sync_timeline_put(ktimeline); + return NULL; + } + + kfence->parent = ktimeline; + kfence->context_id = context->id; + kfence->timestamp = timestamp; + + dma_fence_init(&kfence->fence, &kgsl_sync_fence_ops, &ktimeline->lock, + ktimeline->fence_context, timestamp); + + /* + * sync_file_create() takes a refcount to the fence. This refcount is + * put when the fence is signaled. + */ + kfence->sync_file = sync_file_create(&kfence->fence); + + if (kfence->sync_file == NULL) { + kgsl_sync_timeline_put(ktimeline); + dev_err(context->device->dev, "Create sync_file failed\n"); + kfree(kfence); + return NULL; + } + + spin_lock_irqsave(&ktimeline->lock, flags); + list_add_tail(&kfence->child_list, &ktimeline->child_list_head); + spin_unlock_irqrestore(&ktimeline->lock, flags); + + return kfence; +} + +static void kgsl_sync_fence_release(struct dma_fence *fence) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + + kgsl_sync_timeline_put(kfence->parent); + kfree(kfence); +} + +/* Called with ktimeline->lock held */ +static bool kgsl_sync_fence_has_signaled(struct dma_fence *fence) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_timeline *ktimeline = kfence->parent; + unsigned int ts = kfence->timestamp; + + return (timestamp_cmp(ktimeline->last_timestamp, ts) >= 0); +} + +static bool kgsl_enable_signaling(struct dma_fence *fence) +{ + return !kgsl_sync_fence_has_signaled(fence); +} + +struct kgsl_sync_fence_event_priv { + struct kgsl_context *context; + unsigned int timestamp; +}; + +/** + * kgsl_sync_fence_event_cb - Event callback for a fence timestamp event + * @device - The KGSL device that expired the timestamp + * @context- Pointer to the context that owns the event + * @priv: Private data for the callback + * @result - Result of the event (retired or canceled) + * + * Signal a fence following the expiration of a timestamp + */ + +static void kgsl_sync_fence_event_cb(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int result) +{ + struct kgsl_sync_fence_event_priv *ev = priv; + + kgsl_sync_timeline_signal(ev->context->ktimeline, ev->timestamp); + kgsl_context_put(ev->context); + kfree(ev); +} + +static int _add_fence_event(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + struct kgsl_sync_fence_event_priv *event; + int ret; + + event = kmalloc(sizeof(*event), GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + /* + * Increase the refcount for the context to keep it through the + * callback + */ + if (!_kgsl_context_get(context)) { + kfree(event); + return -ENOENT; + } + + event->context = context; + event->timestamp = timestamp; + + ret = kgsl_add_event(device, &context->events, timestamp, + kgsl_sync_fence_event_cb, event); + + if (ret) { + kgsl_context_put(context); + kfree(event); + } + + return ret; +} + +/* Only to be used if creating a related event failed */ +static void kgsl_sync_cancel(struct kgsl_sync_fence *kfence) +{ + spin_lock(&kfence->parent->lock); + if (!list_empty(&kfence->child_list)) { + list_del_init(&kfence->child_list); + dma_fence_put(&kfence->fence); + } + spin_unlock(&kfence->parent->lock); +} + +/** + * kgsl_add_fence_event - Create a new fence event + * @device - KGSL device to create the event on + * @timestamp - Timestamp to trigger the event + * @data - Return fence fd stored in struct kgsl_timestamp_event_fence + * @len - length of the fence event + * @owner - driver instance that owns this event + * @returns 0 on success or error code on error + * + * Create a fence and register an event to signal the fence when + * the timestamp expires + */ + +int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) +{ + struct kgsl_timestamp_event_fence priv; + struct kgsl_context *context; + struct kgsl_sync_fence *kfence = NULL; + int ret = -EINVAL; + unsigned int cur; + + priv.fence_fd = -1; + + if (len != sizeof(priv)) + return -EINVAL; + + context = kgsl_context_get_owner(owner, context_id); + + if (context == NULL) + return -EINVAL; + + if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv)) + goto out; + + kfence = kgsl_sync_fence_create(context, timestamp); + if (kfence == NULL) { + ret = -ENOMEM; + goto out; + } + + priv.fence_fd = get_unused_fd_flags(0); + if (priv.fence_fd < 0) { + dev_crit_ratelimited(device->dev, + "Unable to get a file descriptor: %d\n", + priv.fence_fd); + ret = priv.fence_fd; + goto out; + } + + /* + * If the timestamp hasn't expired yet create an event to trigger it. + * Otherwise, just signal the fence - there is no reason to go through + * the effort of creating a fence we don't need. + */ + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &cur); + + if (timestamp_cmp(cur, timestamp) >= 0) { + ret = 0; + kgsl_sync_timeline_signal(context->ktimeline, cur); + } else { + ret = _add_fence_event(device, context, timestamp); + if (ret) + goto out; + } + + if (copy_to_user(data, &priv, sizeof(priv))) { + ret = -EFAULT; + goto out; + } + fd_install(priv.fence_fd, kfence->sync_file->file); + +out: + kgsl_context_put(context); + if (ret) { + if (priv.fence_fd >= 0) + put_unused_fd(priv.fence_fd); + + if (kfence) { + kgsl_sync_cancel(kfence); + /* + * Put the refcount of sync file. This will release + * kfence->fence as well. + */ + fput(kfence->sync_file->file); + } + } + return ret; +} + +static void kgsl_sync_timeline_value_str(struct dma_fence *fence, + char *str, int size) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_timeline *ktimeline = kfence->parent; + struct kgsl_context *context = NULL; + unsigned long flags; + int ret = 0; + + unsigned int timestamp_retired; + unsigned int timestamp_queued; + + if (!kref_get_unless_zero(&ktimeline->kref)) + return; + if (!ktimeline->device) + goto put_timeline; + + spin_lock_irqsave(&ktimeline->lock, flags); + ret = _kgsl_context_get(ktimeline->context); + context = ret ? ktimeline->context : NULL; + spin_unlock_irqrestore(&ktimeline->lock, flags); + + /* Get the last signaled timestamp if the context is not valid */ + timestamp_queued = ktimeline->last_timestamp; + timestamp_retired = timestamp_queued; + if (context) { + kgsl_readtimestamp(ktimeline->device, context, + KGSL_TIMESTAMP_RETIRED, ×tamp_retired); + + kgsl_readtimestamp(ktimeline->device, context, + KGSL_TIMESTAMP_QUEUED, ×tamp_queued); + + kgsl_context_put(context); + } + + snprintf(str, size, "%u queued:%u retired:%u", + ktimeline->last_timestamp, + timestamp_queued, timestamp_retired); + +put_timeline: + kgsl_sync_timeline_put(ktimeline); +} + +static void kgsl_sync_fence_value_str(struct dma_fence *fence, + char *str, int size) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + + snprintf(str, size, "%u", kfence->timestamp); +} + +static const char *kgsl_sync_fence_driver_name(struct dma_fence *fence) +{ + return "kgsl-timeline"; +} + +static const char *kgsl_sync_timeline_name(struct dma_fence *fence) +{ + struct kgsl_sync_fence *kfence = (struct kgsl_sync_fence *)fence; + struct kgsl_sync_timeline *ktimeline = kfence->parent; + + return ktimeline->name; +} + +int kgsl_sync_timeline_create(struct kgsl_context *context) +{ + struct kgsl_sync_timeline *ktimeline; + + /* Put context at detach time */ + if (!_kgsl_context_get(context)) + return -ENOENT; + + ktimeline = kzalloc(sizeof(*ktimeline), GFP_KERNEL); + if (ktimeline == NULL) { + kgsl_context_put(context); + return -ENOMEM; + } + + kref_init(&ktimeline->kref); + snprintf(ktimeline->name, sizeof(ktimeline->name), + "%s_%d-%.15s(%d)-%.15s(%d)", + context->device->name, context->id, + current->group_leader->comm, current->group_leader->pid, + current->comm, current->pid); + + ktimeline->fence_context = dma_fence_context_alloc(1); + ktimeline->last_timestamp = 0; + INIT_LIST_HEAD(&ktimeline->child_list_head); + spin_lock_init(&ktimeline->lock); + ktimeline->device = context->device; + + /* + * The context pointer is valid till detach time, where we put the + * refcount on the context + */ + ktimeline->context = context; + + context->ktimeline = ktimeline; + + return 0; +} + +static void kgsl_sync_timeline_signal(struct kgsl_sync_timeline *ktimeline, + unsigned int timestamp) +{ + unsigned long flags; + struct kgsl_sync_fence *kfence, *next; + + if (!kref_get_unless_zero(&ktimeline->kref)) + return; + + spin_lock_irqsave(&ktimeline->lock, flags); + if (timestamp_cmp(timestamp, ktimeline->last_timestamp) > 0) + ktimeline->last_timestamp = timestamp; + + list_for_each_entry_safe(kfence, next, &ktimeline->child_list_head, + child_list) { + if (dma_fence_is_signaled_locked(&kfence->fence)) { + list_del_init(&kfence->child_list); + dma_fence_put(&kfence->fence); + } + } + + spin_unlock_irqrestore(&ktimeline->lock, flags); + kgsl_sync_timeline_put(ktimeline); +} + +void kgsl_sync_timeline_detach(struct kgsl_sync_timeline *ktimeline) +{ + unsigned long flags; + struct kgsl_context *context = ktimeline->context; + + /* Set context pointer to NULL and drop our refcount on the context */ + spin_lock_irqsave(&ktimeline->lock, flags); + ktimeline->context = NULL; + spin_unlock_irqrestore(&ktimeline->lock, flags); + kgsl_context_put(context); +} + +static void kgsl_sync_timeline_destroy(struct kref *kref) +{ + struct kgsl_sync_timeline *ktimeline = + container_of(kref, struct kgsl_sync_timeline, kref); + + kfree(ktimeline); +} + +void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline) +{ + if (ktimeline) + kref_put(&ktimeline->kref, kgsl_sync_timeline_destroy); +} + +static const struct dma_fence_ops kgsl_sync_fence_ops = { + .get_driver_name = kgsl_sync_fence_driver_name, + .get_timeline_name = kgsl_sync_timeline_name, + .enable_signaling = kgsl_enable_signaling, + .signaled = kgsl_sync_fence_has_signaled, + .wait = dma_fence_default_wait, + .release = kgsl_sync_fence_release, + + .fence_value_str = kgsl_sync_fence_value_str, + .timeline_value_str = kgsl_sync_timeline_value_str, +}; + +static void kgsl_sync_fence_callback(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct kgsl_sync_fence_cb *kcb = (struct kgsl_sync_fence_cb *)cb; + + /* + * If the callback is marked for cancellation in a separate thread, + * let the other thread do the cleanup. + */ + if (kcb->func(kcb->priv)) { + dma_fence_put(kcb->fence); + kfree(kcb); + } +} + +static void kgsl_get_fence_names(struct dma_fence *fence, + struct event_fence_info *info_ptr) +{ + unsigned int num_fences; + struct dma_fence **fences; + struct dma_fence_array *array; + int i; + + if (!info_ptr) + return; + + array = to_dma_fence_array(fence); + + if (array != NULL) { + num_fences = array->num_fences; + fences = array->fences; + } else { + num_fences = 1; + fences = &fence; + } + + info_ptr->fences = kcalloc(num_fences, sizeof(struct fence_info), + GFP_ATOMIC); + if (info_ptr->fences == NULL) + return; + + info_ptr->num_fences = num_fences; + + for (i = 0; i < num_fences; i++) { + struct dma_fence *f = fences[i]; + struct fence_info *fi = &info_ptr->fences[i]; + int len; + + len = scnprintf(fi->name, sizeof(fi->name), "%s %s", + f->ops->get_driver_name(f), + f->ops->get_timeline_name(f)); + + if (f->ops->fence_value_str) { + len += scnprintf(fi->name + len, sizeof(fi->name) - len, + ": "); + f->ops->fence_value_str(f, fi->name + len, + sizeof(fi->name) - len); + } + } +} + +struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, + bool (*func)(void *priv), void *priv, struct event_fence_info *info_ptr) +{ + struct kgsl_sync_fence_cb *kcb; + struct dma_fence *fence; + int status; + + fence = sync_file_get_fence(fd); + if (fence == NULL) + return ERR_PTR(-EINVAL); + + /* create the callback */ + kcb = kzalloc(sizeof(*kcb), GFP_ATOMIC); + if (kcb == NULL) { + dma_fence_put(fence); + return ERR_PTR(-ENOMEM); + } + + kcb->fence = fence; + kcb->priv = priv; + kcb->func = func; + + kgsl_get_fence_names(fence, info_ptr); + + /* if status then error or signaled */ + status = dma_fence_add_callback(fence, &kcb->fence_cb, + kgsl_sync_fence_callback); + + if (status) { + kfree(kcb); + if (!dma_fence_is_signaled(fence)) + kcb = ERR_PTR(status); + else + kcb = NULL; + dma_fence_put(fence); + } + + return kcb; +} + +/* + * Cancel the fence async callback and do the cleanup. The caller must make + * sure that the callback (if run before cancelling) returns false, so that + * no other thread frees the pointer. + */ +void kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb) +{ + if (kcb == NULL) + return; + + /* + * After fence_remove_callback() returns, the fence callback is + * either not called at all, or completed without freeing kcb. + * This thread can then put the fence refcount and free kcb. + */ + dma_fence_remove_callback(kcb->fence, &kcb->fence_cb); + dma_fence_put(kcb->fence); + kfree(kcb); +} + +struct kgsl_syncsource { + struct kref refcount; + char name[32]; + int id; + struct kgsl_process_private *private; + struct list_head child_list_head; + spinlock_t lock; +}; + +struct kgsl_syncsource_fence { + struct dma_fence fence; + struct kgsl_syncsource *parent; + struct list_head child_list; +}; + +static const struct dma_fence_ops kgsl_syncsource_fence_ops; + +long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource *syncsource = NULL; + struct kgsl_syncsource_create *param = data; + int ret = -EINVAL; + int id = 0; + struct kgsl_process_private *private = dev_priv->process_priv; + + if (!kgsl_process_private_get(private)) + return ret; + + syncsource = kzalloc(sizeof(*syncsource), GFP_KERNEL); + if (syncsource == NULL) { + ret = -ENOMEM; + goto out; + } + + kref_init(&syncsource->refcount); + snprintf(syncsource->name, sizeof(syncsource->name), + "kgsl-syncsource-pid-%d", current->group_leader->pid); + syncsource->private = private; + INIT_LIST_HEAD(&syncsource->child_list_head); + spin_lock_init(&syncsource->lock); + + idr_preload(GFP_KERNEL); + spin_lock(&private->syncsource_lock); + id = idr_alloc(&private->syncsource_idr, syncsource, 1, 0, GFP_NOWAIT); + if (id > 0) { + syncsource->id = id; + param->id = id; + ret = 0; + } else { + ret = id; + } + + spin_unlock(&private->syncsource_lock); + idr_preload_end(); + +out: + if (ret) { + kgsl_process_private_put(private); + kfree(syncsource); + } + + return ret; +} + +static struct kgsl_syncsource * +kgsl_syncsource_get(struct kgsl_process_private *private, int id) +{ + int result = 0; + struct kgsl_syncsource *syncsource = NULL; + + spin_lock(&private->syncsource_lock); + + syncsource = idr_find(&private->syncsource_idr, id); + if (syncsource) + result = kref_get_unless_zero(&syncsource->refcount); + + spin_unlock(&private->syncsource_lock); + + return result ? syncsource : NULL; +} + +static void kgsl_syncsource_destroy(struct kref *kref) +{ + struct kgsl_syncsource *syncsource = container_of(kref, + struct kgsl_syncsource, + refcount); + + struct kgsl_process_private *private = syncsource->private; + + /* Done with process private. Release the refcount */ + kgsl_process_private_put(private); + + kfree(syncsource); +} + +void kgsl_syncsource_put(struct kgsl_syncsource *syncsource) +{ + if (syncsource) + kref_put(&syncsource->refcount, kgsl_syncsource_destroy); +} + +static void kgsl_syncsource_cleanup(struct kgsl_process_private *private, + struct kgsl_syncsource *syncsource) +{ + struct kgsl_syncsource_fence *sfence, *next; + + /* Signal all fences to release any callbacks */ + spin_lock(&syncsource->lock); + + list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, + child_list) { + dma_fence_signal_locked(&sfence->fence); + list_del_init(&sfence->child_list); + } + + spin_unlock(&syncsource->lock); + + /* put reference from syncsource creation */ + kgsl_syncsource_put(syncsource); +} + +long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource_destroy *param = data; + struct kgsl_syncsource *syncsource = NULL; + struct kgsl_process_private *private = dev_priv->process_priv; + + spin_lock(&private->syncsource_lock); + syncsource = idr_find(&private->syncsource_idr, param->id); + + if (syncsource == NULL) { + spin_unlock(&private->syncsource_lock); + return -EINVAL; + } + + if (syncsource->id != 0) { + idr_remove(&private->syncsource_idr, syncsource->id); + syncsource->id = 0; + } + spin_unlock(&private->syncsource_lock); + + kgsl_syncsource_cleanup(private, syncsource); + return 0; +} + +long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource_create_fence *param = data; + struct kgsl_syncsource *syncsource = NULL; + int ret = -EINVAL; + struct kgsl_syncsource_fence *sfence = NULL; + struct sync_file *sync_file = NULL; + int fd = -1; + + /* + * Take a refcount that is released when the fence is released + * (or if fence can't be added to the syncsource). + */ + syncsource = kgsl_syncsource_get(dev_priv->process_priv, + param->id); + if (syncsource == NULL) + goto out; + + sfence = kzalloc(sizeof(*sfence), GFP_KERNEL); + if (sfence == NULL) { + ret = -ENOMEM; + goto out; + } + sfence->parent = syncsource; + + /* Use a new fence context for each fence */ + dma_fence_init(&sfence->fence, &kgsl_syncsource_fence_ops, + &syncsource->lock, dma_fence_context_alloc(1), 1); + + sync_file = sync_file_create(&sfence->fence); + + if (sync_file == NULL) { + dev_err(dev_priv->device->dev, + "Create sync_file failed\n"); + ret = -ENOMEM; + goto out; + } + + fd = get_unused_fd_flags(0); + if (fd < 0) { + ret = -EBADF; + goto out; + } + ret = 0; + + fd_install(fd, sync_file->file); + + param->fence_fd = fd; + + spin_lock(&syncsource->lock); + list_add_tail(&sfence->child_list, &syncsource->child_list_head); + spin_unlock(&syncsource->lock); +out: + /* + * We're transferring ownership of the fence to the sync file. + * The sync file takes an extra refcount when it is created, so put + * our refcount. + */ + if (sync_file) + dma_fence_put(&sfence->fence); + + if (ret) { + if (sync_file) + fput(sync_file->file); + else if (sfence) + dma_fence_put(&sfence->fence); + else + kgsl_syncsource_put(syncsource); + } + + return ret; +} + +static int kgsl_syncsource_signal(struct kgsl_syncsource *syncsource, + struct dma_fence *fence) +{ + struct kgsl_syncsource_fence *sfence, *next; + int ret = -EINVAL; + + spin_lock(&syncsource->lock); + + list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, + child_list) { + if (fence == &sfence->fence) { + dma_fence_signal_locked(fence); + list_del_init(&sfence->child_list); + + ret = 0; + break; + } + } + + spin_unlock(&syncsource->lock); + + return ret; +} + +long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int ret = -EINVAL; + struct kgsl_syncsource_signal_fence *param = data; + struct kgsl_syncsource *syncsource = NULL; + struct dma_fence *fence = NULL; + + syncsource = kgsl_syncsource_get(dev_priv->process_priv, + param->id); + if (syncsource == NULL) + goto out; + + fence = sync_file_get_fence(param->fence_fd); + if (fence == NULL) { + ret = -EBADF; + goto out; + } + + ret = kgsl_syncsource_signal(syncsource, fence); +out: + if (fence) + dma_fence_put(fence); + if (syncsource) + kgsl_syncsource_put(syncsource); + return ret; +} + +static void kgsl_syncsource_fence_release(struct dma_fence *fence) +{ + struct kgsl_syncsource_fence *sfence = + (struct kgsl_syncsource_fence *)fence; + + /* Signal if it's not signaled yet */ + kgsl_syncsource_signal(sfence->parent, fence); + + /* Release the refcount on the syncsource */ + kgsl_syncsource_put(sfence->parent); + + kfree(sfence); +} + +void kgsl_syncsource_process_release_syncsources( + struct kgsl_process_private *private) +{ + struct kgsl_syncsource *syncsource; + int next = 0; + + while (1) { + spin_lock(&private->syncsource_lock); + syncsource = idr_get_next(&private->syncsource_idr, &next); + + if (syncsource == NULL) { + spin_unlock(&private->syncsource_lock); + break; + } + + if (syncsource->id != 0) { + idr_remove(&private->syncsource_idr, syncsource->id); + syncsource->id = 0; + } + spin_unlock(&private->syncsource_lock); + + kgsl_syncsource_cleanup(private, syncsource); + next = next + 1; + } +} + +static const char *kgsl_syncsource_get_timeline_name(struct dma_fence *fence) +{ + struct kgsl_syncsource_fence *sfence = + (struct kgsl_syncsource_fence *)fence; + struct kgsl_syncsource *syncsource = sfence->parent; + + return syncsource->name; +} + +static bool kgsl_syncsource_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static const char *kgsl_syncsource_driver_name(struct dma_fence *fence) +{ + return "kgsl-syncsource-timeline"; +} + +static void kgsl_syncsource_fence_value_str(struct dma_fence *fence, + char *str, int size) +{ + /* + * Each fence is independent of the others on the same timeline. + * We use a different context for each of them. + */ + snprintf(str, size, "%llu", fence->context); +} + +static const struct dma_fence_ops kgsl_syncsource_fence_ops = { + .get_driver_name = kgsl_syncsource_driver_name, + .get_timeline_name = kgsl_syncsource_get_timeline_name, + .enable_signaling = kgsl_syncsource_enable_signaling, + .wait = dma_fence_default_wait, + .release = kgsl_syncsource_fence_release, + + .fence_value_str = kgsl_syncsource_fence_value_str, +}; + diff --git a/kgsl_sync.h b/kgsl_sync.h new file mode 100644 index 0000000000..b8655ddc9c --- /dev/null +++ b/kgsl_sync.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2014,2018-2019, 2021, The Linux Foundation. All rights reserved. + */ +#ifndef __KGSL_SYNC_H +#define __KGSL_SYNC_H + +#include + +/** + * struct kgsl_sync_timeline - A sync timeline associated with a kgsl context + * @kref: Refcount to keep the struct alive until all its fences are signaled, + and as long as the context exists + * @name: String to describe this timeline + * @fence_context: Used by the fence driver to identify fences belonging to + * this context + * @child_list_head: List head for all fences on this timeline + * @lock: Spinlock to protect this timeline + * @last_timestamp: Last timestamp when signaling fences + * @device: kgsl device + * @context: kgsl context + */ +struct kgsl_sync_timeline { + struct kref kref; + char name[32]; + + u64 fence_context; + + struct list_head child_list_head; + + spinlock_t lock; + unsigned int last_timestamp; + struct kgsl_device *device; + struct kgsl_context *context; +}; + +/** + * struct kgsl_sync_fence - A struct containing a fence and other data + * associated with it + * @fence: The fence struct + * @sync_file: Pointer to the sync file + * @parent: Pointer to the kgsl sync timeline this fence is on + * @child_list: List of fences on the same timeline + * @context_id: kgsl context id + * @timestamp: Context timestamp that this fence is associated with + */ +struct kgsl_sync_fence { + struct dma_fence fence; + struct sync_file *sync_file; + struct kgsl_sync_timeline *parent; + struct list_head child_list; + u32 context_id; + unsigned int timestamp; +}; + +/** + * struct kgsl_sync_fence_cb - Used for fence callbacks + * fence_cb: Fence callback struct + * fence: Pointer to the fence for which the callback is done + * priv: Private data for the callback + * func: Pointer to the kgsl function to call. This function should return + * false if the sync callback is marked for cancellation in a separate thread. + */ +struct kgsl_sync_fence_cb { + struct dma_fence_cb fence_cb; + struct dma_fence *fence; + void *priv; + bool (*func)(void *priv); +}; + +struct kgsl_device_private; +struct kgsl_drawobj_sync_event; +struct event_fence_info; +struct kgsl_process_private; +struct kgsl_syncsource; + +#if defined(CONFIG_SYNC_FILE) +int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner); + +int kgsl_sync_timeline_create(struct kgsl_context *context); + +void kgsl_sync_timeline_detach(struct kgsl_sync_timeline *ktimeline); + +void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline); + +struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, + bool (*func)(void *priv), void *priv, + struct event_fence_info *info_ptr); + +void kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb); + +long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +void kgsl_syncsource_put(struct kgsl_syncsource *syncsource); + +void kgsl_syncsource_process_release_syncsources( + struct kgsl_process_private *private); + +#else +static inline int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) +{ + return -EINVAL; +} + +static inline int kgsl_sync_timeline_create(struct kgsl_context *context) +{ + context->ktimeline = NULL; + return 0; +} + +static inline void kgsl_sync_timeline_detach(struct kgsl_sync_timeline *ktimeline) +{ +} + +static inline void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline) +{ +} + + +static inline struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, + bool (*func)(void *priv), void *priv, + struct event_fence_info *info_ptr) +{ + return NULL; +} + +static inline void +kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb) +{ +} + +static inline long +kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline void kgsl_syncsource_put(struct kgsl_syncsource *syncsource) +{ + +} + +static inline void kgsl_syncsource_process_release_syncsources( + struct kgsl_process_private *private) +{ + +} + +#endif /* CONFIG_SYNC_FILE */ + +#endif /* __KGSL_SYNC_H */ diff --git a/kgsl_sysfs.h b/kgsl_sysfs.h new file mode 100644 index 0000000000..1afceb3f48 --- /dev/null +++ b/kgsl_sysfs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ +#ifndef _KGSL_SYSFS_H_ +#define _KGSL_SYSFS_H_ + +struct kgsl_device; + +/** + * struct kgsl_gpu_sysfs_attr - Attribute definition for sysfs objects in the + * /sys/kernel/gpu kobject + */ +struct kgsl_gpu_sysfs_attr { + /** @attr: Attribute for the sysfs node */ + struct attribute attr; + /** @show: Show function for the node */ + ssize_t (*show)(struct kgsl_device *device, char *buf); + /** @store: Store function for the node */ + ssize_t (*store)(struct kgsl_device *device, const char *buf, + size_t count); +}; + +#define GPU_SYSFS_ATTR(_name, _mode, _show, _store) \ +const struct kgsl_gpu_sysfs_attr gpu_sysfs_attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +#endif diff --git a/kgsl_timeline.c b/kgsl_timeline.c new file mode 100644 index 0000000000..d7b64abab4 --- /dev/null +++ b/kgsl_timeline.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_eventlog.h" +#include "kgsl_sharedmem.h" +#include "kgsl_timeline.h" +#include "kgsl_trace.h" + +struct kgsl_timeline_fence { + struct dma_fence base; + struct kgsl_timeline *timeline; + struct list_head node; +}; + +struct dma_fence *kgsl_timelines_to_fence_array(struct kgsl_device *device, + u64 timelines, u32 count, u64 usize, bool any) +{ + void __user *uptr = u64_to_user_ptr(timelines); + struct dma_fence_array *array; + struct dma_fence **fences; + int i, ret = 0; + + if (!count || count > INT_MAX) + return ERR_PTR(-EINVAL); + + fences = kcalloc(count, sizeof(*fences), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + + if (!fences) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + struct kgsl_timeline_val val; + struct kgsl_timeline *timeline; + + if (copy_struct_from_user(&val, sizeof(val), uptr, usize)) { + ret = -EFAULT; + goto err; + } + + if (val.padding) { + ret = -EINVAL; + goto err; + } + + timeline = kgsl_timeline_by_id(device, val.timeline); + if (!timeline) { + ret = -ENOENT; + goto err; + } + + fences[i] = kgsl_timeline_fence_alloc(timeline, val.seqno); + kgsl_timeline_put(timeline); + + if (IS_ERR(fences[i])) { + ret = PTR_ERR(fences[i]); + goto err; + } + + uptr += usize; + } + + /* No need for a fence array for only one fence */ + if (count == 1) { + struct dma_fence *fence = fences[0]; + + kfree(fences); + return fence; + } + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, any); + + if (array) + return &array->base; + + ret = -ENOMEM; +err: + for (i = 0; i < count; i++) { + if (!IS_ERR_OR_NULL(fences[i])) + dma_fence_put(fences[i]); + } + + kfree(fences); + return ERR_PTR(ret); +} + +void kgsl_timeline_destroy(struct kref *kref) +{ + struct kgsl_timeline *timeline = container_of(kref, + struct kgsl_timeline, ref); + + WARN_ON(!list_empty(&timeline->fences)); + + trace_kgsl_timeline_destroy(timeline->id); + + kfree(timeline); +} + +struct kgsl_timeline *kgsl_timeline_get(struct kgsl_timeline *timeline) +{ + if (timeline) { + if (!kref_get_unless_zero(&timeline->ref)) + return NULL; + } + + return timeline; +} + +static struct kgsl_timeline *kgsl_timeline_alloc(struct kgsl_device_private *dev_priv, + u64 initial) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline *timeline; + int id; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock(&device->timelines_lock); + /* Allocate the ID but don't attach the pointer just yet */ + id = idr_alloc(&device->timelines, NULL, 1, 0, GFP_NOWAIT); + spin_unlock(&device->timelines_lock); + idr_preload_end(); + + if (id < 0) { + kfree(timeline); + return ERR_PTR(id); + } + + timeline->context = dma_fence_context_alloc(1); + timeline->id = id; + INIT_LIST_HEAD(&timeline->fences); + timeline->value = initial; + timeline->dev_priv = dev_priv; + + snprintf((char *) timeline->name, sizeof(timeline->name), + "kgsl-sw-timeline-%d", id); + + trace_kgsl_timeline_alloc(id, initial); + + spin_lock_init(&timeline->lock); + spin_lock_init(&timeline->fence_lock); + + kref_init(&timeline->ref); + + return timeline; +} + +static struct kgsl_timeline_fence *to_timeline_fence(struct dma_fence *fence) +{ + return container_of(fence, struct kgsl_timeline_fence, base); +} + +static void timeline_fence_release(struct dma_fence *fence) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + struct kgsl_timeline *timeline = f->timeline; + struct kgsl_timeline_fence *cur, *temp; + unsigned long flags; + + spin_lock_irqsave(&timeline->fence_lock, flags); + + /* If the fence is still on the active list, remove it */ + list_for_each_entry_safe(cur, temp, &timeline->fences, node) { + if (f != cur) + continue; + + list_del_init(&f->node); + break; + } + spin_unlock_irqrestore(&timeline->fence_lock, flags); + trace_kgsl_timeline_fence_release(f->timeline->id, fence->seqno); + log_kgsl_timeline_fence_release_event(f->timeline->id, fence->seqno); + + kgsl_timeline_put(f->timeline); + dma_fence_free(fence); +} + +static bool timeline_fence_signaled(struct dma_fence *fence) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + + return !__dma_fence_is_later(fence->seqno, f->timeline->value, + fence->ops); +} + +static const char *timeline_get_driver_name(struct dma_fence *fence) +{ + return "kgsl-sw-timeline"; +} + +static const char *timeline_get_timeline_name(struct dma_fence *fence) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + + return f->timeline->name; +} + +static void timeline_get_value_str(struct dma_fence *fence, + char *str, int size) +{ + struct kgsl_timeline_fence *f = to_timeline_fence(fence); + + snprintf(str, size, "%lld", f->timeline->value); +} + +static const struct dma_fence_ops timeline_fence_ops = { + .get_driver_name = timeline_get_driver_name, + .get_timeline_name = timeline_get_timeline_name, + .signaled = timeline_fence_signaled, + .release = timeline_fence_release, + .timeline_value_str = timeline_get_value_str, + .use_64bit_seqno = true, +}; + +static void kgsl_timeline_add_fence(struct kgsl_timeline *timeline, + struct kgsl_timeline_fence *fence) +{ + struct kgsl_timeline_fence *entry; + unsigned long flags; + + spin_lock_irqsave(&timeline->fence_lock, flags); + list_for_each_entry(entry, &timeline->fences, node) { + if (fence->base.seqno < entry->base.seqno) { + list_add_tail(&fence->node, &entry->node); + spin_unlock_irqrestore(&timeline->fence_lock, flags); + return; + } + } + + list_add_tail(&fence->node, &timeline->fences); + spin_unlock_irqrestore(&timeline->fence_lock, flags); +} + +void kgsl_timeline_signal(struct kgsl_timeline *timeline, u64 seqno) +{ + struct kgsl_timeline_fence *fence, *tmp; + struct list_head temp; + + INIT_LIST_HEAD(&temp); + + spin_lock_irq(&timeline->lock); + + if (seqno < timeline->value) + goto unlock; + + trace_kgsl_timeline_signal(timeline->id, seqno); + + timeline->value = seqno; + + spin_lock(&timeline->fence_lock); + list_for_each_entry_safe(fence, tmp, &timeline->fences, node) { + if (timeline_fence_signaled(&fence->base)) { + dma_fence_get(&fence->base); + list_move(&fence->node, &temp); + } + } + spin_unlock(&timeline->fence_lock); + + list_for_each_entry_safe(fence, tmp, &temp, node) { + dma_fence_signal_locked(&fence->base); + dma_fence_put(&fence->base); + } + +unlock: + spin_unlock_irq(&timeline->lock); +} + +struct dma_fence *kgsl_timeline_fence_alloc(struct kgsl_timeline *timeline, + u64 seqno) +{ + struct kgsl_timeline_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->timeline = kgsl_timeline_get(timeline); + if (!fence->timeline) { + kfree(fence); + return ERR_PTR(-ENOENT); + } + + dma_fence_init(&fence->base, &timeline_fence_ops, + &timeline->lock, timeline->context, seqno); + + INIT_LIST_HEAD(&fence->node); + + if (!dma_fence_is_signaled(&fence->base)) + kgsl_timeline_add_fence(timeline, fence); + + trace_kgsl_timeline_fence_alloc(timeline->id, seqno); + log_kgsl_timeline_fence_alloc_event(timeline->id, seqno); + + return &fence->base; +} + +long kgsl_ioctl_timeline_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_create *param = data; + struct kgsl_timeline *timeline; + + timeline = kgsl_timeline_alloc(dev_priv, param->seqno); + if (IS_ERR(timeline)) + return PTR_ERR(timeline); + + /* Commit the pointer to the timeline in timeline idr */ + spin_lock(&device->timelines_lock); + idr_replace(&device->timelines, timeline, timeline->id); + param->id = timeline->id; + spin_unlock(&device->timelines_lock); + return 0; +} + +struct kgsl_timeline *kgsl_timeline_by_id(struct kgsl_device *device, + u32 id) +{ + struct kgsl_timeline *timeline; + int ret = 0; + + spin_lock(&device->timelines_lock); + timeline = idr_find(&device->timelines, id); + + if (timeline) + ret = kref_get_unless_zero(&timeline->ref); + spin_unlock(&device->timelines_lock); + + return ret ? timeline : NULL; +} + +long kgsl_ioctl_timeline_wait(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_wait *param = data; + struct dma_fence *fence; + unsigned long timeout; + signed long ret; + + if (param->flags != KGSL_TIMELINE_WAIT_ANY && + param->flags != KGSL_TIMELINE_WAIT_ALL) + return -EINVAL; + + if (param->padding) + return -EINVAL; + + fence = kgsl_timelines_to_fence_array(device, param->timelines, + param->count, param->timelines_size, + (param->flags == KGSL_TIMELINE_WAIT_ANY)); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + + if (param->tv_sec >= KTIME_SEC_MAX) + timeout = MAX_SCHEDULE_TIMEOUT; + else { + ktime_t time = ktime_set(param->tv_sec, param->tv_nsec); + + timeout = msecs_to_jiffies(ktime_to_ms(time)); + } + + trace_kgsl_timeline_wait(param->flags, param->tv_sec, param->tv_nsec); + + /* secs.nsecs to jiffies */ + if (!timeout) + ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY; + else { + ret = dma_fence_wait_timeout(fence, true, timeout); + + if (!ret) + ret = -ETIMEDOUT; + else if (ret > 0) + ret = 0; + } + + dma_fence_put(fence); + + return ret; +} + +long kgsl_ioctl_timeline_query(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_timeline_val *param = data; + struct kgsl_timeline *timeline; + + if (param->padding) + return -EINVAL; + + timeline = kgsl_timeline_by_id(dev_priv->device, param->timeline); + if (!timeline) + return -ENODEV; + + param->seqno = timeline->value; + kgsl_timeline_put(timeline); + + return 0; +} + +long kgsl_ioctl_timeline_fence_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_fence_get *param = data; + struct kgsl_timeline *timeline; + struct sync_file *sync_file; + struct dma_fence *fence; + int ret = 0, fd; + + timeline = kgsl_timeline_by_id(device, param->timeline); + if (!timeline) + return -ENODEV; + + fence = kgsl_timeline_fence_alloc(timeline, param->seqno); + + if (IS_ERR(fence)) { + kgsl_timeline_put(timeline); + return PTR_ERR(fence); + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ret = fd; + goto out; + } + + sync_file = sync_file_create(fence); + if (sync_file) { + fd_install(fd, sync_file->file); + param->handle = fd; + } else { + put_unused_fd(fd); + ret = -ENOMEM; + } + +out: + dma_fence_put(fence); + kgsl_timeline_put(timeline); + + return ret; +} + +long kgsl_ioctl_timeline_signal(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_signal *param = data; + u64 timelines; + int i; + + if (!param->timelines_size) { + param->timelines_size = sizeof(struct kgsl_timeline_val); + return -EAGAIN; + } + + if (!param->count) + return -EINVAL; + + timelines = param->timelines; + + for (i = 0; i < param->count; i++) { + struct kgsl_timeline *timeline; + struct kgsl_timeline_val val; + + if (copy_struct_from_user(&val, sizeof(val), + u64_to_user_ptr(timelines), param->timelines_size)) + return -EFAULT; + + if (val.padding) + return -EINVAL; + + timeline = kgsl_timeline_by_id(device, val.timeline); + if (!timeline) + return -ENODEV; + + kgsl_timeline_signal(timeline, val.seqno); + + kgsl_timeline_put(timeline); + + timelines += param->timelines_size; + } + + return 0; +} + +long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_timeline_fence *fence, *tmp; + struct kgsl_timeline *timeline; + struct list_head temp; + u32 *param = data; + + if (*param == 0) + return -ENODEV; + + spin_lock(&device->timelines_lock); + timeline = idr_find(&device->timelines, *param); + + if (timeline == NULL) { + spin_unlock(&device->timelines_lock); + return -ENODEV; + } + + /* + * Validate that the id given is owned by the dev_priv + * instance that is passed in. If not, abort. + */ + if (timeline->dev_priv != dev_priv) { + spin_unlock(&device->timelines_lock); + return -EINVAL; + } + + idr_remove(&device->timelines, timeline->id); + spin_unlock(&device->timelines_lock); + + INIT_LIST_HEAD(&temp); + + spin_lock(&timeline->fence_lock); + list_for_each_entry_safe(fence, tmp, &timeline->fences, node) + dma_fence_get(&fence->base); + list_replace_init(&timeline->fences, &temp); + spin_unlock(&timeline->fence_lock); + + spin_lock_irq(&timeline->lock); + list_for_each_entry_safe(fence, tmp, &temp, node) { + dma_fence_set_error(&fence->base, -ENOENT); + dma_fence_signal_locked(&fence->base); + dma_fence_put(&fence->base); + } + spin_unlock_irq(&timeline->lock); + + kgsl_timeline_put(timeline); + + return 0; +} diff --git a/kgsl_timeline.h b/kgsl_timeline.h new file mode 100644 index 0000000000..55223eaeab --- /dev/null +++ b/kgsl_timeline.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __KGSL_TIMELINE_H +#define __KGSL_TIMELINE_H + +/** + * struct kgsl_timeline - Container for a timeline object + */ +struct kgsl_timeline { + /** @context: dma-fence timeline context */ + u64 context; + /** @id: Timeline identifier */ + int id; + /** @value: Current value of the timeline */ + u64 value; + /** @fence_lock: Lock to protect @fences */ + spinlock_t fence_lock; + /** @lock: Lock to use for locking each fence in @fences */ + spinlock_t lock; + /** @ref: Reference count for the struct */ + struct kref ref; + /** @fences: sorted list of active fences */ + struct list_head fences; + /** @name: Name of the timeline for debugging */ + const char name[32]; + /** @dev_priv: pointer to the owning device instance */ + struct kgsl_device_private *dev_priv; +}; + +/** + * kgsl_timeline_signal - Signal the timeline + * @timeline: Pointer to a timeline container + * @seqno: Seqeuence number to signal + * + * Advance @timeline to sequence number @seqno and signal any fences that might + * have expired. + */ +void kgsl_timeline_signal(struct kgsl_timeline *timeline, u64 seqno); + +/** + * kgsl_timeline_destroy - Timeline destroy callback + * @kref: Refcount pointer for the timeline + * + * Reference count callback for the timeline called when the all the object + * references have been released. + */ +void kgsl_timeline_destroy(struct kref *kref); + +/** + * kgsl_timeline_fence_alloc - Allocate a new fence on a timeline + * @timeline: Pointer to a timeline container + * @seqno: Sequence number for the new fence to wait for + * + * Create and return a new fence on the timeline that will expire when the + * timeline value is greater or equal to @seqno. + * Return: A pointer to the newly created fence + */ +struct dma_fence *kgsl_timeline_fence_alloc(struct kgsl_timeline *timeline, + u64 seqno); + +/** + * kgsl_timeline_by_id - Look up a timeline by an id + * @device: A KGSL device handle + * @id: Lookup identifier + * + * Find and return the timeline associated with identifer @id. + * Return: A pointer to a timeline or PTR_ERR() encoded error on failure. + */ +struct kgsl_timeline *kgsl_timeline_by_id(struct kgsl_device *device, + u32 id); + +/** + * kgsl_timeline_get - Get a reference to an existing timeline + * @timeline: Pointer to a timeline container + * + * Get a new reference to the timeline and return the pointer back to the user. + * Return: The pointer to the timeline or PTR_ERR encoded error on failure + */ +struct kgsl_timeline *kgsl_timeline_get(struct kgsl_timeline *timeline); + +/** + * kgsl_timeline_put - Release a reference to a timeline + * @timeline: Pointer to a timeline container + * + * Release a reference to a timeline and destroy it if there are no other + * references + */ +static inline void kgsl_timeline_put(struct kgsl_timeline *timeline) +{ + if (!IS_ERR_OR_NULL(timeline)) + kref_put(&timeline->ref, kgsl_timeline_destroy); +} + +/** + * kgsl_timelines_to_fence_array - Return a dma-fence array of timeline fences + * @device: A KGSL device handle + * @timelines: Userspace pointer to an array of &struct kgsl_timeline_val + * @count: Number of entries in @timelines + * @usize: Size of each entry in @timelines + * @any: True if the fence should expire on any timeline expiring or false if it + * should wait until all timelines have expired + * + * Give a list of &struct kgsl_timeline_val entries, create a dma-fence-array + * containing fences for each timeline/seqno pair. If @any is set the + * dma-fence-array will be set to expire if any of the encapsulated timeline + * fences expire. If @any is false, then the fence will wait for ALL of the + * encapsulated timeline fences to expire. + */ +struct dma_fence *kgsl_timelines_to_fence_array(struct kgsl_device *device, + u64 timelines, u32 count, u64 usize, bool any); + +#endif diff --git a/kgsl_trace.c b/kgsl_trace.c new file mode 100644 index 0000000000..6fb63d520a --- /dev/null +++ b/kgsl_trace.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2011,2013,2015,2019 The Linux Foundation. All rights reserved. + */ + +#include + +#include "kgsl_device.h" + +/* Instantiate tracepoints */ +#define CREATE_TRACE_POINTS +#include "kgsl_trace.h" diff --git a/kgsl_trace.h b/kgsl_trace.h new file mode 100644 index 0000000000..88fa471661 --- /dev/null +++ b/kgsl_trace.h @@ -0,0 +1,1522 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2011-2021, The Linux Foundation. All rights reserved. + */ + +#if !defined(_KGSL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _KGSL_TRACE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kgsl +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE kgsl_trace + +#include + +#include "kgsl.h" +#include "kgsl_drawobj.h" +#include "kgsl_sharedmem.h" + +#define show_memtype(type) \ + __print_symbolic(type, \ + { KGSL_MEM_ENTRY_KERNEL, "gpumem" }, \ + { KGSL_MEM_ENTRY_USER, "usermem" }, \ + { KGSL_MEM_ENTRY_ION, "ion" }) + +#define show_constraint(type) \ + __print_symbolic(type, \ + { KGSL_CONSTRAINT_NONE, "None" }, \ + { KGSL_CONSTRAINT_PWRLEVEL, "Pwrlevel" }, \ + { KGSL_CONSTRAINT_L3_NONE, "L3_none" }, \ + { KGSL_CONSTRAINT_L3_PWRLEVEL, "L3_pwrlevel" }) + +struct kgsl_ringbuffer_issueibcmds; +struct kgsl_device_waittimestamp; + +/* + * Tracepoint for kgsl issue ib commands + */ +TRACE_EVENT(kgsl_issueibcmds, + + TP_PROTO(struct kgsl_device *device, + int drawctxt_id, + unsigned int numibs, + int timestamp, + int flags, + int result, + unsigned int type), + + TP_ARGS(device, drawctxt_id, numibs, timestamp, + flags, result, type), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, drawctxt_id) + __field(unsigned int, numibs) + __field(unsigned int, timestamp) + __field(unsigned int, flags) + __field(int, result) + __field(unsigned int, drawctxt_type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->drawctxt_id = drawctxt_id; + __entry->numibs = numibs; + __entry->timestamp = timestamp; + __entry->flags = flags; + __entry->result = result; + __entry->drawctxt_type = type; + ), + + TP_printk( + "d_name=%s ctx=%u ib=0x0 numibs=%u ts=%u flags=%s result=%d type=%s", + __get_str(device_name), + __entry->drawctxt_id, + __entry->numibs, + __entry->timestamp, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_DRAWOBJ_FLAGS) : "None", + __entry->result, + kgsl_context_type(__entry->drawctxt_type) + ) +); + +/* + * Tracepoint for kgsl readtimestamp + */ +TRACE_EVENT(kgsl_readtimestamp, + + TP_PROTO(struct kgsl_device *device, + unsigned int context_id, + unsigned int type, + unsigned int timestamp), + + TP_ARGS(device, context_id, type, timestamp), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, context_id) + __field(unsigned int, type) + __field(unsigned int, timestamp) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->context_id = context_id; + __entry->type = type; + __entry->timestamp = timestamp; + ), + + TP_printk( + "d_name=%s context_id=%u type=%u ts=%u", + __get_str(device_name), + __entry->context_id, + __entry->type, + __entry->timestamp + ) +); + +/* + * Tracepoint for kgsl waittimestamp entry + */ +TRACE_EVENT(kgsl_waittimestamp_entry, + + TP_PROTO(struct kgsl_device *device, + unsigned int context_id, + unsigned int curr_ts, + unsigned int wait_ts, + unsigned int timeout), + + TP_ARGS(device, context_id, curr_ts, wait_ts, timeout), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, context_id) + __field(unsigned int, curr_ts) + __field(unsigned int, wait_ts) + __field(unsigned int, timeout) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->context_id = context_id; + __entry->curr_ts = curr_ts; + __entry->wait_ts = wait_ts; + __entry->timeout = timeout; + ), + + TP_printk( + "d_name=%s ctx=%u curr_ts=%u ts=%u timeout=%u", + __get_str(device_name), + __entry->context_id, + __entry->curr_ts, + __entry->wait_ts, + __entry->timeout + ) +); + +/* + * Tracepoint for kgsl waittimestamp exit + */ +TRACE_EVENT(kgsl_waittimestamp_exit, + + TP_PROTO(struct kgsl_device *device, unsigned int curr_ts, + int result), + + TP_ARGS(device, curr_ts, result), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, curr_ts) + __field(int, result) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->curr_ts = curr_ts; + __entry->result = result; + ), + + TP_printk( + "d_name=%s curr_ts=%u result=%d", + __get_str(device_name), + __entry->curr_ts, + __entry->result + ) +); + +DECLARE_EVENT_CLASS(kgsl_pwr_template, + TP_PROTO(struct kgsl_device *device, bool on), + + TP_ARGS(device, on), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(bool, on) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->on = on; + ), + + TP_printk( + "d_name=%s flag=%s", + __get_str(device_name), + __entry->on ? "on" : "off" + ) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_irq, + TP_PROTO(struct kgsl_device *device, bool on), + TP_ARGS(device, on) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_bus, + TP_PROTO(struct kgsl_device *device, bool on), + TP_ARGS(device, on) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_rail, + TP_PROTO(struct kgsl_device *device, bool on), + TP_ARGS(device, on) +); + +TRACE_EVENT(kgsl_clk, + + TP_PROTO(struct kgsl_device *device, bool on, + unsigned int freq), + + TP_ARGS(device, on, freq), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(bool, on) + __field(unsigned int, freq) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->on = on; + __entry->freq = freq; + ), + + TP_printk( + "d_name=%s flag=%s active_freq=%d", + __get_str(device_name), + __entry->on ? "on" : "off", + __entry->freq + ) +); + +TRACE_EVENT(kgsl_gmu_pwrlevel, + + TP_PROTO(unsigned long freq, + unsigned long prev_freq), + + TP_ARGS(freq, prev_freq), + + TP_STRUCT__entry( + __field(unsigned long, freq) + __field(unsigned long, prev_freq) + ), + + TP_fast_assign( + __entry->freq = freq; + __entry->prev_freq = prev_freq; + ), + + TP_printk( + "gmu_freq=%ld gmu_prev_freq=%ld", + __entry->freq, + __entry->prev_freq + ) +); + +TRACE_EVENT(kgsl_pwrlevel, + + TP_PROTO(struct kgsl_device *device, + unsigned int pwrlevel, + unsigned int freq, + unsigned int prev_pwrlevel, + unsigned int prev_freq), + + TP_ARGS(device, pwrlevel, freq, prev_pwrlevel, prev_freq), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, pwrlevel) + __field(unsigned int, freq) + __field(unsigned int, prev_pwrlevel) + __field(unsigned int, prev_freq) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->pwrlevel = pwrlevel; + __entry->freq = freq; + __entry->prev_pwrlevel = prev_pwrlevel; + __entry->prev_freq = prev_freq; + ), + + TP_printk( + "d_name=%s pwrlevel=%d freq=%d prev_pwrlevel=%d prev_freq=%d", + __get_str(device_name), + __entry->pwrlevel, + __entry->freq, + __entry->prev_pwrlevel, + __entry->prev_freq + ) +); + +/* + * Tracepoint for kgsl gpu_frequency + */ +TRACE_EVENT(gpu_frequency, + TP_PROTO(unsigned int gpu_freq, unsigned int gpu_id), + TP_ARGS(gpu_freq, gpu_id), + TP_STRUCT__entry( + __field(unsigned int, gpu_freq) + __field(unsigned int, gpu_id) + ), + TP_fast_assign( + __entry->gpu_freq = gpu_freq; + __entry->gpu_id = gpu_id; + ), + + TP_printk("gpu_freq=%luKhz gpu_id=%lu", + (unsigned long)__entry->gpu_freq, + (unsigned long)__entry->gpu_id) +); + +TRACE_EVENT(kgsl_buslevel, + + TP_PROTO(struct kgsl_device *device, unsigned int pwrlevel, + unsigned int bus), + + TP_ARGS(device, pwrlevel, bus), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, pwrlevel) + __field(unsigned int, bus) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->pwrlevel = pwrlevel; + __entry->bus = bus; + ), + + TP_printk( + "d_name=%s pwrlevel=%d bus=%d", + __get_str(device_name), + __entry->pwrlevel, + __entry->bus + ) +); + +TRACE_EVENT(kgsl_gpubusy, + TP_PROTO(struct kgsl_device *device, unsigned int busy, + unsigned int elapsed), + + TP_ARGS(device, busy, elapsed), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, busy) + __field(unsigned int, elapsed) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->busy = busy; + __entry->elapsed = elapsed; + ), + + TP_printk( + "d_name=%s busy=%u elapsed=%d", + __get_str(device_name), + __entry->busy, + __entry->elapsed + ) +); + +TRACE_EVENT(kgsl_pwrstats, + TP_PROTO(struct kgsl_device *device, s64 time, + struct kgsl_power_stats *pstats, u32 ctxt_count), + + TP_ARGS(device, time, pstats, ctxt_count), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(s64, total_time) + __field(u64, busy_time) + __field(u64, ram_time) + __field(u64, ram_wait) + __field(u32, context_count) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->total_time = time; + __entry->busy_time = pstats->busy_time; + __entry->ram_time = pstats->ram_time; + __entry->ram_wait = pstats->ram_wait; + __entry->context_count = ctxt_count; + ), + + TP_printk( + "d_name=%s total=%lld busy=%lld ram_time=%lld ram_wait=%lld context_count=%u", + __get_str(device_name), __entry->total_time, __entry->busy_time, + __entry->ram_time, __entry->ram_wait, __entry->context_count + ) +); + +DECLARE_EVENT_CLASS(kgsl_pwrstate_template, + TP_PROTO(struct kgsl_device *device, unsigned int state), + + TP_ARGS(device, state), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, state) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->state = state; + ), + + TP_printk( + "d_name=%s state=%s", + __get_str(device_name), + kgsl_pwrstate_to_str(__entry->state) + ) +); + +DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_set_state, + TP_PROTO(struct kgsl_device *device, unsigned int state), + TP_ARGS(device, state) +); + +DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_request_state, + TP_PROTO(struct kgsl_device *device, unsigned int state), + TP_ARGS(device, state) +); + +TRACE_EVENT(kgsl_mem_alloc, + + TP_PROTO(struct kgsl_mem_entry *mem_entry), + + TP_ARGS(mem_entry), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + __field(uint64_t, flags) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->tgid = pid_nr(mem_entry->priv->pid); + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->flags = mem_entry->memdesc.flags; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u flags=0x%llx", + __entry->gpuaddr, __entry->size, __entry->tgid, + __entry->usage, __entry->id, __entry->flags + ) +); + +TRACE_EVENT(kgsl_mem_mmap, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned long useraddr), + + TP_ARGS(mem_entry, useraddr), + + TP_STRUCT__entry( + __field(unsigned long, useraddr) + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __array(char, usage, 16) + __field(unsigned int, id) + __field(uint64_t, flags) + ), + + TP_fast_assign( + __entry->useraddr = useraddr; + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->flags = mem_entry->memdesc.flags; + ), + + TP_printk( + "useraddr=0x%lx gpuaddr=0x%llx size=%llu usage=%s id=%u flags=0x%llx", + __entry->useraddr, __entry->gpuaddr, __entry->size, + __entry->usage, __entry->id, __entry->flags + ) +); + +TRACE_EVENT(kgsl_mem_unmapped_area_collision, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, + unsigned long addr, + unsigned long len), + + TP_ARGS(mem_entry, addr, len), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned long, addr) + __field(unsigned long, len) + ), + + TP_fast_assign( + __entry->id = mem_entry->id; + __entry->len = len; + __entry->addr = addr; + ), + + TP_printk( + "id=%u len=%lu addr=0x%lx", + __entry->id, __entry->len, __entry->addr + ) +); + +TRACE_EVENT(kgsl_mem_map, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, int fd), + + TP_ARGS(mem_entry, fd), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, fd) + __field(int, type) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->fd = fd; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->tgid = pid_nr(mem_entry->priv->pid); + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu type=%s fd=%d tgid=%u usage=%s id=%u", + __entry->gpuaddr, __entry->size, + show_memtype(__entry->type), + __entry->fd, __entry->tgid, + __entry->usage, __entry->id + ) +); + +TRACE_EVENT(kgsl_mem_free, + + TP_PROTO(struct kgsl_mem_entry *mem_entry), + + TP_ARGS(mem_entry), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, type) + __field(int, fd) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->tgid = pid_nr(mem_entry->priv->pid); + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu type=%s tgid=%u usage=%s id=%u", + __entry->gpuaddr, __entry->size, + show_memtype(__entry->type), + __entry->tgid, __entry->usage, __entry->id + ) +); + +TRACE_EVENT(kgsl_mem_sync_cache, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, uint64_t offset, + uint64_t length, unsigned int op), + + TP_ARGS(mem_entry, offset, length, op), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __array(char, usage, 16) + __field(unsigned int, tgid) + __field(unsigned int, id) + __field(unsigned int, op) + __field(uint64_t, offset) + __field(uint64_t, length) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->tgid = pid_nr(mem_entry->priv->pid); + __entry->id = mem_entry->id; + __entry->op = op; + __entry->offset = offset; + __entry->length = (length == 0) ? + mem_entry->memdesc.size : length; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u op=%c%c offset=%llu", + __entry->gpuaddr, __entry->length, + __entry->tgid, __entry->usage, __entry->id, + (__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.', + (__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.', + __entry->offset + ) +); + +TRACE_EVENT(kgsl_mem_add_bind_range, + TP_PROTO(struct kgsl_mem_entry *target, u64 offset, + struct kgsl_mem_entry *child, u64 length), + + TP_ARGS(target, offset, child, length), + + TP_STRUCT__entry( + __field(u64, gpuaddr) + __field(u32, target) + __field(u32, tgid) + __field(u32, child) + __field(u64, length) + ), + + TP_fast_assign( + __entry->gpuaddr = target->memdesc.gpuaddr + offset; + __entry->tgid = pid_nr(target->priv->pid); + __entry->target = target->id; + __entry->child = child->id; + __entry->length = length; + ), + + TP_printk( + "tgid=%u target=%d gpuaddr=%llx length %llu child=%d", + __entry->tgid, __entry->target, __entry->gpuaddr, + __entry->length, __entry->child + ) +); + +TRACE_EVENT(kgsl_mem_remove_bind_range, + TP_PROTO(struct kgsl_mem_entry *target, u64 offset, + struct kgsl_mem_entry *child, u64 length), + + TP_ARGS(target, offset, child, length), + + TP_STRUCT__entry( + __field(u64, gpuaddr) + __field(u32, target) + __field(u32, tgid) + __field(u32, child) + __field(u64, length) + ), + + TP_fast_assign( + __entry->gpuaddr = target->memdesc.gpuaddr + offset; + __entry->tgid = pid_nr(target->priv->pid); + __entry->target = target->id; + __entry->child = child->id; + __entry->length = length; + ), + + TP_printk( + "tgid=%u target=%d gpuaddr=%llx length %llu child=%d", + __entry->tgid, __entry->target, __entry->gpuaddr, + __entry->length, __entry->child + ) +); + +TRACE_EVENT(kgsl_mem_sync_full_cache, + + TP_PROTO(unsigned int num_bufs, uint64_t bulk_size), + TP_ARGS(num_bufs, bulk_size), + + TP_STRUCT__entry( + __field(unsigned int, num_bufs) + __field(uint64_t, bulk_size) + ), + + TP_fast_assign( + __entry->num_bufs = num_bufs; + __entry->bulk_size = bulk_size; + ), + + TP_printk( + "num_bufs=%u bulk_size=%llu op=ci", + __entry->num_bufs, __entry->bulk_size + ) +); + +DECLARE_EVENT_CLASS(kgsl_mem_timestamp_template, + + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + + TP_ARGS(device, mem_entry, id, curr_ts, free_ts), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, type) + __array(char, usage, 16) + __field(unsigned int, id) + __field(unsigned int, drawctxt_id) + __field(unsigned int, curr_ts) + __field(unsigned int, free_ts) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->drawctxt_id = id; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->curr_ts = curr_ts; + __entry->free_ts = free_ts; + ), + + TP_printk( + "d_name=%s gpuaddr=0x%llx size=%llu type=%s usage=%s id=%u ctx=%u curr_ts=%u free_ts=%u", + __get_str(device_name), + __entry->gpuaddr, + __entry->size, + show_memtype(__entry->type), + __entry->usage, + __entry->id, + __entry->drawctxt_id, + __entry->curr_ts, + __entry->free_ts + ) +); + +DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_queue, + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + TP_ARGS(device, mem_entry, id, curr_ts, free_ts) +); + +DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_free, + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + TP_ARGS(device, mem_entry, id, curr_ts, free_ts) +); + +TRACE_EVENT(kgsl_context_create, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context, + unsigned int flags), + + TP_ARGS(device, context, flags), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + __field(unsigned int, flags) + __field(unsigned int, priority) + __field(unsigned int, type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + __entry->flags = flags & ~(KGSL_CONTEXT_PRIORITY_MASK | + KGSL_CONTEXT_TYPE_MASK); + __entry->priority = + (flags & KGSL_CONTEXT_PRIORITY_MASK) + >> KGSL_CONTEXT_PRIORITY_SHIFT; + __entry->type = + (flags & KGSL_CONTEXT_TYPE_MASK) + >> KGSL_CONTEXT_TYPE_SHIFT; + ), + + TP_printk( + "d_name=%s ctx=%u flags=%s priority=%u type=%s", + __get_str(device_name), __entry->id, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_CONTEXT_FLAGS) : "None", + __entry->priority, + kgsl_context_type(__entry->type) + ) +); + +TRACE_EVENT(kgsl_context_detach, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context), + + TP_ARGS(device, context), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + ), + + TP_printk( + "d_name=%s ctx=%u", + __get_str(device_name), __entry->id + ) +); + +TRACE_EVENT(kgsl_context_destroy, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context), + + TP_ARGS(device, context), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + ), + + TP_printk( + "d_name=%s ctx=%u", + __get_str(device_name), __entry->id + ) +); + +TRACE_EVENT(kgsl_user_pwrlevel_constraint, + + TP_PROTO(struct kgsl_device *device, unsigned int id, unsigned int type, + unsigned int sub_type), + + TP_ARGS(device, id, type, sub_type), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + __field(unsigned int, type) + __field(unsigned int, sub_type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = id; + __entry->type = type; + __entry->sub_type = sub_type; + ), + + TP_printk( + "d_name=%s ctx=%u constraint_type=%s constraint_subtype=%s", + __get_str(device_name), __entry->id, + show_constraint(__entry->type), + __print_symbolic(__entry->sub_type, + { KGSL_CONSTRAINT_PWR_MIN, "Min" }, + { KGSL_CONSTRAINT_PWR_MAX, "Max" }) + ) +); + +TRACE_EVENT(kgsl_constraint, + + TP_PROTO(struct kgsl_device *device, unsigned int type, + unsigned int value, unsigned int on), + + TP_ARGS(device, type, value, on), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, type) + __field(unsigned int, value) + __field(unsigned int, on) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->type = type; + __entry->value = value; + __entry->on = on; + ), + + TP_printk( + "d_name=%s constraint_type=%s constraint_value=%u status=%s", + __get_str(device_name), + show_constraint(__entry->type), + __entry->value, + __entry->on ? "ON" : "OFF" + ) +); + +TRACE_EVENT(kgsl_mmu_pagefault, + + TP_PROTO(struct kgsl_device *device, unsigned long page, + unsigned int pt, const char *name, const char *op), + + TP_ARGS(device, page, pt, name, op), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned long, page) + __field(unsigned int, pt) + __string(name, name) + __string(op, op) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->page = page; + __entry->pt = pt; + __assign_str(name, name); + __assign_str(op, op); + ), + + TP_printk( + "d_name=%s page=0x%lx pt=%u op=%s name=%s", + __get_str(device_name), __entry->page, __entry->pt, + __get_str(op), __get_str(name) + ) +); + +TRACE_EVENT(kgsl_regwrite, + + TP_PROTO(unsigned int offset, unsigned int value), + + TP_ARGS(offset, value), + + TP_STRUCT__entry( + __string(device_name, "kgsl-3d0") + __field(unsigned int, offset) + __field(unsigned int, value) + ), + + TP_fast_assign( + __assign_str(device_name, "kgsl-3d0"); + __entry->offset = offset; + __entry->value = value; + ), + + TP_printk( + "d_name=%s reg=0x%x value=0x%x", + __get_str(device_name), __entry->offset, __entry->value + ) +); + +TRACE_EVENT(kgsl_register_event, + TP_PROTO(unsigned int id, unsigned int timestamp, void *func), + TP_ARGS(id, timestamp, func), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(void *, func) + ), + TP_fast_assign( + __entry->id = id; + __entry->timestamp = timestamp; + __entry->func = func; + ), + TP_printk( + "ctx=%u ts=%u cb=%pS", + __entry->id, __entry->timestamp, __entry->func) +); + +TRACE_EVENT(kgsl_fire_event, + TP_PROTO(unsigned int id, unsigned int ts, + unsigned int type, unsigned int age, void *func), + TP_ARGS(id, ts, type, age, func), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, ts) + __field(unsigned int, type) + __field(unsigned int, age) + __field(void *, func) + ), + TP_fast_assign( + __entry->id = id; + __entry->ts = ts; + __entry->type = type; + __entry->age = age; + __entry->func = func; + ), + TP_printk( + "ctx=%u ts=%u type=%s age=%u cb=%pS", + __entry->id, __entry->ts, + __print_symbolic(__entry->type, + { KGSL_EVENT_RETIRED, "retired" }, + { KGSL_EVENT_CANCELLED, "cancelled" }), + __entry->age, __entry->func) +); + +TRACE_EVENT(kgsl_active_count, + + TP_PROTO(struct kgsl_device *device, unsigned long ip), + + TP_ARGS(device, ip), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, count) + __field(unsigned long, ip) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->count = atomic_read(&device->active_cnt); + __entry->ip = ip; + ), + + TP_printk( + "d_name=%s active_cnt=%u func=%ps", + __get_str(device_name), __entry->count, (void *) __entry->ip + ) +); + +TRACE_EVENT(kgsl_pagetable_destroy, + TP_PROTO(u64 ptbase, unsigned int name), + TP_ARGS(ptbase, name), + TP_STRUCT__entry( + __field(u64, ptbase) + __field(unsigned int, name) + ), + TP_fast_assign( + __entry->ptbase = ptbase; + __entry->name = name; + ), + TP_printk("ptbase=%llx name=%u", __entry->ptbase, __entry->name) +); + +DECLARE_EVENT_CLASS(syncpoint_timestamp_template, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, + struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(syncobj, context, timestamp), + TP_STRUCT__entry( + __field(unsigned int, syncobj_context_id) + __field(unsigned int, context_id) + __field(unsigned int, timestamp) + ), + TP_fast_assign( + __entry->syncobj_context_id = syncobj->base.context->id; + __entry->context_id = context->id; + __entry->timestamp = timestamp; + ), + TP_printk("ctx=%d sync ctx=%d ts=%d", + __entry->syncobj_context_id, __entry->context_id, + __entry->timestamp) +); + +DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, + struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(syncobj, context, timestamp) +); + +DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp_expire, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, + struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(syncobj, context, timestamp) +); + +DECLARE_EVENT_CLASS(syncpoint_fence_template, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name), + TP_ARGS(syncobj, name), + TP_STRUCT__entry( + __string(fence_name, name) + __field(unsigned int, syncobj_context_id) + ), + TP_fast_assign( + __entry->syncobj_context_id = syncobj->base.context->id; + __assign_str(fence_name, name); + ), + TP_printk("ctx=%d fence=%s", + __entry->syncobj_context_id, __get_str(fence_name)) +); + +DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name), + TP_ARGS(syncobj, name) +); + +DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence_expire, + TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name), + TP_ARGS(syncobj, name) +); + +TRACE_EVENT(kgsl_msg, + TP_PROTO(const char *msg), + TP_ARGS(msg), + TP_STRUCT__entry( + __string(msg, msg) + ), + TP_fast_assign( + __assign_str(msg, msg); + ), + TP_printk( + "%s", __get_str(msg) + ) +); + +TRACE_EVENT(kgsl_clock_throttling, + TP_PROTO( + int idle_10pct, + int crc_50pct, + int crc_more50pct, + int crc_less50pct, + int64_t adj + ), + TP_ARGS( + idle_10pct, + crc_50pct, + crc_more50pct, + crc_less50pct, + adj + ), + TP_STRUCT__entry( + __field(int, idle_10pct) + __field(int, crc_50pct) + __field(int, crc_more50pct) + __field(int, crc_less50pct) + __field(int64_t, adj) + ), + TP_fast_assign( + __entry->idle_10pct = idle_10pct; + __entry->crc_50pct = crc_50pct; + __entry->crc_more50pct = crc_more50pct; + __entry->crc_less50pct = crc_less50pct; + __entry->adj = adj; + ), + TP_printk("idle_10=%d crc_50=%d crc_more50=%d crc_less50=%d adj=%lld", + __entry->idle_10pct, __entry->crc_50pct, __entry->crc_more50pct, + __entry->crc_less50pct, __entry->adj + ) +); + +TRACE_EVENT(kgsl_bcl_clock_throttling, + TP_PROTO( + int crc_25pct, + int crc_58pct, + int crc_75pct + ), + TP_ARGS( + crc_25pct, + crc_58pct, + crc_75pct + ), + TP_STRUCT__entry( + __field(int, crc_25pct) + __field(int, crc_58pct) + __field(int, crc_75pct) + ), + TP_fast_assign( + __entry->crc_25pct = crc_25pct; + __entry->crc_58pct = crc_58pct; + __entry->crc_75pct = crc_75pct; + ), + TP_printk("crc_25=%d crc_58=%d crc_75=%d", + __entry->crc_25pct, __entry->crc_58pct, + __entry->crc_75pct + ) +); + +DECLARE_EVENT_CLASS(gmu_oob_template, + TP_PROTO(unsigned int mask), + TP_ARGS(mask), + TP_STRUCT__entry( + __field(unsigned int, mask) + ), + TP_fast_assign( + __entry->mask = mask; + ), + TP_printk("mask=0x%08x", __entry->mask) +); + +DEFINE_EVENT(gmu_oob_template, kgsl_gmu_oob_set, + TP_PROTO(unsigned int mask), + TP_ARGS(mask) +); + +DEFINE_EVENT(gmu_oob_template, kgsl_gmu_oob_clear, + TP_PROTO(unsigned int mask), + TP_ARGS(mask) +); + +DECLARE_EVENT_CLASS(hfi_msg_template, + TP_PROTO(unsigned int id, unsigned int size, unsigned int seqnum), + TP_ARGS(id, size, seqnum), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, size) + __field(unsigned int, seq) + ), + TP_fast_assign( + __entry->id = id; + __entry->size = size; + __entry->seq = seqnum; + ), + TP_printk("id=0x%x size=0x%x seqnum=0x%x", + __entry->id, __entry->size, __entry->seq) +); + +DEFINE_EVENT(hfi_msg_template, kgsl_hfi_send, + TP_PROTO(unsigned int id, unsigned int size, unsigned int seqnum), + TP_ARGS(id, size, seqnum) +); + +DEFINE_EVENT(hfi_msg_template, kgsl_hfi_receive, + TP_PROTO(unsigned int id, unsigned int size, unsigned int seqnum), + TP_ARGS(id, size, seqnum) +); + +TRACE_EVENT(kgsl_timeline_alloc, + TP_PROTO( + u32 id, + u64 seqno + ), + TP_ARGS( + id, + seqno + ), + TP_STRUCT__entry( + __field(u32, id) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->id = id; + __entry->seqno = seqno; + ), + TP_printk("id=%u initial=%llu", + __entry->id, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_timeline_destroy, + TP_PROTO( + u32 id + ), + TP_ARGS( + id + ), + TP_STRUCT__entry( + __field(u32, id) + ), + TP_fast_assign( + __entry->id = id; + ), + TP_printk("id=%u", + __entry->id + ) +); + + +TRACE_EVENT(kgsl_timeline_signal, + TP_PROTO( + u32 id, + u64 seqno + ), + TP_ARGS( + id, + seqno + ), + TP_STRUCT__entry( + __field(u32, id) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->id = id; + __entry->seqno = seqno; + ), + TP_printk("id=%u seqno=%llu", + __entry->id, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_timeline_fence_alloc, + TP_PROTO( + u32 timeline, + u64 seqno + ), + TP_ARGS( + timeline, + seqno + ), + TP_STRUCT__entry( + __field(u32, timeline) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->timeline = timeline; + __entry->seqno = seqno; + ), + TP_printk("timeline=%u seqno=%llu", + __entry->timeline, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_timeline_fence_release, + TP_PROTO( + u32 timeline, + u64 seqno + ), + TP_ARGS( + timeline, + seqno + ), + TP_STRUCT__entry( + __field(u32, timeline) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->timeline = timeline; + __entry->seqno = seqno; + ), + TP_printk("timeline=%u seqno=%llu", + __entry->timeline, __entry->seqno + ) +); + + +TRACE_EVENT(kgsl_timeline_wait, + TP_PROTO( + u32 flags, + s64 tv_sec, + s64 tv_nsec + ), + TP_ARGS( + flags, + tv_sec, + tv_nsec + ), + TP_STRUCT__entry( + __field(u32, flags) + __field(s64, tv_sec) + __field(s64, tv_nsec) + ), + TP_fast_assign( + __entry->flags = flags; + __entry->tv_sec = tv_sec; + __entry->tv_nsec = tv_nsec; + ), + TP_printk("flags=0x%x tv_sec=%llu tv_nsec=%llu", + __entry->flags, __entry->tv_sec, __entry->tv_nsec + + ) +); + +TRACE_EVENT(kgsl_aux_command, + TP_PROTO(u32 drawctxt_id, u32 numcmds, u32 flags, u32 timestamp + ), + TP_ARGS(drawctxt_id, numcmds, flags, timestamp + ), + TP_STRUCT__entry( + __field(u32, drawctxt_id) + __field(u32, numcmds) + __field(u32, flags) + __field(u32, timestamp) + ), + TP_fast_assign( + __entry->drawctxt_id = drawctxt_id; + __entry->numcmds = numcmds; + __entry->flags = flags; + __entry->timestamp = timestamp; + ), + TP_printk("context=%u numcmds=%u flags=0x%x timestamp=%u", + __entry->drawctxt_id, __entry->numcmds, __entry->flags, + __entry->timestamp + ) +); + +TRACE_EVENT(kgsl_drawobj_timeline, + TP_PROTO(u32 timeline, u64 seqno + ), + TP_ARGS(timeline, seqno + ), + TP_STRUCT__entry( + __field(u32, timeline) + __field(u64, seqno) + ), + TP_fast_assign( + __entry->timeline = timeline; + __entry->seqno = seqno; + ), + TP_printk("timeline=%u seqno=%llu", + __entry->timeline, __entry->seqno + ) +); + +TRACE_EVENT(kgsl_thermal_constraint, + TP_PROTO( + s32 max_freq + ), + TP_ARGS( + max_freq + ), + TP_STRUCT__entry( + __field(s32, max_freq) + ), + TP_fast_assign( + __entry->max_freq = max_freq; + ), + TP_printk("Thermal max freq=%d", + __entry->max_freq + ) +); + +TRACE_EVENT(kgsl_pool_add_page, + TP_PROTO(int order, u32 count), + TP_ARGS(order, count), + TP_STRUCT__entry( + __field(int, order) + __field(u32, count) + ), + TP_fast_assign( + __entry->order = order; + __entry->count = count; + ), + TP_printk("order=%d count=%u", + __entry->order, __entry->count + ) +); + +TRACE_EVENT(kgsl_pool_get_page, + TP_PROTO(int order, u32 count), + TP_ARGS(order, count), + TP_STRUCT__entry( + __field(int, order) + __field(u32, count) + ), + TP_fast_assign( + __entry->order = order; + __entry->count = count; + ), + TP_printk("order=%d count=%u", + __entry->order, __entry->count + ) +); + +TRACE_EVENT(kgsl_pool_alloc_page_system, + TP_PROTO(int order), + TP_ARGS(order), + TP_STRUCT__entry( + __field(int, order) + ), + TP_fast_assign( + __entry->order = order; + ), + TP_printk("order=%d", + __entry->order + ) +); + +TRACE_EVENT(kgsl_pool_try_page_lower, + TP_PROTO(int order), + TP_ARGS(order), + TP_STRUCT__entry( + __field(int, order) + ), + TP_fast_assign( + __entry->order = order; + ), + TP_printk("order=%d", + __entry->order + ) +); + +TRACE_EVENT(kgsl_pool_free_page, + TP_PROTO(int order), + TP_ARGS(order), + TP_STRUCT__entry( + __field(int, order) + ), + TP_fast_assign( + __entry->order = order; + ), + TP_printk("order=%d", + __entry->order + ) +); + +#endif /* _KGSL_TRACE_H */ + +/* This part must be outside protection */ +#include diff --git a/kgsl_util.c b/kgsl_util.c new file mode 100644 index 0000000000..8ebbe349d9 --- /dev/null +++ b/kgsl_util.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adreno.h" +#include "kgsl_util.h" + +bool kgsl_regulator_disable_wait(struct regulator *reg, u32 timeout) +{ + ktime_t tout = ktime_add_us(ktime_get(), timeout * 1000); + + if (IS_ERR_OR_NULL(reg)) + return true; + + regulator_disable(reg); + + for (;;) { + if (!regulator_is_enabled(reg)) + return true; + + if (ktime_compare(ktime_get(), tout) > 0) + return (!regulator_is_enabled(reg)); + + usleep_range((100 >> 2) + 1, 100); + } +} + +struct clk *kgsl_of_clk_by_name(struct clk_bulk_data *clks, int count, + const char *id) +{ + int i; + + for (i = 0; clks && i < count; i++) + if (!strcmp(clks[i].id, id)) + return clks[i].clk; + + return NULL; +} + +int kgsl_regulator_set_voltage(struct device *dev, + struct regulator *reg, u32 voltage) +{ + int ret; + + if (IS_ERR_OR_NULL(reg)) + return 0; + + ret = regulator_set_voltage(reg, voltage, INT_MAX); + if (ret) + dev_err(dev, "Regulator set voltage:%d failed:%d\n", voltage, ret); + + return ret; +} + +int kgsl_clk_set_rate(struct clk_bulk_data *clks, int num_clks, + const char *id, unsigned long rate) +{ + struct clk *clk; + + clk = kgsl_of_clk_by_name(clks, num_clks, id); + if (!clk) + return -ENODEV; + + return clk_set_rate(clk, rate); +} + +/* + * The PASID has stayed consistent across all targets thus far so we are + * cautiously optimistic that we can hard code it + */ +#define GPU_PASID 13 + +int kgsl_zap_shader_load(struct device *dev, const char *name) +{ + struct device_node *np, *mem_np; + const struct firmware *fw; + void *mem_region = NULL; + phys_addr_t mem_phys; + struct resource res; + const char *fwname; + ssize_t mem_size; + int ret; + + np = of_get_child_by_name(dev->of_node, "zap-shader"); + if (!np) { + dev_err(dev, "zap-shader node not found. Please update the device tree\n"); + return -ENODEV; + } + + mem_np = of_parse_phandle(np, "memory-region", 0); + of_node_put(np); + if (!mem_np) { + dev_err(dev, "Couldn't parse the mem-region from the zap-shader node\n"); + return -EINVAL; + } + + ret = of_address_to_resource(mem_np, 0, &res); + of_node_put(mem_np); + if (ret) + return ret; + + /* + * To avoid confusion we will keep the "legacy" naming scheme + * without the .mdt postfix (i.e. "a660_zap") outside of this function + * so we have to fix it up here + */ + fwname = kasprintf(GFP_KERNEL, "%s.mdt", name); + if (!fwname) + return -ENOMEM; + + ret = request_firmware(&fw, fwname, dev); + if (ret) { + dev_err(dev, "Couldn't load the firmware %s\n", fwname); + kfree(fwname); + return ret; + } + + mem_size = qcom_mdt_get_size(fw); + if (mem_size < 0) { + ret = mem_size; + goto out; + } + + if (mem_size > resource_size(&res)) { + ret = -E2BIG; + goto out; + } + + mem_phys = res.start; + + mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC); + if (!mem_region) { + ret = -ENOMEM; + goto out; + } + + ret = qcom_mdt_load(dev, fw, fwname, GPU_PASID, mem_region, + mem_phys, mem_size, NULL); + if (ret) { + dev_err(dev, "Error %d while loading the MDT\n", ret); + goto out; + } + + ret = qcom_scm_pas_auth_and_reset(GPU_PASID); + +out: + if (mem_region) + memunmap(mem_region); + + release_firmware(fw); + kfree(fwname); + return ret; +} + +int kgsl_hwlock(struct cpu_gpu_lock *lock) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(1000); + + /* Indicate that the CPU wants the lock */ + lock->cpu_req = 1; + + /* post the request */ + wmb(); + + /* Wait for our turn */ + lock->turn = 0; + + /* Finish all memory transactions before moving on */ + mb(); + + /* + * Spin here while GPU ucode holds the lock, lock->gpu_req will + * be set to 0 after GPU ucode releases the lock. Maximum wait time + * is 1 second and this should be enough for GPU to release the lock. + */ + while (lock->gpu_req && lock->turn == 0) { + cpu_relax(); + /* Get the latest updates from GPU */ + rmb(); + + if (time_after(jiffies, timeout)) + break; + } + + if (lock->gpu_req && lock->turn == 0) + return -EBUSY; + + return 0; +} + +void kgsl_hwunlock(struct cpu_gpu_lock *lock) +{ + /* Make sure all writes are done before releasing the lock */ + wmb(); + lock->cpu_req = 0; +} + +void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ + struct md_region md_entry = {0}; + int ret; + + if (!msm_minidump_enabled()) + return; + + scnprintf(md_entry.name, sizeof(md_entry.name), name); + md_entry.virt_addr = virt_addr; + md_entry.phys_addr = phy_addr; + md_entry.size = size; + ret = msm_minidump_add_region(&md_entry); + if (ret < 0 && ret != -EEXIST) + pr_err("kgsl: Failed to register %s with minidump:%d\n", name, ret); + +} + +void kgsl_remove_from_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ + struct md_region md_entry = {0}; + int ret; + + if (!msm_minidump_enabled()) + return; + + scnprintf(md_entry.name, sizeof(md_entry.name), name); + md_entry.virt_addr = virt_addr; + md_entry.phys_addr = phy_addr; + md_entry.size = size; + ret = msm_minidump_remove_region(&md_entry); + if (ret < 0 && ret != -ENOENT) + pr_err("kgsl: Failed to remove %s from minidump\n", name); +} + +int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, + size_t size) +{ + struct va_md_entry entry = {0}; + int ret; + + scnprintf(entry.owner, sizeof(entry.owner), name); + entry.vaddr = (u64)(ptr); + entry.size = size; + ret = qcom_va_md_add_region(&entry); + if (ret < 0) + dev_err(dev, "Failed to register %s with va_minidump: %d\n", name, + ret); + + return ret; +} + +static int kgsl_add_driver_data_to_va_minidump(struct kgsl_device *device) +{ + int ret; + char name[32]; + struct kgsl_pagetable *pt; + struct adreno_context *ctxt; + struct kgsl_process_private *p; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + ret = kgsl_add_va_to_minidump(device->dev, KGSL_DRIVER, + (void *)(&kgsl_driver), sizeof(struct kgsl_driver)); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(device->dev, KGSL_SCRATCH_ENTRY, + device->scratch->hostptr, device->scratch->size); + if (ret) + return ret; + + ret = kgsl_add_va_to_minidump(device->dev, KGSL_MEMSTORE_ENTRY, + device->memstore->hostptr, device->memstore->size); + if (ret) + return ret; + + spin_lock(&adreno_dev->active_list_lock); + list_for_each_entry(ctxt, &adreno_dev->active_list, active_node) { + snprintf(name, sizeof(name), "kgsl_adreno_ctx_%d", ctxt->base.id); + ret = kgsl_add_va_to_minidump(device->dev, name, + (void *)(ctxt), sizeof(struct adreno_context)); + if (ret) + break; + } + spin_unlock(&adreno_dev->active_list_lock); + + read_lock(&kgsl_driver.proclist_lock); + list_for_each_entry(p, &kgsl_driver.process_list, list) { + snprintf(name, sizeof(name), "kgsl_proc_priv_%d", pid_nr(p->pid)); + ret = kgsl_add_va_to_minidump(device->dev, name, + (void *)(p), sizeof(struct kgsl_process_private)); + if (ret) + break; + } + read_unlock(&kgsl_driver.proclist_lock); + + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + snprintf(name, sizeof(name), "kgsl_pgtable_%d", pt->name); + ret = kgsl_add_va_to_minidump(device->dev, name, + (void *)(pt), sizeof(struct kgsl_pagetable)); + if (ret) + break; + } + spin_unlock(&kgsl_driver.ptlock); + + return ret; +} + +static int kgsl_va_minidump_callback(struct notifier_block *nb, + unsigned long action, void *unused) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(kgsl_driver.devp[0]); + const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (kgsl_add_driver_data_to_va_minidump(kgsl_driver.devp[0])) + return NOTIFY_BAD; + + if (gpudev->add_to_va_minidump(adreno_dev)) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static struct notifier_block kgsl_va_minidump_nb = { + .priority = INT_MAX, + .notifier_call = kgsl_va_minidump_callback, +}; + +void kgsl_qcom_va_md_register(struct kgsl_device *device) +{ + if (!qcom_va_md_enabled()) + return; + + if (qcom_va_md_register("KGSL", &kgsl_va_minidump_nb)) + dev_err(device->dev, "Failed to register notifier with va_minidump\n"); +} diff --git a/kgsl_util.h b/kgsl_util.h new file mode 100644 index 0000000000..f2da379828 --- /dev/null +++ b/kgsl_util.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef _KGSL_UTIL_H_ +#define _KGSL_UTIL_H_ + +#define KGSL_DRIVER "kgsl_driver" +#define KGSL_ADRENO_DEVICE "kgsl_adreno_device" +#define KGSL_A6XX_DEVICE "kgsl_a6xx_device" +#define KGSL_GEN7_DEVICE "kgsl_gen7_device" +#define KGSL_HWSCHED_DEVICE "kgsl_hwsched_device" + +#define KGSL_SCRATCH_ENTRY "kgsl_scratch" +#define KGSL_MEMSTORE_ENTRY "kgsl_memstore" +#define KGSL_GMU_LOG_ENTRY "kgsl_gmu_log" +#define KGSL_HFIMEM_ENTRY "kgsl_hfi_mem" +#define KGSL_GMU_DUMPMEM_ENTRY "kgsl_gmu_dump_mem" + +struct regulator; +struct clk_bulk_data; + +/** + * struct cpu_gpu_lock - CP spinlock structure for power up list + * @gpu_req: flag value set by CP + * @cpu_req: flag value set by KMD + * @turn: turn variable set by both CP and KMD + * @list_length: this tells CP the last dword in the list: + * 16 + (4 * (List_Length - 1)) + * @list_offset: this tells CP the start of preemption only list: + * 16 + (4 * List_Offset) + */ +struct cpu_gpu_lock { + u32 gpu_req; + u32 cpu_req; + u32 turn; + u16 list_length; + u16 list_offset; +}; + +/** + * kgsl_hwlock - Try to get the spinlock + * @lock: cpu_gpu_lock structure + * + * Spin while the GPU has the lock. + * + * Return: 0 if lock is successful, -EBUSY if timed out waiting for lock + */ +int kgsl_hwlock(struct cpu_gpu_lock *lock); + +/** + * kgsl_hwunlock - Release a previously grabbed lock + * @lock: cpu_gpu_lock structure + */ +void kgsl_hwunlock(struct cpu_gpu_lock *lock); + +/** + * kgsl_regulator_disable_wait - Disable a regulator and wait for it + * @reg: A &struct regulator handle + * @timeout: Time to wait (in milliseconds) + * + * Disable the regulator and wait @timeout milliseconds for it to enter the + * disabled state. + * + * Return: True if the regulator was disabled or false if it timed out + */ +bool kgsl_regulator_disable_wait(struct regulator *reg, u32 timeout); + +/** + * kgsl_of_clk_by_name - Return a clock device for a given name + * @clks: Pointer to an array of bulk clk data + * @count: Number of entries in the array + * @id: Name of the clock to search for + * + * Returns: A pointer to the clock device for the given name or NULL if not + * found + */ +struct clk *kgsl_of_clk_by_name(struct clk_bulk_data *clks, int count, + const char *id); +/** + * kgsl_regulator_set_voltage - Set voltage level for regulator + * @dev: A &struct device pointer + * @reg: A &struct regulator handle + * @voltage: Voltage value to set regulator + * + * Return: 0 on success and negative error on failure. + */ +int kgsl_regulator_set_voltage(struct device *dev, + struct regulator *reg, u32 voltage); + +/** + * kgsl_clk_set_rate - Set a clock to a given rate + * @clks: Pointer to an array of bulk clk data + * @count: Number of entries in the array + * @id: Name of the clock to search for + * @rate: Rate to st the clock to + * + * Return: 0 on success or negative error on failure + */ +int kgsl_clk_set_rate(struct clk_bulk_data *clks, int num_clks, + const char *id, unsigned long rate); + +/** + * kgsl_zap_shader_load - Load a zap shader + * @dev: Pointer to the struct device for the GPU platform device + * @name: Basename of the zap shader to load (without the postfix) + * + * Load and install the zap shader named @name. Name should be specified without + * the extension for example "a660_zap" instead of "a660_zap.mdt". + * + * Return: 0 on success or negative on failure + */ +int kgsl_zap_shader_load(struct device *dev, const char *name); + +/** + * kgsl_add_to_minidump - Add a physically contiguous section to minidump + * @name: Name of the section + * @virt_addr: Virtual address of the section + * @phy_addr: Physical address of the section + * @size: Size of the section + */ +void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size); + +/** + * kgsl_remove_from_minidump - Remove a contiguous section from minidump + * @name: Name of the section + * @virt_addr: Virtual address of the section + * @phy_addr: Physical address of the section + * @size: Size of the section + */ +void kgsl_remove_from_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size); + +/** + * kgsl_add_va_to_minidump - Add a physically non-contiguous section to minidump + * @dev: Pointer to the struct device for the GPU platform device + * @name: Name of the section + * @ptr: Virtual address of the section + * @size: Size of the section + */ +int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, + size_t size); + +/** + * kgsl_qcom_va_md_register - Register driver with va-minidump + * @device: Pointer to kgsl device + */ +void kgsl_qcom_va_md_register(struct kgsl_device *device); + +#endif diff --git a/kgsl_vbo.c b/kgsl_vbo.c new file mode 100644 index 0000000000..75959b1475 --- /dev/null +++ b/kgsl_vbo.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "kgsl_device.h" +#include "kgsl_mmu.h" +#include "kgsl_sharedmem.h" +#include "kgsl_trace.h" + +struct kgsl_memdesc_bind_range { + struct kgsl_mem_entry *entry; + struct interval_tree_node range; +}; + +static struct kgsl_memdesc_bind_range *bind_to_range(struct interval_tree_node *node) +{ + return container_of(node, struct kgsl_memdesc_bind_range, range); +} + +static struct kgsl_memdesc_bind_range *bind_range_create(u64 start, u64 last, + struct kgsl_mem_entry *entry) +{ + struct kgsl_memdesc_bind_range *range = + kzalloc(sizeof(*range), GFP_KERNEL); + + if (!range) + return ERR_PTR(-ENOMEM); + + range->range.start = start; + range->range.last = last; + range->entry = kgsl_mem_entry_get(entry); + + if (!range->entry) { + kfree(range); + return ERR_PTR(-EINVAL); + } + + return range; +} + +static u64 bind_range_len(struct kgsl_memdesc_bind_range *range) +{ + return (range->range.last - range->range.start) + 1; +} + +void kgsl_memdesc_print_vbo_ranges(struct kgsl_mem_entry *entry, + struct seq_file *s) +{ + struct interval_tree_node *next; + struct kgsl_memdesc *memdesc = &entry->memdesc; + + if (!(memdesc->flags & KGSL_MEMFLAGS_VBO)) + return; + + /* + * We are called in an atomic context so try to get the mutex but if we + * don't then skip this item + */ + if (!mutex_trylock(&memdesc->ranges_lock)) + return; + + next = interval_tree_iter_first(&memdesc->ranges, 0, ~0UL); + while (next) { + struct kgsl_memdesc_bind_range *range = bind_to_range(next); + + seq_printf(s, "%5d %5d 0x%16.16lx-0x%16.16lx\n", + entry->id, range->entry->id, range->range.start, + range->range.last); + + next = interval_tree_iter_next(next, 0, ~0UL); + } + + mutex_unlock(&memdesc->ranges_lock); +} + +static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, + u64 start, u64 last, struct kgsl_mem_entry *entry) +{ + struct interval_tree_node *node, *next; + struct kgsl_memdesc_bind_range *range; + struct kgsl_memdesc *memdesc = &target->memdesc; + + mutex_lock(&memdesc->ranges_lock); + + next = interval_tree_iter_first(&memdesc->ranges, start, last); + while (next) { + node = next; + range = bind_to_range(node); + next = interval_tree_iter_next(node, start, last); + + if (range->entry->id == entry->id) { + interval_tree_remove(node, &memdesc->ranges); + trace_kgsl_mem_remove_bind_range(target, + range->range.start, range->entry, + bind_range_len(range)); + + kgsl_mmu_unmap_range(memdesc->pagetable, + memdesc, range->range.start, bind_range_len(range)); + + kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, + memdesc, range->range.start, bind_range_len(range)); + + kfree(range); + } + } + + mutex_unlock(&memdesc->ranges_lock); +} + +static int kgsl_memdesc_add_range(struct kgsl_mem_entry *target, + u64 start, u64 last, struct kgsl_mem_entry *entry, u64 offset) +{ + struct interval_tree_node *node, *next; + struct kgsl_memdesc *memdesc = &target->memdesc; + struct kgsl_memdesc_bind_range *range = + bind_range_create(start, last, entry); + + if (IS_ERR(range)) + return PTR_ERR(range); + + mutex_lock(&memdesc->ranges_lock); + + /* + * Unmap the range first. This increases the potential for a page fault + * but is safer in case something goes bad while updating the interval + * tree + */ + kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, start, + last - start + 1); + + next = interval_tree_iter_first(&memdesc->ranges, start, last); + + while (next) { + struct kgsl_memdesc_bind_range *cur; + + node = next; + cur = bind_to_range(node); + next = interval_tree_iter_next(node, start, last); + + trace_kgsl_mem_remove_bind_range(target, cur->range.start, + cur->entry, bind_range_len(cur)); + + interval_tree_remove(node, &memdesc->ranges); + + if (start <= cur->range.start) { + if (last >= cur->range.last) { + kgsl_mem_entry_put(cur->entry); + kfree(cur); + continue; + } + /* Adjust the start of the mapping */ + cur->range.start = last + 1; + /* And put it back into the tree */ + interval_tree_insert(node, &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, + cur->range.start, cur->entry, bind_range_len(cur)); + } else { + if (last < cur->range.last) { + struct kgsl_memdesc_bind_range *temp; + + /* + * The range is split into two so make a new + * entry for the far side + */ + temp = bind_range_create(last + 1, cur->range.last, + cur->entry); + /* FIXME: Uhoh, this would be bad */ + BUG_ON(IS_ERR(temp)); + + interval_tree_insert(&temp->range, + &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, + temp->range.start, + temp->entry, bind_range_len(temp)); + } + + cur->range.last = start - 1; + interval_tree_insert(node, &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, cur->range.start, + cur->entry, bind_range_len(cur)); + } + } + + /* Add the new range */ + interval_tree_insert(&range->range, &memdesc->ranges); + + trace_kgsl_mem_add_bind_range(target, range->range.start, + range->entry, bind_range_len(range)); + mutex_unlock(&memdesc->ranges_lock); + + return kgsl_mmu_map_child(memdesc->pagetable, memdesc, start, + &entry->memdesc, offset, last - start + 1); +} + +static void kgsl_sharedmem_vbo_put_gpuaddr(struct kgsl_memdesc *memdesc) +{ + struct interval_tree_node *node, *next; + struct kgsl_memdesc_bind_range *range; + + /* Unmap the entire pagetable region */ + kgsl_mmu_unmap_range(memdesc->pagetable, memdesc, + 0, memdesc->size); + + /* Put back the GPU address */ + kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); + + memdesc->gpuaddr = 0; + memdesc->pagetable = NULL; + + /* + * FIXME: do we have a use after free potential here? We might need to + * lock this and set a "do not update" bit + */ + + /* Now delete each range and release the mem entries */ + next = interval_tree_iter_first(&memdesc->ranges, 0, ~0UL); + + while (next) { + node = next; + range = bind_to_range(node); + next = interval_tree_iter_next(node, 0, ~0UL); + + interval_tree_remove(node, &memdesc->ranges); + kgsl_mem_entry_put(range->entry); + kfree(range); + } +} + +static struct kgsl_memdesc_ops kgsl_vbo_ops = { + .put_gpuaddr = kgsl_sharedmem_vbo_put_gpuaddr, +}; + +int kgsl_sharedmem_allocate_vbo(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, u64 size, u64 flags) +{ + size = PAGE_ALIGN(size); + + /* Make sure that VBOs are supported by the MMU */ + if (WARN_ON_ONCE(!kgsl_mmu_has_feature(device, + KGSL_MMU_SUPPORT_VBO))) + return -EOPNOTSUPP; + + kgsl_memdesc_init(device, memdesc, flags); + memdesc->priv = 0; + + memdesc->ops = &kgsl_vbo_ops; + memdesc->size = size; + + /* Set up the interval tree and lock */ + memdesc->ranges = RB_ROOT_CACHED; + mutex_init(&memdesc->ranges_lock); + + return 0; +} + +static bool kgsl_memdesc_check_range(struct kgsl_memdesc *memdesc, + u64 offset, u64 length) +{ + return ((offset < memdesc->size) && + (offset + length > offset) && + (offset + length) <= memdesc->size); +} + +static void kgsl_sharedmem_free_bind_op(struct kgsl_sharedmem_bind_op *op) +{ + int i; + + if (IS_ERR_OR_NULL(op)) + return; + + for (i = 0; i < op->nr_ops; i++) + kgsl_mem_entry_put(op->ops[i].entry); + + kgsl_mem_entry_put(op->target); + + kvfree(op->ops); + kfree(op); +} + +struct kgsl_sharedmem_bind_op * +kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, + u32 target_id, void __user *ranges, u32 ranges_nents, + u64 ranges_size) +{ + struct kgsl_sharedmem_bind_op *op; + struct kgsl_mem_entry *target; + int ret, i; + + /* There must be at least one defined operation */ + if (!ranges_nents) + return ERR_PTR(-EINVAL); + + /* Find the target memory entry */ + target = kgsl_sharedmem_find_id(private, target_id); + if (!target) + return ERR_PTR(-ENOENT); + + if (!(target->memdesc.flags & KGSL_MEMFLAGS_VBO)) { + kgsl_mem_entry_put(target); + return ERR_PTR(-EINVAL); + } + + /* Make a container for the bind operations */ + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + kgsl_mem_entry_put(target); + return ERR_PTR(-ENOMEM); + } + + /* + * Make an array for the individual operations. Use __GFP_NOWARN and + * __GFP_NORETRY to make sure a very large request quietly fails + */ + op->ops = kvcalloc(ranges_nents, sizeof(*op->ops), + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!op->ops) { + kfree(op); + kgsl_mem_entry_put(target); + return ERR_PTR(-ENOMEM); + } + + op->nr_ops = ranges_nents; + op->target = target; + + for (i = 0; i < ranges_nents; i++) { + struct kgsl_gpumem_bind_range range; + struct kgsl_mem_entry *entry; + u32 size; + + size = min_t(u32, sizeof(range), ranges_size); + + ret = -EINVAL; + + if (copy_from_user(&range, ranges, size)) { + ret = -EFAULT; + goto err; + } + + /* The offset must be page aligned */ + if (!PAGE_ALIGNED(range.target_offset)) + goto err; + + /* The length of the operation must be aligned and non zero */ + if (!range.length || !PAGE_ALIGNED(range.length)) + goto err; + + /* Make sure the range fits in the target */ + if (!kgsl_memdesc_check_range(&target->memdesc, + range.target_offset, range.length)) + goto err; + + /* Get the child object */ + op->ops[i].entry = kgsl_sharedmem_find_id(private, + range.child_id); + entry = op->ops[i].entry; + if (!entry) { + ret = -ENOENT; + goto err; + } + + /* Make sure the child is not a VBO */ + if ((entry->memdesc.flags & KGSL_MEMFLAGS_VBO)) { + ret = -EINVAL; + goto err; + } + + /* + * Make sure that only secure children are mapped in secure VBOs + * and vice versa + */ + if ((target->memdesc.flags & KGSL_MEMFLAGS_SECURE) != + (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE)) { + ret = -EPERM; + goto err; + } + + /* Make sure the range operation is valid */ + if (range.op != KGSL_GPUMEM_RANGE_OP_BIND && + range.op != KGSL_GPUMEM_RANGE_OP_UNBIND) + goto err; + + if (range.op == KGSL_GPUMEM_RANGE_OP_BIND) { + if (!PAGE_ALIGNED(range.child_offset)) + goto err; + + /* Make sure the range fits in the child */ + if (!kgsl_memdesc_check_range(&entry->memdesc, + range.child_offset, range.length)) + goto err; + } else { + /* For unop operations the child offset must be 0 */ + if (range.child_offset) + goto err; + } + + op->ops[i].entry = entry; + op->ops[i].start = range.target_offset; + op->ops[i].last = range.target_offset + range.length - 1; + op->ops[i].child_offset = range.child_offset; + op->ops[i].op = range.op; + + ranges += ranges_size; + } + + kref_init(&op->ref); + + return op; + +err: + kgsl_sharedmem_free_bind_op(op); + return ERR_PTR(ret); +} + +void kgsl_sharedmem_bind_range_destroy(struct kref *kref) +{ + struct kgsl_sharedmem_bind_op *op = container_of(kref, + struct kgsl_sharedmem_bind_op, ref); + + kgsl_sharedmem_free_bind_op(op); +} + +static void kgsl_sharedmem_bind_worker(struct work_struct *work) +{ + struct kgsl_sharedmem_bind_op *op = container_of(work, + struct kgsl_sharedmem_bind_op, work); + int i; + + for (i = 0; i < op->nr_ops; i++) { + if (op->ops[i].op == KGSL_GPUMEM_RANGE_OP_BIND) + kgsl_memdesc_add_range(op->target, + op->ops[i].start, + op->ops[i].last, + op->ops[i].entry, + op->ops[i].child_offset); + else + kgsl_memdesc_remove_range(op->target, + op->ops[i].start, + op->ops[i].last, + op->ops[i].entry); + + /* Release the reference on the child entry */ + kgsl_mem_entry_put(op->ops[i].entry); + op->ops[i].entry = NULL; + } + + /* Release the reference on the target entry */ + kgsl_mem_entry_put(op->target); + op->target = NULL; + + if (op->callback) + op->callback(op); + + kref_put(&op->ref, kgsl_sharedmem_bind_range_destroy); +} + +void kgsl_sharedmem_bind_ranges(struct kgsl_sharedmem_bind_op *op) +{ + /* Take a reference to the operation while it is scheduled */ + kref_get(&op->ref); + + INIT_WORK(&op->work, kgsl_sharedmem_bind_worker); + schedule_work(&op->work); +} + +struct kgsl_sharedmem_bind_fence { + struct dma_fence base; + spinlock_t lock; + int fd; + struct kgsl_sharedmem_bind_op *op; +}; + +static const char *bind_fence_get_driver_name(struct dma_fence *fence) +{ + return "kgsl_sharedmem_bind"; +} + +static const char *bind_fence_get_timeline_name(struct dma_fence *fence) +{ + return "(unbound)"; +} + +static void bind_fence_release(struct dma_fence *fence) +{ + struct kgsl_sharedmem_bind_fence *bind_fence = container_of(fence, + struct kgsl_sharedmem_bind_fence, base); + + kgsl_sharedmem_put_bind_op(bind_fence->op); + kfree(bind_fence); +} + +static void +kgsl_sharedmem_bind_fence_callback(struct kgsl_sharedmem_bind_op *op) +{ + struct kgsl_sharedmem_bind_fence *bind_fence = op->data; + + dma_fence_signal(&bind_fence->base); + dma_fence_put(&bind_fence->base); +} + +static const struct dma_fence_ops kgsl_sharedmem_bind_fence_ops = { + .get_driver_name = bind_fence_get_driver_name, + .get_timeline_name = bind_fence_get_timeline_name, + .release = bind_fence_release, +}; + +static struct kgsl_sharedmem_bind_fence * +kgsl_sharedmem_bind_fence(struct kgsl_sharedmem_bind_op *op) +{ + struct kgsl_sharedmem_bind_fence *fence; + struct sync_file *sync_file; + int fd; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&fence->lock); + + dma_fence_init(&fence->base, &kgsl_sharedmem_bind_fence_ops, + &fence->lock, dma_fence_context_alloc(1), 0); + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + kfree(fence); + return ERR_PTR(fd); + } + + sync_file = sync_file_create(&fence->base); + if (!sync_file) { + put_unused_fd(fd); + kfree(fence); + return ERR_PTR(-ENOMEM); + } + + fd_install(fd, sync_file->file); + + fence->fd = fd; + fence->op = op; + + return fence; +} + +static void +kgsl_sharedmem_bind_async_callback(struct kgsl_sharedmem_bind_op *op) +{ + struct completion *comp = op->data; + + complete(comp); +} + +long kgsl_ioctl_gpumem_bind_ranges(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + DECLARE_COMPLETION_ONSTACK(sync); + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpumem_bind_ranges *param = data; + struct kgsl_sharedmem_bind_op *op; + int ret; + + /* If ranges_size isn't set, return the expected size to the user */ + if (!param->ranges_size) { + param->ranges_size = sizeof(struct kgsl_gpumem_bind_range); + return 0; + } + + /* FENCE_OUT only makes sense with ASYNC */ + if ((param->flags & KGSL_GPUMEM_BIND_FENCE_OUT) && + !(param->flags & KGSL_GPUMEM_BIND_ASYNC)) + return -EINVAL; + + op = kgsl_sharedmem_create_bind_op(private, param->id, + u64_to_user_ptr(param->ranges), param->ranges_nents, + param->ranges_size); + if (IS_ERR(op)) + return PTR_ERR(op); + + if (param->flags & KGSL_GPUMEM_BIND_ASYNC) { + struct kgsl_sharedmem_bind_fence *fence; + + if (param->flags & KGSL_GPUMEM_BIND_FENCE_OUT) { + fence = kgsl_sharedmem_bind_fence(op); + + if (IS_ERR(fence)) { + kgsl_sharedmem_put_bind_op(op); + return PTR_ERR(fence); + } + + op->data = fence; + op->callback = kgsl_sharedmem_bind_fence_callback; + param->fence_id = fence->fd; + } + + kgsl_sharedmem_bind_ranges(op); + + if (!(param->flags & KGSL_GPUMEM_BIND_FENCE_OUT)) + kgsl_sharedmem_put_bind_op(op); + + return 0; + } + + /* For synchronous operations add a completion to wait on */ + op->callback = kgsl_sharedmem_bind_async_callback; + op->data = &sync; + + init_completion(&sync); + + /* + * Schedule the work. All the resources will be released after + * the bind operation is done + */ + kgsl_sharedmem_bind_ranges(op); + + ret = wait_for_completion_interruptible(&sync); + kgsl_sharedmem_put_bind_op(op); + + return ret; +} diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h new file mode 100644 index 0000000000..be366cda04 --- /dev/null +++ b/msm_adreno_devfreq.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. + */ + +#ifndef MSM_ADRENO_DEVFREQ_H +#define MSM_ADRENO_DEVFREQ_H + +#include +#include + +#define DEVFREQ_FLAG_WAKEUP_MAXFREQ 0x2 +#define DEVFREQ_FLAG_FAST_HINT 0x4 +#define DEVFREQ_FLAG_SLOW_HINT 0x8 + +struct device; + +/* same as KGSL_MAX_PWRLEVELS */ +#define MSM_ADRENO_MAX_PWRLEVELS 16 + +struct xstats { + u64 ram_time; + u64 ram_wait; + int buslevel; +}; + +struct devfreq_msm_adreno_tz_data { + struct notifier_block nb; + struct { + s64 total_time; + s64 busy_time; + u32 ctxt_aware_target_pwrlevel; + u32 ctxt_aware_busy_penalty; + } bin; + struct { + u64 total_time; + u64 ram_time; + u64 ram_wait; + u64 gpu_time; + u32 num; + u32 max; + u32 width; + u32 *up; + u32 *down; + s32 *p_up; + s32 *p_down; + u32 *ib_kbps; + bool floating; + } bus; + unsigned int device_id; + bool is_64; + bool disable_busy_time_burst; + bool ctxt_aware_enable; + /* Multiplier to change gpu busy status */ + u32 mod_percent; +}; + +struct msm_adreno_extended_profile { + struct devfreq_msm_adreno_tz_data *private_data; + struct devfreq_dev_profile profile; +}; + +struct msm_busmon_extended_profile { + u32 flag; + u32 sampling_ms; + unsigned long percent_ab; + unsigned long ab_mbytes; + struct devfreq_msm_adreno_tz_data *private_data; + struct devfreq_dev_profile profile; +}; + +typedef void(*getbw_func)(unsigned long *, unsigned long *, void *); + +int devfreq_vbif_update_bw(void); +void devfreq_vbif_register_callback(getbw_func func, void *data); + +#endif From 019de8b3ddc4540d451ffda14228a5eca83b7f96 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 11 Oct 2021 14:10:22 -0700 Subject: [PATCH 003/750] msm: kgsl: Update build files for Kalama Update the build files for the Kalama target. Change-Id: I0b3495715e3affed4e8e0d7ea504bc41973efabf Signed-off-by: Lynus Vaz --- Android.mk | 13 ++++++++++--- Kbuild | 3 +++ gfx_kernel_board.mk | 3 ++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Android.mk b/Android.mk index cad91bfab6..63924eac51 100644 --- a/Android.mk +++ b/Android.mk @@ -1,9 +1,14 @@ -# Test dlkm -DLKM_DIR := device/qcom/common/dlkm +ifneq ($(TARGET_USES_QMAA),true) KGSL_SELECT := CONFIG_QCOM_KGSL=m -KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# This makefile is only for DLKM +ifneq ($(findstring vendor,$(LOCAL_PATH)),) + +DLKM_DIR := device/qcom/common/dlkm +KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) KBUILD_OPTIONS += $(KGSL_SELECT) @@ -27,3 +32,5 @@ LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT) BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE) include $(DLKM_DIR)/Build_external_kernelmodule.mk +endif # DLKM check +endif # QMAA check diff --git a/Kbuild b/Kbuild index b1212a31a7..b4d7e582ed 100644 --- a/Kbuild +++ b/Kbuild @@ -9,6 +9,9 @@ endif ifeq ($(CONFIG_ARCH_WAIPIO), y) include $(KGSL_PATH)/config/gki_waipiodisp.conf endif +ifeq ($(CONFIG_ARCH_KALAMA), y) + include $(KGSL_PATH)/config/gki_waipiodisp.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq diff --git a/gfx_kernel_board.mk b/gfx_kernel_board.mk index 238b8ff9ed..33beab8906 100644 --- a/gfx_kernel_board.mk +++ b/gfx_kernel_board.mk @@ -1,5 +1,6 @@ #SPDX-License-Identifier: GPL-2.0-only +ifneq ($(TARGET_USES_QMAA),true) ifneq ($(TARGET_BOARD_AUTO),true) ifeq ($(call is-board-platform-in-list,$(TARGET_BOARD_PLATFORM)),true) BOARD_VENDOR_KERNEL_MODULES += $(KERNEL_MODULES_OUT)/msm_kgsl.ko @@ -7,4 +8,4 @@ ifneq ($(TARGET_BOARD_AUTO),true) BOARD_VENDOR_RAMDISK_RECOVERY_KERNEL_MODULES_LOAD += $(KERNEL_MODULES_OUT)/msm_kgsl.ko endif endif - +endif From a23c8798bf6d56b4e516141fff9e18cfb3593b97 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 19:06:57 -0800 Subject: [PATCH 004/750] msm: kgsl: Remove obsolete IOMMU API calls Convert uses of the obsolete iommu_domain_{get,set}_attr() to the newer specific functions that perform the same purpose. Change-Id: Idb6f27f4cacffca3612e583c54992ae10cb7c55d Signed-off-by: Lynus Vaz --- adreno_a6xx_gmu.c | 4 +-- adreno_gen7_gmu.c | 4 +-- kgsl_iommu.c | 74 +++++++++++++++++------------------------------ 3 files changed, 28 insertions(+), 54 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 2f1f25d194..90fb4b23f8 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -2580,7 +2580,6 @@ static int a6xx_gmu_iommu_fault_handler(struct iommu_domain *domain, static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) { int ret; - int no_stall = 1; gmu->domain = iommu_domain_alloc(&platform_bus_type); if (gmu->domain == NULL) { @@ -2593,8 +2592,7 @@ static int a6xx_gmu_iommu_init(struct a6xx_gmu_device *gmu) * This sets SCTLR.CFCFG = 0. * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. */ - iommu_domain_set_attr(gmu->domain, - DOMAIN_ATTR_FAULT_MODEL_NO_STALL, &no_stall); + qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); if (!ret) { diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 867d683037..da1a55ea50 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1917,7 +1917,6 @@ static int gen7_gmu_iommu_fault_handler(struct iommu_domain *domain, static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) { int ret; - int no_stall = 1; gmu->domain = iommu_domain_alloc(&platform_bus_type); if (gmu->domain == NULL) { @@ -1930,8 +1929,7 @@ static int gen7_gmu_iommu_init(struct gen7_gmu_device *gmu) * This sets SCTLR.CFCFG = 0. * Also note that, the smmu driver sets SCTLR.HUPCF = 0 by default. */ - iommu_domain_set_attr(gmu->domain, - DOMAIN_ATTR_FAULT_MODEL_NO_STALL, &no_stall); + qcom_iommu_set_fault_model(gmu->domain, QCOM_IOMMU_FAULT_MODEL_NO_STALL); ret = iommu_attach_device(gmu->domain, &gmu->pdev->dev); if (!ret) { diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 82518dcdc0..16a006c82a 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -74,46 +74,26 @@ static const struct iommu_flush_ops kgsl_iopgtbl_tlb_ops = { .tlb_add_page = _tlb_add_page, }; -static bool _iommu_domain_check_bool(struct iommu_domain *domain, int attr) -{ - u32 val; - int ret = iommu_domain_get_attr(domain, attr, &val); - - return (!ret && val); -} - -static int _iommu_domain_context_bank(struct iommu_domain *domain) -{ - int val, ret; - - ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_CONTEXT_BANK, &val); - - return ret ? ret : val; -} - static struct kgsl_iommu_pt *to_iommu_pt(struct kgsl_pagetable *pagetable) { return container_of(pagetable, struct kgsl_iommu_pt, base); } -static u32 get_llcc_flags(struct iommu_domain *domain) +static u32 get_llcc_flags(struct kgsl_mmu *mmu) { - if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_LLC_NWA)) - return IOMMU_USE_LLC_NWA; - - if (_iommu_domain_check_bool(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT)) + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) + return (test_bit(KGSL_MMU_IO_COHERENT, &mmu->features)) ? + 0 : IOMMU_USE_LLC_NWA; + else return IOMMU_USE_UPSTREAM_HINT; - - return 0; } - -static int _iommu_get_protection_flags(struct iommu_domain *domain, +static int _iommu_get_protection_flags(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc) { int flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC; - flags |= get_llcc_flags(domain); + flags |= get_llcc_flags(mmu); if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) flags &= ~IOMMU_WRITE; @@ -282,7 +262,7 @@ kgsl_iopgtbl_map_child(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, return ret; /* Inherit the flags from the child for this mapping */ - flags = _iommu_get_protection_flags(domain, child); + flags = _iommu_get_protection_flags(pt->mmu, child); ret = _iopgtbl_map_sg(iommu_pt, memdesc->gpuaddr + offset, &sgt, flags); @@ -332,7 +312,6 @@ static int kgsl_iopgtbl_map_zero_page_to_range(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, u64 offset, u64 length) { struct kgsl_iommu *iommu = &pt->mmu->iommu; - struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); /* * The SMMU only does the PRT compare at the bottom level of the page table, because * there is not an easy way for the hardware to perform this check at earlier levels. @@ -340,7 +319,7 @@ static int kgsl_iopgtbl_map_zero_page_to_range(struct kgsl_pagetable *pt, * of this zero page is programmed in PRR register, MMU will intercept any accesses to * the page before they go to DDR and will terminate the transaction. */ - u32 flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC | get_llcc_flags(domain); + u32 flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC | get_llcc_flags(pt->mmu); struct kgsl_iommu_pt *iommu_pt = to_iommu_pt(pt); struct page *page = kgsl_vbo_zero_page; @@ -363,12 +342,11 @@ static int kgsl_iopgtbl_map(struct kgsl_pagetable *pagetable, { struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); struct kgsl_iommu *iommu = &pagetable->mmu->iommu; - struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); size_t mapped, padding; int prot; /* Get the protection flags for the user context */ - prot = _iommu_get_protection_flags(domain, memdesc); + prot = _iommu_get_protection_flags(pagetable->mmu, memdesc); if (memdesc->sgt) mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, @@ -465,9 +443,10 @@ static size_t _iommu_map_sg(struct iommu_domain *domain, u64 gpuaddr, } static int -_kgsl_iommu_map(struct iommu_domain *domain, struct kgsl_memdesc *memdesc) +_kgsl_iommu_map(struct kgsl_mmu *mmu, struct iommu_domain *domain, + struct kgsl_memdesc *memdesc) { - int prot = _iommu_get_protection_flags(domain, memdesc); + int prot = _iommu_get_protection_flags(mmu, memdesc); size_t mapped, padding; int ret = 0; @@ -519,7 +498,7 @@ static int kgsl_iommu_secure_map(struct kgsl_pagetable *pagetable, struct kgsl_iommu *iommu = &pagetable->mmu->iommu; struct iommu_domain *domain = to_iommu_domain(&iommu->secure_context); - return _kgsl_iommu_map(domain, memdesc); + return _kgsl_iommu_map(pagetable->mmu, domain, memdesc); } /* @@ -557,13 +536,13 @@ static int kgsl_iommu_default_map(struct kgsl_pagetable *pagetable, domain = to_iommu_domain(&iommu->user_context); /* Map the object to the default GPU domain */ - ret = _kgsl_iommu_map(domain, memdesc); + ret = _kgsl_iommu_map(mmu, domain, memdesc); /* Also map the object to the LPAC domain if it exists */ lpac = to_iommu_domain(&iommu->lpac_context); if (!ret && lpac) { - ret = _kgsl_iommu_map(lpac, memdesc); + ret = _kgsl_iommu_map(mmu, lpac, memdesc); /* On failure, also unmap from the default domain */ if (ret) @@ -1097,7 +1076,7 @@ static int kgsl_iommu_get_context_bank(struct kgsl_pagetable *pt) struct kgsl_iommu *iommu = to_kgsl_iommu(pt); struct iommu_domain *domain = to_iommu_domain(&iommu->user_context); - return _iommu_domain_context_bank(domain); + return qcom_iommu_get_context_bank_nr(domain); } static void kgsl_iommu_destroy_default_pagetable(struct kgsl_pagetable *pagetable) @@ -1131,15 +1110,15 @@ static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) static void _enable_gpuhtw_llc(struct kgsl_mmu *mmu, struct iommu_domain *domain) { - int val = 1; - if (!test_bit(KGSL_MMU_LLCC_ENABLE, &mmu->features)) return; - if (mmu->subtype == KGSL_IOMMU_SMMU_V500) - iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_LLC_NWA, &val); - else - iommu_domain_set_attr(domain, DOMAIN_ATTR_USE_UPSTREAM_HINT, &val); + if (mmu->subtype == KGSL_IOMMU_SMMU_V500) { + if (!test_bit(KGSL_MMU_IO_COHERENT, &mmu->features)) + iommu_set_pgtable_quirks(domain, + IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA); + } else + iommu_set_pgtable_quirks(domain, IO_PGTABLE_QUIRK_ARM_OUTER_WBWA); } static int set_smmu_aperture(struct kgsl_device *device, @@ -2059,7 +2038,7 @@ static int kgsl_iommu_setup_context(struct kgsl_mmu *mmu, iommu_set_fault_handler(context->domain, handler, mmu); - context->cb_num = _iommu_domain_context_bank(context->domain); + context->cb_num = qcom_iommu_get_context_bank_nr(context->domain); if (context->cb_num >= 0) return 0; @@ -2156,8 +2135,7 @@ static int iommu_probe_secure_context(struct kgsl_device *device, return -ENODEV; } - ret = iommu_domain_set_attr(context->domain, DOMAIN_ATTR_SECURE_VMID, - &secure_vmid); + ret = qcom_iommu_set_secure_vmid(context->domain, secure_vmid); if (ret) { dev_err(device->dev, "Unable to set the secure VMID: %d\n", ret); iommu_domain_free(context->domain); @@ -2180,7 +2158,7 @@ static int iommu_probe_secure_context(struct kgsl_device *device, iommu_set_fault_handler(context->domain, kgsl_iommu_secure_fault_handler, mmu); - context->cb_num = _iommu_domain_context_bank(context->domain); + context->cb_num = qcom_iommu_get_context_bank_nr(context->domain); if (context->cb_num < 0) { iommu_detach_device(context->domain, &context->pdev->dev); From 9ba960e696ba1793100dacd8ca8de4ad29ba5e58 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 18:57:59 -0800 Subject: [PATCH 005/750] msm: kgsl: Catchup from msm-5.10 Update KGSL code to msm-5.10 commit f3801b189e25 (Merge "msm: kgsl: Correct a6xx CP init sequence"). This includes the following changes from msm-5.10: 4aabff3db0cf (msm: kgsl: Add support to query process memory usage) dcf93c3a2f0b (msm: kgsl: Vote ddr perf mode for better performance) dc70d7f2961e (msm: kgsl: Enable preemption for gen7 GPU) 211d943edc87 (msm: kgsl: Provide a mechanism to unbind a range in VBO) 45d7e571a332 (msm: kgsl: Correctly validate devfreq_add_device return value) a07afc4e1477 (msm: kgsl: Reclaim gpu pages asynchronously) 8b5de66d4f1d (msm: kgsl: Fix uninitialized variable use in kgsl_iommu_print_fault()) 0bd2f8ba6a8f (msm: kgsl: Use kmalloc() for HFI message storage) 79f18623e7f1 (msm: kgsl: Make sure BR doesn't race ahead of BV) 48fc67d2bcfb (msm: kgsl: Get rid of per ringbuffer scratch memory) 96f7537ccfcd (msm: kgsl: Remove redundant SET_PSEUDO_REGISTER packets) ca1cbeedfcd6 (msm: kgsl: Fix return value due to devfreq_add_device() failure) 2092f3df2fae (msm: kgsl: Add enable_signaling for timeline fence) 6f24e5a5112e (msm: kgsl: Remove unneeded NULL check when submitting commands) 9d13d7a5ca6d (msm: kgsl: Pre-allocate page pool tracking structures) 7e5a0845ad64 (msm: kgsl: Add Gen7 support for RDPM frequency register writes) 682d0ea32d62 (msm: kgsl: Add RDPM update for GMU frequency changes) 2eddfeae4850 (msm: kgsl: Add support for the new bit in RESET_CONTEXT_STATE packet) 1141005a4787 (msm: kgsl: Add support for A662 GPU) fc04d956e44a (msm: kgsl: Fix gmu power counter reg address) 286d5d09faf1 (msm: kgsl: Remove invalid kgsl_process_private_put) 4b5e16dbec0d (msm: kgsl: Update the list of protected registers for A730) 3a107bfc062a (msm: kgsl: Bail out of allocation loops if a SIG_KILL is pending) ca22fde3705d (msm: kgsl: Fix out of bound write in adreno_profile_submit_time) 73d27010f02f (msm: kgsl: Avoid parsing IBs during atomic snapshot) 684032d39451 (msm: kgsl: Correct VMID flag for secure buffer validation) 8548ed02e179 (msm: kgsl: Expose memtype stats through sysfs) 52757e33639a (msm: kgsl: Enable Preemption on A662 GPU) a8edbf590967 (msm: kgsl: Add support for new GMU uncached VA range) ff25ecb13ad5 (msm: kgsl: Remove process debugfs and kobject without mutex) 397f7d63607e (msm: kgsl: Fix clockgating values for various blocks) 06f837b9da4f (msm: kgsl: Allow concurrent requests for oob_perfcntr) 73a66962fe20 (msm: kgsl: Make the Adreno trace instance enabled through Kconfig) 6d65fed34558 (msm: kgsl: Update range checking when building voltage tables) 698d612ba43d (msm: kgsl: Verify secure access before importing buffers) 1a278333d0c7 (msm: kgsl: Fix syncsource spinlock recursion) 82c89093b63b (msm: kgsl: Fix HWSCHED_MAX_DISPATCH_NUMIBS) e6d7e1d4bdc3 (msm: kgsl: Set max ACD levels to match max power levels) eb56597e5392 (msm: kgsl: Ensure global entry free stays in bounds) 8a8fbdf97a7a (msm: kgsl: Ensure local variables are set before use) 402bb87906b3 (msm: kgsl: Take snapshot if GPU isn't idle before slumber) 6341d395821b (msm: kgsl: Fix syncpoint timeout log for timeline fences) 0058b5eb2760 (msm: kgsl: Add inflight commands to tracepoint) 127ac415117e (msm: kgsl: Fix NULL pointer dereference) 4433948157f8 (msm: kgsl: Do not process HFI queues in F2H daemon during init sequence) 6a298b62c2d7 (msm: kgsl: Increase wait during kgsl_open) e5fd445b298e (msm: kgsl: Update register protection config) eff8f6e07da7 (msm: kgsl: Set min_pwrlevel based on configuration) e13459b66d4a (msm: kgsl: Log unprotected write address) c19ed67ffbcf (msm: kgsl: Free up iommu page tables on process close) d221f9dd6c44 (msm: kgsl: Update the IFPC power up reglist) 697143a7d17a (msm: kgsl: Update GPUCC Offsets for A662) 180c1d5e124a (msm: kgsl: Add support for C501 GPU) f583f456d0a2 (msm: kgsl: Correct a6xx CP init sequence) dde4355ea92d (msm: kgsl: Add GBIF L2 CGC control with A6x CGC) Change-Id: Ib679fb0b2cb47b79e7caed531de0c0aa7ef0558d Signed-off-by: Lynus Vaz --- Kconfig | 29 +++++++ a6xx_reg.h | 7 +- adreno-gpulist.h | 163 +++++++++++++++++++++++++---------- adreno.c | 45 ++++++---- adreno.h | 48 +++++++---- adreno_a5xx.c | 4 +- adreno_a5xx_preempt.c | 21 ++--- adreno_a5xx_ringbuffer.c | 14 +-- adreno_a6xx.c | 62 +++++++++++--- adreno_a6xx.h | 2 +- adreno_a6xx_gmu.c | 16 +++- adreno_a6xx_gmu.h | 2 + adreno_a6xx_hwsched.c | 2 + adreno_a6xx_hwsched_hfi.c | 4 +- adreno_a6xx_preempt.c | 57 +++++-------- adreno_a6xx_rgmu.c | 12 ++- adreno_a6xx_rgmu.h | 2 + adreno_a6xx_ringbuffer.c | 24 ++++-- adreno_gen7.c | 66 +++++++++++++-- adreno_gen7.h | 18 ++++ adreno_gen7_gmu.c | 52 +++++++++++- adreno_gen7_gmu.h | 6 ++ adreno_gen7_hwsched.c | 21 ++++- adreno_gen7_hwsched_hfi.c | 6 +- adreno_gen7_preempt.c | 61 +++++-------- adreno_gen7_ringbuffer.c | 60 ++++++++++--- adreno_gen7_rpmh.c | 13 ++- adreno_gen7_snapshot.c | 2 +- adreno_hfi.h | 21 +---- adreno_hwsched.c | 8 ++ adreno_pm4types.h | 4 + adreno_ringbuffer.c | 9 -- adreno_ringbuffer.h | 32 ++----- adreno_snapshot.c | 8 ++ adreno_trace.c | 2 + gen7_reg.h | 4 +- kgsl.c | 104 ++++++++++++++--------- kgsl.h | 29 ++++--- kgsl_bus.c | 7 ++ kgsl_device.h | 5 ++ kgsl_drawobj.c | 56 ++++++------ kgsl_gmu_core.h | 3 +- kgsl_iommu.c | 6 +- kgsl_pool.c | 59 ++++++++++++- kgsl_pwrctrl.c | 7 +- kgsl_pwrscale.c | 4 +- kgsl_reclaim.c | 69 +++++---------- kgsl_sharedmem.c | 174 ++++++++++++++++++++++++++++++-------- kgsl_sync.c | 15 ++-- kgsl_timeline.c | 21 ++++- kgsl_vbo.c | 23 ++++- 51 files changed, 1023 insertions(+), 466 deletions(-) diff --git a/Kconfig b/Kconfig index 0c04a88e74..9507965009 100644 --- a/Kconfig +++ b/Kconfig @@ -30,6 +30,15 @@ config DEVFREQ_GOV_QCOM_GPUBW_MON This governor will not be useful for non-Adreno based targets. +config QCOM_KGSL_FENCE_TRACE + bool "Enable built-in tracing for adreno fence timeouts" + depends on QCOM_KGSL + help + A boolean flag used to create a KGSL-specific tracing instance + under /tracing/instances/kgsl-fence that can be used + for debugging timeouts for fences between KGSL-contexts and + sync-point blocks. If unsure, say 'N' here. + config QCOM_ADRENO_DEFAULT_GOVERNOR string "devfreq governor for the adreno core" default "msm-adreno-tz" @@ -87,3 +96,23 @@ config QCOM_KGSL_QDSS_STM When enabled, the Adreno GPU QDSS STM support is enabled. GPU QDSS STM memory will be mapped to GPU and QDSS clock needed to access this memory is voted. Debug kernels should say 'Y' here. + +config QCOM_KGSL_USE_SHMEM + bool "Enable using shmem for memory allocations" + depends on QCOM_KGSL + help + Say 'Y' to enable using shmem for memory allocations. If enabled, + there will be no support for the memory pools and higher order pages. + But using shmem will help in making kgsl pages available for + reclaiming. + +config QCOM_KGSL_PROCESS_RECLAIM + bool "Make driver pages available for reclaim" + depends on QCOM_KGSL + select QCOM_KGSL_USE_SHMEM + help + Say 'Y' to make driver pages available for reclaiming. If enabled, + shmem will be used for allocation. kgsl would know the process + foreground/background activity through the sysfs entry exposed per + process. Based on this kgsl can unpin given number of pages from + background processes and make them available to the shrinker. diff --git a/a6xx_reg.h b/a6xx_reg.h index f6b7dcde8a..0c0a118957 100644 --- a/a6xx_reg.h +++ b/a6xx_reg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. */ #ifndef _A6XX_REG_H @@ -1106,6 +1106,11 @@ #define A6XX_GPU_CC_GX_DOMAIN_MISC3 0x24563 #define A6XX_GPU_CC_CX_GDSCR 0x2441B +/* GPUCC offsets are different for A662 */ +#define A662_GPU_CC_GX_GDSCR 0x26417 +#define A662_GPU_CC_GX_DOMAIN_MISC3 0x26541 +#define A662_GPU_CC_CX_GDSCR 0x26442 + /* GPU CPR registers */ #define A6XX_GPU_CPR_FSM_CTL 0x26801 diff --git a/adreno-gpulist.h b/adreno-gpulist.h index e7e061f76f..91aedbfd4f 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1093,9 +1093,10 @@ static const struct adreno_protected_regs a620_protected_regs[] = { { A6XX_CP_PROTECT_REG + 32, 0x0fc00, 0x11bff, 0 }, { A6XX_CP_PROTECT_REG + 33, 0x18400, 0x1a3ff, 1 }, { A6XX_CP_PROTECT_REG + 34, 0x1a800, 0x1c7ff, 1 }, - { A6XX_CP_PROTECT_REG + 35, 0x1f400, 0x1f843, 1 }, - { A6XX_CP_PROTECT_REG + 36, 0x1f844, 0x1f8bf, 0 }, - { A6XX_CP_PROTECT_REG + 37, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 35, 0x1c800, 0x1e7ff, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 38, 0x1f887, 0x1f8a2, 1 }, { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, { 0 }, }; @@ -1573,10 +1574,11 @@ static const struct adreno_protected_regs a660_protected_regs[] = { { A6XX_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, { A6XX_CP_PROTECT_REG + 34, 0x18400, 0x1a3ff, 1 }, { A6XX_CP_PROTECT_REG + 35, 0x1a400, 0x1c3ff, 1 }, - { A6XX_CP_PROTECT_REG + 36, 0x1f400, 0x1f843, 1 }, - { A6XX_CP_PROTECT_REG + 37, 0x1f844, 0x1f8bf, 0 }, - { A6XX_CP_PROTECT_REG + 38, 0x1f860, 0x1f860, 1 }, - { A6XX_CP_PROTECT_REG + 39, 0x1f887, 0x1f8a2, 1 }, + { A6XX_CP_PROTECT_REG + 36, 0x1c400, 0x1e3ff, 1 }, + { A6XX_CP_PROTECT_REG + 37, 0x1f400, 0x1f843, 1 }, + { A6XX_CP_PROTECT_REG + 38, 0x1f844, 0x1f8bf, 0 }, + { A6XX_CP_PROTECT_REG + 39, 0x1f860, 0x1f860, 1 }, + { A6XX_CP_PROTECT_REG + 40, 0x1f887, 0x1f8a2, 1 }, { A6XX_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, { 0 }, }; @@ -1710,6 +1712,37 @@ static const struct adreno_a6xx_core adreno_gpu_core_a635 = { .ctxt_record_size = 2496 * 1024, }; +static const struct adreno_a6xx_core adreno_gpu_core_a662 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_A662, 6, 6, 2, ANY_ID), + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION, + .gpudev = &adreno_a6xx_gmu_gpudev.base, + .perfcounters = &adreno_a6xx_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_512K, + .bus_width = 32, + .snapshot_size = SZ_2M, + }, + .prim_fifo_threshold = 0x00200000, + .gmu_major = 2, + .gmu_minor = 0, + .sqefw_name = "a660_sqe.fw", + .gmufw_name = "a662_gmu.bin", + .zap_name = "a662_zap", + .hwcg = a660_hwcg_regs, + .hwcg_count = ARRAY_SIZE(a660_hwcg_regs), + .vbif = a650_gbif_regs, + .vbif_count = ARRAY_SIZE(a650_gbif_regs), + .hang_detect_cycles = 0x3ffff, + .veto_fal10 = true, + .protected_regs = a660_protected_regs, + .disable_tseskip = true, + .highest_bank_bit = 15, + .pdc_in_aop = true, + .ctxt_record_size = 2496 * 1024, +}; + static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { { GEN7_GBIF_QSB_SIDE0, 0x00071620 }, { GEN7_GBIF_QSB_SIDE1, 0x00071620 }, @@ -1720,10 +1753,10 @@ static const struct kgsl_regmap_list gen7_0_0_gbif_regs[] = { static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, - { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x02022222 }, { GEN7_RBBM_CLOCK_HYST_SP0, 0x0000f3cf }, { GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, - { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222220 }, { GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, { GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, { GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, @@ -1751,7 +1784,7 @@ static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, { GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 }, { GEN7_RBBM_CLOCK_MODE_VFD, 0x00002222 }, - { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222223 }, + { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 }, { GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, { GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, { GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 }, @@ -1777,42 +1810,50 @@ static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { /* GEN7_0_0 protected register list */ static const struct gen7_protected_regs gen7_0_0_protected_regs[] = { { GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, - { GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00698, 0 }, + { GEN7_CP_PROTECT_REG + 1, 0x0050b, 0x00563, 0 }, { GEN7_CP_PROTECT_REG + 2, 0x0050e, 0x0050e, 1 }, { GEN7_CP_PROTECT_REG + 3, 0x00510, 0x00510, 1 }, { GEN7_CP_PROTECT_REG + 4, 0x00534, 0x00534, 1 }, - { GEN7_CP_PROTECT_REG + 5, 0x00699, 0x00882, 1 }, - { GEN7_CP_PROTECT_REG + 6, 0x008a0, 0x008a8, 1 }, - { GEN7_CP_PROTECT_REG + 7, 0x008ab, 0x008cf, 1 }, - { GEN7_CP_PROTECT_REG + 8, 0x008d0, 0x00a40, 0 }, - { GEN7_CP_PROTECT_REG + 9, 0x00900, 0x0094d, 1 }, - { GEN7_CP_PROTECT_REG + 10, 0x0098d, 0x00a3f, 1 }, - { GEN7_CP_PROTECT_REG + 11, 0x00a41, 0x00bff, 1 }, - { GEN7_CP_PROTECT_REG + 12, 0x00df0, 0x00df1, 1 }, - { GEN7_CP_PROTECT_REG + 13, 0x00e01, 0x00e01, 1 }, - { GEN7_CP_PROTECT_REG + 14, 0x00e07, 0x00e0f, 1 }, - { GEN7_CP_PROTECT_REG + 15, 0x03c00, 0x03cc3, 1 }, - { GEN7_CP_PROTECT_REG + 16, 0x03cc4, 0x05cc3, 0 }, - { GEN7_CP_PROTECT_REG + 17, 0x08630, 0x087ff, 1 }, - { GEN7_CP_PROTECT_REG + 18, 0x08e00, 0x08e00, 1 }, - { GEN7_CP_PROTECT_REG + 19, 0x08e08, 0x08e08, 1 }, - { GEN7_CP_PROTECT_REG + 20, 0x08e50, 0x08e6f, 1 }, - { GEN7_CP_PROTECT_REG + 21, 0x08e80, 0x09100, 1 }, - { GEN7_CP_PROTECT_REG + 22, 0x09624, 0x097ff, 1 }, - { GEN7_CP_PROTECT_REG + 23, 0x09e40, 0x09e40, 1 }, - { GEN7_CP_PROTECT_REG + 24, 0x09e64, 0x09e71, 1 }, - { GEN7_CP_PROTECT_REG + 25, 0x09e78, 0x09fff, 1 }, - { GEN7_CP_PROTECT_REG + 26, 0x0a630, 0x0a7ff, 1 }, - { GEN7_CP_PROTECT_REG + 27, 0x0ae02, 0x0ae02, 1 }, - { GEN7_CP_PROTECT_REG + 28, 0x0ae50, 0x0ae5f, 1 }, - { GEN7_CP_PROTECT_REG + 29, 0x0ae66, 0x0ae69, 1 }, - { GEN7_CP_PROTECT_REG + 30, 0x0ae6f, 0x0ae72, 1 }, - { GEN7_CP_PROTECT_REG + 31, 0x0b604, 0x0b607, 1 }, - { GEN7_CP_PROTECT_REG + 32, 0x0ec00, 0x0fbff, 1 }, - { GEN7_CP_PROTECT_REG + 33, 0x0fc00, 0x11bff, 0 }, - { GEN7_CP_PROTECT_REG + 34, 0x18400, 0x18453, 1 }, - { GEN7_CP_PROTECT_REG + 35, 0x18454, 0x18458, 0 }, - { GEN7_CP_PROTECT_REG + 47, 0x18459, 0x18459, 1 }, + { GEN7_CP_PROTECT_REG + 5, 0x005fb, 0x00698, 0 }, + { GEN7_CP_PROTECT_REG + 6, 0x00699, 0x00882, 1 }, + { GEN7_CP_PROTECT_REG + 7, 0x008a0, 0x008a8, 1 }, + { GEN7_CP_PROTECT_REG + 8, 0x008ab, 0x008cf, 1 }, + { GEN7_CP_PROTECT_REG + 9, 0x008d0, 0x00a40, 0 }, + { GEN7_CP_PROTECT_REG + 10, 0x00900, 0x0094d, 1 }, + { GEN7_CP_PROTECT_REG + 11, 0x0098d, 0x00a3f, 1 }, + { GEN7_CP_PROTECT_REG + 12, 0x00a41, 0x00bff, 1 }, + { GEN7_CP_PROTECT_REG + 13, 0x00df0, 0x00df1, 1 }, + { GEN7_CP_PROTECT_REG + 14, 0x00e01, 0x00e01, 1 }, + { GEN7_CP_PROTECT_REG + 15, 0x00e07, 0x00e0f, 1 }, + { GEN7_CP_PROTECT_REG + 16, 0x03c00, 0x03cc3, 1 }, + { GEN7_CP_PROTECT_REG + 17, 0x03cc4, 0x05cc3, 0 }, + { GEN7_CP_PROTECT_REG + 18, 0x08630, 0x087ff, 1 }, + { GEN7_CP_PROTECT_REG + 19, 0x08e00, 0x08e00, 1 }, + { GEN7_CP_PROTECT_REG + 20, 0x08e08, 0x08e08, 1 }, + { GEN7_CP_PROTECT_REG + 21, 0x08e50, 0x08e6f, 1 }, + { GEN7_CP_PROTECT_REG + 22, 0x08e80, 0x09100, 1 }, + { GEN7_CP_PROTECT_REG + 23, 0x09624, 0x097ff, 1 }, + { GEN7_CP_PROTECT_REG + 24, 0x09e40, 0x09e40, 1 }, + { GEN7_CP_PROTECT_REG + 25, 0x09e64, 0x09e71, 1 }, + { GEN7_CP_PROTECT_REG + 26, 0x09e78, 0x09fff, 1 }, + { GEN7_CP_PROTECT_REG + 27, 0x0a630, 0x0a7ff, 1 }, + { GEN7_CP_PROTECT_REG + 28, 0x0ae02, 0x0ae02, 1 }, + { GEN7_CP_PROTECT_REG + 29, 0x0ae50, 0x0ae5f, 1 }, + { GEN7_CP_PROTECT_REG + 30, 0x0ae66, 0x0ae69, 1 }, + { GEN7_CP_PROTECT_REG + 31, 0x0ae6f, 0x0ae72, 1 }, + { GEN7_CP_PROTECT_REG + 32, 0x0b604, 0x0b607, 1 }, + { GEN7_CP_PROTECT_REG + 33, 0x0ec00, 0x0fbff, 1 }, + { GEN7_CP_PROTECT_REG + 34, 0x0fc00, 0x11bff, 0 }, + { GEN7_CP_PROTECT_REG + 35, 0x18400, 0x18453, 1 }, + { GEN7_CP_PROTECT_REG + 36, 0x18454, 0x18458, 0 }, + { GEN7_CP_PROTECT_REG + 37, 0x18459, 0x1a458, 1 }, + { GEN7_CP_PROTECT_REG + 38, 0x1a459, 0x1c458, 1 }, + { GEN7_CP_PROTECT_REG + 39, 0x1c459, 0x1e458, 1 }, + { GEN7_CP_PROTECT_REG + 40, 0x1f400, 0x1f843, 1 }, + { GEN7_CP_PROTECT_REG + 41, 0x1f844, 0x1f8bf, 0 }, + { GEN7_CP_PROTECT_REG + 42, 0x1f860, 0x1f860, 1 }, + { GEN7_CP_PROTECT_REG + 43, 0x1f878, 0x1f8a2, 1 }, + { GEN7_CP_PROTECT_REG + 47, 0x1f8c0, 0x1f8c0, 1 }, { 0 }, }; @@ -1824,7 +1865,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { .chipid = 0x07030000, .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | - ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL | + ADRENO_PREEMPTION, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_perfcounters, .gmem_base = 0, @@ -1853,7 +1895,35 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { .chipid = 0x07030001, .features = ADRENO_APRIV | ADRENO_IOCOHERENT | ADRENO_CONTENT_PROTECTION | ADRENO_IFPC | - ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL, + ADRENO_ACD | ADRENO_L3_VOTE | ADRENO_BCL | + ADRENO_PREEMPTION, + .gpudev = &adreno_gen7_gmu_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = SZ_2M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a730_sqe.fw", + .gmufw_name = "gmu_gen70000.bin", + .gmufw_bak_name = "c500_gmu.bin", + .zap_name = "a730_zap", + .hwcg = gen7_0_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_4_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-4-0", + .features = ADRENO_APRIV | ADRENO_IOCOHERENT | + ADRENO_CONTENT_PROTECTION, .gpudev = &adreno_gen7_gmu_gpudev.base, .perfcounters = &adreno_gen7_perfcounters, .gmem_base = 0, @@ -1912,4 +1982,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_a660_shima.base, &adreno_gpu_core_gen7_0_0.base, &adreno_gpu_core_gen7_0_1.base, + &adreno_gpu_core_a662.base, + &adreno_gpu_core_gen7_4_0.base, + }; diff --git a/adreno.c b/adreno.c index ed9e778e9f..0f32ad3141 100644 --- a/adreno.c +++ b/adreno.c @@ -166,7 +166,7 @@ unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb) kgsl_regread(device, A3XX_CP_RB_RPTR, &rptr); else kgsl_sharedmem_readl(device->scratch, &rptr, - SCRATCH_RPTR_OFFSET(rb->id)); + SCRATCH_RB_OFFSET(rb->id, rptr)); return rptr; } @@ -682,18 +682,32 @@ out: return ret; } -static void adreno_of_get_initial_pwrlevel(struct kgsl_pwrctrl *pwr, +static void adreno_of_get_initial_pwrlevels(struct kgsl_pwrctrl *pwr, struct device_node *node) { - int init_level = 1; + int level; - of_property_read_u32(node, "qcom,initial-pwrlevel", &init_level); + /* Get and set the initial power level */ + if (of_property_read_u32(node, "qcom,initial-pwrlevel", &level)) + level = 1; - if (init_level < 0 || init_level >= pwr->num_pwrlevels) - init_level = 1; + if (level < 0 || level >= pwr->num_pwrlevels) + level = 1; - pwr->active_pwrlevel = init_level; - pwr->default_pwrlevel = init_level; + pwr->active_pwrlevel = level; + pwr->default_pwrlevel = level; + + /* Set the max power level */ + pwr->max_pwrlevel = 0; + + /* Get and set the min power level */ + if (of_property_read_u32(node, "qcom,initial-min-pwrlevel", &level)) + level = pwr->num_pwrlevels - 1; + + if (level < 0 || level >= pwr->num_pwrlevels || level < pwr->default_pwrlevel) + level = pwr->num_pwrlevels - 1; + + pwr->min_pwrlevel = level; } static void adreno_of_get_limits(struct adreno_device *adreno_dev, @@ -733,7 +747,7 @@ static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev, ret = adreno_of_parse_pwrlevels(adreno_dev, node); if (!ret) { - adreno_of_get_initial_pwrlevel(&device->pwrctrl, parent); + adreno_of_get_initial_pwrlevels(&device->pwrctrl, parent); adreno_of_get_limits(adreno_dev, parent); } @@ -766,7 +780,7 @@ static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, return ret; } - adreno_of_get_initial_pwrlevel(&device->pwrctrl, child); + adreno_of_get_initial_pwrlevels(&device->pwrctrl, child); /* * Check for global throttle-pwrlevel first and override @@ -1511,14 +1525,16 @@ void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev) { int i; struct adreno_ringbuffer *rb; + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { if (rb->drawctxt_active) kgsl_context_put(&(rb->drawctxt_active->base)); rb->drawctxt_active = NULL; - kgsl_sharedmem_writel(rb->pagetable_desc, - PT_INFO_OFFSET(current_rb_ptname), 0); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, current_rb_ptname), + 0); } } @@ -1749,8 +1765,6 @@ static int _adreno_start(struct adreno_device *adreno_dev) /* Set the bit to indicate that we've just powered on */ set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv); - adreno_ringbuffer_set_global(adreno_dev, 0); - /* Clear the busy_data stats - we're starting over from scratch */ memset(&adreno_dev->busy_data, 0, sizeof(adreno_dev->busy_data)); @@ -2345,9 +2359,6 @@ static int adreno_soft_reset(struct kgsl_device *device) adreno_dev->busy_data.bif_starved_ram = 0; adreno_dev->busy_data.bif_starved_ram_ch1 = 0; - /* Set the page table back to the default page table */ - adreno_ringbuffer_set_global(adreno_dev, 0); - /* Reinitialize the GPU */ gpudev->start(adreno_dev); diff --git a/adreno.h b/adreno.h index 0ed5a5f282..e8838e3eb6 100644 --- a/adreno.h +++ b/adreno.h @@ -16,6 +16,17 @@ #include "adreno_ringbuffer.h" #include "kgsl_sharedmem.h" +/* Used to point CP to the SMMU record during preemption */ +#define SET_PSEUDO_SMMU_INFO 0 +/* Used to inform CP where to save preemption data at the time of switch out */ +#define SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR 1 +/* Used to inform CP where to save secure preemption data at the time of switch out */ +#define SET_PSEUDO_PRIV_SECURE_SAVE_ADDR 2 +/* Used to inform CP where to save per context non-secure data at the time of switch out */ +#define SET_PSEUDO_NON_PRIV_SAVE_ADDR 3 +/* Used to inform CP where to save preemption counter data at the time of switch out */ +#define SET_PSEUDO_COUNTER 4 + /* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */ #define ADRENO_DEVICE(device) \ container_of(device, struct adreno_device, dev) @@ -178,6 +189,7 @@ enum adreno_gpurev { ADRENO_REV_A640 = 640, ADRENO_REV_A650 = 650, ADRENO_REV_A660 = 660, + ADRENO_REV_A662 = 662, ADRENO_REV_A680 = 680, /* * Gen7 and higher version numbers may exceed 1 digit @@ -187,6 +199,7 @@ enum adreno_gpurev { */ ADRENO_REV_GEN7_0_0 = 0x070000, ADRENO_REV_GEN7_0_1 = 0x070001, + ADRENO_REV_GEN7_4_0 = 0x070400, }; #define ADRENO_SOFT_FAULT BIT(0) @@ -1014,6 +1027,7 @@ ADRENO_TARGET(a619, ADRENO_REV_A619) ADRENO_TARGET(a620, ADRENO_REV_A620) ADRENO_TARGET(a630, ADRENO_REV_A630) ADRENO_TARGET(a635, ADRENO_REV_A635) +ADRENO_TARGET(a662, ADRENO_REV_A662) ADRENO_TARGET(a640, ADRENO_REV_A640) ADRENO_TARGET(a650, ADRENO_REV_A650) ADRENO_TARGET(a680, ADRENO_REV_A680) @@ -1023,7 +1037,8 @@ static inline int adreno_is_a660(struct adreno_device *adreno_dev) { unsigned int rev = ADRENO_GPUREV(adreno_dev); - return (rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); + return (rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635 || + rev == ADRENO_REV_A662); } /* @@ -1061,7 +1076,8 @@ static inline int adreno_is_a650_family(struct adreno_device *adreno_dev) unsigned int rev = ADRENO_GPUREV(adreno_dev); return (rev == ADRENO_REV_A650 || rev == ADRENO_REV_A620 || - rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635); + rev == ADRENO_REV_A660 || rev == ADRENO_REV_A635 || + rev == ADRENO_REV_A662); } static inline int adreno_is_a619_holi(struct adreno_device *adreno_dev) @@ -1421,28 +1437,22 @@ static inline bool adreno_support_64bit(struct adreno_device *adreno_dev) return (BITS_PER_LONG > 32 && ADRENO_GPUREV(adreno_dev) >= 500); } -static inline void adreno_ringbuffer_set_global( - struct adreno_device *adreno_dev, int name) -{ - kgsl_sharedmem_writel(adreno_dev->ringbuffers[0].pagetable_desc, - PT_INFO_OFFSET(current_global_ptname), name); -} - -static inline void adreno_ringbuffer_set_pagetable(struct adreno_ringbuffer *rb, - struct kgsl_pagetable *pt) +static inline void adreno_ringbuffer_set_pagetable(struct kgsl_device *device, + struct adreno_ringbuffer *rb, struct kgsl_pagetable *pt) { unsigned long flags; spin_lock_irqsave(&rb->preempt_lock, flags); - kgsl_sharedmem_writel(rb->pagetable_desc, - PT_INFO_OFFSET(current_rb_ptname), pt->name); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, current_rb_ptname), pt->name); - kgsl_sharedmem_writeq(rb->pagetable_desc, - PT_INFO_OFFSET(ttbr0), kgsl_mmu_pagetable_get_ttbr0(pt)); + kgsl_sharedmem_writeq(device->scratch, + SCRATCH_RB_OFFSET(rb->id, ttbr0), + kgsl_mmu_pagetable_get_ttbr0(pt)); - kgsl_sharedmem_writel(rb->pagetable_desc, - PT_INFO_OFFSET(contextidr), 0); + kgsl_sharedmem_writel(device->scratch, + SCRATCH_RB_OFFSET(rb->id, contextidr), 0); spin_unlock_irqrestore(&rb->preempt_lock, flags); } @@ -1753,6 +1763,7 @@ static inline void adreno_set_dispatch_ops(struct adreno_device *adreno_dev, adreno_dev->dispatch_ops = ops; } +#ifdef CONFIG_QCOM_KGSL_FENCE_TRACE /** * adreno_fence_trace_array_init - Initialize an always on trace array * @device: A GPU device handle @@ -1760,6 +1771,9 @@ static inline void adreno_set_dispatch_ops(struct adreno_device *adreno_dev, * Register an always-on trace array to for fence timeout debugging */ void adreno_fence_trace_array_init(struct kgsl_device *device); +#else +static inline void adreno_fence_trace_array_init(struct kgsl_device *device) {} +#endif /* * adreno_drawobj_set_constraint - Set a power constraint diff --git a/adreno_a5xx.c b/adreno_a5xx.c index 0b48af11fb..83e2b4b389 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -1850,7 +1850,7 @@ static int a5xx_rb_start(struct adreno_device *adreno_dev) FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); rb->wptr = 0; rb->_wptr = 0; @@ -1859,7 +1859,7 @@ static int a5xx_rb_start(struct adreno_device *adreno_dev) /* Set up the current ringbuffer */ rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, A5XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); diff --git a/adreno_a5xx_preempt.c b/adreno_a5xx_preempt.c index bf3126ef74..9008cc320b 100644 --- a/adreno_a5xx_preempt.c +++ b/adreno_a5xx_preempt.c @@ -212,16 +212,11 @@ void a5xx_preemption_trigger(struct adreno_device *adreno_dev) spin_lock_irqsave(&next->preempt_lock, flags); - /* - * Get the pagetable from the pagetable info. - * The pagetable_desc is allocated and mapped at probe time, and - * preemption_desc at init time, so no need to check if - * sharedmem accesses to these memdescs succeed. - */ - kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, - PT_INFO_OFFSET(ttbr0)); - kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, - PT_INFO_OFFSET(contextidr)); + /* Get the pagetable from the pagetable info. */ + kgsl_sharedmem_readq(device->scratch, &ttbr0, + SCRATCH_RB_OFFSET(next->id, ttbr0)); + kgsl_sharedmem_readl(device->scratch, &contextidr, + SCRATCH_RB_OFFSET(next->id, contextidr)); kgsl_sharedmem_writel(next->preemption_desc, PREEMPT_RECORD(wptr), next->wptr); @@ -458,7 +453,7 @@ void a5xx_preemption_start(struct adreno_device *adreno_dev) kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); - adreno_ringbuffer_set_pagetable(rb, + adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); } @@ -490,8 +485,8 @@ static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev, kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR(device, - rb->id)); + PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(device, + rb->id, rptr)); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); kgsl_sharedmem_writeq(rb->preemption_desc, diff --git a/adreno_a5xx_ringbuffer.c b/adreno_a5xx_ringbuffer.c index fb973e9412..1862fb2da1 100644 --- a/adreno_a5xx_ringbuffer.c +++ b/adreno_a5xx_ringbuffer.c @@ -32,10 +32,10 @@ static int a5xx_rb_pagetable_switch(struct kgsl_device *device, cmds[7] = 1; cmds[8] = cp_type7_packet(CP_MEM_WRITE, 5); - cmds[9] = lower_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); - cmds[10] = upper_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); + cmds[9] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[10] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); cmds[11] = lower_32_bits(ttbr0); cmds[12] = upper_32_bits(ttbr0); cmds[13] = id; @@ -75,8 +75,10 @@ int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb, return PTR_ERR(cmds); cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2); - cmds[1] = lower_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); - cmds[2] = upper_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); + cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); } spin_lock_irqsave(&rb->preempt_lock, flags); diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 44e69b9d94..7bfa013467 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -50,6 +50,7 @@ static u32 a6xx_pwrup_reglist[] = { A6XX_SP_NC_MODE_CNTL, A6XX_PC_DBG_ECO_CNTL, A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE, + A6XX_UCHE_GBIF_GX_CONFIG, }; /* IFPC only static powerup restore list */ @@ -92,9 +93,28 @@ static u32 a6xx_ifpc_pwrup_reglist[] = { A6XX_CP_AHB_CNTL, }; +/* Applicable to a620, a635, a650 and a660 */ +static u32 a650_ifpc_pwrup_reglist[] = { + A6XX_CP_PROTECT_REG+32, + A6XX_CP_PROTECT_REG+33, + A6XX_CP_PROTECT_REG+34, + A6XX_CP_PROTECT_REG+35, + A6XX_CP_PROTECT_REG+36, + A6XX_CP_PROTECT_REG+37, + A6XX_CP_PROTECT_REG+38, + A6XX_CP_PROTECT_REG+39, + A6XX_CP_PROTECT_REG+40, + A6XX_CP_PROTECT_REG+41, + A6XX_CP_PROTECT_REG+42, + A6XX_CP_PROTECT_REG+43, + A6XX_CP_PROTECT_REG+44, + A6XX_CP_PROTECT_REG+45, + A6XX_CP_PROTECT_REG+46, + A6XX_CP_PROTECT_REG+47, +}; + /* Applicable to a620, a635, a650 and a660 */ static u32 a650_pwrup_reglist[] = { - A6XX_CP_PROTECT_REG + 47, /* Programmed for infinite span */ A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, @@ -170,7 +190,8 @@ int a6xx_init(struct adreno_device *adreno_dev) /* If the memory type is DDR 4, override the existing configuration */ if (of_fdt_get_ddrtype() == 0x7) { if (adreno_is_a660_shima(adreno_dev) || - adreno_is_a635(adreno_dev)) + adreno_is_a635(adreno_dev) || + adreno_is_a662(adreno_dev)) adreno_dev->highest_bank_bit = 14; else if ((adreno_is_a650(adreno_dev) || adreno_is_a660(adreno_dev))) @@ -258,6 +279,9 @@ __get_gmu_ao_cgc_mode_cntl(struct adreno_device *adreno_dev) return 0x00000022; else if (adreno_is_a615_family(adreno_dev)) return 0x00000222; + /* a662 should be checked before a660 */ + else if (adreno_is_a662(adreno_dev)) + return 0x00020200; else if (adreno_is_a660(adreno_dev)) return 0x00020000; else @@ -298,7 +322,12 @@ static unsigned int __get_gmu_wfi_config(struct adreno_device *adreno_dev) void a6xx_cx_regulator_disable_wait(struct regulator *reg, struct kgsl_device *device, u32 timeout) { - if (!adreno_regulator_disable_poll(device, reg, A6XX_GPU_CC_CX_GDSCR, timeout)) { + u32 offset; + + offset = adreno_is_a662(ADRENO_DEVICE(device)) ? + A662_GPU_CC_CX_GDSCR : A6XX_GPU_CC_CX_GDSCR; + + if (!adreno_regulator_disable_poll(device, reg, offset, timeout)) { dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); /* Dump the cx regulator consumer list */ qcom_clk_dump(NULL, reg, false); @@ -361,6 +390,10 @@ static void a6xx_hwcg_set(struct adreno_device *adreno_dev, bool on) kgsl_regwrite(device, a6xx_core->hwcg[i].offset, on ? a6xx_core->hwcg[i].val : 0); + /* GBIF L2 CGC control is not part of the UCHE */ + kgsl_regrmw(device, A6XX_UCHE_GBIF_GX_CONFIG, 0x70000, + FIELD_PREP(GENMASK(18, 16), on ? 2 : 0)); + /* * Enable SP clock after programming HWCG registers. * A612 and A610 GPU is not having the GX power domain. @@ -388,14 +421,21 @@ struct a6xx_reglist_list { static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) { - struct a6xx_reglist_list reglist[3]; + struct a6xx_reglist_list reglist[4]; void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; int items = 0, i, j; u32 *dest = ptr + sizeof(*lock); + u16 list_offset = 0; /* Static IFPC-only registers */ - reglist[items++] = REGLIST(a6xx_ifpc_pwrup_reglist); + reglist[items] = REGLIST(a6xx_ifpc_pwrup_reglist); + list_offset += reglist[items++].count * 2; + + if (adreno_is_a650_family(adreno_dev)) { + reglist[items] = REGLIST(a650_ifpc_pwrup_reglist); + list_offset += reglist[items++].count * 2; + } /* Static IFPC + preemption registers */ reglist[items++] = REGLIST(a6xx_pwrup_reglist); @@ -448,7 +488,7 @@ static void a6xx_patch_pwrup_reglist(struct adreno_device *adreno_dev) * all the lists and list_offset should be specified as the size in * dwords of the first entry in the list. */ - lock->list_offset = reglist[0].count * 2; + lock->list_offset = list_offset; } @@ -958,7 +998,7 @@ static int a6xx_send_cp_init(struct adreno_device *adreno_dev, "CP initialization failed to idle\n"); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); rb->wptr = 0; rb->_wptr = 0; } @@ -982,11 +1022,11 @@ static int _preemption_init(struct adreno_device *adreno_dev, cmds += cp_protected_mode(adreno_dev, cmds, 0); *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); - *cmds++ = 1; + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); - *cmds++ = 2; + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); @@ -1052,7 +1092,7 @@ int a6xx_rb_start(struct adreno_device *adreno_dev) FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); rb->wptr = 0; rb->_wptr = 0; @@ -1063,7 +1103,7 @@ int a6xx_rb_start(struct adreno_device *adreno_dev) /* Set up the current ringbuffer */ rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, A6XX_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); diff --git a/adreno_a6xx.h b/adreno_a6xx.h index c70e9156ca..66454750dd 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -162,7 +162,7 @@ struct a6xx_cp_smmu_info { (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F)) /* Size of the CP_INIT pm4 stream in dwords */ -#define A6XX_CP_INIT_DWORDS 12 +#define A6XX_CP_INIT_DWORDS 11 #define A6XX_INT_MASK \ ((1 << A6XX_INT_CP_AHB_ERROR) | \ diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index d18bee9aa2..2f1f25d194 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -544,7 +544,9 @@ int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev) if (!test_bit(GMU_PRIV_RSCC_SLEEP_DONE, &gmu->flags)) return 0; /* A660 has a replacement register */ - if (adreno_is_a660(ADRENO_DEVICE(device))) + if (adreno_is_a662(ADRENO_DEVICE(device))) + gmu_core_regread(device, A662_GPU_CC_GX_DOMAIN_MISC3, &val); + else if (adreno_is_a660(ADRENO_DEVICE(device))) gmu_core_regread(device, A6XX_GPU_CC_GX_DOMAIN_MISC3, &val); else gmu_core_regread(device, A6XX_GPU_CC_GX_DOMAIN_MISC, &val); @@ -784,6 +786,9 @@ int a6xx_gmu_oob_set(struct kgsl_device *device, int ret = 0; int set, check; + if (req == oob_perfcntr && gmu->num_oob_perfcntr++) + return 0; + if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) { set = BIT(req + 16); check = BIT(req + 24); @@ -807,6 +812,8 @@ int a6xx_gmu_oob_set(struct kgsl_device *device, if (gmu_core_timed_poll_check(device, A6XX_GMU_GMU2HOST_INTR_INFO, check, GPU_START_TIMEOUT, check)) { + if (req == oob_perfcntr) + gmu->num_oob_perfcntr--; gmu_core_fault_snapshot(device); ret = -ETIMEDOUT; WARN(1, "OOB request %s timed out\n", oob_to_str(req)); @@ -826,6 +833,9 @@ void a6xx_gmu_oob_clear(struct kgsl_device *device, struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); int clear; + if (req == oob_perfcntr && --gmu->num_oob_perfcntr) + return; + if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) { clear = BIT(req + 24); } else { @@ -2388,7 +2398,7 @@ static void a6xx_free_gmu_globals(struct a6xx_gmu_device *gmu) { int i; - for (i = 0; i < gmu->global_entries; i++) { + for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { struct kgsl_memdesc *md = &gmu->gmu_globals[i]; if (!md->gmuaddr) @@ -2835,8 +2845,6 @@ static int a6xx_gpu_boot(struct adreno_device *adreno_dev) adreno_set_active_ctxs_null(adreno_dev); - adreno_ringbuffer_set_global(adreno_dev, 0); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 12160ede51..6c2ddeda4b 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -90,6 +90,8 @@ struct a6xx_gmu_device { * which GMU can run at 500 Mhz. */ u32 perf_ddr_bw; + /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ + u32 num_oob_perfcntr; }; /* Helper function to get to a6xx gmu device from adreno device */ diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 01d79cd20a..d1a230d8a0 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -940,6 +940,8 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) return; } + a6xx_rdpm_cx_freq_update(gmu, freq / 1000); + trace_kgsl_gmu_pwrlevel(freq, prev_freq); prev_freq = freq; diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 71f326e939..0a6b6d3a83 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1364,7 +1364,7 @@ int a6xx_hwsched_submit_cmdobj(struct adreno_device *adreno_dev, if (WARN_ON(cmd_sizebytes > HFI_MAX_MSG_SIZE)) return -EMSGSIZE; - cmd = kvmalloc(cmd_sizebytes, GFP_KERNEL); + cmd = kmalloc(cmd_sizebytes, GFP_KERNEL); if (cmd == NULL) return -ENOMEM; @@ -1417,7 +1417,7 @@ skipib: adreno_profile_submit_time(&time); free: - kvfree(cmd); + kfree(cmd); return ret; } diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index de8bd8c014..1d5596dc87 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -14,14 +14,6 @@ #define PREEMPT_SMMU_RECORD(_field) \ offsetof(struct a6xx_cp_smmu_info, _field) -enum { - SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO = 0, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER, -}; - static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, bool atomic) { @@ -284,16 +276,11 @@ void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) spin_lock_irqsave(&next->preempt_lock, flags); - /* - * Get the pagetable from the pagetable info. - * The pagetable_desc is allocated and mapped at probe time, and - * preemption_desc at init time, so no need to check if - * sharedmem accesses to these memdescs succeed. - */ - kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, - PT_INFO_OFFSET(ttbr0)); - kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, - PT_INFO_OFFSET(contextidr)); + /* Get the pagetable from the pagetable info. */ + kgsl_sharedmem_readq(device->scratch, &ttbr0, + SCRATCH_RB_OFFSET(next->id, ttbr0)); + kgsl_sharedmem_readl(device->scratch, &contextidr, + SCRATCH_RB_OFFSET(next->id, contextidr)); kgsl_sharedmem_writel(next->preemption_desc, PREEMPT_RECORD(wptr), next->wptr); @@ -504,34 +491,26 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds) { unsigned int *cmds_orig = cmds; - uint64_t gpuaddr = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; - if (drawctxt) { - gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); - } else { - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); - } + if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags)) + goto done; + + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); /* NULL SMMU_INFO buffer - we track in KMD */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO; + *cmds++ = SET_PSEUDO_SMMU_INFO; cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); - if (drawctxt) { - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR; - cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); - } - /* * There is no need to specify this address when we are about to * trigger preemption. This is because CP internally stores this @@ -539,14 +518,16 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, * the context record and thus knows from where to restore * the saved perfcounters for the new ringbuffer. */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER; + *cmds++ = SET_PSEUDO_COUNTER; cmds += cp_gpuaddr(adreno_dev, cmds, rb->perfcounter_save_restore_desc->gpuaddr); +done: if (drawctxt) { struct adreno_ringbuffer *rb = drawctxt->rb; uint64_t dest = adreno_dev->preempt.scratch->gpuaddr + (rb->id * sizeof(u64)); + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); cmds += cp_gpuaddr(adreno_dev, cmds, dest); @@ -624,8 +605,10 @@ void a6xx_preemption_start(struct adreno_device *adreno_dev) kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); - adreno_ringbuffer_set_pagetable(rb, + adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); + + clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags); } } @@ -642,8 +625,8 @@ static void reset_rb_preempt_record(struct adreno_device *adreno_dev, kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(cntl), cp_rb_cntl); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR( - KGSL_DEVICE(adreno_dev), rb->id)); + PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, rptr)); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); } diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 746f686591..a92f37a3da 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -145,6 +145,9 @@ static int a6xx_rgmu_oob_set(struct kgsl_device *device, struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device)); int ret, set, check; + if (req == oob_perfcntr && rgmu->num_oob_perfcntr++) + return 0; + set = BIT(req + 16); check = BIT(req + 16); @@ -159,6 +162,8 @@ static int a6xx_rgmu_oob_set(struct kgsl_device *device, if (ret) { unsigned int status; + if (req == oob_perfcntr) + rgmu->num_oob_perfcntr--; gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &status); dev_err(&rgmu->pdev->dev, "Timed out while setting OOB req:%s status:0x%x\n", @@ -180,6 +185,11 @@ static int a6xx_rgmu_oob_set(struct kgsl_device *device, static void a6xx_rgmu_oob_clear(struct kgsl_device *device, enum oob_request req) { + struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(ADRENO_DEVICE(device)); + + if (req == oob_perfcntr && --rgmu->num_oob_perfcntr) + return; + gmu_core_regwrite(device, A6XX_GMU_HOST2GMU_INTR_SET, BIT(req + 24)); trace_kgsl_gmu_oob_clear(BIT(req + 24)); } @@ -744,8 +754,6 @@ static int a6xx_gpu_boot(struct adreno_device *adreno_dev) adreno_set_active_ctxs_null(adreno_dev); - adreno_ringbuffer_set_global(adreno_dev, 0); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_a6xx_rgmu.h b/adreno_a6xx_rgmu.h index 89ce5199fb..1ac472c58e 100644 --- a/adreno_a6xx_rgmu.h +++ b/adreno_a6xx_rgmu.h @@ -58,6 +58,8 @@ struct a6xx_rgmu_device { unsigned int fault_count; /** @flags: rgmu internal flags */ unsigned long flags; + /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ + u32 num_oob_perfcntr; }; /** diff --git a/adreno_a6xx_ringbuffer.c b/adreno_a6xx_ringbuffer.c index f4d7acdc52..6599c264dc 100644 --- a/adreno_a6xx_ringbuffer.c +++ b/adreno_a6xx_ringbuffer.c @@ -35,10 +35,10 @@ static int a6xx_rb_pagetable_switch(struct adreno_device *adreno_dev, } cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5); - cmds[count++] = lower_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); - cmds[count++] = upper_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); + cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); cmds[count++] = lower_32_bits(ttbr0); cmds[count++] = upper_32_bits(ttbr0); cmds[count++] = id; @@ -61,7 +61,7 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[32]; + u32 cmds[36]; if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) count += a6xx_rb_pagetable_switch(adreno_dev, rb, drawctxt, @@ -87,6 +87,14 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; + if (adreno_is_preemption_enabled(adreno_dev)) { + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); + cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR; + count += cp_gpuaddr(adreno_dev, &cmds[count], gpuaddr); + } + return a6xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, cmds, count, 0, NULL); } @@ -119,8 +127,10 @@ int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb, return PTR_ERR(cmds); cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2); - cmds[1] = lower_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); - cmds[2] = upper_32_bits(SCRATCH_RPTR_GPU_ADDR(device, rb->id)); + cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); + cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id, + rptr)); } spin_lock_irqsave(&rb->preempt_lock, flags); diff --git a/adreno_gen7.c b/adreno_gen7.c index f559e45669..9a1193215d 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -79,6 +79,19 @@ static const u32 gen7_ifpc_pwrup_reglist[] = { GEN7_CP_PROTECT_REG+31, GEN7_CP_PROTECT_REG+32, GEN7_CP_PROTECT_REG+33, + GEN7_CP_PROTECT_REG+34, + GEN7_CP_PROTECT_REG+35, + GEN7_CP_PROTECT_REG+36, + GEN7_CP_PROTECT_REG+37, + GEN7_CP_PROTECT_REG+38, + GEN7_CP_PROTECT_REG+39, + GEN7_CP_PROTECT_REG+40, + GEN7_CP_PROTECT_REG+41, + GEN7_CP_PROTECT_REG+42, + GEN7_CP_PROTECT_REG+43, + GEN7_CP_PROTECT_REG+44, + GEN7_CP_PROTECT_REG+45, + GEN7_CP_PROTECT_REG+46, GEN7_CP_PROTECT_REG+47, GEN7_CP_AHB_CNTL, }; @@ -474,6 +487,36 @@ int gen7_start(struct adreno_device *adreno_dev) return 0; } +/* Offsets into the MX/CX mapped register regions */ +#define GEN7_RDPM_MX_OFFSET 0xf00 +#define GEN7_RDPM_CX_OFFSET 0xf14 + +void gen7_rdpm_mx_freq_update(struct gen7_gmu_device *gmu, u32 freq) +{ + if (gmu->rdpm_mx_virt) { + writel_relaxed(freq/1000, (gmu->rdpm_mx_virt + GEN7_RDPM_MX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + +void gen7_rdpm_cx_freq_update(struct gen7_gmu_device *gmu, u32 freq) +{ + if (gmu->rdpm_cx_virt) { + writel_relaxed(freq/1000, (gmu->rdpm_cx_virt + GEN7_RDPM_CX_OFFSET)); + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + } +} + void gen7_spin_idle_debug(struct adreno_device *adreno_dev, const char *str) { @@ -549,11 +592,11 @@ static int gen7_post_start(struct adreno_device *adreno_dev) return PTR_ERR(cmds); cmds[0] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6); - cmds[1] = 1; + cmds[1] = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds[2] = lower_32_bits(rb->preemption_desc->gpuaddr); cmds[3] = upper_32_bits(rb->preemption_desc->gpuaddr); - cmds[4] = 2; + cmds[4] = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds[5] = lower_32_bits(rb->secure_preemption_desc->gpuaddr); cmds[6] = upper_32_bits(rb->secure_preemption_desc->gpuaddr); @@ -589,9 +632,9 @@ int gen7_rb_start(struct adreno_device *adreno_dev) FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { memset(rb->buffer_desc->hostptr, 0xaa, KGSL_RB_SIZE); kgsl_sharedmem_writel(device->scratch, - SCRATCH_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, rptr), 0); kgsl_sharedmem_writel(device->scratch, - SCRATCH_BV_RPTR_OFFSET(rb->id), 0); + SCRATCH_RB_OFFSET(rb->id, bv_rptr), 0); rb->wptr = 0; rb->_wptr = 0; @@ -603,11 +646,11 @@ int gen7_rb_start(struct adreno_device *adreno_dev) /* Set up the current ringbuffer */ rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); - addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, rptr); kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, GEN7_CP_RB_RPTR_ADDR_HI, upper_32_bits(addr)); - addr = SCRATCH_BV_RPTR_GPU_ADDR(device, rb->id); + addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_rptr); kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_LO, lower_32_bits(addr)); kgsl_regwrite(device, GEN7_CP_BV_RB_RPTR_ADDR_HI, upper_32_bits(addr)); @@ -815,8 +858,17 @@ static void gen7_err_callback(struct adreno_device *adreno_dev, int bit) dev_crit_ratelimited(dev, "UCHE: Trap interrupt\n"); break; case GEN7_INT_TSBWRITEERROR: - dev_crit_ratelimited(dev, "TSB: Write error interrupt\n"); + { + u32 lo, hi; + + kgsl_regread(device, GEN7_RBBM_SECVID_TSB_STATUS_LO, &lo); + kgsl_regread(device, GEN7_RBBM_SECVID_TSB_STATUS_HI, &hi); + + dev_crit_ratelimited(dev, "TSB: Write error interrupt: Address: 0x%llx MID: %d\n", + FIELD_GET(GENMASK(16, 0), hi) << 32 | lo, + FIELD_GET(GENMASK(31, 23), hi)); break; + } default: dev_crit_ratelimited(dev, "Unknown interrupt %d\n", bit); } diff --git a/adreno_gen7.h b/adreno_gen7.h index 7e4c910231..761dc14430 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -433,4 +433,22 @@ to_gen7_gpudev(const struct adreno_gpudev *gpudev) * Reset the preemption records at the time of hard reset */ void gen7_reset_preempt_records(struct adreno_device *adreno_dev); + +/** + * gen7_rdpm_mx_freq_update - Update the mx frequency + * @gmu: An Adreno GMU handle + * @freq: Frequency in KHz + * + * This function communicates GPU mx frequency(in Mhz) changes to rdpm. + */ +void gen7_rdpm_mx_freq_update(struct gen7_gmu_device *gmu, u32 freq); + +/** + * gen7_rdpm_cx_freq_update - Update the cx frequency + * @gmu: An Adreno GMU handle + * @freq: Frequency in KHz + * + * This function communicates GPU cx frequency(in Mhz) changes to rdpm. + */ +void gen7_rdpm_cx_freq_update(struct gen7_gmu_device *gmu, u32 freq); #endif diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 8fc1726748..867d683037 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -52,6 +52,11 @@ static struct gmu_vma_entry gen7_gmu_vma[] = { .size = SZ_512M, .next_va = 0x60000000, }, + [GMU_NONCACHED_KERNEL_EXTENDED] = { + .start = 0xc0000000, + .size = SZ_512M, + .next_va = 0xc0000000, + }, }; static ssize_t log_stream_enable_store(struct kobject *kobj, @@ -494,6 +499,9 @@ int gen7_gmu_oob_set(struct kgsl_device *device, int ret = 0; int set, check; + if (req == oob_perfcntr && gmu->num_oob_perfcntr++) + return 0; + if (req >= oob_boot_slumber) { dev_err(&gmu->pdev->dev, "Unsupported OOB request %s\n", @@ -508,6 +516,8 @@ int gen7_gmu_oob_set(struct kgsl_device *device, if (gmu_core_timed_poll_check(device, GEN7_GMU_GMU2HOST_INTR_INFO, check, 100, check)) { + if (req == oob_perfcntr) + gmu->num_oob_perfcntr--; gmu_core_fault_snapshot(device); ret = -ETIMEDOUT; WARN(1, "OOB request %s timed out\n", oob_to_str(req)); @@ -527,6 +537,9 @@ void gen7_gmu_oob_clear(struct kgsl_device *device, struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int clear = BIT(31 - req * 2); + if (req == oob_perfcntr && --gmu->num_oob_perfcntr) + return; + if (req >= oob_boot_slumber) { dev_err(&gmu->pdev->dev, "Unsupported OOB clear %s\n", oob_to_str(req)); @@ -1147,6 +1160,8 @@ void gen7_gmu_suspend(struct adreno_device *adreno_dev) gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + dev_err(&gmu->pdev->dev, "Suspended GMU\n"); device->state = KGSL_STATE_NONE; @@ -1205,6 +1220,10 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, ADRENO_GMU_FAULT_SKIP_SNAPSHOT); } + if (req.freq != INVALID_DCVS_IDX) + gen7_rdpm_mx_freq_update(gmu, + gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + return ret; } @@ -1467,6 +1486,8 @@ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; + gen7_rdpm_cx_freq_update(gmu, GMU_FREQ_MIN / 1000); + ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", GMU_FREQ_MIN); if (ret) { @@ -1576,6 +1597,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -1640,6 +1663,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -1708,7 +1733,7 @@ static void gen7_free_gmu_globals(struct gen7_gmu_device *gmu) { int i; - for (i = 0; i < gmu->global_entries; i++) { + for (i = 0; i < gmu->global_entries && i < ARRAY_SIZE(gmu->gmu_globals); i++) { struct kgsl_memdesc *md = &gmu->gmu_globals[i]; if (!md->gmuaddr) @@ -1812,6 +1837,22 @@ static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev) return ret; } +static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu, + struct kgsl_device *device) +{ + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_cx"); + if (res) + gmu->rdpm_cx_virt = devm_ioremap(&device->pdev->dev, + res->start, resource_size(res)); + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, "rdpm_mx"); + if (res) + gmu->rdpm_mx_virt = devm_ioremap(&device->pdev->dev, + res->start, resource_size(res)); +} + static int gen7_gmu_regulators_probe(struct gen7_gmu_device *gmu, struct platform_device *pdev) { @@ -1933,6 +1974,9 @@ int gen7_gmu_probe(struct kgsl_device *device, } } + /* Setup any rdpm register ranges */ + gen7_gmu_rdpm_probe(gmu, device); + /* Set up GMU regulators */ ret = gen7_gmu_regulators_probe(gmu, pdev); if (ret) @@ -2081,6 +2125,8 @@ static int gen7_gmu_power_off(struct adreno_device *adreno_dev) if (ret) goto error; + gen7_rdpm_mx_freq_update(gmu, 0); + /* Now that we are done with GMU and GPU, Clear the GBIF */ ret = gen7_halt_gbif(adreno_dev); if (ret) @@ -2095,6 +2141,8 @@ static int gen7_gmu_power_off(struct adreno_device *adreno_dev) /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + device->state = KGSL_STATE_NONE; return 0; @@ -2135,8 +2183,6 @@ static int gen7_gpu_boot(struct adreno_device *adreno_dev) adreno_set_active_ctxs_null(adreno_dev); - adreno_ringbuffer_set_global(adreno_dev, 0); - ret = kgsl_mmu_start(device); if (ret) goto err; diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 0702793251..5f40bc575f 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -85,6 +85,12 @@ struct gen7_gmu_device { * which GMU can run at 500 Mhz. */ u32 perf_ddr_bw; + /** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */ + void __iomem *rdpm_cx_virt; + /** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */ + void __iomem *rdpm_mx_virt; + /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ + u32 num_oob_perfcntr; }; /* Helper function to get to gen7 gmu device from adreno device */ diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 710c696557..18f6a6178c 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -332,6 +332,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -392,6 +394,8 @@ gdsc_off: /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; } @@ -464,6 +468,8 @@ static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) ret = gen7_rscc_sleep_sequence(adreno_dev); + gen7_rdpm_mx_freq_update(gmu, 0); + /* Now that we are done with GMU and GPU, Clear the GBIF */ ret = gen7_halt_gbif(adreno_dev); @@ -476,6 +482,8 @@ static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) /* Poll to make sure that the CX is off */ gen7_cx_regulator_disable_wait(gmu->cx_gdsc, device, 5000); + gen7_rdpm_cx_freq_update(gmu, 0); + return ret; error: @@ -703,7 +711,7 @@ static int gen7_hwsched_power_off(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - int ret; + int ret = 0; if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) return 0; @@ -713,8 +721,11 @@ static int gen7_hwsched_power_off(struct adreno_device *adreno_dev) /* process any profiling results that are available */ adreno_profile_process_results(ADRENO_DEVICE(device)); - if (!gen7_hw_isidle(adreno_dev)) + if (!gen7_hw_isidle(adreno_dev)) { dev_err(&gmu->pdev->dev, "GPU isn't idle before SLUMBER\n"); + gmu_core_fault_snapshot(device); + goto no_gx_power; + } ret = gen7_gmu_oob_set(device, oob_gpu); if (ret) { @@ -883,6 +894,10 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); } + if (req.freq != INVALID_DCVS_IDX) + gen7_rdpm_mx_freq_update(gmu, + gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + return ret; } @@ -919,6 +934,8 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) return; } + gen7_rdpm_cx_freq_update(gmu, freq / 1000); + trace_kgsl_gmu_pwrlevel(freq, prev_freq); prev_freq = freq; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 0ee8a7b858..c910ab7581 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1152,7 +1152,8 @@ static int hfi_f2h_main(void *arg) while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, !kthread_should_stop() && !(is_queue_empty(adreno_dev, HFI_MSG_ID) && - is_queue_empty(adreno_dev, HFI_DBG_ID))); + is_queue_empty(adreno_dev, HFI_DBG_ID)) && + (hfi->irq_mask & HFI_IRQ_MSGQ_MASK)); if (kthread_should_stop()) break; @@ -1201,6 +1202,7 @@ static void add_profile_events(struct adreno_device *adreno_dev, unsigned long time_in_ns; struct kgsl_context *context = drawobj->context; struct submission_info info = {0}; + struct adreno_hwsched *hwsched = &adreno_dev->hwsched; /* * Here we are attempting to create a mapping between the @@ -1233,7 +1235,7 @@ static void add_profile_events(struct adreno_device *adreno_dev, time_in_s = time->ktime; time_in_ns = do_div(time_in_s, 1000000000); - info.inflight = -1; + info.inflight = hwsched->inflight; info.rb_id = adreno_get_level(context->priority); info.gmu_dispatch_queue = context->gmu_dispatch_queue; diff --git a/adreno_gen7_preempt.c b/adreno_gen7_preempt.c index 4c5da6d497..5185f933b4 100644 --- a/adreno_gen7_preempt.c +++ b/adreno_gen7_preempt.c @@ -14,14 +14,6 @@ #define PREEMPT_SMMU_RECORD(_field) \ offsetof(struct gen7_cp_smmu_info, _field) -enum { - SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO = 0, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR, - SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER, -}; - static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer, bool atomic) { @@ -257,16 +249,11 @@ void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic) spin_lock_irqsave(&next->preempt_lock, flags); - /* - * Get the pagetable from the pagetable info. - * The pagetable_desc is allocated and mapped at probe time, and - * preemption_desc at init time, so no need to check if - * sharedmem accesses to these memdescs succeed. - */ - kgsl_sharedmem_readq(next->pagetable_desc, &ttbr0, - PT_INFO_OFFSET(ttbr0)); - kgsl_sharedmem_readl(next->pagetable_desc, &contextidr, - PT_INFO_OFFSET(contextidr)); + /* Get the pagetable from the pagetable info. */ + kgsl_sharedmem_readq(device->scratch, &ttbr0, + SCRATCH_RB_OFFSET(next->id, ttbr0)); + kgsl_sharedmem_readl(device->scratch, &contextidr, + SCRATCH_RB_OFFSET(next->id, contextidr)); kgsl_sharedmem_writel(next->preemption_desc, PREEMPT_RECORD(wptr), next->wptr); @@ -476,37 +463,29 @@ u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds) { u32 *cmds_orig = cmds; - u64 gpuaddr = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; + if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags)) + goto done; + *cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1); *cmds++ = CP_SET_THREAD_BR; - if (drawctxt) { - gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); - } else { - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); - } + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); /* NULL SMMU_INFO buffer - we track in KMD */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO; + *cmds++ = SET_PSEUDO_SMMU_INFO; cmds += cp_gpuaddr(adreno_dev, cmds, 0x0); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr); - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR; + *cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR; cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); - if (drawctxt) { - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR; - cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); - } - /* * There is no need to specify this address when we are about to * trigger preemption. This is because CP internally stores this @@ -514,14 +493,16 @@ u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, * the context record and thus knows from where to restore * the saved perfcounters for the new ringbuffer. */ - *cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER; + *cmds++ = SET_PSEUDO_COUNTER; cmds += cp_gpuaddr(adreno_dev, cmds, rb->perfcounter_save_restore_desc->gpuaddr); +done: if (drawctxt) { struct adreno_ringbuffer *rb = drawctxt->rb; u64 dest = adreno_dev->preempt.scratch->gpuaddr + (rb->id * sizeof(u64)); + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); cmds += cp_gpuaddr(adreno_dev, cmds, dest); @@ -601,8 +582,10 @@ void gen7_preemption_start(struct adreno_device *adreno_dev) kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(wptr), 0); - adreno_ringbuffer_set_pagetable(rb, + adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); + + clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags); } } @@ -616,13 +599,13 @@ static void reset_rb_preempt_record(struct adreno_device *adreno_dev, kgsl_sharedmem_writel(rb->preemption_desc, PREEMPT_RECORD(cntl), GEN7_CP_RB_CNTL_DEFAULT); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR( - KGSL_DEVICE(adreno_dev), rb->id)); + PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, rptr)); kgsl_sharedmem_writeq(rb->preemption_desc, PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr); kgsl_sharedmem_writeq(rb->preemption_desc, - PREEMPT_RECORD(bv_rptr_addr), SCRATCH_BV_RPTR_GPU_ADDR( - KGSL_DEVICE(adreno_dev), rb->id)); + PREEMPT_RECORD(bv_rptr_addr), SCRATCH_RB_GPU_ADDR( + KGSL_DEVICE(adreno_dev), rb->id, bv_rptr)); } void gen7_reset_preempt_records(struct adreno_device *adreno_dev) diff --git a/adreno_gen7_ringbuffer.c b/adreno_gen7_ringbuffer.c index 47277fc51f..8f7305acaf 100644 --- a/adreno_gen7_ringbuffer.c +++ b/adreno_gen7_ringbuffer.c @@ -37,10 +37,10 @@ static int gen7_rb_pagetable_switch(struct adreno_device *adreno_dev, cmds[count++] = id; cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5); - cmds[count++] = lower_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); - cmds[count++] = upper_32_bits(rb->pagetable_desc->gpuaddr + - PT_INFO_OFFSET(ttbr0)); + cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); + cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, + rb->id, ttbr0)); cmds[count++] = lower_32_bits(ttbr0); cmds[count++] = upper_32_bits(ttbr0); cmds[count++] = id; @@ -64,15 +64,15 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[42]; + u32 cmds[46]; /* Sync both threads */ cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1); cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH; /* Reset context state */ cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1); - cmds[count++] = CP_CLEAR_BV_BR_COUNTER | CP_CLEAR_RESOURCE_TABLE | - CP_CLEAR_ON_CHIP_TS; + cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER | + CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS; /* * Enable/disable concurrent binning for pagetable switch and * set the thread to BR since only BR can execute the pagetable @@ -87,7 +87,7 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, drawctxt, pagetable, &cmds[count]); else { struct kgsl_iommu *iommu = KGSL_IOMMU(device); - u32 id = drawctxt ? drawctxt->base.id : 0; + u32 offset = GEN7_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d; /* @@ -96,7 +96,7 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, * need any special sequence or locking to change it */ cmds[count++] = cp_type4_packet(offset, 1); - cmds[count++] = id; + cmds[count++] = drawctxt->base.id; } cmds[count++] = cp_type7_packet(CP_NOP, 1); @@ -119,6 +119,15 @@ static int gen7_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; + if (adreno_is_preemption_enabled(adreno_dev)) { + u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + + cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); + cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR; + cmds[count++] = lower_32_bits(gpuaddr); + cmds[count++] = upper_32_bits(gpuaddr); + } + return gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, cmds, count, 0, NULL); } @@ -305,6 +314,37 @@ int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev, if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy)) cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0); + if (is_concurrent_binning(drawctxt)) { + u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts); + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BV; + + /* + * Make sure the timestamp is committed once BV pipe is + * completely done with this submission. + */ + cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4); + cmds[index++] = CACHE_CLEAN | BIT(27); + cmds[index++] = lower_32_bits(addr); + cmds[index++] = upper_32_bits(addr); + cmds[index++] = rb->timestamp; + + cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1); + cmds[index++] = CP_SET_THREAD_BR; + + /* + * This makes sure that BR doesn't race ahead and commit + * timestamp to memstore while BV is still processing + * this submission. + */ + cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4); + cmds[index++] = 0; + cmds[index++] = lower_32_bits(addr); + cmds[index++] = upper_32_bits(addr); + cmds[index++] = rb->timestamp; + } + /* * If this is an internal command, just write the ringbuffer timestamp, * otherwise, write both @@ -431,7 +471,7 @@ static int gen7_drawctxt_switch(struct adreno_device *adreno_dev, ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \ field)) -#define GEN7_COMMAND_DWORDS 38 +#define GEN7_COMMAND_DWORDS 52 int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, u32 flags, diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 37e3fcdaf1..aa7841ad93 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -311,8 +311,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, /* Add the zero powerlevel for the perf table */ table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; - if (table->gpu_level_num > pri_rail->num || - table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { + if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { dev_err(&gmu->pdev->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; @@ -462,8 +461,14 @@ int gen7_build_rpmh_tables(struct adreno_device *adreno_dev) int ret; ret = build_dcvs_table(adreno_dev); - if (ret) + if (ret) { + dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n"); return ret; + } - return build_bw_table(adreno_dev); + ret = build_bw_table(adreno_dev); + if (ret) + dev_err(adreno_dev->dev.dev, "Failed to build bw table\n"); + + return ret; } diff --git a/adreno_gen7_snapshot.c b/adreno_gen7_snapshot.c index 71a2d37c22..4f40872cce 100644 --- a/adreno_gen7_snapshot.c +++ b/adreno_gen7_snapshot.c @@ -1109,7 +1109,7 @@ void gen7_snapshot(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_ringbuffer *rb; unsigned int i; - u32 hi, lo, cgc, cgc1, cgc2; + u32 hi, lo, cgc = 0, cgc1 = 0, cgc2 = 0; /* * Dump debugbus data here to capture it for both diff --git a/adreno_hfi.h b/adreno_hfi.h index 6b171de136..ead4fa0918 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -422,7 +422,7 @@ struct hfi_dcvstable_cmd { } __packed; #define MAX_ACD_STRIDE 2 -#define MAX_ACD_NUM_LEVELS 6 +#define MAX_ACD_NUM_LEVELS KGSL_MAX_PWRLEVELS /* H2F */ struct hfi_acd_table_cmd { @@ -597,23 +597,6 @@ struct hfi_issue_ib { u32 size; } __packed; -/* H2F */ -struct hfi_issue_cmd_cmd { - u32 hdr; - u32 ctxt_id; - u32 flags; - u32 ts; - u32 count; - struct hfi_issue_ib *ibs[]; -} __packed; - -/* Internal */ -struct hfi_issue_cmd_req { - u32 queue; - u32 ctxt_id; - struct hfi_issue_cmd_cmd cmd; -} __packed; - /* H2F */ /* The length of *buf will be embedded in the hdr */ struct hfi_issue_cmd_raw_cmd { @@ -736,7 +719,7 @@ static inline int _CMD_MSG_HDR(u32 *hdr, int id, size_t size) /* Maximum number of IBs in a submission */ #define HWSCHED_MAX_DISPATCH_NUMIBS \ - ((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \ + ((HFI_MAX_MSG_SIZE - sizeof(struct hfi_submit_cmd)) \ / sizeof(struct hfi_issue_ib)) /** diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 84a21aec8c..13db07a065 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1588,6 +1588,14 @@ void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev, struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct cmd_list_obj *obj, *tmp; + /* + * During IB parse, vmalloc is called which can sleep and + * should not be called from atomic context. Since IBs are not + * dumped during atomic snapshot, there is no need to parse it. + */ + if (adreno_dev->dev.snapshot_atomic) + return; + list_for_each_entry_safe(obj, tmp, &hwsched->cmd_list, node) { struct kgsl_drawobj_cmd *cmdobj = obj->cmdobj; diff --git a/adreno_pm4types.h b/adreno_pm4types.h index 1d5ab43fa9..426cbce027 100644 --- a/adreno_pm4types.h +++ b/adreno_pm4types.h @@ -164,13 +164,17 @@ /* Controls which threads execute the PM4 commands the follow this packet */ #define CP_THREAD_CONTROL 0x17 +#define CP_WAIT_TIMESTAMP 0x14 + #define CP_SET_THREAD_BR FIELD_PREP(GENMASK(1, 0), 1) +#define CP_SET_THREAD_BV FIELD_PREP(GENMASK(1, 0), 2) #define CP_SET_THREAD_BOTH FIELD_PREP(GENMASK(1, 0), 3) #define CP_SYNC_THREADS BIT(31) #define CP_CONCURRENT_BIN_DISABLE BIT(27) #define CP_RESET_CONTEXT_STATE 0x1F +#define CP_RESET_GLOBAL_LOCAL_TS BIT(3) #define CP_CLEAR_BV_BR_COUNTER BIT(2) #define CP_CLEAR_RESOURCE_TABLE BIT(1) #define CP_CLEAR_ON_CHIP_TS BIT(0) diff --git a/adreno_ringbuffer.c b/adreno_ringbuffer.c index 5721bb4fb1..3d8af5a086 100644 --- a/adreno_ringbuffer.c +++ b/adreno_ringbuffer.c @@ -127,15 +127,6 @@ int adreno_ringbuffer_setup(struct adreno_device *adreno_dev, unsigned int priv = 0; int ret; - /* - * Allocate mem for storing RB pagetables and commands to - * switch pagetable - */ - ret = adreno_allocate_global(device, &rb->pagetable_desc, PAGE_SIZE, - SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); - if (ret) - return ret; - /* allocate a chunk of memory to create user profiling IB1s */ adreno_allocate_global(device, &rb->profile_desc, PAGE_SIZE, 0, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); diff --git a/adreno_ringbuffer.h b/adreno_ringbuffer.h index 447586e72d..2dea03eebe 100644 --- a/adreno_ringbuffer.h +++ b/adreno_ringbuffer.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. */ #ifndef __ADRENO_RINGBUFFER_H #define __ADRENO_RINGBUFFER_H @@ -67,30 +67,10 @@ struct adreno_submit_time { }; /** - * struct adreno_ringbuffer_pagetable_info - Contains fields used during a - * pagetable switch. - * @current_global_ptname: The current pagetable id being used by the GPU. - * Only the ringbuffers[0] current_global_ptname is used to keep track of - * the current pagetable id - * @current_rb_ptname: The current pagetable active on the given RB - * @incoming_ptname: Contains the incoming pagetable we are switching to. After - * switching of pagetable this value equals current_rb_ptname. - * @switch_pt_enable: Flag used during pagetable switch to check if pt - * switch can be skipped - * @ttbr0: value to program into TTBR0 during pagetable switch. - * @contextidr: value to program into CONTEXTIDR during pagetable switch. + * This is to keep track whether the SET_PSEUDO_REGISTER packet needs to be submitted + * or not */ -struct adreno_ringbuffer_pagetable_info { - int current_global_ptname; - int current_rb_ptname; - int incoming_ptname; - int switch_pt_enable; - uint64_t ttbr0; - unsigned int contextidr; -}; - -#define PT_INFO_OFFSET(_field) \ - offsetof(struct adreno_ringbuffer_pagetable_info, _field) +#define ADRENO_RB_SET_PSEUDO_DONE 0 /** * struct adreno_ringbuffer - Definition for an adreno ringbuffer object @@ -112,7 +92,6 @@ struct adreno_ringbuffer_pagetable_info { * preemption info written/read by CP for secure contexts * @perfcounter_save_restore_desc: Used by CP to save/restore the perfcounter * values across preemption - * @pagetable_desc: Memory to hold information about the pagetables being used * and the commands to switch pagetable on the RB * @dispatch_q: The dispatcher side queue for this ringbuffer * @ts_expire_waitq: Wait queue to wait for rb timestamp to expire @@ -126,7 +105,7 @@ struct adreno_ringbuffer_pagetable_info { * hardware */ struct adreno_ringbuffer { - uint32_t flags; + unsigned long flags; struct kgsl_memdesc *buffer_desc; unsigned int _wptr; unsigned int wptr; @@ -139,7 +118,6 @@ struct adreno_ringbuffer { struct kgsl_memdesc *preemption_desc; struct kgsl_memdesc *secure_preemption_desc; struct kgsl_memdesc *perfcounter_save_restore_desc; - struct kgsl_memdesc *pagetable_desc; struct adreno_dispatcher_drawqueue dispatch_q; wait_queue_head_t ts_expire_waitq; unsigned int wptr_preempt_end; diff --git a/adreno_snapshot.c b/adreno_snapshot.c index ec6defa94f..d85307d966 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -276,6 +276,14 @@ static void snapshot_rb_ibs(struct kgsl_device *device, int index, i; int parse_ibs = 0, ib_parse_start; + /* + * During IB parse, vmalloc is called which can sleep and + * should not be called from atomic context. Since IBs are not + * dumped during atomic snapshot, there is no need to parse it. + */ + if (device->snapshot_atomic) + return; + /* * Figure out the window of ringbuffer data to dump. First we need to * find where the last processed IB ws submitted. Start walking back diff --git a/adreno_trace.c b/adreno_trace.c index 84577f2441..4c27d2c451 100644 --- a/adreno_trace.c +++ b/adreno_trace.c @@ -10,6 +10,7 @@ #define CREATE_TRACE_POINTS #include "adreno_trace.h" +#ifdef CONFIG_QCOM_KGSL_FENCE_TRACE static const char * const kgsl_fence_trace_events[] = { "adreno_cmdbatch_submitted", "adreno_cmdbatch_retired", @@ -34,3 +35,4 @@ void adreno_fence_trace_array_init(struct kgsl_device *device) "kgsl", kgsl_fence_trace_events[i], true); } +#endif diff --git a/gen7_reg.h b/gen7_reg.h index e530a02854..da03e710ff 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -556,6 +556,8 @@ #define GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xf801 #define GEN7_RBBM_SECVID_TSB_TRUSTED_SIZE 0xf802 #define GEN7_RBBM_SECVID_TSB_CNTL 0xf803 +#define GEN7_RBBM_SECVID_TSB_STATUS_LO 0xfc00 +#define GEN7_RBBM_SECVID_TSB_STATUS_HI 0xfc01 #define GEN7_RBBM_GBIF_CLIENT_QOS_CNTL 0x00011 #define GEN7_RBBM_GBIF_HALT 0x00016 @@ -1035,7 +1037,7 @@ #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_L 0x1f870 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_6_H 0x1f871 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_L 0x1f872 -#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1f843 +#define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_7_H 0x1f873 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_L 0x1f874 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_8_H 0x1f875 #define GEN7_GMU_CX_GMU_POWER_COUNTER_XOCLK_9_L 0x1f876 diff --git a/kgsl.c b/kgsl.c index c59ccff831..8a423ddbd4 100644 --- a/kgsl.c +++ b/kgsl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,7 @@ #include #include #include -#include +#include #include "kgsl_compat.h" #include "kgsl_debugfs.h" @@ -872,10 +873,12 @@ static void kgsl_destroy_process_private(struct kref *kref) struct kgsl_process_private *private = container_of(kref, struct kgsl_process_private, refcount); - mutex_lock(&kgsl_driver.process_mutex); debugfs_remove_recursive(private->debug_root); + kobject_put(&private->kobj_memtype); kobject_put(&private->kobj); + mutex_lock(&kgsl_driver.process_mutex); + /* When using global pagetables, do not detach global pagetable */ if (private->pagetable->name != KGSL_MMU_GLOBAL_PT) kgsl_mmu_detach_pagetable(private->pagetable); @@ -1084,7 +1087,7 @@ static struct kgsl_process_private *kgsl_process_private_open( * private destroy is triggered but didn't complete. Retry creating * process private after sometime to allow previous destroy to complete. */ - for (i = 0; (PTR_ERR_OR_ZERO(private) == -EEXIST) && (i < 5); i++) { + for (i = 0; (PTR_ERR_OR_ZERO(private) == -EEXIST) && (i < 50); i++) { usleep_range(10, 100); private = _process_private_open(device); } @@ -2831,7 +2834,25 @@ static void kgsl_process_add_stats(struct kgsl_process_private *priv, priv->stats[type].max = ret; } +u64 kgsl_get_stats(pid_t pid) +{ + struct kgsl_process_private *process; + u64 ret; + if (pid < 0) + return atomic_long_read(&kgsl_driver.stats.page_alloc); + + process = kgsl_process_private_find(pid); + + if (!process) + return 0; + + ret = atomic64_read(&process->stats[KGSL_MEM_ENTRY_KERNEL].cur); + kgsl_process_private_put(process); + + return ret; +} +EXPORT_SYMBOL(kgsl_get_stats); long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -2957,6 +2978,43 @@ static int _map_usermem_dma_buf(struct kgsl_device *device, #endif #ifdef CONFIG_DMA_SHARED_BUFFER +static int verify_secure_access(struct kgsl_device *device, + struct kgsl_mem_entry *entry, struct dma_buf *dmabuf) +{ + bool secure = entry->memdesc.priv & KGSL_MEMDESC_SECURE; + uint32_t *vmid_list = NULL, *perms_list = NULL; + uint32_t nelems = 0; + int i; + + if (mem_buf_dma_buf_copy_vmperm(dmabuf, (int **)&vmid_list, + (int **)&perms_list, (int *)&nelems)) { + dev_info(device->dev, "Skipped access check\n"); + return 0; + } + + /* Check if secure buffer is accessible to CP_PIXEL */ + for (i = 0; i < nelems; i++) { + if (vmid_list[i] == VMID_CP_PIXEL) + break; + } + + kfree(vmid_list); + kfree(perms_list); + + /* + * Do not import a buffer if it is accessible to CP_PIXEL but is being imported as + * a buffer accessible to non-secure GPU. Also, make sure if buffer is to be made + * accessible to secure GPU, it must be accessible to CP_PIXEL + */ + if (!(secure ^ (i == nelems))) + return -EPERM; + + if (secure && mem_buf_dma_buf_exclusive_owner(dmabuf)) + return -EPERM; + + return 0; +} + static int kgsl_setup_dma_buf(struct kgsl_device *device, struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, @@ -3012,44 +3070,10 @@ static int kgsl_setup_dma_buf(struct kgsl_device *device, entry->priv_data = meta; entry->memdesc.sgt = sg_table; - if (entry->memdesc.priv & KGSL_MEMDESC_SECURE) { - uint32_t *vmid_list = NULL, *perms_list = NULL; - uint32_t nelems = 0; - int i; + ret = verify_secure_access(device, entry, dmabuf); + if (ret) + goto out; - if (mem_buf_dma_buf_exclusive_owner(dmabuf)) { - ret = -EPERM; - goto out; - } - - ret = mem_buf_dma_buf_copy_vmperm(dmabuf, (int **)&vmid_list, - (int **)&perms_list, (int *)&nelems); - if (ret) { - ret = 0; - dev_info(device->dev, "Skipped access check\n"); - goto skip_access_check; - } - - /* Check if secure buffer is accessible to CP_PIXEL */ - for (i = 0; i < nelems; i++) { - if (vmid_list[i] == QCOM_DMA_HEAP_FLAG_CP_PIXEL) - break; - } - - kfree(vmid_list); - kfree(perms_list); - - if (i == nelems) { - /* - * Secure buffer is not accessible to CP_PIXEL, there is no point - * in importing this buffer. - */ - ret = -EPERM; - goto out; - } - } - -skip_access_check: /* Calculate the size of the memdesc from the sglist */ for (s = entry->memdesc.sgt->sgl; s != NULL; s = sg_next(s)) entry->memdesc.size += (uint64_t) s->length; diff --git a/kgsl.h b/kgsl.h index 98b25cb300..5bc6f64262 100644 --- a/kgsl.h +++ b/kgsl.h @@ -61,19 +61,28 @@ * is mapped into the GPU. This allows for some 'shared' data between * the GPU and CPU. For example, it will be used by the GPU to write * each updated RPTR for each RB. - * - * Used Data: - * Offset: Length(bytes): What - * 0x0: 4 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 RPTR */ /* Shadow global helpers */ -#define SCRATCH_RPTR_OFFSET(id) ((id) * sizeof(unsigned int)) -#define SCRATCH_RPTR_GPU_ADDR(dev, id) \ - ((dev)->scratch->gpuaddr + SCRATCH_RPTR_OFFSET(id)) -#define SCRATCH_BV_RPTR_OFFSET(id) (0x40 + (id) * sizeof(unsigned int)) -#define SCRATCH_BV_RPTR_GPU_ADDR(dev, id) \ - ((dev)->scratch->gpuaddr + SCRATCH_BV_RPTR_OFFSET(id)) +struct adreno_rb_shadow { + /** @rptr: per ringbuffer address where GPU writes the rptr */ + u32 rptr; + /** @bv_rptr: per ringbuffer address where GPU writes BV rptr */ + u32 bv_rptr; + /** @bv_ts: per ringbuffer address where BV ringbuffer timestamp is written to */ + u32 bv_ts; + /** @current_rb_ptname: The current pagetable active on the given RB */ + u32 current_rb_ptname; + /** @ttbr0: value to program into TTBR0 during pagetable switch */ + u64 ttbr0; + /** @contextidr: value to program into CONTEXTIDR during pagetable switch */ + u32 contextidr; +}; + +#define SCRATCH_RB_OFFSET(id, _field) ((id * sizeof(struct adreno_rb_shadow)) + \ + offsetof(struct adreno_rb_shadow, _field)) +#define SCRATCH_RB_GPU_ADDR(dev, id, _field) \ + ((dev)->scratch->gpuaddr + SCRATCH_RB_OFFSET(id, _field)) /* Timestamp window used to detect rollovers (half of integer range) */ #define KGSL_TIMESTAMP_WINDOW 0x80000000 diff --git a/kgsl_bus.c b/kgsl_bus.c index 2279ce5d2e..e1991d0b22 100644 --- a/kgsl_bus.c +++ b/kgsl_bus.c @@ -31,6 +31,8 @@ static u32 _ab_buslevel_update(struct kgsl_pwrctrl *pwr, return (pwr->bus_percent_ab * pwr->bus_max) / 100; } +#define ACTIVE_ONLY_TAG 0x3 +#define PERF_MODE_TAG 0x8 int kgsl_bus_update(struct kgsl_device *device, enum kgsl_bus_vote vote_state) @@ -69,6 +71,11 @@ int kgsl_bus_update(struct kgsl_device *device, /* buslevel is the IB vote, update the AB */ ab = _ab_buslevel_update(pwr, pwr->ddr_table[buslevel]); + if (buslevel == pwr->pwrlevels[0].bus_max) + icc_set_tag(pwr->icc_path, ACTIVE_ONLY_TAG | PERF_MODE_TAG); + else + icc_set_tag(pwr->icc_path, ACTIVE_ONLY_TAG); + return device->ftbl->gpu_bus_set(device, buslevel, ab); } diff --git a/kgsl_device.h b/kgsl_device.h index ba704d3cb1..dc81abf235 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -461,6 +461,11 @@ struct kgsl_process_private { * @cmd_count: The number of cmds that are active for the process */ atomic_t cmd_count; + /** + * @kobj_memtype: Pointer to a kobj for memtype sysfs directory for this + * process + */ + struct kobject kobj_memtype; }; struct kgsl_device_private { diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index 75807d6350..83ce37938c 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -188,11 +188,23 @@ static void syncobj_timer(struct timer_list *t) case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: { int j; struct event_timeline_info *info = event->priv; + struct dma_fence *fence = event->fence; + bool retired = false; + bool signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags); + const char *str = NULL; + if (fence->ops->signaled && fence->ops->signaled(fence)) + retired = true; + + if (!retired) + str = "not retired"; + else if (retired && signaled) + str = "signaled"; + else if (retired && !signaled) + str = "retired but not signaled"; dev_err(device->dev, " [%u] FENCE %s\n", - i, dma_fence_is_signaled(event->fence) ? - "signaled" : "not signaled"); - + i, str); for (j = 0; info && info[j].timeline; j++) dev_err(device->dev, " TIMELINE %d SEQNO %lld\n", info[j].timeline, info[j].seqno); @@ -530,11 +542,12 @@ static int drawobj_add_sync_timeline(struct kgsl_device *device, drawobj_get_sync_timeline_priv(u64_to_user_ptr(sync.timelines), sync.timelines_size, sync.count); + /* Set pending flag before adding callback to avoid race */ + set_bit(event->id, &syncobj->pending); + ret = dma_fence_add_callback(event->fence, &event->cb, drawobj_sync_timeline_fence_callback); - set_bit(event->id, &syncobj->pending); - if (ret) { clear_bit(event->id, &syncobj->pending); @@ -746,6 +759,7 @@ static void add_profiling_buffer(struct kgsl_device *device, { struct kgsl_mem_entry *entry; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + u64 start; if (!(drawobj->flags & KGSL_DRAWOBJ_PROFILING)) return; @@ -762,7 +776,14 @@ static void add_profiling_buffer(struct kgsl_device *device, gpuaddr); if (entry != NULL) { - if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { + start = id ? (entry->memdesc.gpuaddr + offset) : gpuaddr; + /* + * Make sure there is enough room in the object to store the + * entire profiling buffer object + */ + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size) || + !kgsl_gpuaddr_in_memdesc(&entry->memdesc, start, + sizeof(struct kgsl_drawobj_profiling_buffer))) { kgsl_mem_entry_put(entry); entry = NULL; } @@ -775,28 +796,7 @@ static void add_profiling_buffer(struct kgsl_device *device, return; } - - if (!id) { - cmdobj->profiling_buffer_gpuaddr = gpuaddr; - } else { - u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer); - - /* - * Make sure there is enough room in the object to store the - * entire profiling buffer object - */ - if (off < offset || off >= entry->memdesc.size) { - dev_err(device->dev, - "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", - drawobj->context->id, id, offset, gpuaddr, size); - kgsl_mem_entry_put(entry); - return; - } - - cmdobj->profiling_buffer_gpuaddr = - entry->memdesc.gpuaddr + offset; - } - + cmdobj->profiling_buffer_gpuaddr = start; cmdobj->profiling_buf_entry = entry; } diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 0ae12a8e04..3fdc68f498 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -162,7 +162,8 @@ enum gmu_mem_type { GMU_CACHE = GMU_ICACHE, GMU_DTCM, GMU_DCACHE, - GMU_NONCACHED_KERNEL, + GMU_NONCACHED_KERNEL, /* GMU VBIF3 uncached VA range: 0x60000000 - 0x7fffffff */ + GMU_NONCACHED_KERNEL_EXTENDED, /* GMU VBIF3 uncached VA range: 0xc0000000 - 0xdfffffff */ GMU_NONCACHED_USER, GMU_MEM_TYPE_MAX, }; diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 282a231b90..82518dcdc0 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -825,7 +825,7 @@ static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, struct adreno_device *adreno_dev = ADRENO_DEVICE(device); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); struct kgsl_mem_entry *prev = NULL, *next = NULL, *entry; - const char *fault_type; + const char *fault_type = NULL; const char *comm = NULL; u32 ptname = KGSL_MMU_GLOBAL_PT; int id; @@ -847,6 +847,9 @@ static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, fault_type = "external"; else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) fault_type = "transaction stalled"; + else + fault_type = "unknown"; + /* FIXME: This seems buggy */ if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE, &mmu->pfpolicy)) @@ -1122,6 +1125,7 @@ static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) { struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); + free_io_pgtable_ops(pt->pgtbl_ops); kfree(pt); } diff --git a/kgsl_pool.c b/kgsl_pool.c index 18f6a8e28d..134c225303 100644 --- a/kgsl_pool.c +++ b/kgsl_pool.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,7 @@ static struct kmem_cache *addr_page_cache; * @reserved_pages: Number of pages reserved at init for the pool * @list_lock: Spinlock for page list in the pool * @pool_rbtree: RB tree with all pages held/reserved in this pool + * @mempool: Mempool to pre-allocate tracking structs for pages in this pool */ struct kgsl_page_pool { unsigned int pool_order; @@ -38,15 +40,28 @@ struct kgsl_page_pool { unsigned int reserved_pages; spinlock_t list_lock; struct rb_root pool_rbtree; + mempool_t *mempool; }; +static void *_pool_entry_alloc(gfp_t gfp_mask, void *arg) +{ + return kmem_cache_alloc(addr_page_cache, gfp_mask); +} + +static void _pool_entry_free(void *element, void *arg) +{ + return kmem_cache_free(addr_page_cache, element); +} + static int __kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) { struct rb_node **node, *parent; struct kgsl_pool_page_entry *new_page, *entry; + gfp_t gfp_mask = GFP_KERNEL & ~__GFP_DIRECT_RECLAIM; - new_page = kmem_cache_alloc(addr_page_cache, GFP_KERNEL); + new_page = pool->mempool ? mempool_alloc(pool->mempool, gfp_mask) : + kmem_cache_alloc(addr_page_cache, gfp_mask); if (new_page == NULL) return -ENOMEM; @@ -87,7 +102,10 @@ __kgsl_pool_get_page(struct kgsl_page_pool *pool) entry = rb_entry(node, struct kgsl_pool_page_entry, node); p = entry->page; rb_erase(&entry->node, &pool->pool_rbtree); - kmem_cache_free(addr_page_cache, entry); + if (pool->mempool) + mempool_free(entry, pool->mempool); + else + kmem_cache_free(addr_page_cache, entry); pool->page_count--; return p; } @@ -101,6 +119,17 @@ static void kgsl_pool_cache_init(void) { addr_page_cache = KMEM_CACHE(kgsl_pool_page_entry, 0); } + +static void kgsl_pool_cache_destroy(void) +{ + kmem_cache_destroy(addr_page_cache); +} + +static void kgsl_destroy_page_pool(struct kgsl_page_pool *pool) +{ + mempool_destroy(pool->mempool); +} + #else /** * struct kgsl_page_pool - Structure to hold information for the pool @@ -151,6 +180,14 @@ static void kgsl_pool_list_init(struct kgsl_page_pool *pool) static void kgsl_pool_cache_init(void) { } + +static void kgsl_pool_cache_destroy(void) +{ +} + +static void kgsl_destroy_page_pool(struct kgsl_page_pool *pool) +{ +} #endif static struct kgsl_page_pool kgsl_pools[6]; @@ -563,6 +600,15 @@ static void kgsl_pool_reserve_pages(struct kgsl_page_pool *pool, /* Limit the total number of reserved pages to 4096 */ pool->reserved_pages = min_t(u32, reserved, 4096); +#if IS_ENABLED(CONFIG_QCOM_KGSL_SORT_POOL) + /* + * Pre-allocate tracking structs for reserved_pages so that + * the pool can hold them even in low memory conditions + */ + pool->mempool = mempool_create(pool->reserved_pages, + _pool_entry_alloc, _pool_entry_free, NULL); +#endif + for (i = 0; i < pool->reserved_pages; i++) { gfp_t gfp_mask = kgsl_gfp_mask(pool->pool_order); struct page *page; @@ -632,10 +678,19 @@ void kgsl_probe_page_pools(void) void kgsl_exit_page_pools(void) { + int i; + /* Release all pages in pools, if any.*/ kgsl_pool_reduce(INT_MAX, true); /* Unregister shrinker */ unregister_shrinker(&kgsl_pool_shrinker); + + /* Destroy helper structures */ + for (i = 0; i < kgsl_num_pools; i++) + kgsl_destroy_page_pool(&kgsl_pools[i]); + + /* Destroy the kmem cache */ + kgsl_pool_cache_destroy(); } diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 5a3e52c3a7..57b2b63499 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1557,12 +1557,9 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) init_waitqueue_head(&device->active_cnt_wq); - /* Initialize the user and thermal clock constraints */ - - pwr->max_pwrlevel = 0; - pwr->min_pwrlevel = pwr->num_pwrlevels - 1; + /* Initialize the thermal clock constraints */ pwr->thermal_pwrlevel = 0; - pwr->thermal_pwrlevel_floor = pwr->min_pwrlevel; + pwr->thermal_pwrlevel_floor = pwr->num_pwrlevels - 1; pwr->wakeup_maxpwrlevel = 0; diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index c7d0ff0d66..fc7e47e177 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -705,10 +705,10 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, devfreq = devfreq_add_device(&pdev->dev, &gpu_profile->profile, governor, &adreno_tz_data); - if (IS_ERR(devfreq)) { + if (IS_ERR_OR_NULL(devfreq)) { device->pwrscale.enabled = false; msm_adreno_tz_exit(); - return PTR_ERR(devfreq); + return IS_ERR(devfreq) ? PTR_ERR(devfreq) : -EINVAL; } pwrscale->devfreqptr = devfreq; diff --git a/kgsl_reclaim.c b/kgsl_reclaim.c index b0d6804456..f93a1a654a 100644 --- a/kgsl_reclaim.c +++ b/kgsl_reclaim.c @@ -21,37 +21,10 @@ static u32 kgsl_reclaim_max_page_limit = 7680; /* Setting this to 0 means we reclaim pages as specified in shrinker call */ static u32 kgsl_nr_to_scan; -static atomic_t kgsl_shrinker_active = ATOMIC_INIT(0); -static unsigned long shmem_swap_pages(struct address_space *mapping) -{ - struct inode *inode = mapping->host; - struct shmem_inode_info *info = SHMEM_I(inode); - unsigned long swapped; +struct work_struct reclaim_work; - swapped = READ_ONCE(info->swapped); - return swapped; -} - -static unsigned long kgsl_process_get_reclaim_count( - struct kgsl_process_private *process) -{ - struct kgsl_mem_entry *entry; - struct kgsl_memdesc *memdesc; - unsigned long reclaim_count = 0; - int id; - - spin_lock(&process->mem_lock); - idr_for_each_entry(&process->mem_idr, entry, id) { - memdesc = &entry->memdesc; - if (memdesc->shmem_filp) - reclaim_count += shmem_swap_pages( - memdesc->shmem_filp->f_mapping); - } - spin_unlock(&process->mem_lock); - - return reclaim_count; -} +static atomic_t kgsl_nr_to_reclaim; static int kgsl_memdesc_get_reclaimed_pages(struct kgsl_mem_entry *entry) { @@ -182,7 +155,7 @@ static ssize_t gpumem_reclaimed_show(struct kobject *kobj, container_of(kobj, struct kgsl_process_private, kobj); return scnprintf(buf, PAGE_SIZE, "%d\n", - kgsl_process_get_reclaim_count(process) << PAGE_SHIFT); + atomic_read(&process->unpinned_page_count) << PAGE_SHIFT); } PROCESS_ATTR(state, 0644, kgsl_proc_state_show, kgsl_proc_state_store); @@ -297,7 +270,6 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, for (i = 0; i < memdesc->page_count; i++) { set_page_dirty_lock(memdesc->pages[i]); - shmem_mark_page_lazyfree(memdesc->pages[i]); spin_lock(&memdesc->lock); put_page(memdesc->pages[i]); memdesc->pages[i] = NULL; @@ -306,6 +278,7 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, remaining--; } + reclaim_shmem_address_space(memdesc->shmem_filp->f_mapping); memdesc->priv |= KGSL_MEMDESC_RECLAIMED; } @@ -318,24 +291,13 @@ static u32 kgsl_reclaim_process(struct kgsl_process_private *process, return (pages_to_reclaim - remaining); } -/* Functions for the shrinker */ - -static unsigned long -kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, - struct shrink_control *sc) +static void kgsl_reclaim_background_work(struct work_struct *work) { - /* nr_pages represents number of pages to be reclaimed*/ - u32 nr_pages = kgsl_nr_to_scan ? kgsl_nr_to_scan : sc->nr_to_scan; - u32 bg_proc = 0; + u32 bg_proc = 0, nr_pages = atomic_read(&kgsl_nr_to_reclaim); u64 pp_nr_pages; struct list_head kgsl_reclaim_process_list; struct kgsl_process_private *process, *next; - if (atomic_inc_return(&kgsl_shrinker_active) > 1) { - atomic_dec(&kgsl_shrinker_active); - return 0; - } - INIT_LIST_HEAD(&kgsl_reclaim_process_list); read_lock(&kgsl_driver.proclist_lock); list_for_each_entry(process, &kgsl_driver.process_list, list) { @@ -362,10 +324,21 @@ kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, list_del(&process->reclaim_list); kgsl_process_private_put(process); } +} - atomic_dec(&kgsl_shrinker_active); - return ((kgsl_nr_to_scan ? - kgsl_nr_to_scan : sc->nr_to_scan) - nr_pages); +/* Shrinker callback functions */ +static unsigned long +kgsl_reclaim_shrink_scan_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + if (!current_is_kswapd()) + return 0; + + atomic_set(&kgsl_nr_to_reclaim, kgsl_nr_to_scan ? + kgsl_nr_to_scan : sc->nr_to_scan); + kgsl_schedule_work(&reclaim_work); + + return atomic_read(&kgsl_nr_to_reclaim); } static unsigned long @@ -411,6 +384,8 @@ int kgsl_reclaim_init(void) ret = register_shrinker(&kgsl_reclaim_shrinker); if (ret) pr_err("kgsl: reclaim: Failed to register shrinker\n"); + else + INIT_WORK(&reclaim_work, kgsl_reclaim_background_work); return ret; } diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 42311fd494..53cbf1d8ba 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "kgsl_device.h" #include "kgsl_pool.h" @@ -26,6 +27,73 @@ bool kgsl_sharedmem_noretry_flag; static DEFINE_MUTEX(kernel_map_global_lock); +#define MEMTYPE(_type, _name) \ + static struct kgsl_memtype memtype_##_name = { \ + .type = _type, \ + .attr = { .name = __stringify(_name), .mode = 0444 } \ +} + +struct kgsl_memtype { + unsigned int type; + struct attribute attr; +}; + +/* We can not use macro MEMTYPE for "any(0)" because of special characters */ +static struct kgsl_memtype memtype_any0 = { + .type = KGSL_MEMTYPE_OBJECTANY, + .attr = { .name = "any(0)", .mode = 0444 }, +}; + +MEMTYPE(KGSL_MEMTYPE_FRAMEBUFFER, framebuffer); +MEMTYPE(KGSL_MEMTYPE_RENDERBUFFER, renderbuffer); +MEMTYPE(KGSL_MEMTYPE_ARRAYBUFFER, arraybuffer); +MEMTYPE(KGSL_MEMTYPE_ELEMENTARRAYBUFFER, elementarraybuffer); +MEMTYPE(KGSL_MEMTYPE_VERTEXARRAYBUFFER, vertexarraybuffer); +MEMTYPE(KGSL_MEMTYPE_TEXTURE, texture); +MEMTYPE(KGSL_MEMTYPE_SURFACE, surface); +MEMTYPE(KGSL_MEMTYPE_EGL_SURFACE, egl_surface); +MEMTYPE(KGSL_MEMTYPE_GL, gl); +MEMTYPE(KGSL_MEMTYPE_CL, cl); +MEMTYPE(KGSL_MEMTYPE_CL_BUFFER_MAP, cl_buffer_map); +MEMTYPE(KGSL_MEMTYPE_CL_BUFFER_NOMAP, cl_buffer_nomap); +MEMTYPE(KGSL_MEMTYPE_CL_IMAGE_MAP, cl_image_map); +MEMTYPE(KGSL_MEMTYPE_CL_IMAGE_NOMAP, cl_image_nomap); +MEMTYPE(KGSL_MEMTYPE_CL_KERNEL_STACK, cl_kernel_stack); +MEMTYPE(KGSL_MEMTYPE_COMMAND, command); +MEMTYPE(KGSL_MEMTYPE_2D, 2d); +MEMTYPE(KGSL_MEMTYPE_EGL_IMAGE, egl_image); +MEMTYPE(KGSL_MEMTYPE_EGL_SHADOW, egl_shadow); +MEMTYPE(KGSL_MEMTYPE_MULTISAMPLE, egl_multisample); +MEMTYPE(KGSL_MEMTYPE_KERNEL, kernel); + +static struct attribute *memtype_attrs[] = { + &memtype_any0.attr, + &memtype_framebuffer.attr, + &memtype_renderbuffer.attr, + &memtype_arraybuffer.attr, + &memtype_elementarraybuffer.attr, + &memtype_vertexarraybuffer.attr, + &memtype_texture.attr, + &memtype_surface.attr, + &memtype_egl_surface.attr, + &memtype_gl.attr, + &memtype_cl.attr, + &memtype_cl_buffer_map.attr, + &memtype_cl_buffer_nomap.attr, + &memtype_cl_image_map.attr, + &memtype_cl_image_nomap.attr, + &memtype_cl_kernel_stack.attr, + &memtype_command.attr, + &memtype_2d.attr, + &memtype_egl_image.attr, + &memtype_egl_shadow.attr, + &memtype_egl_multisample.attr, + &memtype_kernel.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(memtype); + /* An attribute for showing per-process memory statistics */ struct kgsl_mem_entry_attribute { struct kgsl_process_attribute attr; @@ -64,6 +132,51 @@ static ssize_t mem_entry_sysfs_show(struct kobject *kobj, return pattr->show(priv, pattr->memtype, buf); } +static ssize_t memtype_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_process_private *priv; + struct kgsl_memtype *memtype; + struct kgsl_mem_entry *entry; + u64 size = 0; + int id = 0; + + priv = container_of(kobj, struct kgsl_process_private, kobj_memtype); + memtype = container_of(attr, struct kgsl_memtype, attr); + + spin_lock(&priv->mem_lock); + for (entry = idr_get_next(&priv->mem_idr, &id); entry; + id++, entry = idr_get_next(&priv->mem_idr, &id)) { + struct kgsl_memdesc *memdesc; + unsigned int type; + + if (!kgsl_mem_entry_get(entry)) + continue; + spin_unlock(&priv->mem_lock); + + memdesc = &entry->memdesc; + type = kgsl_memdesc_get_memtype(memdesc); + + if (type == memtype->type) + size += memdesc->size; + + kgsl_mem_entry_put(entry); + spin_lock(&priv->mem_lock); + } + spin_unlock(&priv->mem_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", size); +} + +static const struct sysfs_ops memtype_sysfs_ops = { + .show = memtype_sysfs_show, +}; + +static struct kobj_type ktype_memtype = { + .sysfs_ops = &memtype_sysfs_ops, + .default_groups = memtype_groups, +}; + static ssize_t imported_mem_show(struct kgsl_process_private *priv, int type, char *buf) @@ -243,10 +356,15 @@ void kgsl_process_init_sysfs(struct kgsl_device *device, kgsl_driver.prockobj, "%d", pid_nr(private->pid))) { dev_err(device->dev, "Unable to add sysfs for process %d\n", pid_nr(private->pid)); - kgsl_process_private_put(private); } kgsl_reclaim_proc_sysfs_init(private); + + if (kobject_init_and_add(&private->kobj_memtype, &ktype_memtype, + &private->kobj, "memtype")) { + dev_err(device->dev, "Unable to add memtype sysfs for process %d\n", + pid_nr(private->pid)); + } } static ssize_t memstat_show(struct device *dev, @@ -794,41 +912,21 @@ kgsl_sharedmem_writeq(const struct kgsl_memdesc *memdesc, wmb(); } -static const char * const memtype_str[] = { - [KGSL_MEMTYPE_OBJECTANY] = "any(0)", - [KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer", - [KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer", - [KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer", - [KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer", - [KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer", - [KGSL_MEMTYPE_TEXTURE] = "texture", - [KGSL_MEMTYPE_SURFACE] = "surface", - [KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface", - [KGSL_MEMTYPE_GL] = "gl", - [KGSL_MEMTYPE_CL] = "cl", - [KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map", - [KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap", - [KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map", - [KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap", - [KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack", - [KGSL_MEMTYPE_COMMAND] = "command", - [KGSL_MEMTYPE_2D] = "2d", - [KGSL_MEMTYPE_EGL_IMAGE] = "egl_image", - [KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow", - [KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample", - /* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */ -}; - void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags) { unsigned int type = FIELD_GET(KGSL_MEMTYPE_MASK, memflags); + struct kgsl_memtype *memtype; + int i; - if (type == KGSL_MEMTYPE_KERNEL) - strlcpy(name, "kernel", name_size); - else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL) - strlcpy(name, memtype_str[type], name_size); - else - snprintf(name, name_size, "VK/others(%3d)", type); + for (i = 0; memtype_attrs[i]; i++) { + memtype = container_of(memtype_attrs[i], struct kgsl_memtype, attr); + if (memtype->type == type) { + strlcpy(name, memtype->attr.name, name_size); + return; + } + } + + snprintf(name, name_size, "VK/others(%3d)", type); } int kgsl_memdesc_sg_dma(struct kgsl_memdesc *memdesc, @@ -885,6 +983,9 @@ static int kgsl_alloc_page(int *page_size, struct page **pages, if (pages == NULL) return -EINVAL; + if (fatal_signal_pending(current)) + return -ENOMEM; + page = shmem_read_mapping_page_gfp(shmem_filp->f_mapping, page_off, kgsl_gfp_mask(0)); if (IS_ERR(page)) @@ -934,6 +1035,9 @@ static int kgsl_alloc_page(int *page_size, struct page **pages, unsigned int page_off, struct file *shmem_filp, struct device *dev) { + if (fatal_signal_pending(current)) + return -ENOMEM; + return kgsl_pool_alloc_page(page_size, pages, pages_len, align, dev); } @@ -1261,7 +1365,11 @@ static int kgsl_system_alloc_pages(u64 size, struct page ***pages, gfp_t gfp = __GFP_ZERO | __GFP_HIGHMEM | GFP_KERNEL | __GFP_NORETRY; - local[i] = alloc_pages(gfp, get_order(PAGE_SIZE)); + if (!fatal_signal_pending(current)) + local[i] = alloc_pages(gfp, get_order(PAGE_SIZE)); + else + local[i] = NULL; + if (!local[i]) { for (i = i - 1; i >= 0; i--) __free_pages(local[i], get_order(PAGE_SIZE)); diff --git a/kgsl_sync.c b/kgsl_sync.c index 1103b51248..c138687bc8 100644 --- a/kgsl_sync.c +++ b/kgsl_sync.c @@ -636,9 +636,10 @@ static void kgsl_syncsource_cleanup(struct kgsl_process_private *private, struct kgsl_syncsource *syncsource) { struct kgsl_syncsource_fence *sfence, *next; + unsigned long flags; /* Signal all fences to release any callbacks */ - spin_lock(&syncsource->lock); + spin_lock_irqsave(&syncsource->lock, flags); list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, child_list) { @@ -646,7 +647,7 @@ static void kgsl_syncsource_cleanup(struct kgsl_process_private *private, list_del_init(&sfence->child_list); } - spin_unlock(&syncsource->lock); + spin_unlock_irqrestore(&syncsource->lock, flags); /* put reference from syncsource creation */ kgsl_syncsource_put(syncsource); @@ -686,6 +687,7 @@ long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, struct kgsl_syncsource_fence *sfence = NULL; struct sync_file *sync_file = NULL; int fd = -1; + unsigned long flags; /* * Take a refcount that is released when the fence is released @@ -727,9 +729,9 @@ long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, param->fence_fd = fd; - spin_lock(&syncsource->lock); + spin_lock_irqsave(&syncsource->lock, flags); list_add_tail(&sfence->child_list, &syncsource->child_list_head); - spin_unlock(&syncsource->lock); + spin_unlock_irqrestore(&syncsource->lock, flags); out: /* * We're transferring ownership of the fence to the sync file. @@ -756,8 +758,9 @@ static int kgsl_syncsource_signal(struct kgsl_syncsource *syncsource, { struct kgsl_syncsource_fence *sfence, *next; int ret = -EINVAL; + unsigned long flags; - spin_lock(&syncsource->lock); + spin_lock_irqsave(&syncsource->lock, flags); list_for_each_entry_safe(sfence, next, &syncsource->child_list_head, child_list) { @@ -770,7 +773,7 @@ static int kgsl_syncsource_signal(struct kgsl_syncsource *syncsource, } } - spin_unlock(&syncsource->lock); + spin_unlock_irqrestore(&syncsource->lock, flags); return ret; } diff --git a/kgsl_timeline.c b/kgsl_timeline.c index d7b64abab4..b499face00 100644 --- a/kgsl_timeline.c +++ b/kgsl_timeline.c @@ -196,6 +196,16 @@ static bool timeline_fence_signaled(struct dma_fence *fence) fence->ops); } +static bool timeline_fence_enable_signaling(struct dma_fence *fence) +{ + /* + * Return value of false indicates the fence already passed. + * When fence is not passed we return true indicating successful + * enabling. + */ + return !timeline_fence_signaled(fence); +} + static const char *timeline_get_driver_name(struct dma_fence *fence) { return "kgsl-sw-timeline"; @@ -221,6 +231,7 @@ static const struct dma_fence_ops timeline_fence_ops = { .get_timeline_name = timeline_get_timeline_name, .signaled = timeline_fence_signaled, .release = timeline_fence_release, + .enable_signaling = timeline_fence_enable_signaling, .timeline_value_str = timeline_get_value_str, .use_64bit_seqno = true, }; @@ -298,10 +309,18 @@ struct dma_fence *kgsl_timeline_fence_alloc(struct kgsl_timeline *timeline, INIT_LIST_HEAD(&fence->node); - if (!dma_fence_is_signaled(&fence->base)) + /* + * Once fence is checked as not signaled, allow it to be added + * in the list before other thread such as kgsl_timeline_signal + * can get chance to signal. + */ + spin_lock_irq(&timeline->lock); + if (!dma_fence_is_signaled_locked(&fence->base)) kgsl_timeline_add_fence(timeline, fence); trace_kgsl_timeline_fence_alloc(timeline->id, seqno); + spin_unlock_irq(&timeline->lock); + log_kgsl_timeline_fence_alloc_event(timeline->id, seqno); return &fence->base; diff --git a/kgsl_vbo.c b/kgsl_vbo.c index 75959b1475..091f2ecc3f 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -95,7 +95,11 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, range = bind_to_range(node); next = interval_tree_iter_next(node, start, last); - if (range->entry->id == entry->id) { + /* + * If entry is null, consider it as a special request. Unbind + * the entire range between start and last in this case. + */ + if (!entry || range->entry->id == entry->id) { interval_tree_remove(node, &memdesc->ranges); trace_kgsl_mem_remove_bind_range(target, range->range.start, range->entry, @@ -359,6 +363,23 @@ kgsl_sharedmem_create_bind_op(struct kgsl_process_private *private, range.target_offset, range.length)) goto err; + /* + * Special case: Consider child id 0 as a special request incase of + * unbind. This helps to unbind the specified range (could span multiple + * child buffers) without supplying backing physical buffer information. + */ + if (range.child_id == 0 && range.op == KGSL_GPUMEM_RANGE_OP_UNBIND) { + op->ops[i].entry = NULL; + op->ops[i].start = range.target_offset; + op->ops[i].last = range.target_offset + range.length - 1; + /* Child offset doesn't matter for unbind. set it to 0 */ + op->ops[i].child_offset = 0; + op->ops[i].op = range.op; + + ranges += ranges_size; + continue; + } + /* Get the child object */ op->ops[i].entry = kgsl_sharedmem_find_id(private, range.child_id); From b5f34d537c72e87c8d9a757585b63a4e30e68e82 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 19:07:38 -0800 Subject: [PATCH 006/750] msm: kgsl: Update changed kernel APIs Use the new kernel APIs. Change-Id: I58b6cff5782ff13fdce9e31e0753a3f277d73a39 Signed-off-by: Lynus Vaz --- adreno_snapshot.c | 2 +- governor_gpubw_mon.c | 2 +- governor_msm_adreno_tz.c | 2 +- kgsl_pwrscale.h | 1 + kgsl_snapshot.c | 6 +++--- kgsl_util.c | 2 ++ kgsl_util.h | 19 +++++++++++++++++++ 7 files changed, 28 insertions(+), 6 deletions(-) diff --git a/adreno_snapshot.c b/adreno_snapshot.c index d85307d966..fd9834f073 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -857,7 +857,7 @@ static void adreno_snapshot_os(struct kgsl_device *device, strlcpy(header->release, init_utsname()->release, sizeof(header->release)); strlcpy(header->version, init_utsname()->version, sizeof(header->version)); - header->seconds = get_seconds(); + header->seconds = ktime_get_real_seconds(); header->power_flags = device->pwrctrl.power_flags; header->power_level = device->pwrctrl.active_pwrlevel; header->power_interval_timeout = device->pwrctrl.interval_timeout; diff --git a/governor_gpubw_mon.c b/governor_gpubw_mon.c index 147c43511f..f7f19c40e5 100644 --- a/governor_gpubw_mon.c +++ b/governor_gpubw_mon.c @@ -299,7 +299,7 @@ static struct devfreq_governor devfreq_gpubw = { .name = "gpubw_mon", .get_target_freq = devfreq_gpubw_get_target, .event_handler = devfreq_gpubw_event_handler, - .immutable = 1, + .flags = DEVFREQ_GOV_FLAG_IMMUTABLE, }; int devfreq_gpubw_init(void) diff --git a/governor_msm_adreno_tz.c b/governor_msm_adreno_tz.c index 18f4f16ead..6004d6d3ac 100644 --- a/governor_msm_adreno_tz.c +++ b/governor_msm_adreno_tz.c @@ -546,7 +546,7 @@ static struct devfreq_governor msm_adreno_tz = { .name = "msm-adreno-tz", .get_target_freq = tz_get_target_freq, .event_handler = tz_handler, - .immutable = 1, + .flags = DEVFREQ_GOV_FLAG_IMMUTABLE, }; int msm_adreno_tz_init(void) diff --git a/kgsl_pwrscale.h b/kgsl_pwrscale.h index 2bdc9db0d9..ed40ea6ccd 100644 --- a/kgsl_pwrscale.h +++ b/kgsl_pwrscale.h @@ -6,6 +6,7 @@ #ifndef __KGSL_PWRSCALE_H #define __KGSL_PWRSCALE_H +#include "governor.h" #include "kgsl_pwrctrl.h" #include "msm_adreno_devfreq.h" diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index c2cac0c881..40d16820eb 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -3,8 +3,8 @@ * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. */ -#include #include +#include #include #include @@ -592,7 +592,7 @@ static void kgsl_device_snapshot_atomic(struct kgsl_device *device) * the kernel log */ getboottime64(&boot); - snapshot->timestamp = get_seconds() - boot.tv_sec; + snapshot->timestamp = ktime_get_real_seconds() - boot.tv_sec; kgsl_add_to_minidump("ATOMIC_GPU_SNAPSHOT", (u64) device->snapshot_memory_atomic.ptr, atomic_snapshot_phy_addr(device), device->snapshot_memory_atomic.size); @@ -682,7 +682,7 @@ void kgsl_device_snapshot(struct kgsl_device *device, */ getboottime64(&boot); - snapshot->timestamp = get_seconds() - boot.tv_sec; + snapshot->timestamp = ktime_get_real_seconds() - boot.tv_sec; /* Store the instance in the device until it gets dumped */ device->snapshot = snapshot; diff --git a/kgsl_util.c b/kgsl_util.c index 8ebbe349d9..8992fb4b46 100644 --- a/kgsl_util.c +++ b/kgsl_util.c @@ -211,6 +211,7 @@ void kgsl_hwunlock(struct cpu_gpu_lock *lock) lock->cpu_req = 0; } +#if IS_ENABLED(CONFIG_QCOM_VA_MINIDUMP) void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) { struct md_region md_entry = {0}; @@ -348,3 +349,4 @@ void kgsl_qcom_va_md_register(struct kgsl_device *device) if (qcom_va_md_register("KGSL", &kgsl_va_minidump_nb)) dev_err(device->dev, "Failed to register notifier with va_minidump\n"); } +#endif diff --git a/kgsl_util.h b/kgsl_util.h index f2da379828..0f3af38c10 100644 --- a/kgsl_util.h +++ b/kgsl_util.h @@ -113,6 +113,7 @@ int kgsl_clk_set_rate(struct clk_bulk_data *clks, int num_clks, */ int kgsl_zap_shader_load(struct device *dev, const char *name); +#if IS_ENABLED(CONFIG_QCOM_VA_MINIDUMP) /** * kgsl_add_to_minidump - Add a physically contiguous section to minidump * @name: Name of the section @@ -146,5 +147,23 @@ int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, * @device: Pointer to kgsl device */ void kgsl_qcom_va_md_register(struct kgsl_device *device); +#else +static inline void kgsl_add_to_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ +} +static inline void kgsl_remove_from_minidump(char *name, u64 virt_addr, u64 phy_addr, size_t size) +{ +} + +static inline int kgsl_add_va_to_minidump(struct device *dev, const char *name, void *ptr, + size_t size) +{ + return 0; +} + +static inline void kgsl_qcom_va_md_register(struct kgsl_device *device) +{ +} +#endif #endif From 4038321e47d76d63d6b4201f0e15549e940bf14c Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 19:08:18 -0800 Subject: [PATCH 007/750] msm: kgsl: Remove is_dma_buf_file() usage The is_dma_buf_file() API is private to dma-buf. Remove code that depends on it. Change-Id: I70562c219b915f3891f2ca89a6c48fd1cd34ca38 Signed-off-by: Lynus Vaz --- kgsl.c | 108 +++------------------------------------------------------ 1 file changed, 5 insertions(+), 103 deletions(-) diff --git a/kgsl.c b/kgsl.c index 8a423ddbd4..e28048eb4d 100644 --- a/kgsl.c +++ b/kgsl.c @@ -2589,103 +2589,6 @@ static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable, return ret; } -#ifdef CONFIG_DMA_SHARED_BUFFER -static void _setup_cache_mode(struct kgsl_mem_entry *entry, - struct vm_area_struct *vma) -{ - uint64_t mode; - pgprot_t pgprot = vma->vm_page_prot; - - if ((pgprot_val(pgprot) == pgprot_val(pgprot_noncached(pgprot))) || - (pgprot_val(pgprot) == pgprot_val(pgprot_writecombine(pgprot)))) - mode = KGSL_CACHEMODE_WRITECOMBINE; - else - mode = KGSL_CACHEMODE_WRITEBACK; - - entry->memdesc.flags |= FIELD_PREP(KGSL_CACHEMODE_MASK, mode); -} - -static int kgsl_setup_dma_buf(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_mem_entry *entry, - struct dma_buf *dmabuf); - -static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_mem_entry *entry, unsigned long hostptr) -{ - struct vm_area_struct *vma; - struct dma_buf *dmabuf = NULL; - int ret; - - /* - * Find the VMA containing this pointer and figure out if it - * is a dma-buf. - */ - mmap_read_lock(current->mm); - vma = find_vma(current->mm, hostptr); - - if (vma && vma->vm_file) { - ret = check_vma_flags(vma, entry->memdesc.flags); - if (ret) { - mmap_read_unlock(current->mm); - return ret; - } - - /* - * Check to see that this isn't our own memory that we have - * already mapped - */ - if (vma->vm_ops == &kgsl_gpumem_vm_ops) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - - if (!is_dma_buf_file(vma->vm_file)) { - mmap_read_unlock(current->mm); - return -ENODEV; - } - - /* Take a refcount because dma_buf_put() decrements the refcount */ - get_file(vma->vm_file); - - dmabuf = vma->vm_file->private_data; - } - - if (!dmabuf) { - mmap_read_unlock(current->mm); - return -ENODEV; - } - - ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); - if (ret) { - dma_buf_put(dmabuf); - mmap_read_unlock(current->mm); - return ret; - } - - /* Setup the cache mode for cache operations */ - _setup_cache_mode(entry, vma); - - if (kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT) && - (IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT) && - kgsl_cachemode_is_cached(entry->memdesc.flags))) - entry->memdesc.flags |= KGSL_MEMFLAGS_IOCOHERENT; - else - entry->memdesc.flags &= ~((u64) KGSL_MEMFLAGS_IOCOHERENT); - - mmap_read_unlock(current->mm); - return 0; -} -#else -static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_mem_entry *entry, unsigned long hostptr) -{ - return -ENODEV; -} -#endif - static int kgsl_setup_useraddr(struct kgsl_device *device, struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, @@ -2696,12 +2599,6 @@ static int kgsl_setup_useraddr(struct kgsl_device *device, if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE)) return -EINVAL; - /* Try to set up a dmabuf - if it returns -ENODEV assume anonymous */ - ret = kgsl_setup_dmabuf_useraddr(device, pagetable, entry, hostptr); - if (ret != -ENODEV) - return ret; - - /* Okay - lets go legacy */ return kgsl_setup_anon_useraddr(pagetable, entry, hostptr, offset, size); } @@ -2748,6 +2645,11 @@ static bool check_and_warn_secured(struct kgsl_device *device) } #ifdef CONFIG_DMA_SHARED_BUFFER +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf); + static long _gpuobj_map_dma_buf(struct kgsl_device *device, struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, From a9b5f09089b9d4fe17b6a1597e44e4032df5a0cc Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 12 Nov 2021 17:07:25 -0800 Subject: [PATCH 008/750] msm: kgsl: Enable the Gen7_2_0 GPU Add in the code to identify the Gen7_2_0 GPU. Change-Id: I7ba07628a6756a57bc386c71864348cb219ed090 Signed-off-by: Lynus Vaz --- adreno-gpulist.h | 104 +++++++++++++++++++++++++++++++++++++++++++++++ adreno.h | 2 + adreno_gen7.c | 12 ++---- adreno_gen7.h | 4 ++ gen7_reg.h | 2 + 5 files changed, 115 insertions(+), 9 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 91aedbfd4f..3c428535bc 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1807,6 +1807,12 @@ static const struct kgsl_regmap_list gen7_0_0_hwcg_regs[] = { { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, }; +static const struct kgsl_regmap_list gen7_0_0_ao_hwcg_regs[] = { + { GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x00020000 }, + { GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x00010111 }, + { GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x00005555 }, +}; + /* GEN7_0_0 protected register list */ static const struct gen7_protected_regs gen7_0_0_protected_regs[] = { { GEN7_CP_PROTECT_REG + 0, 0x00000, 0x004ff, 0 }, @@ -1880,6 +1886,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_0 = { .zap_name = "a730_zap", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), .gbif = gen7_0_0_gbif_regs, .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), .hang_detect_cycles = 0xcfffff, @@ -1910,6 +1918,99 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_0_1 = { .zap_name = "a730_zap", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), + .gbif = gen7_0_0_gbif_regs, + .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), + .hang_detect_cycles = 0xcfffff, + .protected_regs = gen7_0_0_protected_regs, + .highest_bank_bit = 16, +}; + +static const struct kgsl_regmap_list gen7_2_0_hwcg_regs[] = { + { GEN7_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, + { GEN7_RBBM_CLOCK_CNTL2_SP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_HYST_SP0, 0x003cf3cf }, + { GEN7_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, + { GEN7_RBBM_CLOCK_CNTL_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, + { GEN7_RBBM_CLOCK_HYST_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, + { GEN7_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, + { GEN7_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, + { GEN7_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, + { GEN7_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_UCHE, 0x00222222 }, + { GEN7_RBBM_CLOCK_HYST_UCHE, 0x00000444 }, + { GEN7_RBBM_CLOCK_DELAY_UCHE, 0x00000222 }, + { GEN7_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, + { GEN7_RBBM_CLOCK_CNTL2_RB0, 0x01002222 }, + { GEN7_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, + { GEN7_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 }, + { GEN7_RBBM_CLOCK_CNTL_RAC, 0x25222022 }, + { GEN7_RBBM_CLOCK_CNTL2_RAC, 0x00555555 }, + { GEN7_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, + { GEN7_RBBM_CLOCK_HYST_RAC, 0x00440044 }, + { GEN7_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, + { GEN7_RBBM_CLOCK_MODE2_GRAS, 0x00000222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_GPC, 0x02222223 }, + { GEN7_RBBM_CLOCK_MODE_VFD, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 }, + { GEN7_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, + { GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_GPC, 0x04104004 }, + { GEN7_RBBM_CLOCK_HYST_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000000 }, + { GEN7_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, + { GEN7_RBBM_CLOCK_DELAY_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, + { GEN7_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, + { GEN7_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 }, + { GEN7_RBBM_CLOCK_HYST2_VFD, 0x00000000 }, + { GEN7_RBBM_CLOCK_MODE_CP, 0x00000222 }, + { GEN7_RBBM_CLOCK_CNTL, 0x8aa8aa82 }, + { GEN7_RBBM_ISDB_CNT, 0x00000182 }, + { GEN7_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, + { GEN7_RBBM_SP_HYST_CNT, 0x00000000 }, + { GEN7_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, + { GEN7_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, + { GEN7_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, +}; + +static const struct kgsl_regmap_list gen7_2_0_ao_hwcg_regs[] = { + { GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x00020222 }, + { GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x00010111 }, + { GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x00005555 }, +}; + +static const struct adreno_gen7_core adreno_gpu_core_gen7_2_0 = { + .base = { + DEFINE_ADRENO_REV(ADRENO_REV_GEN7_2_0, + UINT_MAX, UINT_MAX, UINT_MAX, ANY_ID), + .compatible = "qcom,adreno-gpu-gen7-2-0", + .chipid = 0x43050a00, + .features = ADRENO_APRIV | ADRENO_IOCOHERENT, + .gpudev = &adreno_gen7_hwsched_gpudev.base, + .perfcounters = &adreno_gen7_perfcounters, + .gmem_base = 0, + .gmem_size = 3 * SZ_1M, + .bus_width = 32, + .snapshot_size = SZ_4M, + }, + .sqefw_name = "a740_sqe.fw", + .gmufw_name = "gmu_gen70200.bin", + .zap_name = "a740_zap", + .hwcg = gen7_2_0_hwcg_regs, + .hwcg_count = ARRAY_SIZE(gen7_2_0_hwcg_regs), + .ao_hwcg = gen7_2_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_2_0_ao_hwcg_regs), .gbif = gen7_0_0_gbif_regs, .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), .hang_detect_cycles = 0xcfffff, @@ -1937,6 +2038,8 @@ static const struct adreno_gen7_core adreno_gpu_core_gen7_4_0 = { .zap_name = "a730_zap", .hwcg = gen7_0_0_hwcg_regs, .hwcg_count = ARRAY_SIZE(gen7_0_0_hwcg_regs), + .ao_hwcg = gen7_0_0_ao_hwcg_regs, + .ao_hwcg_count = ARRAY_SIZE(gen7_0_0_ao_hwcg_regs), .gbif = gen7_0_0_gbif_regs, .gbif_count = ARRAY_SIZE(gen7_0_0_gbif_regs), .hang_detect_cycles = 0xcfffff, @@ -1983,6 +2086,7 @@ static const struct adreno_gpu_core *adreno_gpulist[] = { &adreno_gpu_core_gen7_0_0.base, &adreno_gpu_core_gen7_0_1.base, &adreno_gpu_core_a662.base, + &adreno_gpu_core_gen7_2_0.base, &adreno_gpu_core_gen7_4_0.base, }; diff --git a/adreno.h b/adreno.h index e8838e3eb6..5609f6e12a 100644 --- a/adreno.h +++ b/adreno.h @@ -199,6 +199,7 @@ enum adreno_gpurev { */ ADRENO_REV_GEN7_0_0 = 0x070000, ADRENO_REV_GEN7_0_1 = 0x070001, + ADRENO_REV_GEN7_2_0 = 0x070200, ADRENO_REV_GEN7_4_0 = 0x070400, }; @@ -1106,6 +1107,7 @@ static inline int adreno_is_gen7(struct adreno_device *adreno_dev) ADRENO_TARGET(gen7_0_0, ADRENO_REV_GEN7_0_0) ADRENO_TARGET(gen7_0_1, ADRENO_REV_GEN7_0_1) +ADRENO_TARGET(gen7_2_0, ADRENO_REV_GEN7_2_0) /* * adreno_checkreg_off() - Checks the validity of a register enum diff --git a/adreno_gen7.c b/adreno_gen7.c index 9a1193215d..66d349b4f6 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -232,9 +232,6 @@ void gen7_cx_regulator_disable_wait(struct regulator *reg, } #define RBBM_CLOCK_CNTL_ON 0x8aa8aa82 -#define GMU_AO_CGC_MODE_CNTL 0x00020000 -#define GMU_AO_CGC_DELAY_CNTL 0x00010111 -#define GMU_AO_CGC_HYST_CNTL 0x00005555 static void gen7_hwcg_set(struct adreno_device *adreno_dev, bool on) { @@ -246,12 +243,9 @@ static void gen7_hwcg_set(struct adreno_device *adreno_dev, bool on) if (!adreno_dev->hwcg_enabled) on = false; - gmu_core_regwrite(device, GEN7_GPU_GMU_AO_GMU_CGC_MODE_CNTL, - on ? GMU_AO_CGC_MODE_CNTL : 0); - gmu_core_regwrite(device, GEN7_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, - on ? GMU_AO_CGC_DELAY_CNTL : 0); - gmu_core_regwrite(device, GEN7_GPU_GMU_AO_GMU_CGC_HYST_CNTL, - on ? GMU_AO_CGC_HYST_CNTL : 0); + for (i = 0; i < gen7_core->ao_hwcg_count; i++) + gmu_core_regwrite(device, gen7_core->ao_hwcg[i].offset, + on ? gen7_core->ao_hwcg[i].val : 0); kgsl_regread(device, GEN7_RBBM_CLOCK_CNTL, &value); diff --git a/adreno_gen7.h b/adreno_gen7.h index 761dc14430..b061a5fc8f 100644 --- a/adreno_gen7.h +++ b/adreno_gen7.h @@ -73,6 +73,10 @@ struct adreno_gen7_core { const struct kgsl_regmap_list *hwcg; /** @hwcg_count: Number of registers in @hwcg */ u32 hwcg_count; + /** @ao_hwcg: List of registers and values to write for HWCG in AO block */ + const struct kgsl_regmap_list *ao_hwcg; + /** @ao_hwcg_count: Number of registers in @ao_hwcg */ + u32 ao_hwcg_count; /** @gbif: List of registers and values to write for GBIF */ const struct kgsl_regmap_list *gbif; /** @gbif_count: Number of registers in @gbif */ diff --git a/gen7_reg.h b/gen7_reg.h index da03e710ff..8f6a433f21 100644 --- a/gen7_reg.h +++ b/gen7_reg.h @@ -598,6 +598,7 @@ #define GEN7_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00109 #define GEN7_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0010a #define GEN7_RBBM_CLOCK_CNTL_UCHE 0x0010b +#define GEN7_RBBM_CLOCK_CNTL2_UCHE 0x0010c #define GEN7_RBBM_CLOCK_DELAY_UCHE 0x0010f #define GEN7_RBBM_CLOCK_HYST_UCHE 0x00110 #define GEN7_RBBM_CLOCK_MODE_VFD 0x00111 @@ -613,6 +614,7 @@ #define GEN7_RBBM_CLOCK_MODE_HLSQ 0x0011b #define GEN7_RBBM_CLOCK_DELAY_HLSQ 0x0011c #define GEN7_RBBM_CLOCK_HYST_HLSQ 0x0011d +#define GEN7_RBBM_CLOCK_HYST2_VFD 0x0012f #define GEN7_RBBM_CLOCK_MODE_CP 0x00260 #define GEN7_RBBM_CLOCK_MODE_BV_LRZ 0x00284 #define GEN7_RBBM_CLOCK_MODE_BV_GRAS 0x00285 From 3a5e9c42271363a21c78b36fe6d79342c1b44f39 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Thu, 11 Nov 2021 18:58:35 -0800 Subject: [PATCH 009/750] msm: kgsl: Get GMU frequencies from devicetree The frequencies the GMU clocks can run at may change between devices. Read an optional devicetree property "qcom,gmu-freq-table" that is a list of frequencies that the GMU can run at. Change-Id: I9166a02cbbf753dd1ca82515a5af0086c87e1ac7 Signed-off-by: Lynus Vaz --- adreno_gen7_gmu.c | 88 ++++++++++++++++++++++++++++++++----------- adreno_gen7_gmu.h | 4 ++ adreno_gen7_hwsched.c | 6 +-- adreno_gen7_rpmh.c | 17 +++++---- 4 files changed, 84 insertions(+), 31 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index da1a55ea50..44b39a0165 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -3,6 +3,7 @@ * Copyright (c) 2021, The Linux Foundation. All rights reserved. */ +#include #include #include #include @@ -1486,10 +1487,10 @@ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; - gen7_rdpm_cx_freq_update(gmu, GMU_FREQ_MIN / 1000); + gen7_rdpm_cx_freq_update(gmu, gmu->freqs[0] / 1000); ret = kgsl_clk_set_rate(gmu->clks, gmu->num_clks, "gmu_clk", - GMU_FREQ_MIN); + gmu->freqs[0]); if (ret) { dev_err(&gmu->pdev->dev, "Unable to set the GMU clock\n"); return ret; @@ -1837,6 +1838,68 @@ static int gen7_gmu_reg_probe(struct adreno_device *adreno_dev) return ret; } +static int gen7_gmu_clk_probe(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + int ret, i; + int tbl_size; + int num_freqs; + int offset; + + ret = devm_clk_bulk_get_all(&gmu->pdev->dev, &gmu->clks); + if (ret < 0) + return ret; + + /* + * Voting for apb_pclk will enable power and clocks required for + * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled, + * QDSS is essentially unusable. Hence, if QDSS cannot be used, + * don't vote for this clock. + */ + if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) { + for (i = 0; i < ret; i++) { + if (!strcmp(gmu->clks[i].id, "apb_pclk")) { + gmu->clks[i].clk = NULL; + break; + } + } + } + + gmu->num_clks = ret; + + /* Read the optional list of GMU frequencies */ + if (of_get_property(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", &tbl_size) == NULL) + goto default_gmu_freq; + + num_freqs = (tbl_size / sizeof(u32)) / 2; + if (num_freqs != ARRAY_SIZE(gmu->freqs)) + goto default_gmu_freq; + + for (i = 0; i < num_freqs; i++) { + offset = i * 2; + ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", offset, &gmu->freqs[i]); + if (ret) + goto default_gmu_freq; + ret = of_property_read_u32_index(gmu->pdev->dev.of_node, + "qcom,gmu-freq-table", offset + 1, &gmu->vlvls[i]); + if (ret) + goto default_gmu_freq; + } + return 0; + +default_gmu_freq: + /* The GMU frequency table is missing or invalid. Go with a default */ + gmu->freqs[0] = GMU_FREQ_MIN; + gmu->vlvls[0] = RPMH_REGULATOR_LEVEL_LOW_SVS; + gmu->freqs[1] = GMU_FREQ_MAX; + gmu->vlvls[1] = RPMH_REGULATOR_LEVEL_SVS; + + return 0; +} + static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu, struct kgsl_device *device) { @@ -1980,27 +2043,10 @@ int gen7_gmu_probe(struct kgsl_device *device, if (ret) return ret; - ret = devm_clk_bulk_get_all(&pdev->dev, &gmu->clks); - if (ret < 0) + ret = gen7_gmu_clk_probe(adreno_dev); + if (ret) return ret; - /* - * Voting for apb_pclk will enable power and clocks required for - * QDSS path to function. However, if QCOM_KGSL_QDSS_STM is not enabled, - * QDSS is essentially unusable. Hence, if QDSS cannot be used, - * don't vote for this clock. - */ - if (!IS_ENABLED(CONFIG_QCOM_KGSL_QDSS_STM)) { - for (i = 0; i < ret; i++) { - if (!strcmp(gmu->clks[i].id, "apb_pclk")) { - gmu->clks[i].clk = NULL; - break; - } - } - } - - gmu->num_clks = ret; - /* Set up GMU IOMMU and shared memory with GMU */ ret = gen7_gmu_iommu_init(gmu); if (ret) diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 5f40bc575f..91e1b27ee0 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -55,6 +55,10 @@ struct gen7_gmu_device { /** @num_clks: Number of entries in the @clks array */ int num_clks; unsigned int idle_level; + /** @freqs: Array of GMU frequencies */ + u32 freqs[2]; + /** @vlvls: Array of GMU voltage levels */ + u32 vlvls[2]; struct kgsl_mailbox mailbox; /** @gmu_globals: Array to store gmu global buffers */ struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES]; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 18f6a6178c..25aa7f8293 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -913,17 +913,17 @@ static void scale_gmu_frequency(struct adreno_device *adreno_dev, int buslevel) struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); static unsigned long prev_freq; - unsigned long freq = GMU_FREQ_MIN; + unsigned long freq = gmu->freqs[0]; if (!gmu->perf_ddr_bw) return; /* * Scale the GMU if DDR is at a CX corner at which GMU can run at - * 500 Mhz + * a higher frequency */ if (pwr->ddr_table[buslevel] >= gmu->perf_ddr_bw) - freq = GMU_FREQ_MAX; + freq = gmu->freqs[1]; if (prev_freq == freq) return; diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index aa7841ad93..71b0db2c67 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -3,7 +3,6 @@ * Copyright (c) 2021, The Linux Foundation. All rights reserved. */ -#include #include #include #include @@ -254,12 +253,15 @@ static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, * @hfi: Pointer to hfi device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table + * @freqs: List of GMU frequencies + * @vlvls: List of GMU voltage levels * * This function initializes the cx votes for all gmu frequencies * for gmu dcvs */ static int setup_cx_arc_votes(struct gen7_hfi *hfi, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, + u32 *freqs, u32 *vlvls) { /* Hardcoded values of GMU CX voltage levels */ u16 gmu_cx_vlvl[MAX_CX_LEVELS]; @@ -268,14 +270,14 @@ static int setup_cx_arc_votes(struct gen7_hfi *hfi, int ret, i; gmu_cx_vlvl[0] = 0; - gmu_cx_vlvl[1] = RPMH_REGULATOR_LEVEL_LOW_SVS; - gmu_cx_vlvl[2] = RPMH_REGULATOR_LEVEL_SVS; + gmu_cx_vlvl[1] = vlvls[0]; + gmu_cx_vlvl[2] = vlvls[1]; table->gmu_level_num = 3; table->cx_votes[0].freq = 0; - table->cx_votes[1].freq = GMU_FREQ_MIN / 1000; - table->cx_votes[2].freq = GMU_FREQ_MAX / 1000; + table->cx_votes[1].freq = freqs[0] / 1000; + table->cx_votes[2].freq = freqs[1] / 1000; ret = setup_volt_dependency_tbl(cx_votes, pri_rail, sec_rail, gmu_cx_vlvl, table->gmu_level_num); @@ -363,7 +365,8 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) if (ret) return ret; - ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc); + ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc, + gmu->freqs, gmu->vlvls); if (ret) return ret; From ada4d7c8550109006d70dca0c81d8d7d9aaa1ed3 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 30 Nov 2021 14:37:48 -0800 Subject: [PATCH 010/750] msm: kgsl: Update the header include paths The header files that KGSL uses have been modified in recent kernel versions. Update the file names and include paths that we look for. Change-Id: I79ff6ab77c12393bc974bc30f9a294b67da76c9d Signed-off-by: Lynus Vaz --- Kbuild | 2 +- kgsl.c | 1 - kgsl_iommu.c | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Kbuild b/Kbuild index b4d7e582ed..daf7ace63f 100644 --- a/Kbuild +++ b/Kbuild @@ -13,7 +13,7 @@ ifeq ($(CONFIG_ARCH_KALAMA), y) include $(KGSL_PATH)/config/gki_waipiodisp.conf endif -ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq +ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq -I$(KERN_SRC)/drivers/iommu obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o diff --git a/kgsl.c b/kgsl.c index e28048eb4d..1a72257c21 100644 --- a/kgsl.c +++ b/kgsl.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 16a006c82a..e1e845d6b0 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "adreno.h" From f092e735c5469e2128288c3b6473d8f995342559 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Fri, 29 Oct 2021 13:56:39 -0700 Subject: [PATCH 011/750] msm: kgsl: Honor the QMAA override flags The graphics driver can be compiled with the appropriate QMAA override flags. Change-Id: I2d1d43a6f2075e5c7895291fcf7a592e57c98e41 Signed-off-by: Lynus Vaz --- Android.mk | 20 ++++++++++++++------ Kbuild | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Android.mk b/Android.mk index 63924eac51..78597cb706 100644 --- a/Android.mk +++ b/Android.mk @@ -1,4 +1,13 @@ -ifneq ($(TARGET_USES_QMAA),true) +ifeq ($(TARGET_USES_QMAA),true) + KGSL_ENABLED := false + ifeq ($(TARGET_USES_QMAA_OVERRIDE_GFX),true) + KGSL_ENABLED := true + endif # TARGET_USES_QMAA_OVERRIDE_GFX +else + KGSL_ENABLED := true +endif # TARGET_USES_QMAA + +ifeq ($(KGSL_ENABLED),true) KGSL_SELECT := CONFIG_QCOM_KGSL=m LOCAL_PATH := $(call my-dir) @@ -8,12 +17,10 @@ include $(CLEAR_VARS) ifneq ($(findstring vendor,$(LOCAL_PATH)),) DLKM_DIR := device/qcom/common/dlkm -KERN_SRC := $(ANDROID_TOP)/kernel_platform/msm-kernel KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM) KBUILD_OPTIONS += $(KGSL_SELECT) KBUILD_OPTIONS += MODNAME=msm_kgsl -KBUILD_OPTIONS += KERN_SRC=$(KERN_SRC) KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS=$(PWD)/$(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers @@ -25,12 +32,13 @@ LOCAL_MODULE_KBUILD_NAME := msm_kgsl.ko LOCAL_MODULE_TAGS := optional LOCAL_MODULE_DEBUG_ENABLE := true LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT) -#LOCAL_REQUIRED_MODULES := mmrm-module-symvers -#LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers + +LOCAL_REQUIRED_MODULES := mmrm-module-symvers +LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,mmrm-module-symvers)/Module.symvers # Include msm_kgsl.ko in the /vendor/lib/modules (vendor.img) BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE) include $(DLKM_DIR)/Build_external_kernelmodule.mk endif # DLKM check -endif # QMAA check +endif # KGSL_ENABLED diff --git a/Kbuild b/Kbuild index daf7ace63f..8b921fe6ab 100644 --- a/Kbuild +++ b/Kbuild @@ -13,7 +13,7 @@ ifeq ($(CONFIG_ARCH_KALAMA), y) include $(KGSL_PATH)/config/gki_waipiodisp.conf endif -ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERN_SRC)/drivers/devfreq -I$(KERN_SRC)/drivers/iommu +ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq -I$(KERNEL_SRC)/drivers/iommu obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o From a0a56e9d4ebade419597e801cfc0c8ccec80aa43 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Mon, 22 Nov 2021 16:59:34 -0800 Subject: [PATCH 012/750] msm: kgsl: Use the GPU revision for the GMU id On Gen7 GPUs, the GMU uses a new versioning system for the chipid that is based on the GPU revision. Set up the GMU id based on this new system. Change-Id: I1b4701d67e69d0bbbc916448b55d9d6a0b08eed3 Signed-off-by: Lynus Vaz --- adreno.h | 9 +++++++++ adreno_gen7_gmu.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/adreno.h b/adreno.h index 5609f6e12a..ffce7391d5 100644 --- a/adreno.h +++ b/adreno.h @@ -58,6 +58,15 @@ FIELD_PREP(GENMASK(15, 12), ADRENO_CHIPID_MINOR(_id)) | \ FIELD_PREP(GENMASK(11, 8), ADRENO_CHIPID_PATCH(_id))) +#define ADRENO_REV_MAJOR(_rev) FIELD_GET(GENMASK(23, 16), _rev) +#define ADRENO_REV_MINOR(_rev) FIELD_GET(GENMASK(15, 8), _rev) +#define ADRENO_REV_PATCH(_rev) FIELD_GET(GENMASK(7, 0), _rev) + +#define ADRENO_GMU_REV(_rev) \ + (FIELD_PREP(GENMASK(31, 24), ADRENO_REV_MAJOR(_rev)) | \ + FIELD_PREP(GENMASK(23, 16), ADRENO_REV_MINOR(_rev)) | \ + FIELD_PREP(GENMASK(15, 8), ADRENO_REV_PATCH(_rev))) + /* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */ #define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 44b39a0165..c32cb2f9c2 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -832,7 +832,7 @@ void gen7_gmu_register_config(struct adreno_device *adreno_dev) /* Pass chipid to GMU FW, must happen before starting GMU */ gmu_core_regwrite(device, GEN7_GMU_GENERAL_10, - ADRENO_GMU_CHIPID(adreno_dev->chipid)); + ADRENO_GMU_REV(ADRENO_GPUREV(adreno_dev))); /* Log size is encoded in (number of 4K units - 1) */ val = (gmu->gmu_log->gmuaddr & GENMASK(31, 12)) | From 6935541597c08e988e57eac800d4b9f954e39f84 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Mon, 4 Oct 2021 12:02:57 -0700 Subject: [PATCH 013/750] msm: kgsl: Drop aggregated bandwidth vote during thermal throttling There is a possibility of gpu consuming bandwidth above the allowed ddr bandwidth for respective gpu power levels. Under thermal throttling, this could keep the DDR to run at high levels causing a thermal reset. When GPU is throttled to its lowest level, drop the ab vote as a last resort to prevent thermal reset. Change-Id: Ia0885ac1ebfc58f4af9f999dd8063c905ec130c3 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor Signed-off-by: Harshdeep Dhatt --- kgsl_pwrscale.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index fc7e47e177..caef4788c2 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -498,7 +498,15 @@ int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) /* Update bus vote if AB or IB is modified */ if ((pwr->bus_mod != b) || (pwr->bus_ab_mbytes != ab_mbytes)) { pwr->bus_percent_ab = device->pwrscale.bus_profile.percent_ab; - pwr->bus_ab_mbytes = ab_mbytes; + /* + * When gpu is thermally throttled to its lowest power level, + * drop GPU's AB vote as a last resort to lower CX voltage and + * to prevent thermal reset. + */ + if (pwr->thermal_pwrlevel != pwr->num_pwrlevels - 1) + pwr->bus_ab_mbytes = ab_mbytes; + else + pwr->bus_ab_mbytes = 0; kgsl_bus_update(device, KGSL_BUS_VOTE_ON); } From 3cb1bfe3368703538d46b1d0ee539c1a6fd594ce Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Tue, 26 Oct 2021 15:00:03 -0700 Subject: [PATCH 014/750] msm: kgsl: Remove inline performance counter enable for gen7 targets HLSQ, SP and TP HW did not allow any programming between programming of draw_init, 2D_init, CL_init, Global_event and context_done. To get around this, KMD had an inline enable mechanism to enable these performance counters. As this is fixed on gen7 targets, program these regsiters from HLOS. Change-Id: I2a08fe19a486b70059484979fe0d1e1718cd4776 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_gen7_perfcounter.c | 76 +++------------------------------------ 1 file changed, 5 insertions(+), 71 deletions(-) diff --git a/adreno_gen7_perfcounter.c b/adreno_gen7_perfcounter.c index f088856da3..75a4b9831b 100644 --- a/adreno_gen7_perfcounter.c +++ b/adreno_gen7_perfcounter.c @@ -46,67 +46,6 @@ static int gen7_counter_enable(struct adreno_device *adreno_dev, return ret; } -static int gen7_counter_inline_enable(struct adreno_device *adreno_dev, - const struct adreno_perfcount_group *group, - unsigned int counter, unsigned int countable) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct adreno_perfcount_register *reg = &group->regs[counter]; - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; - u32 cmds[3]; - int ret; - - if (!(device->state == KGSL_STATE_ACTIVE)) - return gen7_counter_enable(adreno_dev, group, counter, - countable); - - if (group->flags & ADRENO_PERFCOUNTER_GROUP_RESTORE) - gen7_perfcounter_update(adreno_dev, reg, false); - - cmds[0] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - cmds[1] = cp_type4_packet(reg->select, 1); - cmds[2] = countable; - - /* submit to highest priority RB always */ - ret = gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, - F_NOTPROTECTED, cmds, 3, 0, NULL); - if (ret) - return ret; - - /* - * schedule dispatcher to make sure rb[0] is run, because - * if the current RB is not rb[0] and gpu is idle then - * rb[0] will not get scheduled to run - */ - if (adreno_dev->cur_rb != rb) - adreno_dispatcher_schedule(device); - - /* wait for the above commands submitted to complete */ - ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, - ADRENO_IDLE_TIMEOUT); - - if (ret) { - /* - * If we were woken up because of cancelling rb events - * either due to soft reset or adreno_stop, ignore the - * error and return 0 here. The perfcounter is already - * set up in software and it will be programmed in - * hardware when we wake up or come up after soft reset - */ - if (ret == -EAGAIN) - ret = 0; - else - dev_err(device->dev, - "Perfcounter %s/%u/%u start via commands failed %d\n", - group->name, counter, countable, ret); - } - - if (!ret) - reg->value = 0; - - return ret; -} - static u64 gen7_counter_read(struct adreno_device *adreno_dev, const struct adreno_perfcount_group *group, unsigned int counter) @@ -849,8 +788,7 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups gen7_counter_enable, gen7_counter_read), GEN7_REGULAR_PERFCOUNTER_GROUP(PC, pc), GEN7_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), - GEN7_PERFCOUNTER_GROUP(HLSQ, hlsq, - gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq), GEN7_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), GEN7_REGULAR_PERFCOUNTER_GROUP(CCU, ccu), GEN7_REGULAR_PERFCOUNTER_GROUP(CMP, cmp), @@ -858,10 +796,8 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups GEN7_REGULAR_PERFCOUNTER_GROUP(RAS, ras), GEN7_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz), GEN7_REGULAR_PERFCOUNTER_GROUP(UCHE, uche), - GEN7_PERFCOUNTER_GROUP(TP, tp, - gen7_counter_inline_enable, gen7_counter_read), - GEN7_PERFCOUNTER_GROUP(SP, sp, - gen7_counter_inline_enable, gen7_counter_read), + GEN7_REGULAR_PERFCOUNTER_GROUP(TP, tp), + GEN7_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN7_REGULAR_PERFCOUNTER_GROUP(RB, rb), GEN7_REGULAR_PERFCOUNTER_GROUP(VSC, vsc), GEN7_PERFCOUNTER_GROUP_FLAGS(gen7, VBIF, gbif, 0, @@ -883,10 +819,8 @@ static const struct adreno_perfcount_group gen7_perfcounter_groups GEN7_BV_REGULAR_PERFCOUNTER_GROUP(PC, pc), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VFD, vfd), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(VPC, vpc), - GEN7_BV_PERFCOUNTER_GROUP(TP, tp, - gen7_counter_inline_enable, gen7_counter_read), - GEN7_BV_PERFCOUNTER_GROUP(SP, sp, - gen7_counter_inline_enable, gen7_counter_read), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(TP, tp), + GEN7_BV_REGULAR_PERFCOUNTER_GROUP(SP, sp), GEN7_BV_REGULAR_PERFCOUNTER_GROUP(UFC, ufc), }; From 0021da9fd06b2552e1f379746d82b15af7feab0e Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 4 Nov 2021 10:56:13 -0600 Subject: [PATCH 015/750] msm: kgsl: Fix sysfs deadlock in kgsl If kgsl mem entries are being accessed in sysfs and if putting back the mementry refcount triggers a free of the process private, we have a mutex deadlock. Because freeing the process private triggers removal of the sysfs directories from within a thread that is accessing the sysfs files. Fix this by taking an extra refcount on the process private and then putting it back in a deferred manner. Change-Id: I7db0e6144cabec2a86df9afbc500cd0ba3af0291 Signed-off-by: Harshdeep Dhatt --- kgsl_sharedmem.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/kgsl_sharedmem.c b/kgsl_sharedmem.c index 53cbf1d8ba..43baa5cacf 100644 --- a/kgsl_sharedmem.c +++ b/kgsl_sharedmem.c @@ -132,6 +132,20 @@ static ssize_t mem_entry_sysfs_show(struct kobject *kobj, return pattr->show(priv, pattr->memtype, buf); } +struct deferred_work { + struct kgsl_process_private *private; + struct work_struct work; +}; + +static void process_private_deferred_put(struct work_struct *work) +{ + struct deferred_work *free_work = + container_of(work, struct deferred_work, work); + + kgsl_process_private_put(free_work->private); + kfree(free_work); +} + static ssize_t memtype_sysfs_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -140,10 +154,30 @@ static ssize_t memtype_sysfs_show(struct kobject *kobj, struct kgsl_mem_entry *entry; u64 size = 0; int id = 0; + struct deferred_work *work = kzalloc(sizeof(struct deferred_work), + GFP_KERNEL); + + if (!work) + return -ENOMEM; priv = container_of(kobj, struct kgsl_process_private, kobj_memtype); memtype = container_of(attr, struct kgsl_memtype, attr); + /* + * Take a process refcount here and put it back in a deferred manner. + * This is to avoid a deadlock where we put back last reference of the + * process private (via kgsl_mem_entry_put) here and end up trying to + * remove sysfs kobject while we are still in the middle of reading one + * of the sysfs files. + */ + if (!kgsl_process_private_get(priv)) { + kfree(work); + return -ENOENT; + } + + work->private = priv; + INIT_WORK(&work->work, process_private_deferred_put); + spin_lock(&priv->mem_lock); for (entry = idr_get_next(&priv->mem_idr, &id); entry; id++, entry = idr_get_next(&priv->mem_idr, &id)) { @@ -165,6 +199,8 @@ static ssize_t memtype_sysfs_show(struct kobject *kobj, } spin_unlock(&priv->mem_lock); + queue_work(kgsl_driver.mem_workqueue, &work->work); + return scnprintf(buf, PAGE_SIZE, "%llu\n", size); } @@ -184,6 +220,26 @@ imported_mem_show(struct kgsl_process_private *priv, struct kgsl_mem_entry *entry; uint64_t imported_mem = 0; int id = 0; + struct deferred_work *work = kzalloc(sizeof(struct deferred_work), + GFP_KERNEL); + + if (!work) + return -ENOMEM; + + /* + * Take a process refcount here and put it back in a deferred manner. + * This is to avoid a deadlock where we put back last reference of the + * process private (via kgsl_mem_entry_put) here and end up trying to + * remove sysfs kobject while we are still in the middle of reading one + * of the sysfs files. + */ + if (!kgsl_process_private_get(priv)) { + kfree(work); + return -ENOENT; + } + + work->private = priv; + INIT_WORK(&work->work, process_private_deferred_put); spin_lock(&priv->mem_lock); for (entry = idr_get_next(&priv->mem_idr, &id); entry; @@ -218,6 +274,8 @@ imported_mem_show(struct kgsl_process_private *priv, } spin_unlock(&priv->mem_lock); + queue_work(kgsl_driver.mem_workqueue, &work->work); + return scnprintf(buf, PAGE_SIZE, "%llu\n", imported_mem); } From d6aaa1d7f0e68e2c5557f02faa3492d7f211e89d Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 5 Nov 2021 12:28:15 -0600 Subject: [PATCH 016/750] msm: kgsl: Set context id as contextidr in hwscheduling This is used by the iommu page fault handler to figure out the page faulting context. Change-Id: Ic8f8c69df2bbb7d47465799c224adb8a496b4a96 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index c910ab7581..60ad1921d0 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -1277,7 +1277,7 @@ static int send_context_register(struct adreno_device *adreno_dev, cmd.ctxt_id = context->id; cmd.flags = HFI_CTXT_FLAG_NOTIFY | context->flags; cmd.pt_addr = kgsl_mmu_pagetable_get_ttbr0(pt); - cmd.ctxt_idr = pid_nr(context->proc_priv->pid); + cmd.ctxt_idr = context->id; cmd.ctxt_bank = kgsl_mmu_pagetable_get_context_bank(pt); return gen7_hfi_send_cmd_async(adreno_dev, &cmd); From 5aec9bede1ec367c262b6f17c48f3855fb8e0c92 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Mon, 8 Nov 2021 00:18:21 +0530 Subject: [PATCH 017/750] msm: kgsl: Update GPUCC regs for A662 snapshot GPUCC register offsets are different in A662. So update the snapshot to use the new offsets for A662. Change-Id: I07fbc3d26840e1a483a06bcd9cbd76f30084148b Signed-off-by: Harshitha Sai Neelati --- adreno_a6xx_gmu_snapshot.c | 39 ++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/adreno_a6xx_gmu_snapshot.c b/adreno_a6xx_gmu_snapshot.c index 32dc60c59c..e11f70e0b2 100644 --- a/adreno_a6xx_gmu_snapshot.c +++ b/adreno_a6xx_gmu_snapshot.c @@ -39,14 +39,6 @@ static const unsigned int a6xx_gmu_registers[] = { 0x1F9E0, 0x1F9E2, 0x1F9F0, 0x1F9F0, 0x1FA00, 0x1FA01, /* GMU AO */ 0x23B00, 0x23B16, - /* GPU CC */ - 0x24000, 0x24012, 0x24040, 0x24052, 0x24400, 0x24404, 0x24407, 0x2440B, - 0x24415, 0x2441C, 0x2441E, 0x2442D, 0x2443C, 0x2443D, 0x2443F, 0x24440, - 0x24442, 0x24449, 0x24458, 0x2445A, 0x24540, 0x2455E, 0x24800, 0x24802, - 0x24C00, 0x24C02, 0x25400, 0x25402, 0x25800, 0x25802, 0x25C00, 0x25C02, - 0x26000, 0x26002, - /* GPU CC ACD */ - 0x26400, 0x26416, 0x26420, 0x26427, }; static const unsigned int a660_gmu_registers[] = { @@ -60,6 +52,28 @@ static const unsigned int a660_gmu_registers[] = { 0x23B30, 0x23B30, }; +static const unsigned int a6xx_gmu_gpucc_registers[] = { + /* GPU CC */ + 0x24000, 0x24012, 0x24040, 0x24052, 0x24400, 0x24404, 0x24407, 0x2440B, + 0x24415, 0x2441C, 0x2441E, 0x2442D, 0x2443C, 0x2443D, 0x2443F, 0x24440, + 0x24442, 0x24449, 0x24458, 0x2445A, 0x24540, 0x2455E, 0x24800, 0x24802, + 0x24C00, 0x24C02, 0x25400, 0x25402, 0x25800, 0x25802, 0x25C00, 0x25C02, + 0x26000, 0x26002, + /* GPU CC ACD */ + 0x26400, 0x26416, 0x26420, 0x26427, +}; + +static const unsigned int a662_gmu_gpucc_registers[] = { + /* GPU CC */ + 0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff, + 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405, + 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455, + 0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e, + 0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8, + 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e, + 0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d, +}; + static const unsigned int a630_rscc_snapshot_registers[] = { 0x23400, 0x23434, 0x23436, 0x23436, 0x23480, 0x23484, 0x23489, 0x2348C, 0x23491, 0x23494, 0x23499, 0x2349C, 0x234A1, 0x234A4, 0x234A9, 0x234AC, @@ -399,6 +413,15 @@ void a6xx_gmu_device_snapshot(struct kgsl_device *device, adreno_snapshot_registers(device, snapshot, a6xx_gmu_registers, ARRAY_SIZE(a6xx_gmu_registers) / 2); + if (adreno_is_a662(adreno_dev)) + adreno_snapshot_registers(device, snapshot, + a662_gmu_gpucc_registers, + ARRAY_SIZE(a662_gmu_gpucc_registers) / 2); + else + adreno_snapshot_registers(device, snapshot, + a6xx_gmu_gpucc_registers, + ARRAY_SIZE(a6xx_gmu_gpucc_registers) / 2); + /* Snapshot A660 specific GMU registers */ if (adreno_is_a660(adreno_dev)) adreno_snapshot_registers(device, snapshot, a660_gmu_registers, From 53f46c5c600d14cf0ea64cf22133f41a77cdd374 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Mon, 8 Nov 2021 16:00:20 -0800 Subject: [PATCH 018/750] msm: kgsl: Add notifier call for thermal constraints Currently devfreq call back function is overloaded for DCVS recommendations and thermal constraints. This is causing devfreq mutex congestion thereby causing delayed response to thermal requests leading to thermal reset. Add a new QoS notifier callback function to apply the constraints. Change-Id: Ic3c4a2e59867aeaa342fa893344667c77d8b1984 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- kgsl_pwrctrl.c | 25 ----------------------- kgsl_pwrctrl.h | 3 ++- kgsl_pwrscale.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 57b2b63499..9f68d8527e 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -2261,31 +2261,6 @@ int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device) return device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); } -/** - * kgsl_pwrctrl_update_thermal_pwrlevel() - Update GPU thermal power level - * @device: Pointer to the kgsl_device struct - */ -void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device) -{ - s32 qos_max_freq = dev_pm_qos_read_value(&device->pdev->dev, - DEV_PM_QOS_MAX_FREQUENCY); - int level = 0; - - if (qos_max_freq != PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE) { - level = _get_nearest_pwrlevel(&device->pwrctrl, - qos_max_freq * 1000); - if (level < 0) - return; - } - - if (level != device->pwrctrl.thermal_pwrlevel) { - trace_kgsl_thermal_constraint( - device->pwrctrl.pwrlevels[level].gpu_freq); - - device->pwrctrl.thermal_pwrlevel = level; - } -} - int kgsl_gpu_num_freqs(void) { struct kgsl_device *device = kgsl_get_device(0); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 925aceeef5..6d353b0b05 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -163,6 +163,8 @@ struct kgsl_pwrctrl { u64 time_in_pwrlevel[KGSL_MAX_PWRLEVELS]; /** @last_stat_updated: The last time stats were updated */ ktime_t last_stat_updated; + /** @nb_max: Notifier block for DEV_PM_QOS_MAX_FREQUENCY */ + struct notifier_block nb_max; }; int kgsl_pwrctrl_init(struct kgsl_device *device); @@ -216,7 +218,6 @@ void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy); void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, struct kgsl_pwr_constraint *pwrc, u32 id, u32 ts); int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device); -void kgsl_pwrctrl_update_thermal_pwrlevel(struct kgsl_device *device); /** * kgsl_pwrctrl_request_state - Request a specific power state diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index caef4788c2..74fe81f09f 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -259,8 +259,6 @@ int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) cur_freq = kgsl_pwrctrl_active_freq(pwr); level = pwr->active_pwrlevel; - kgsl_pwrctrl_update_thermal_pwrlevel(device); - /* If the governor recommends a new frequency, update it here */ if (rec_freq != cur_freq) { for (i = 0; i < pwr->num_pwrlevels; i++) @@ -629,6 +627,46 @@ static void pwrscale_of_ca_aware(struct kgsl_device *device) of_node_put(node); } +/* + * thermal_max_notifier_call - Callback function registered to receive qos max + * frequency events. + * @nb: The notifier block + * @val: Max frequency value in KHz for GPU + * + * The function subscribes to GPU max frequency change and updates thermal + * power level accordingly. + */ +static int thermal_max_notifier_call(struct notifier_block *nb, unsigned long val, void *data) +{ + struct kgsl_pwrctrl *pwr = container_of(nb, struct kgsl_pwrctrl, nb_max); + struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); + u32 max_freq = val * 1000; + int level; + + for (level = pwr->num_pwrlevels - 1; level >= 0; level--) { + /* get nearest power level with a maximum delta of 5MHz */ + if (abs(pwr->pwrlevels[level].gpu_freq - max_freq) < 5000000) + break; + } + + if (level < 0) + return NOTIFY_DONE; + + if (level == pwr->thermal_pwrlevel) + return NOTIFY_OK; + + trace_kgsl_thermal_constraint(max_freq); + pwr->thermal_pwrlevel = level; + + mutex_lock(&device->mutex); + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + + mutex_unlock(&device->mutex); + return NOTIFY_OK; +} + int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, const char *governor) { @@ -711,6 +749,16 @@ int kgsl_pwrscale_init(struct kgsl_device *device, struct platform_device *pdev, return ret; } + pwr->nb_max.notifier_call = thermal_max_notifier_call; + ret = dev_pm_qos_add_notifier(&pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); + + if (ret) { + dev_err(device->dev, "Unable to register notifier call for thermal: %d\n", ret); + device->pwrscale.enabled = false; + msm_adreno_tz_exit(); + return ret; + } + devfreq = devfreq_add_device(&pdev->dev, &gpu_profile->profile, governor, &adreno_tz_data); if (IS_ERR_OR_NULL(devfreq)) { @@ -775,6 +823,7 @@ void kgsl_pwrscale_close(struct kgsl_device *device) devfreq_remove_device(device->pwrscale.devfreqptr); device->pwrscale.devfreqptr = NULL; + dev_pm_qos_remove_notifier(&device->pdev->dev, &pwr->nb_max, DEV_PM_QOS_MAX_FREQUENCY); msm_adreno_tz_exit(); } From 2d2726b851f45fb335264ea54659e4209aac9e53 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 10 Nov 2021 13:43:23 -0700 Subject: [PATCH 019/750] msm: kgsl: Avoid double SLUMBER entry adreno_suspend_context() relinquishes the device mutex which opens up a window for a concurrent thread to attempt SLUMBER. Hence, check for flags again, before proceeding with SLUMBER sequence. Change-Id: I3f36e19e31f5399a038a29af5cb9bc9f59bdfa5b Signed-off-by: Harshdeep Dhatt --- adreno_a6xx_gmu.c | 12 ++++++++++-- adreno_a6xx_rgmu.c | 9 +++++++-- adreno_gen7_gmu.c | 12 ++++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 90fb4b23f8..9986573cc4 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -3034,10 +3034,18 @@ static int a6xx_power_off(struct adreno_device *adreno_dev) WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); - trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); - adreno_suspend_context(device); + /* + * adreno_suspend_context() unlocks the device mutex, which + * could allow a concurrent thread to attempt SLUMBER sequence. + * Hence, check the flags again before proceeding with SLUMBER. + */ + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + ret = a6xx_gmu_oob_set(device, oob_gpu); if (ret) { a6xx_gmu_oob_clear(device, oob_gpu); diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index a92f37a3da..0bbc651682 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -1051,13 +1051,18 @@ static int a6xx_power_off(struct adreno_device *adreno_dev) struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); int ret; + adreno_suspend_context(device); + + /* + * adreno_suspend_context() unlocks the device mutex, which + * could allow a concurrent thread to attempt SLUMBER sequence. + * Hence, check the flags before proceeding with SLUMBER. + */ if (!test_bit(RGMU_PRIV_GPU_STARTED, &rgmu->flags)) return 0; trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); - adreno_suspend_context(device); - ret = a6xx_rgmu_oob_set(device, oob_gpu); if (ret) { a6xx_rgmu_oob_clear(device, oob_gpu); diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index c32cb2f9c2..d629f8edf8 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -2392,10 +2392,18 @@ static int gen7_power_off(struct adreno_device *adreno_dev) WARN_ON(!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)); - trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); - adreno_suspend_context(device); + /* + * adreno_suspend_context() unlocks the device mutex, which + * could allow a concurrent thread to attempt SLUMBER sequence. + * Hence, check the flags again before proceeding with SLUMBER. + */ + if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) + return 0; + + trace_kgsl_pwr_request_state(device, KGSL_STATE_SLUMBER); + ret = gen7_gmu_oob_set(device, oob_gpu); if (!ret) { kgsl_pwrscale_update_stats(device); From ef3f8530b44b388b710875d6d94af7da65455f56 Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Fri, 1 Oct 2021 12:47:27 -0400 Subject: [PATCH 020/750] msm: kgsl: Add new tracepoints for command batch ready and done The new tracepoints adreno_cmdbatch_ready and adreno_cmdbatch_done provide common begin and end reference points respectively for comparison between dispatch on GMU and host. adreno_cmdbatch_ready is logged after the sync dependencies for a command have been resolved and the command is therefore ready to be submitted. adreno_cmdbatch_done is logged on both SW and HW dispatcher threads just before signaling events for the command. Change-Id: If9587bae0d4655be93bfc3fee855d6ffbe967e1f Signed-off-by: Sushmita Susheelendra --- adreno_dispatch.c | 7 +++++++ adreno_hwsched.c | 31 +++++++++++++++++++------------ adreno_trace.h | 43 +++++++++++++++++++++++++++++++++++++++++++ kgsl_drawobj.c | 1 + kgsl_drawobj.h | 2 ++ 5 files changed, 72 insertions(+), 12 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index f73ab29b70..d1c747061f 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -737,6 +737,7 @@ static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, (dispatch_q->inflight < inflight)) { struct kgsl_drawobj *drawobj; struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_context *context; if (adreno_gpu_fault(adreno_dev) != 0) break; @@ -762,6 +763,9 @@ static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, timestamp = drawobj->timestamp; cmdobj = CMDOBJ(drawobj); + context = drawobj->context; + trace_adreno_cmdbatch_ready(context->id, context->priority, + drawobj->timestamp, cmdobj->requeue_cnt); ret = sendcmd(adreno_dev, cmdobj); /* @@ -782,6 +786,7 @@ static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, drawctxt, cmdobj); if (r) ret = r; + cmdobj->requeue_cnt++; } break; @@ -2282,6 +2287,8 @@ static void retire_cmdobj(struct adreno_device *adreno_dev, drawctxt->ticks_index = (drawctxt->ticks_index + 1) % SUBMIT_RETIRE_TICKS_SIZE; + trace_adreno_cmdbatch_done(drawobj->context->id, + drawobj->context->priority, drawobj->timestamp); kgsl_drawobj_destroy(drawobj); } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 13db07a065..7ce7d2177b 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -455,6 +455,7 @@ static int hwsched_sendcmds(struct adreno_device *adreno_dev, while (1) { struct kgsl_drawobj *drawobj; struct kgsl_drawobj_cmd *cmdobj; + struct kgsl_context *context; spin_lock(&drawctxt->lock); drawobj = _process_drawqueue_get_next_drawobj(adreno_dev, @@ -478,6 +479,9 @@ static int hwsched_sendcmds(struct adreno_device *adreno_dev, timestamp = drawobj->timestamp; cmdobj = CMDOBJ(drawobj); + context = drawobj->context; + trace_adreno_cmdbatch_ready(context->id, context->priority, + drawobj->timestamp, cmdobj->requeue_cnt); ret = hwsched_sendcmd(adreno_dev, cmdobj); /* @@ -498,6 +502,7 @@ static int hwsched_sendcmds(struct adreno_device *adreno_dev, drawctxt, cmdobj); if (r) ret = r; + cmdobj->requeue_cnt++; } break; @@ -1026,26 +1031,28 @@ static int adreno_hwsched_queue_cmds(struct kgsl_device_private *dev_priv, static void retire_cmdobj(struct adreno_hwsched *hwsched, struct kgsl_drawobj_cmd *cmdobj) { - struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); + struct kgsl_drawobj *drawobj; struct kgsl_mem_entry *entry; struct kgsl_drawobj_profiling_buffer *profile_buffer; - if (cmdobj != NULL) { - if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) - atomic64_inc(&drawobj->context->proc_priv->frame_count); + drawobj = DRAWOBJ(cmdobj); + if (drawobj->flags & KGSL_DRAWOBJ_END_OF_FRAME) + atomic64_inc(&drawobj->context->proc_priv->frame_count); - entry = cmdobj->profiling_buf_entry; - if (entry) { - profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - cmdobj->profiling_buffer_gpuaddr); + entry = cmdobj->profiling_buf_entry; + if (entry) { + profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, + cmdobj->profiling_buffer_gpuaddr); - if (profile_buffer == NULL) - return; + if (profile_buffer == NULL) + return; - kgsl_memdesc_unmap(&entry->memdesc); - } + kgsl_memdesc_unmap(&entry->memdesc); } + trace_adreno_cmdbatch_done(drawobj->context->id, + drawobj->context->priority, drawobj->timestamp); + if (hwsched->big_cmdobj == cmdobj) { hwsched->big_cmdobj = NULL; kgsl_drawobj_put(drawobj); diff --git a/adreno_trace.h b/adreno_trace.h index 3890dfc501..536d6f7154 100644 --- a/adreno_trace.h +++ b/adreno_trace.h @@ -218,6 +218,49 @@ TRACE_EVENT(adreno_cmdbatch_sync, ) ); +TRACE_EVENT(adreno_cmdbatch_ready, + TP_PROTO(unsigned int ctx_id, unsigned int ctx_prio, + unsigned int timestamp, unsigned int requeue_cnt), + TP_ARGS(ctx_id, ctx_prio, timestamp, requeue_cnt), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(int, prio) + __field(unsigned int, timestamp) + __field(unsigned int, requeue_cnt) + ), + TP_fast_assign( + __entry->id = ctx_id; + __entry->prio = ctx_prio; + __entry->timestamp = timestamp; + __entry->requeue_cnt = requeue_cnt; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u requeue_cnt=%u", + __entry->id, __entry->prio, __entry->timestamp, + __entry->requeue_cnt + ) +); + +TRACE_EVENT(adreno_cmdbatch_done, + TP_PROTO(unsigned int ctx_id, unsigned int ctx_prio, + unsigned int timestamp), + TP_ARGS(ctx_id, ctx_prio, timestamp), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, prio) + __field(unsigned int, timestamp) + ), + TP_fast_assign( + __entry->id = ctx_id; + __entry->prio = ctx_prio; + __entry->timestamp = timestamp; + ), + TP_printk( + "ctx=%u ctx_prio=%u ts=%u", + __entry->id, __entry->prio, __entry->timestamp + ) +); + TRACE_EVENT(adreno_cmdbatch_fault, TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int fault), TP_ARGS(cmdobj, fault), diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index 83ce37938c..208ae04e73 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -1130,6 +1130,7 @@ struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device, INIT_LIST_HEAD(&cmdobj->cmdlist); INIT_LIST_HEAD(&cmdobj->memlist); + cmdobj->requeue_cnt = 0; if (type & CMDOBJ_TYPE) atomic_inc(&context->proc_priv->cmd_count); diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h index faf396ba74..03ee97dd82 100644 --- a/kgsl_drawobj.h +++ b/kgsl_drawobj.h @@ -85,6 +85,8 @@ struct kgsl_drawobj_cmd { uint64_t submit_ticks; /* @numibs: Number of ibs in this cmdobj */ u32 numibs; + /* @requeue_cnt: Number of times cmdobj was requeued before submission to dq succeeded */ + u32 requeue_cnt; }; /** From 08f906a2d0479288ea93710ec2ecce9a06cb4b6c Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Wed, 27 Jan 2021 12:29:47 +0530 Subject: [PATCH 021/750] msm: kgsl: Remove undefined HLSQ register dump to A6xx snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 297883f14279 ("msm: kgsl: Dump HLSQ_DBG_CNTL in snapshot") added extra register “0xD004” which does not exists. Remove this register to have only required registers in snapshot dumping. Change-Id: I6dcdf6b0fdbcc89ac6854bd1b8a7d20cd375f621 Signed-off-by: Hareesh Gundu --- adreno_a6xx_snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index b4bc986351..33eedc0bf0 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -288,7 +288,7 @@ static const unsigned int a6xx_registers[] = { 0xA600, 0xA601, 0xA603, 0xA603, 0xA60A, 0xA60A, 0xA610, 0xA617, 0xA630, 0xA630, /* HLSQ */ - 0xD002, 0xD004, + 0xD002, 0xD003, }; static const unsigned int a660_registers[] = { From 7b147bd2e127bc98de5dd1a3d9d3c18c6f648b82 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 19 Nov 2021 12:58:36 +0530 Subject: [PATCH 022/750] msm: kgsl: Remove RBBM_GPC_ERROR from a6xx hwsched interrupt mask RBBM_GPC interrupt is handled by GMU for hwsched enabled targets. Hence remove this from the a6xx hwsched interrupt mask. Change-Id: I7cc409514a59e528fa8310640197c1743a9d201d Signed-off-by: Hareesh Gundu --- adreno_a6xx.h | 1 - adreno_a6xx_hwsched_hfi.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/adreno_a6xx.h b/adreno_a6xx.h index 66454750dd..e581cce7c5 100644 --- a/adreno_a6xx.h +++ b/adreno_a6xx.h @@ -183,7 +183,6 @@ struct a6xx_cp_smmu_info { #define A6XX_HWSCHED_INT_MASK \ ((1 << A6XX_INT_CP_AHB_ERROR) | \ (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) | \ - (1 << A6XX_INT_RBBM_GPC_ERROR) | \ (1 << A6XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ (1 << A6XX_INT_UCHE_OOB_ACCESS) | \ (1 << A6XX_INT_UCHE_TRAP_INTR) | \ diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 0a6b6d3a83..2afcc03a7a 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -285,6 +285,9 @@ static void log_gpu_fault(struct adreno_device *adreno_dev) cur, cur_rptr, cur_wptr, next, next_rptr, next_wptr); } break; + case GMU_CP_GPC_ERROR: + dev_crit_ratelimited(dev, "RBBM: GPC error\n"); + break; default: dev_crit_ratelimited(dev, "Unknown GPU fault: %u\n", cmd->error); From abc40777759fd51ec434f94676db21a11d8845a4 Mon Sep 17 00:00:00 2001 From: Oleg Perelet Date: Tue, 2 Nov 2021 16:13:52 -0700 Subject: [PATCH 023/750] msm: kgsl: Enable bus voting on minimal powerlevel There are usecases where GPU is not busy but GPU consumes high ddr bandwidth. In such case, bus DCVS will not kick in, potentially causing an under voting scenario. Enable bus voting on minimal power level, even if GPU busy is less than 75%. Change-Id: I02db2e4b68ce9d48c2f755112f0dcf9912936b56 Signed-off-by: Oleg Perelet Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- governor_gpubw_mon.c | 2 +- kgsl_pwrscale.c | 2 ++ msm_adreno_devfreq.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/governor_gpubw_mon.c b/governor_gpubw_mon.c index f7f19c40e5..f853e46cd9 100644 --- a/governor_gpubw_mon.c +++ b/governor_gpubw_mon.c @@ -154,7 +154,7 @@ static int devfreq_gpubw_get_target(struct devfreq *df, (priv->bus.num - 1) : act_level; if ((norm_cycles > priv->bus.up[act_level] || wait_active_percent > WAIT_THRESHOLD) && - gpu_percent > CAP) + (gpu_percent > CAP || b.gpu_minfreq == *freq)) bus_profile->flag = DEVFREQ_FLAG_FAST_HINT; else if (norm_cycles < priv->bus.down[act_level] && b.buslevel) bus_profile->flag = DEVFREQ_FLAG_SLOW_HINT; diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index 74fe81f09f..d5f6df8f23 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -338,6 +338,7 @@ int kgsl_devfreq_get_dev_status(struct device *dev, last_b->ram_time = device->pwrscale.accum_stats.ram_time; last_b->ram_wait = device->pwrscale.accum_stats.ram_wait; last_b->buslevel = device->pwrctrl.cur_buslevel; + last_b->gpu_minfreq = pwrctrl->pwrlevels[pwrctrl->min_pwrlevel].gpu_freq; } kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time); @@ -404,6 +405,7 @@ int kgsl_busmon_get_dev_status(struct device *dev, b->ram_time = last_b->ram_time; b->ram_wait = last_b->ram_wait; b->buslevel = last_b->buslevel; + b->gpu_minfreq = last_b->gpu_minfreq; } return 0; } diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h index be366cda04..c72924efb7 100644 --- a/msm_adreno_devfreq.h +++ b/msm_adreno_devfreq.h @@ -22,6 +22,7 @@ struct xstats { u64 ram_time; u64 ram_wait; int buslevel; + unsigned long gpu_minfreq; }; struct devfreq_msm_adreno_tz_data { From b019bba722636a1fc338471b3577827a964db0f8 Mon Sep 17 00:00:00 2001 From: Harshitha Sai Neelati Date: Wed, 13 Oct 2021 15:45:29 +0530 Subject: [PATCH 024/750] msm: kgsl: Enable IFPC for A662 GPU Enable IFPC feature for A662 GPU. Change-Id: I59b6958600c24f952f54c3a041ef5b3cb79ba469 Signed-off-by: Harshitha Sai Neelati --- adreno-gpulist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index 3c428535bc..a676433212 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1716,7 +1716,8 @@ static const struct adreno_a6xx_core adreno_gpu_core_a662 = { .base = { DEFINE_ADRENO_REV(ADRENO_REV_A662, 6, 6, 2, ANY_ID), .features = ADRENO_APRIV | ADRENO_IOCOHERENT | - ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION, + ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION | + ADRENO_IFPC, .gpudev = &adreno_a6xx_gmu_gpudev.base, .perfcounters = &adreno_a6xx_perfcounters, .gmem_base = 0, From a62385f47a48b318d23a0d0072f4119d06ec9e61 Mon Sep 17 00:00:00 2001 From: Puranam V G Tejaswi Date: Mon, 17 May 2021 14:41:40 +0530 Subject: [PATCH 025/750] msm: kgsl: Signal fence only if last fence refcount was not put Currently there is a chance that release for the fence was already called before we call dma_fence_get during kgsl_timeline_signal and kgsl_ioctl_timeline_destroy. This can cause use-after-free issue as we can access fence after release. Fix this by signalling fence only if the last refcount on the fence was not yet put. This makes sure that release for the fence will not be called until we are done signalling. Change-Id: I6bdcefa1f128febb7a0f7aef133757268a3b9ae3 Signed-off-by: Puranam V G Tejaswi Signed-off-by: Pranav Patel --- kgsl_timeline.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kgsl_timeline.c b/kgsl_timeline.c index b499face00..e07db2569f 100644 --- a/kgsl_timeline.c +++ b/kgsl_timeline.c @@ -272,12 +272,10 @@ void kgsl_timeline_signal(struct kgsl_timeline *timeline, u64 seqno) timeline->value = seqno; spin_lock(&timeline->fence_lock); - list_for_each_entry_safe(fence, tmp, &timeline->fences, node) { - if (timeline_fence_signaled(&fence->base)) { - dma_fence_get(&fence->base); + list_for_each_entry_safe(fence, tmp, &timeline->fences, node) + if (timeline_fence_signaled(&fence->base) && + kref_get_unless_zero(&fence->base.refcount)) list_move(&fence->node, &temp); - } - } spin_unlock(&timeline->fence_lock); list_for_each_entry_safe(fence, tmp, &temp, node) { @@ -552,7 +550,8 @@ long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, spin_lock(&timeline->fence_lock); list_for_each_entry_safe(fence, tmp, &timeline->fences, node) - dma_fence_get(&fence->base); + if (!kref_get_unless_zero(&fence->base.refcount)) + list_del_init(&fence->node); list_replace_init(&timeline->fences, &temp); spin_unlock(&timeline->fence_lock); From d99092f93bf6d7d0341fc786ee74d735c1cf98c3 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 19 Nov 2021 22:49:05 +0530 Subject: [PATCH 026/750] msm: kgsl: Dump context information in snapshot Current context is not dumped during snapshot. Also, context count and timestamps are dumped only for gmu faults. These information can be helpful to debug issues. Hence, dump the context information in snapshot. Change-Id: I71babee314a4abede3a7af91ffc094c6d868288f Signed-off-by: Kamal Agrawal --- adreno_snapshot.c | 49 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/adreno_snapshot.c b/adreno_snapshot.c index fd9834f073..59877edf4c 100644 --- a/adreno_snapshot.c +++ b/adreno_snapshot.c @@ -839,13 +839,16 @@ static void adreno_snapshot_ringbuffer(struct kgsl_device *device, } static void adreno_snapshot_os(struct kgsl_device *device, - struct kgsl_snapshot *snapshot, struct kgsl_context *guilty, - bool dump_contexts) + struct kgsl_snapshot *snapshot, struct kgsl_context *guilty) { struct kgsl_snapshot_section_header *sect = (struct kgsl_snapshot_section_header *) snapshot->ptr; struct kgsl_snapshot_linux_v2 *header = (struct kgsl_snapshot_linux_v2 *) (snapshot->ptr + sizeof(*sect)); + struct kgsl_context *context; + u32 remain; + void *mem; + int id; if (snapshot->remain < (sizeof(*sect) + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "OS"); @@ -869,41 +872,37 @@ static void adreno_snapshot_os(struct kgsl_device *device, /* If we know the guilty context then dump it */ if (guilty) { + header->current_context = guilty->id; header->pid = guilty->tid; strlcpy(header->comm, guilty->proc_priv->comm, sizeof(header->comm)); } - if (dump_contexts) { - u32 remain = snapshot->remain - sizeof(*sect) + sizeof(*header); - void *mem = snapshot->ptr + sizeof(*sect) + sizeof(*header); - struct kgsl_context *context; - int id; + remain = snapshot->remain - sizeof(*sect) + sizeof(*header); + mem = snapshot->ptr + sizeof(*sect) + sizeof(*header); - read_lock(&device->context_lock); - idr_for_each_entry(&device->context_idr, context, id) { - struct kgsl_snapshot_linux_context_v2 *c = mem; + read_lock(&device->context_lock); + idr_for_each_entry(&device->context_idr, context, id) { + struct kgsl_snapshot_linux_context_v2 *c = mem; - if (remain < sizeof(*c)) - break; + if (remain < sizeof(*c)) + break; - kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, - &c->timestamp_queued); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &c->timestamp_queued); - kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, - &c->timestamp_consumed); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, + &c->timestamp_consumed); - kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, - &c->timestamp_retired); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &c->timestamp_retired); - header->ctxtcount++; + header->ctxtcount++; - mem += sizeof(*c); - remain -= sizeof(*c); - - } - read_unlock(&device->context_lock); + mem += sizeof(*c); + remain -= sizeof(*c); } + read_unlock(&device->context_lock); sect->magic = SNAPSHOT_SECTION_MAGIC; sect->id = KGSL_SNAPSHOT_SECTION_OS; @@ -943,7 +942,7 @@ void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, snapshot->size += sizeof(*header); /* Write the OS section */ - adreno_snapshot_os(device, snapshot, context, device->gmu_fault); + adreno_snapshot_os(device, snapshot, context); ib_max_objs = 0; /* Reset the list of objects */ From 1bfa35fdba8ffc9bb673d5c4d3cba52c32f73dd1 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 25 Nov 2021 21:45:51 +0530 Subject: [PATCH 027/750] msm: kgsl: Ignore thermal requests until first boot is done There is a possible deadlock scenario during kgsl firmware reading (request_firmware) and thermal notifier calls. During first boot, kgsl device mutex is held and then request_firmware is called for reading firmware. request_firmware internally takes dev_pm_qos_mtx lock. Whereas in case of thermal notifier calls, it first takes the same dev_pm_qos_mtx lock and then tries to take kgsl device mutex. This results in deadlock when both threads are unable to acquire the mutex held by other thread as shown in call stack below. Call stack: CPU0: mutex_lock --> waiting for kgsl device mutex thermal_max_notifier_call pm_qos_update_target apply_constraint __dev_pm_qos_update_request dev_pm_qos_update_request ---> takes dev_pm_qos_mtx mutex devfreq_cooling_set_cur_state thermal_cdev_update step_wise_throttle handle_thermal_trip CPU1: __mutex_lock __mutex_lock_slowpath ---> waiting for dev_pm_qos_mtx mutex dev_pm_qos_constraints_destroy dpm_sysfs_remove device_del fw_load_sysfs_fallback fw_load_from_user_helper firmware_fallback_sysfs _request_firmware request_firmware kgsl_zap_shader_load genc_rb_start genc_gpu_boot genc_first_boot genc_gmu_first_open adreno_first_open kgsl_open ---> takes kgsl device mutex Fix this by ensuring thermal notifier calls are not exercised till first boot is done i.e. till we are done reading all firmware files. Change-Id: I185c07f1491afddf820cbad30202733dff915125 Signed-off-by: Kamal Agrawal --- kgsl_pwrscale.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kgsl_pwrscale.c b/kgsl_pwrscale.c index d5f6df8f23..dd6d7fa2b3 100644 --- a/kgsl_pwrscale.c +++ b/kgsl_pwrscale.c @@ -645,6 +645,9 @@ static int thermal_max_notifier_call(struct notifier_block *nb, unsigned long va u32 max_freq = val * 1000; int level; + if (!device->pwrscale.devfreq_enabled) + return NOTIFY_DONE; + for (level = pwr->num_pwrlevels - 1; level >= 0; level--) { /* get nearest power level with a maximum delta of 5MHz */ if (abs(pwr->pwrlevels[level].gpu_freq - max_freq) < 5000000) From 907f856d4bd82ff3a8a02b165ba977ce01d4df7b Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 19 Oct 2021 12:55:51 +0530 Subject: [PATCH 028/750] msm: kgsl: Add GPU fault report To aid developers in tracking down errors in the application, expose a new IOCTL to provide GPU fault report. This will help to retrieve diagnostic information about faults that might have caused GPU hang. Application developers can use this information to debug issues. Faults tracking for a context can be enabled by specifying a flag (KGSL_CONTEXT_FAULT_INFO) during context creation. Fault report can be queried with new IOCTL_KGSL_GET_FAULT_REPORT ioctl once the context is invalidated. Change-Id: I7372b18f3b235183bc5dd070a7bdf92a0484bacb Signed-off-by: Kamal Agrawal --- adreno_drawctxt.c | 5 +- include/uapi/linux/msm_kgsl.h | 85 ++++++++++++++ kgsl.c | 207 ++++++++++++++++++++++++++++++++++ kgsl.h | 2 + kgsl_device.h | 39 +++++++ kgsl_ioctl.c | 2 + kgsl_iommu.c | 32 ++++++ 7 files changed, 370 insertions(+), 2 deletions(-) diff --git a/adreno_drawctxt.c b/adreno_drawctxt.c index 0844413fee..dcc163f2ab 100644 --- a/adreno_drawctxt.c +++ b/adreno_drawctxt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved. */ #include @@ -338,7 +338,8 @@ adreno_drawctxt_create(struct kgsl_device_private *dev_priv, KGSL_CONTEXT_IFH_NOP | KGSL_CONTEXT_SECURE | KGSL_CONTEXT_PREEMPT_STYLE_MASK | - KGSL_CONTEXT_NO_SNAPSHOT); + KGSL_CONTEXT_NO_SNAPSHOT | + KGSL_CONTEXT_FAULT_INFO); /* Check for errors before trying to initialize */ diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h index 4b67887f09..f3395a6ec5 100644 --- a/include/uapi/linux/msm_kgsl.h +++ b/include/uapi/linux/msm_kgsl.h @@ -74,6 +74,7 @@ #define KGSL_CONTEXT_TYPE_UNKNOWN 0x1E #define KGSL_CONTEXT_INVALIDATE_ON_FAULT 0x10000000 +#define KGSL_CONTEXT_FAULT_INFO 0x40000000 #define KGSL_CONTEXT_INVALID 0xffffffff @@ -1998,4 +1999,88 @@ struct kgsl_gpu_aux_command_timeline { __u32 timelines_size; }; +/* Macros for fault type used in kgsl_fault structure */ +#define KGSL_FAULT_TYPE_NO_FAULT 0 +#define KGSL_FAULT_TYPE_PAGEFAULT 1 +#define KGSL_FAULT_TYPE_MAX 2 + +/* Macros to be used in kgsl_pagefault_report structure */ +#define KGSL_PAGEFAULT_TYPE_NONE 0 +#define KGSL_PAGEFAULT_TYPE_READ (1 << 0) +#define KGSL_PAGEFAULT_TYPE_WRITE (1 << 1) +#define KGSL_PAGEFAULT_TYPE_TRANSLATION (1 << 2) +#define KGSL_PAGEFAULT_TYPE_PERMISSION (1 << 3) +#define KGSL_PAGEFAULT_TYPE_EXTERNAL (1 << 4) +#define KGSL_PAGEFAULT_TYPE_TRANSACTION_STALLED (1 << 5) + +/** + * struct kgsl_pagefault_report - Descriptor for each page fault + * @fault_addr: page fault address + * @fault_type: type of page fault + * + * Contains information about supported GPU page fault. + * Supported fault type: KGSL_PAGEFAULT_TYPE_* + */ +struct kgsl_pagefault_report { + __u64 fault_addr; + /* private: reserved for future use */ + __u64 reserved[2]; + __u32 fault_type; + /* private: padding for 64 bit compatibility */ + __u32 __pad; +}; + +/** + * struct kgsl_fault - Descriptor for each GPU fault type + * @fault: User memory pointer to list of specific fault type + * @type: Type of gpu fault + * @count: Number of entries in @fault + * @size: Size of each entry in @fault in bytes + * + * Contains information about each GPU fault type. If user passes 0 for all the fields, KGSL + * will return the @count and @type of fault. Based on this, user can allocate a buffer for + * specific fault type, fill the @fault and specify the structure size of type specific fault + * in @size. User can walk through @fault list to parse the fault type specific information. + * + * Supported type: KGSL_FAULT_TYPE_* + */ +struct kgsl_fault { + __u64 fault; + __u32 type; + __u32 count; + __u32 size; + /* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +/** + * struct kgsl_fault_report - Container for list of GPU faults + * @faultlist: User memory pointer to list of fault descriptor &struct kgsl_fault + * @faultnents: Number of entries in @faultlist. Each entry corresponds to a fault type i.e. + * KGSL_FAULT_TYPE_* + * @faultsize: Size of each entry in @faultlist in bytes + * @context_id: ID of a KGSL context + * + * Returns a list of GPU faults for a context identified by @context_id. If the user specifies + * @context_id only, then KGSL will set the @faultnents to the number of fault types it has + * for that context. + * + * User is expected to allocate an array of @struct kgsl_fault with @faultnents number of entries + * and fill the @faultlist field. On calling @IOCTL_KGSL_GET_FAULT_REPORT, KGSL will return the + * type and count for each fault. Based on this, user needs to update the @kgsl_fault structure. + * Then, it should call the @IOCTL_KGSL_GET_FAULT_REPORT again for kernel to fill the fault + * information. + */ +struct kgsl_fault_report { + __u64 faultlist; + __u32 faultnents; + __u32 faultsize; + __u32 context_id; + /* private: padding for 64 bit compatibility */ + __u32 padding; +}; + +#define IOCTL_KGSL_GET_FAULT_REPORT \ + _IOWR(KGSL_IOC_TYPE, 0x5E, struct kgsl_fault_report) + #endif /* _UAPI_MSM_KGSL_H */ diff --git a/kgsl.c b/kgsl.c index 1a72257c21..e69c7bc516 100644 --- a/kgsl.c +++ b/kgsl.c @@ -683,6 +683,8 @@ int kgsl_context_init(struct kgsl_device_private *dev_priv, context->id = id; + mutex_init(&context->fault_lock); + INIT_LIST_HEAD(&context->faults); kref_init(&context->refcount); /* * Get a refernce to the process private so its not destroyed, until @@ -718,6 +720,20 @@ out: return ret; } +void kgsl_free_faults(struct kgsl_context *context) +{ + struct kgsl_fault_node *p, *tmp; + + if (!(context->flags & KGSL_CONTEXT_FAULT_INFO)) + return; + + list_for_each_entry_safe(p, tmp, &context->faults, node) { + list_del(&p->node); + kfree(p->priv); + kfree(p); + } +} + /** * kgsl_context_detach() - Release the "master" context reference * @context: The context that will be detached @@ -779,6 +795,7 @@ kgsl_context_destroy(struct kref *kref) */ BUG_ON(!kgsl_context_detached(context)); + kgsl_free_faults(context); kgsl_sync_timeline_put(context->ktimeline); write_lock(&device->context_lock); @@ -3437,6 +3454,196 @@ out: return ret; } +static int kgsl_update_fault_details(struct kgsl_context *context, + void __user *ptr, u32 faultnents, u32 faultsize) +{ + u32 size = min_t(u32, sizeof(struct kgsl_fault), faultsize); + u32 cur_idx[KGSL_FAULT_TYPE_MAX] = {0}; + struct kgsl_fault_node *fault_node; + struct kgsl_fault *faults; + int i, ret = 0; + + faults = kcalloc(KGSL_FAULT_TYPE_MAX, sizeof(struct kgsl_fault), + GFP_KERNEL); + if (!faults) + return -ENOMEM; + + for (i = 0; i < faultnents; i++) { + struct kgsl_fault fault = {0}; + + if (copy_from_user(&fault, ptr + i * faultsize, size)) { + ret = -EFAULT; + goto err; + } + + if (fault.type >= KGSL_FAULT_TYPE_MAX) { + ret = -EINVAL; + goto err; + } + + memcpy(&faults[fault.type], &fault, sizeof(fault)); + } + + list_for_each_entry(fault_node, &context->faults, node) { + u32 fault_type = fault_node->type; + + if (cur_idx[fault_type] >= faults[fault_type].count) + continue; + + switch (fault_type) { + case KGSL_FAULT_TYPE_PAGEFAULT: + size = sizeof(struct kgsl_pagefault_report); + } + + size = min_t(u32, size, faults[fault_type].size); + + if (copy_to_user(u64_to_user_ptr(faults[fault_type].fault + + cur_idx[fault_type] * faults[fault_type].size), + fault_node->priv, size)) { + ret = -EFAULT; + goto err; + } + + cur_idx[fault_type] += 1; + } + +err: + kfree(faults); + return ret; +} + +static int kgsl_update_fault_count(struct kgsl_context *context, + void __user *faults, u32 faultnents, u32 faultsize) +{ + u32 size = min_t(u32, sizeof(struct kgsl_fault), faultsize); + u32 faultcount[KGSL_FAULT_TYPE_MAX] = {0}; + struct kgsl_fault_node *fault_node; + int i, j; + + list_for_each_entry(fault_node, &context->faults, node) + faultcount[fault_node->type]++; + + /* KGSL_FAULT_TYPE_NO_FAULT (i.e. 0) is not an actual fault type */ + for (i = 0, j = 1; i < faultnents && j < KGSL_FAULT_TYPE_MAX; j++) { + struct kgsl_fault fault = {0}; + + if (!faultcount[j]) + continue; + + fault.type = j; + fault.count = faultcount[j]; + + if (copy_to_user(faults, &fault, size)) + return -EFAULT; + + faults += faultsize; + i++; + } + + return 0; +} + +long kgsl_ioctl_get_fault_report(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_fault_report *param = data; + u32 size = min_t(u32, sizeof(struct kgsl_fault), param->faultsize); + void __user *ptr = u64_to_user_ptr(param->faultlist); + struct kgsl_context *context; + int i, ret = 0; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (!context) + return -EINVAL; + + /* This IOCTL is valid for invalidated contexts only */ + if (!(context->flags & KGSL_CONTEXT_FAULT_INFO) || + !kgsl_context_invalid(context)) { + ret = -EINVAL; + goto err; + } + + /* Return the number of fault types */ + if (!param->faultlist) { + param->faultnents = KGSL_FAULT_TYPE_MAX; + kgsl_context_put(context); + return 0; + } + + /* Check if it's a request to get fault counts or to fill the fault information */ + for (i = 0; i < param->faultnents; i++) { + struct kgsl_fault fault = {0}; + + if (copy_from_user(&fault, ptr, size)) { + ret = -EFAULT; + goto err; + } + + if (fault.fault) + break; + + ptr += param->faultsize; + } + + ptr = u64_to_user_ptr(param->faultlist); + + if (i == param->faultnents) + ret = kgsl_update_fault_count(context, ptr, param->faultnents, + param->faultsize); + else + ret = kgsl_update_fault_details(context, ptr, param->faultnents, + param->faultsize); + +err: + kgsl_context_put(context); + return ret; +} + +int kgsl_add_fault(struct kgsl_context *context, u32 type, void *priv) +{ + struct kgsl_fault_node *fault, *p, *tmp; + int length = 0; + ktime_t tout; + + if (kgsl_context_is_bad(context)) + return -EINVAL; + + fault = kmalloc(sizeof(struct kgsl_fault_node), GFP_KERNEL); + if (!fault) + return -ENOMEM; + + fault->type = type; + fault->priv = priv; + fault->time = ktime_get(); + + tout = ktime_sub_ms(ktime_get(), KGSL_MAX_FAULT_TIME_THRESHOLD); + + mutex_lock(&context->fault_lock); + + list_for_each_entry_safe(p, tmp, &context->faults, node) { + if (ktime_compare(p->time, tout) > 0) { + length++; + continue; + } + + list_del(&p->node); + kfree(p->priv); + kfree(p); + } + + if (length == KGSL_MAX_FAULT_ENTRIES) { + tmp = list_first_entry(&context->faults, struct kgsl_fault_node, node); + list_del(&tmp->node); + kfree(tmp->priv); + kfree(tmp); + } + + list_add_tail(&fault->node, &context->faults); + mutex_unlock(&context->fault_lock); + + return 0; +} + #ifdef CONFIG_ARM64 static uint64_t kgsl_filter_cachemode(uint64_t flags) { diff --git a/kgsl.h b/kgsl.h index 5bc6f64262..fa5d904e1e 100644 --- a/kgsl.h +++ b/kgsl.h @@ -474,6 +474,8 @@ long kgsl_ioctl_timeline_signal(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); long kgsl_ioctl_timeline_destroy(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); +long kgsl_ioctl_get_fault_report(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); void kgsl_mem_entry_destroy(struct kref *kref); diff --git a/kgsl_device.h b/kgsl_device.h index dc81abf235..4808436853 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -329,6 +329,25 @@ enum kgsl_context_priv { struct kgsl_process_private; +#define KGSL_MAX_FAULT_ENTRIES 40 + +/* Maintain faults observed within threshold time (in milliseconds) */ +#define KGSL_MAX_FAULT_TIME_THRESHOLD 5000 + +/** + * struct kgsl_fault_node - GPU fault descriptor + * @node: List node for list of faults + * @type: Type of fault + * @priv: Pointer to type specific fault + * @time: Time when fault was observed + */ +struct kgsl_fault_node { + struct list_head node; + u32 type; + void *priv; + ktime_t time; +}; + /** * struct kgsl_context - The context fields that are valid for a user defined * context @@ -382,6 +401,10 @@ struct kgsl_context { * submitted */ u32 gmu_dispatch_queue; + /** @faults: List of @kgsl_fault_node to store fault information */ + struct list_head faults; + /** @fault_lock: Mutex to protect faults */ + struct mutex fault_lock; }; #define _context_comm(_c) \ @@ -944,6 +967,22 @@ static inline void kgsl_mmu_set_feature(struct kgsl_device *device, set_bit(feature, &device->mmu.features); } +/** + * kgsl_add_fault - Add fault information for a context + * @context: Pointer to the KGSL context + * @type: type of fault info + * @priv: Pointer to type specific fault info + * + * Return: 0 on success or error code on failure. + */ +int kgsl_add_fault(struct kgsl_context *context, u32 type, void *priv); + +/** + * kgsl_free_faults - Free fault information for a context + * @context: Pointer to the KGSL context + */ +void kgsl_free_faults(struct kgsl_context *context); + /** * kgsl_trace_gpu_mem_total - Overall gpu memory usage tracking which includes * process allocations, imported dmabufs and kgsl globals diff --git a/kgsl_ioctl.c b/kgsl_ioctl.c index c6b55641a8..7fdf03dabd 100644 --- a/kgsl_ioctl.c +++ b/kgsl_ioctl.c @@ -100,6 +100,8 @@ static const struct kgsl_ioctl kgsl_ioctl_funcs[] = { kgsl_ioctl_timeline_signal), KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMELINE_DESTROY, kgsl_ioctl_timeline_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GET_FAULT_REPORT, + kgsl_ioctl_get_fault_report), }; long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, diff --git a/kgsl_iommu.c b/kgsl_iommu.c index e1e845d6b0..8b16a8eba2 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -795,6 +795,37 @@ static struct kgsl_process_private *kgsl_iommu_get_process(u64 ptbase) return NULL; } +static void kgsl_iommu_add_fault_info(struct kgsl_context *context, + unsigned long addr, int flags) +{ + struct kgsl_pagefault_report *report; + u32 fault_flag = 0; + + if (!context || !(context->flags & KGSL_CONTEXT_FAULT_INFO)) + return; + + report = kzalloc(sizeof(struct kgsl_pagefault_report), GFP_KERNEL); + if (!report) + return; + + if (flags & IOMMU_FAULT_TRANSLATION) + fault_flag = KGSL_PAGEFAULT_TYPE_TRANSLATION; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_flag = KGSL_PAGEFAULT_TYPE_PERMISSION; + else if (flags & IOMMU_FAULT_EXTERNAL) + fault_flag = KGSL_PAGEFAULT_TYPE_EXTERNAL; + else if (flags & IOMMU_FAULT_TRANSACTION_STALLED) + fault_flag = KGSL_PAGEFAULT_TYPE_TRANSACTION_STALLED; + + fault_flag |= (flags & IOMMU_FAULT_WRITE) ? KGSL_PAGEFAULT_TYPE_WRITE : + KGSL_PAGEFAULT_TYPE_READ; + + report->fault_addr = addr; + report->fault_type = fault_flag; + if (kgsl_add_fault(context, KGSL_FAULT_TYPE_PAGEFAULT, report)) + kfree(report); +} + static void kgsl_iommu_print_fault(struct kgsl_mmu *mmu, struct kgsl_iommu_context *ctxt, unsigned long addr, u64 ptbase, u32 contextid, @@ -957,6 +988,7 @@ static int kgsl_iommu_fault_handler(struct kgsl_mmu *mmu, kgsl_iommu_print_fault(mmu, ctx, addr, ptbase, contextidr, flags, private, context); + kgsl_iommu_add_fault_info(context, addr, flags); if (stall) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); From b2ab4c577f213cb1eb63dded51f9605aaaba6a70 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Mon, 29 Nov 2021 11:26:28 +0530 Subject: [PATCH 029/750] msm: kgsl: Fix preemption in a6x Earlier we used to submit CP_SET_PSEUDO_REGISTER packet for every cmdbatch. This was recently optimized to submit only once for each RB and during context switch. Switch back to old sequence for a6x family since the new one is not supported in a6x family. Change-Id: Id05000de619d9800f770b4eee6c4ca157c4ebbc2 Signed-off-by: Akhil P Oommen --- adreno_a6xx_preempt.c | 20 ++++++++++++-------- adreno_a6xx_ringbuffer.c | 10 +--------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/adreno_a6xx_preempt.c b/adreno_a6xx_preempt.c index 1d5596dc87..ecc9b8b6d5 100644 --- a/adreno_a6xx_preempt.c +++ b/adreno_a6xx_preempt.c @@ -491,14 +491,17 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds) { unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = 0; if (!adreno_is_preemption_enabled(adreno_dev)) return 0; - if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags)) - goto done; - - *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); + if (drawctxt) { + gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15); + } else { + *cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12); + } /* NULL SMMU_INFO buffer - we track in KMD */ *cmds++ = SET_PSEUDO_SMMU_INFO; @@ -511,6 +514,11 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, cmds += cp_gpuaddr(adreno_dev, cmds, rb->secure_preemption_desc->gpuaddr); + if (drawctxt) { + *cmds++ = SET_PSEUDO_NON_PRIV_SAVE_ADDR; + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + } + /* * There is no need to specify this address when we are about to * trigger preemption. This is because CP internally stores this @@ -522,12 +530,10 @@ u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, cmds += cp_gpuaddr(adreno_dev, cmds, rb->perfcounter_save_restore_desc->gpuaddr); -done: if (drawctxt) { struct adreno_ringbuffer *rb = drawctxt->rb; uint64_t dest = adreno_dev->preempt.scratch->gpuaddr + (rb->id * sizeof(u64)); - u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2); cmds += cp_gpuaddr(adreno_dev, cmds, dest); @@ -607,8 +613,6 @@ void a6xx_preemption_start(struct adreno_device *adreno_dev) adreno_ringbuffer_set_pagetable(device, rb, device->mmu.defaultpagetable); - - clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags); } } diff --git a/adreno_a6xx_ringbuffer.c b/adreno_a6xx_ringbuffer.c index 6599c264dc..fe36694eb2 100644 --- a/adreno_a6xx_ringbuffer.c +++ b/adreno_a6xx_ringbuffer.c @@ -61,7 +61,7 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, adreno_drawctxt_get_pagetable(drawctxt); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int count = 0; - u32 cmds[36]; + u32 cmds[32]; if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) count += a6xx_rb_pagetable_switch(adreno_dev, rb, drawctxt, @@ -87,14 +87,6 @@ static int a6xx_rb_context_switch(struct adreno_device *adreno_dev, cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1); cmds[count++] = 0x31; - if (adreno_is_preemption_enabled(adreno_dev)) { - u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr; - - cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3); - cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR; - count += cp_gpuaddr(adreno_dev, &cmds[count], gpuaddr); - } - return a6xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED, cmds, count, 0, NULL); } From 63a813a6296875834749647ba7c8cdeecf8b69d0 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Thu, 2 Dec 2021 15:06:43 +0530 Subject: [PATCH 030/750] msm: kgsl: Update a662 configuration Correct the gmem size and prim_fifo_threshold configuration for a662 gpu. Change-Id: I029cf8d806e34f9dc8e4a1b92908629f67e59248 Signed-off-by: Akhil P Oommen --- adreno-gpulist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adreno-gpulist.h b/adreno-gpulist.h index a676433212..59236118e8 100644 --- a/adreno-gpulist.h +++ b/adreno-gpulist.h @@ -1721,11 +1721,11 @@ static const struct adreno_a6xx_core adreno_gpu_core_a662 = { .gpudev = &adreno_a6xx_gmu_gpudev.base, .perfcounters = &adreno_a6xx_perfcounters, .gmem_base = 0, - .gmem_size = SZ_512K, + .gmem_size = SZ_1M + SZ_512K, .bus_width = 32, .snapshot_size = SZ_2M, }, - .prim_fifo_threshold = 0x00200000, + .prim_fifo_threshold = 0x00300000, .gmu_major = 2, .gmu_minor = 0, .sqefw_name = "a660_sqe.fw", From ab186fed8f7fa71a5f92b7f20c03c6c3157c4cde Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Wed, 24 Nov 2021 14:34:19 -0700 Subject: [PATCH 031/750] msm: kgsl: Re-enable IOCTLs for securemem for 32-bit processes Now that we have restored 32-bit secure VA for 32-bit processes, we can re-enable these IOCTLs. Change-Id: I887a59b675f06ab984085414056848207a96456c Signed-off-by: Harshdeep Dhatt --- kgsl.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/kgsl.c b/kgsl.c index e69c7bc516..002d314fe1 100644 --- a/kgsl.c +++ b/kgsl.c @@ -3081,15 +3081,6 @@ long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, if (!check_and_warn_secured(device)) return -EOPNOTSUPP; - /* - * On 64 bit kernel, secure memory region is expanded and - * moved to 64 bit address, 32 bit apps can not access it from - * this IOCTL. - */ - if (is_compat_task() && - test_bit(KGSL_MMU_64BIT, &device->mmu.features)) - return -EOPNOTSUPP; - /* Can't use CPU map with secure buffers */ if (param->flags & KGSL_MEMFLAGS_USE_CPU_MAP) return -EINVAL; @@ -3875,20 +3866,10 @@ long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { - struct kgsl_device *device = dev_priv->device; struct kgsl_gpumem_alloc *param = data; struct kgsl_mem_entry *entry; uint64_t flags = param->flags; - /* - * On 64 bit kernel, secure memory region is expanded and - * moved to 64 bit address, 32 bit apps can not access it from - * this IOCTL. - */ - if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() - && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) - return -EOPNOTSUPP; - /* Legacy functions doesn't support these advanced features */ flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); @@ -3913,20 +3894,10 @@ long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { - struct kgsl_device *device = dev_priv->device; struct kgsl_gpumem_alloc_id *param = data; struct kgsl_mem_entry *entry; uint64_t flags = param->flags; - /* - * On 64 bit kernel, secure memory region is expanded and - * moved to 64 bit address, 32 bit apps can not access it from - * this IOCTL. - */ - if ((param->flags & KGSL_MEMFLAGS_SECURE) && is_compat_task() - && test_bit(KGSL_MMU_64BIT, &device->mmu.features)) - return -EOPNOTSUPP; - if (is_compat_task()) flags |= KGSL_MEMFLAGS_FORCE_32BIT; From 4fd5db8534f1bd21dd451896966d4de6a621306e Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Fri, 12 Nov 2021 15:00:03 -0700 Subject: [PATCH 032/750] msm: kgsl: Fix memory leak in VBOs We take a refcount on the child mem entry when creating a bind range, but never put it back. This leads to memory leak, even when process has exited. Put back this reference when removing this bind range. Change-Id: I7e7f4b4cb36fa2d5d20a80b28890c9c77c69d7e2 Signed-off-by: Harshdeep Dhatt --- kgsl_vbo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kgsl_vbo.c b/kgsl_vbo.c index 091f2ecc3f..ff7488d297 100644 --- a/kgsl_vbo.c +++ b/kgsl_vbo.c @@ -111,6 +111,7 @@ static void kgsl_memdesc_remove_range(struct kgsl_mem_entry *target, kgsl_mmu_map_zero_page_to_range(memdesc->pagetable, memdesc, range->range.start, bind_range_len(range)); + kgsl_mem_entry_put(range->entry); kfree(range); } } From 79ee406847ab3c9c4e4231fd246bcd909d9f872f Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 18 Nov 2021 14:16:01 -0700 Subject: [PATCH 033/750] msm: kgsl: Use correct fault type for GMU asserts Use ADRENO_GMU_FAULT and make sure it gets propagated to snapshot layers. Change-Id: I9388bf408c623956d8e5d922d07393eb7f3061e2 Signed-off-by: Harshdeep Dhatt --- adreno_gen7_hwsched_hfi.c | 2 +- adreno_hwsched.c | 2 +- kgsl_snapshot.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index 60ad1921d0..d7a7c543dc 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -427,7 +427,7 @@ static void process_dbgq_irq(struct adreno_device *adreno_dev) if (!recovery) return; - adreno_hwsched_fault(adreno_dev, ADRENO_HARD_FAULT); + adreno_hwsched_fault(adreno_dev, ADRENO_GMU_FAULT); } /* HFI interrupt handler */ diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 7ce7d2177b..43062a94fb 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -1430,7 +1430,7 @@ static void reset_and_snapshot(struct adreno_device *adreno_dev, int fault) obj = get_active_cmdobj(adreno_dev); if (!obj) { - kgsl_device_snapshot(device, NULL, false); + kgsl_device_snapshot(device, NULL, fault & ADRENO_GMU_FAULT); goto done; } diff --git a/kgsl_snapshot.c b/kgsl_snapshot.c index 40d16820eb..148e095dd7 100644 --- a/kgsl_snapshot.c +++ b/kgsl_snapshot.c @@ -631,6 +631,7 @@ void kgsl_device_snapshot(struct kgsl_device *device, /* increment the hang count for good book keeping */ device->snapshot_faultcount++; + device->gmu_fault = gmu_fault; if (device->snapshot != NULL) { From 69e51b81ed7b77e96ced136231e7675c568fbbd1 Mon Sep 17 00:00:00 2001 From: Rohan Sethi Date: Tue, 7 Dec 2021 13:18:51 +0530 Subject: [PATCH 034/750] msm: kgsl: Fix gpuaddr_in_range() to check upper bound Currently gpuaddr_in_range() accepts only the gpuaddr & returns true if it lies in valid range. But this does not mean that the entire buffer is within range. Modify the function to accept size as a parameter and check that both starting & ending points of buffer lie within mmu range. Change-Id: I1d722295b9a27e746bfdb6d3bf409ffe722193cb Signed-off-by: Rohan Sethi --- adreno_dispatch.c | 4 ++-- adreno_hwsched.c | 3 ++- kgsl.c | 4 ++-- kgsl_iommu.c | 11 +++++++---- kgsl_mmu.c | 6 +++--- kgsl_mmu.h | 5 +++-- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index d1c747061f..51d701434c 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -1092,8 +1092,8 @@ static inline bool _verify_ib(struct kgsl_device_private *dev_priv, } /* Make sure that the address is in range and dword aligned */ - if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr) || - !IS_ALIGNED(ib->gpuaddr, 4)) { + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr, + ib->size) || !IS_ALIGNED(ib->gpuaddr, 4)) { pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n", context->id, ib->gpuaddr); return false; diff --git a/adreno_hwsched.c b/adreno_hwsched.c index 43062a94fb..83cfbfc2a0 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -705,7 +705,8 @@ static inline bool _verify_ib(struct kgsl_device_private *dev_priv, } /* Make sure that the address is mapped */ - if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) { + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr, + ib->size)) { pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n", context->id, ib->gpuaddr); return false; diff --git a/kgsl.c b/kgsl.c index 002d314fe1..4b176a069d 100644 --- a/kgsl.c +++ b/kgsl.c @@ -1303,9 +1303,9 @@ kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr) if (!private) return NULL; - if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr) && + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr, 0) && !kgsl_mmu_gpuaddr_in_range( - private->pagetable->mmu->securepagetable, gpuaddr)) + private->pagetable->mmu->securepagetable, gpuaddr, 0)) return NULL; spin_lock(&private->mem_lock); diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 8b16a8eba2..dc01a17b93 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2007,18 +2007,21 @@ static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable, } static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr) + uint64_t gpuaddr, uint64_t size) { if (gpuaddr == 0) return false; - if (gpuaddr >= pagetable->va_start && gpuaddr < pagetable->va_end) + if (gpuaddr >= pagetable->va_start && (gpuaddr + size) < + pagetable->va_end) return true; - if (gpuaddr >= pagetable->compat_va_start && gpuaddr < pagetable->compat_va_end) + if (gpuaddr >= pagetable->compat_va_start && (gpuaddr + size) < + pagetable->compat_va_end) return true; - if (gpuaddr >= pagetable->svm_start && gpuaddr < pagetable->svm_end) + if (gpuaddr >= pagetable->svm_start && (gpuaddr + size) < + pagetable->svm_end) return true; return false; diff --git a/kgsl_mmu.c b/kgsl_mmu.c index c0cc54f202..21f8b7c1e7 100644 --- a/kgsl_mmu.c +++ b/kgsl_mmu.c @@ -520,10 +520,10 @@ enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device) } bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr) + uint64_t gpuaddr, uint64_t size) { if (PT_OP_VALID(pagetable, addr_in_range)) - return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr); + return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr, size); return false; } @@ -535,7 +535,7 @@ bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, */ static bool nommu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr) + uint64_t gpuaddr, uint64_t size) { return (gpuaddr != 0) ? true : false; } diff --git a/kgsl_mmu.h b/kgsl_mmu.h index 0852ca7097..fbf0ccb8c6 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -136,7 +136,7 @@ struct kgsl_mmu_pt_ops { int (*svm_range)(struct kgsl_pagetable *pt, uint64_t *lo, uint64_t *hi, uint64_t memflags); bool (*addr_in_range)(struct kgsl_pagetable *pagetable, - uint64_t gpuaddr); + uint64_t gpuaddr, uint64_t size); }; enum kgsl_mmu_feature { @@ -214,7 +214,8 @@ int kgsl_mmu_unmap_range(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, u64 offset, u64 length); unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, u64 ttbr0, uint64_t addr); -bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr); +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr, + uint64_t size); int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, uint64_t size); From 8890addb51ec7779ec2a15f8da00eca440ed5755 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Tue, 7 Dec 2021 10:15:27 +0530 Subject: [PATCH 035/750] msm: kgsl: Restore 32 bit secure VA range for 32 bit processes Some 32 bit apps cannot work with 64 bit secure GPU virtual addresses. Hence, use 32 bit secure VA for 32 bit processes. The hardware expects all secure VA(both 32 and 64 bit) to be a contiguous range. To make this happen, move global VA (which is currently sandwiched between 32 bit and 64 bit secure VA) below the 32 bit secure VA. Change-Id: I6fb9c0979fc6cedb649aa9a3ba1d0533188883bd Signed-off-by: Akhil P Oommen Signed-off-by: Nitheesh Muthuraj --- adreno_a5xx.c | 7 ++++--- adreno_a6xx.c | 7 ++++--- adreno_gen7.c | 7 ++++--- kgsl_iommu.c | 27 ++++++++++----------------- kgsl_iommu.h | 33 +++++++++++++++++---------------- 5 files changed, 39 insertions(+), 42 deletions(-) diff --git a/adreno_a5xx.c b/adreno_a5xx.c index 83e2b4b389..b25198be46 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -1530,11 +1530,12 @@ static int a5xx_start(struct adreno_device *adreno_dev) if (device->mmu.secured) { kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_CNTL, 0x0); kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, - upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, - KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + FIELD_PREP(GENMASK(31, 12), + (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); } a5xx_preemption_start(adreno_dev); diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 7bfa013467..b593593547 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -505,11 +505,12 @@ static void a6xx_set_secvid(struct kgsl_device *device) kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_CNTL, 0x0); kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, - upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, - KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + FIELD_PREP(GENMASK(31, 12), + (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); if (ADRENO_QUIRK(ADRENO_DEVICE(device), ADRENO_QUIRK_SECVID_SET_ONCE)) set = true; diff --git a/adreno_gen7.c b/adreno_gen7.c index 66d349b4f6..70e1aaf433 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -355,11 +355,12 @@ static void _set_secvid(struct kgsl_device *device) { kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_CNTL, 0x0); kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - lower_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + lower_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_BASE_HI, - upper_32_bits(KGSL_IOMMU_SECURE_BASE(&device->mmu))); + upper_32_bits(KGSL_IOMMU_SECURE_BASE32)); kgsl_regwrite(device, GEN7_RBBM_SECVID_TSB_TRUSTED_SIZE, - KGSL_IOMMU_SECURE_SIZE(&device->mmu)); + FIELD_PREP(GENMASK(31, 12), + (KGSL_IOMMU_SECURE_SIZE(&device->mmu) / SZ_4K))); } /* diff --git a/kgsl_iommu.c b/kgsl_iommu.c index dc01a17b93..0728683226 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -1233,18 +1233,16 @@ static struct kgsl_pagetable *kgsl_iommu_default_pagetable(struct kgsl_mmu *mmu) if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { iommu_pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; - iommu_pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + if (test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) + iommu_pt->base.compat_va_end = KGSL_MEMSTORE_TOKEN_ADDRESS; + else + iommu_pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE64; iommu_pt->base.va_start = KGSL_IOMMU_VA_BASE64; iommu_pt->base.va_end = KGSL_IOMMU_VA_END64; } else { iommu_pt->base.va_start = KGSL_IOMMU_SVM_BASE32; - - if (mmu->secured) - iommu_pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); - else - iommu_pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); - + iommu_pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); iommu_pt->base.compat_va_start = iommu_pt->base.va_start; iommu_pt->base.compat_va_end = iommu_pt->base.va_end; } @@ -1291,8 +1289,8 @@ static struct kgsl_pagetable *kgsl_iommu_secure_pagetable(struct kgsl_mmu *mmu) iommu_pt->base.rbtree = RB_ROOT; iommu_pt->base.pt_ops = &secure_pt_ops; - iommu_pt->base.compat_va_start = KGSL_IOMMU_SECURE_BASE(mmu); - iommu_pt->base.compat_va_end = KGSL_IOMMU_SECURE_END(mmu); + iommu_pt->base.compat_va_start = KGSL_IOMMU_SECURE_BASE32; + iommu_pt->base.compat_va_end = KGSL_IOMMU_SECURE_END32; iommu_pt->base.va_start = KGSL_IOMMU_SECURE_BASE(mmu); iommu_pt->base.va_end = KGSL_IOMMU_SECURE_END(mmu); @@ -1324,13 +1322,13 @@ static struct kgsl_pagetable *kgsl_iopgtbl_pagetable(struct kgsl_mmu *mmu, u32 n if (test_bit(KGSL_MMU_64BIT, &mmu->features)) { pt->base.compat_va_start = KGSL_IOMMU_SVM_BASE32; - pt->base.compat_va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + pt->base.compat_va_end = KGSL_MEMSTORE_TOKEN_ADDRESS; pt->base.va_start = KGSL_IOMMU_VA_BASE64; pt->base.va_end = KGSL_IOMMU_VA_END64; if (is_compat_task()) { pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; - pt->base.svm_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + pt->base.svm_end = KGSL_MEMSTORE_TOKEN_ADDRESS; } else { pt->base.svm_start = KGSL_IOMMU_SVM_BASE64; pt->base.svm_end = KGSL_IOMMU_SVM_END64; @@ -1338,12 +1336,7 @@ static struct kgsl_pagetable *kgsl_iopgtbl_pagetable(struct kgsl_mmu *mmu, u32 n } else { pt->base.va_start = KGSL_IOMMU_SVM_BASE32; - - if (mmu->secured) - pt->base.va_end = KGSL_IOMMU_SECURE_BASE(mmu); - else - pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); - + pt->base.va_end = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); pt->base.compat_va_start = pt->base.va_start; pt->base.compat_va_end = pt->base.va_end; pt->base.svm_start = KGSL_IOMMU_SVM_BASE32; diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 4632992831..b95d502714 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -13,7 +13,8 @@ */ #define KGSL_IOMMU_GLOBAL_MEM_SIZE (20 * SZ_1M) #define KGSL_IOMMU_GLOBAL_MEM_BASE32 0xf8000000 -#define KGSL_IOMMU_GLOBAL_MEM_BASE64 0xfc000000 +#define KGSL_IOMMU_GLOBAL_MEM_BASE64 \ + (KGSL_MEMSTORE_TOKEN_ADDRESS - KGSL_IOMMU_GLOBAL_MEM_SIZE) /* * This is a dummy token address that we use to identify memstore when the user @@ -23,7 +24,7 @@ * conflict */ -#define KGSL_MEMSTORE_TOKEN_ADDRESS 0xfff00000 +#define KGSL_MEMSTORE_TOKEN_ADDRESS (KGSL_IOMMU_SECURE_BASE32 - SZ_4K) #define KGSL_IOMMU_GLOBAL_MEM_BASE(__mmu) \ (test_bit(KGSL_MMU_64BIT, &(__mmu)->features) ? \ @@ -36,27 +37,27 @@ * Limit secure size to 256MB for 32bit kernels. */ #define KGSL_IOMMU_SECURE_SIZE32 SZ_256M -#define KGSL_IOMMU_SECURE_END32(_mmu) KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) -#define KGSL_IOMMU_SECURE_BASE32(_mmu) \ - (KGSL_IOMMU_GLOBAL_MEM_BASE(_mmu) - KGSL_IOMMU_SECURE_SIZE32) +#define KGSL_IOMMU_SECURE_BASE32 \ + (KGSL_IOMMU_SECURE_BASE64 - KGSL_IOMMU_SECURE_SIZE32) +#define KGSL_IOMMU_SECURE_END32 KGSL_IOMMU_SECURE_BASE64 -/* - * Try to use maximum allowed secure size i.e 0xFFFFF000 - * for both 32bit and 64bit secure apps when using 64bit kernel. - */ -#define KGSL_IOMMU_SECURE_BASE64 0x0100000000ULL -#define KGSL_IOMMU_SECURE_END64 0x01FFFFF000ULL -#define KGSL_IOMMU_SECURE_SIZE64 \ - (KGSL_IOMMU_SECURE_END64 - KGSL_IOMMU_SECURE_BASE64) +#define KGSL_IOMMU_SECURE_BASE64 0x100000000ULL +#define KGSL_IOMMU_SECURE_END64 \ + (KGSL_IOMMU_SECURE_BASE64 + KGSL_IOMMU_SECURE_SIZE64) + +#define KGSL_IOMMU_MAX_SECURE_SIZE 0xFFFFF000 + +#define KGSL_IOMMU_SECURE_SIZE64 \ + (KGSL_IOMMU_MAX_SECURE_SIZE - KGSL_IOMMU_SECURE_SIZE32) #define KGSL_IOMMU_SECURE_BASE(_mmu) (test_bit(KGSL_MMU_64BIT, \ &(_mmu)->features) ? KGSL_IOMMU_SECURE_BASE64 : \ - KGSL_IOMMU_SECURE_BASE32(_mmu)) + KGSL_IOMMU_SECURE_BASE32) #define KGSL_IOMMU_SECURE_END(_mmu) (test_bit(KGSL_MMU_64BIT, \ &(_mmu)->features) ? KGSL_IOMMU_SECURE_END64 : \ - KGSL_IOMMU_SECURE_END32(_mmu)) + KGSL_IOMMU_SECURE_END32) #define KGSL_IOMMU_SECURE_SIZE(_mmu) (test_bit(KGSL_MMU_64BIT, \ - &(_mmu)->features) ? KGSL_IOMMU_SECURE_SIZE64 : \ + &(_mmu)->features) ? KGSL_IOMMU_MAX_SECURE_SIZE : \ KGSL_IOMMU_SECURE_SIZE32) /* The CPU supports 39 bit addresses */ From 6b69f39eabaecc7e4af50cc4f2fd67fc08505c10 Mon Sep 17 00:00:00 2001 From: Mohammed Mirza Mandayappurath Manzoor Date: Tue, 8 Jun 2021 10:19:22 -0700 Subject: [PATCH 036/750] msm: kgsl: Add support for TSE, LRZ, RAZ and HLSQ perfcounters The select register for TSE, LRZ, RAZ and HLSQ counters are virtualized. Hence we need to program CP aperture control to route the register write to correct pipe. Also, since these registers do not have retention, update the power up list to include the pipe id so that CP can program its aperture correct. Change-Id: I7d553b19e81f3ea58bd870efd7fcc1a6bd45a875 Signed-off-by: Mohammed Mirza Mandayappurath Manzoor --- adreno_gen7.c | 63 ++++++++++++------- adreno_gen7.h | 8 ++- adreno_gen7_perfcounter.c | 115 ++++++++++++++++++++++++++++++++-- adreno_gen7_snapshot.h | 5 -- include/uapi/linux/msm_kgsl.h | 6 +- kgsl_util.h | 16 ++++- 6 files changed, 175 insertions(+), 38 deletions(-) diff --git a/adreno_gen7.c b/adreno_gen7.c index 70e1aaf433..208ee07e07 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -130,7 +130,15 @@ void gen7_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds) /* Register initialization list with spinlock */ cmds[i++] = lower_32_bits(adreno_dev->pwrup_reglist->gpuaddr); cmds[i++] = upper_32_bits(adreno_dev->pwrup_reglist->gpuaddr); - cmds[i++] = 0; + /* + * Gen7 targets with concurrent binning are expected to have a dynamic + * power up list with triplets which contains the pipe id in it. + * Bit 31 of POWER_UP_REGISTER_LIST_LENGTH is reused here to let CP + * know if the power up contains the triplets. If + * REGISTER_INIT_LIST_WITH_SPINLOCK is set and bit 31 below is set, + * CP expects a dynamic list with triplets. + */ + cmds[i++] = BIT(31); } int gen7_fenced_write(struct adreno_device *adreno_dev, u32 offset, @@ -275,10 +283,12 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) /* Static IFPC-only registers */ reglist[0].regs = gen7_ifpc_pwrup_reglist; reglist[0].count = ARRAY_SIZE(gen7_ifpc_pwrup_reglist); + lock->ifpc_list_len = reglist[0].count; /* Static IFPC + preemption registers */ reglist[1].regs = gen7_pwrup_reglist; reglist[1].count = ARRAY_SIZE(gen7_pwrup_reglist); + lock->preemption_list_len = reglist[1].count; /* * For each entry in each of the lists, write the offset and the current @@ -291,14 +301,12 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) *dest++ = r[j]; kgsl_regread(KGSL_DEVICE(adreno_dev), r[j], dest++); } - - lock->list_length += reglist[i].count * 2; } - /* This needs to be at the end of the list */ + /* This needs to be at the end of the dynamic list */ + *dest++ = FIELD_PREP(GENMASK(13, 12), PIPE_NONE); *dest++ = GEN7_RBBM_PERFCTR_CNTL; *dest++ = 1; - lock->list_length += 2; /* * The overall register list is composed of @@ -306,12 +314,16 @@ static void gen7_patch_pwrup_reglist(struct adreno_device *adreno_dev) * 2. Static IFPC + preemption registers * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) * - * The CP views the second and third entries as one dynamic list - * starting from list_offset. list_length should be the total dwords in - * all the lists and list_offset should be specified as the size in - * dwords of the first entry in the list. + * The first two lists are static. Size of these lists are stored as + * number of pairs in ifpc_list_len and preemption_list_len + * respectively. With concurrent binning, Some of the perfcounter + * registers being virtualized, CP needs to know the pipe id to program + * the aperture inorder to restore the same. Thus, third list is a + * dynamic list with triplets as + * (
), and the length is + * stored as number for triplets in dynamic_list_len. */ - lock->list_offset = reglist[0].count * 2; + lock->dynamic_list_len = 1; } /* _llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks */ @@ -1128,12 +1140,12 @@ static unsigned int gen7_register_offsets[ADRENO_REG_REGISTER_MAX] = { }; int gen7_perfcounter_update(struct adreno_device *adreno_dev, - struct adreno_perfcount_register *reg, bool update_reg) + struct adreno_perfcount_register *reg, bool update_reg, u32 pipe) { void *ptr = adreno_dev->pwrup_reglist->hostptr; struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); - int i, offset = 0; + int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); @@ -1142,19 +1154,19 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, /* * If the perfcounter select register is already present in reglist - * update it, otherwise append the pair to * the end of the list. */ - for (i = 0; i < lock->list_length >> 1; i++) { - if (data[offset] == reg->select) { - data[offset + 1] = reg->countable; - goto update; - } - - if (data[offset] == A6XX_RBBM_PERFCTR_CNTL) - break; - - offset += 2; + if (select_reg_present) { + data[offset + 1] = reg->countable; + goto update; } /* @@ -2033,7 +2039,6 @@ int a6xx_perfcounter_update(struct adreno_device *adreno_dev, * so overwrite the existing A6XX_RBBM_PERFCNTL_CTRL and add it back to * the end. */ - data[offset] = reg->select; data[offset + 1] = reg->countable; data[offset + 2] = A6XX_RBBM_PERFCTR_CNTL, diff --git a/adreno_gen7.c b/adreno_gen7.c index 5eaac79e44..3235b98a05 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1618,15 +1618,8 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, bool remove_counter = false; u32 pipe = FIELD_PREP(GENMASK(13, 12), _get_pipeid(groupid)); - if (kgsl_hwlock(lock)) { - kgsl_hwunlock(lock); - return -EBUSY; - } - - if (lock->dynamic_list_len < 2) { - kgsl_hwunlock(lock); + if (lock->dynamic_list_len < 2) return -EINVAL; - } second_last_offset = offset + (lock->dynamic_list_len - 2) * 3; last_offset = second_last_offset + 3; @@ -1640,9 +1633,12 @@ int gen7_perfcounter_remove(struct adreno_device *adreno_dev, offset += 3; } - if (!remove_counter) { - kgsl_hwunlock(lock); + if (!remove_counter) return -ENOENT; + + if (kgsl_hwlock(lock)) { + kgsl_hwunlock(lock); + return -EBUSY; } /* @@ -1683,6 +1679,19 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); int i, offset = (lock->ifpc_list_len + lock->preemption_list_len) * 2; + bool select_reg_present = false; + + for (i = 0; i < lock->dynamic_list_len; i++) { + if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { + select_reg_present = true; + break; + } + + if (data[offset + 1] == GEN7_RBBM_PERFCTR_CNTL) + break; + + offset += 3; + } if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); @@ -1694,16 +1703,9 @@ int gen7_perfcounter_update(struct adreno_device *adreno_dev, * update it, otherwise append the * triplet to the end of the list. */ - for (i = 0; i < lock->dynamic_list_len; i++) { - if ((data[offset + 1] == reg->select) && (data[offset] == pipe)) { - data[offset + 2] = reg->countable; - goto update; - } - - if (data[offset + 1] == GEN7_RBBM_PERFCTR_CNTL) - break; - - offset += 3; + if (select_reg_present) { + data[offset + 2] = reg->countable; + goto update; } /* From 65d32f9946e3eac5813252a22b8457a841ccf4b7 Mon Sep 17 00:00:00 2001 From: Lynus Vaz Date: Tue, 22 Aug 2023 14:31:43 -0700 Subject: [PATCH 515/750] msm: kgsl: Simplify timelineobj cleanup Use the same path for timelineobj retire and destroy. This keeps the timelineobj valid until the preceding cmdbatches retire, and the scheduler retires it during normal operation or context detach. This simplifies cleanup when userspace detaches a context with timelineobjs in flight. Change-Id: I8812acd045ee13bf965fea1361cf867baf7345a0 Signed-off-by: Lynus Vaz --- adreno_dispatch.c | 5 +-- adreno_hwsched.c | 4 +- kgsl_drawobj.c | 93 ++++++++++++++++++++++++++++------------------- kgsl_drawobj.h | 8 ---- 4 files changed, 57 insertions(+), 53 deletions(-) diff --git a/adreno_dispatch.c b/adreno_dispatch.c index 37a84765da..bf922832e1 100644 --- a/adreno_dispatch.c +++ b/adreno_dispatch.c @@ -299,11 +299,8 @@ static int dispatch_retire_syncobj(struct kgsl_drawobj *drawobj, static int drawqueue_retire_timelineobj(struct kgsl_drawobj *drawobj, struct adreno_context *drawctxt) { - struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); - _pop_drawobj(drawctxt); - kgsl_drawobj_timelineobj_retire(timelineobj); - + kgsl_drawobj_destroy(drawobj); return 0; } diff --git a/adreno_hwsched.c b/adreno_hwsched.c index b6ca356e64..5703eb94f7 100644 --- a/adreno_hwsched.c +++ b/adreno_hwsched.c @@ -195,10 +195,8 @@ static int _retire_markerobj(struct adreno_device *adreno_dev, struct kgsl_drawo static int _retire_timelineobj(struct kgsl_drawobj *drawobj, struct adreno_context *drawctxt) { - struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); - _pop_drawobj(drawctxt); - kgsl_drawobj_timelineobj_retire(timelineobj); + kgsl_drawobj_destroy(drawobj); return 0; } diff --git a/kgsl_drawobj.c b/kgsl_drawobj.c index d9ecc6e56a..f1fdc7b831 100644 --- a/kgsl_drawobj.c +++ b/kgsl_drawobj.c @@ -393,12 +393,16 @@ static void syncobj_destroy(struct kgsl_drawobj *drawobj) } -static void timelineobj_destroy(struct kgsl_drawobj *drawobj) +static void _drawobj_timelineobj_retire(struct kref *kref) { - struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); int i; + struct kgsl_drawobj_timeline *timelineobj = container_of(kref, + struct kgsl_drawobj_timeline, sig_refcount); for (i = 0; i < timelineobj->count; i++) { + kgsl_timeline_signal(timelineobj->timelines[i].timeline, + timelineobj->timelines[i].seqno); + kgsl_timeline_put(timelineobj->timelines[i].timeline); kgsl_context_put(timelineobj->timelines[i].context); } @@ -408,6 +412,32 @@ static void timelineobj_destroy(struct kgsl_drawobj *drawobj) timelineobj->count = 0; } +static void kgsl_timelineobj_signal(struct kgsl_drawobj_timeline *timelineobj) +{ + kref_put(&timelineobj->sig_refcount, _drawobj_timelineobj_retire); +} + +static void timelineobj_destroy(struct kgsl_drawobj *drawobj) +{ + struct kgsl_drawobj_timeline *timelineobj = TIMELINEOBJ(drawobj); + int i; + + /* + * At this point any syncobjs blocking this timelinobj have been + * signaled. The timelineobj now only needs all preceding timestamps to + * retire before signaling the timelines. Notify timelines to keep them + * in sync with the timestamps as they retire. + */ + for (i = 0; i < timelineobj->count; i++) + kgsl_timeline_add_signal(&timelineobj->timelines[i]); + + /* + * The scheduler is done with the timelineobj. Put the initial + * sig_refcount to continue with the signaling process. + */ + kgsl_timelineobj_signal(timelineobj); +} + static void bindobj_destroy(struct kgsl_drawobj *drawobj) { struct kgsl_drawobj_bind *bindobj = BINDOBJ(drawobj); @@ -938,7 +968,8 @@ kgsl_drawobj_timeline_create(struct kgsl_device *device, * Initialize the sig_refcount that triggers the timeline signal. * This refcount goes to 0 when: * 1) This timelineobj is popped off the context queue. This implies - * any syncobj blocking this timelineobj was already signaled. + * any syncobj blocking this timelineobj was already signaled, or + * the context queue is cleaned up at detach time. * 2) The cmdobjs queued on this context before this timeline object * are retired. */ @@ -950,43 +981,17 @@ kgsl_drawobj_timeline_create(struct kgsl_device *device, return timelineobj; } -static void _drawobj_timelineobj_retire(struct kref *kref) -{ - struct kgsl_drawobj_timeline *timelineobj = container_of(kref, - struct kgsl_drawobj_timeline, sig_refcount); - struct kgsl_drawobj *drawobj = DRAWOBJ(timelineobj); - int i; - - for (i = 0; i < timelineobj->count; i++) - kgsl_timeline_signal(timelineobj->timelines[i].timeline, - timelineobj->timelines[i].seqno); - - /* Now that timelines are signaled destroy the drawobj */ - kgsl_drawobj_destroy(drawobj); -} - static void _timeline_signaled(struct kgsl_device *device, struct kgsl_event_group *group, void *priv, int ret) { struct kgsl_drawobj_timeline *timelineobj = priv; + struct kgsl_drawobj *drawobj = DRAWOBJ(timelineobj); - kref_put(&timelineobj->sig_refcount, _drawobj_timelineobj_retire); -} + /* Put the sig_refcount we took when registering this event */ + kgsl_timelineobj_signal(timelineobj); -void kgsl_drawobj_timelineobj_retire(struct kgsl_drawobj_timeline *timelineobj) -{ - int i; - - /* - * At this point any syncobjs blocking this timelinobj have been - * signaled. The timelineobj now only needs all preceding timestamps to - * retire before signaling the timelines. Notify timelines to keep them - * in sync with the timestamps as they retire. - */ - for (i = 0; i < timelineobj->count; i++) - kgsl_timeline_add_signal(&timelineobj->timelines[i]); - - kref_put(&timelineobj->sig_refcount, _drawobj_timelineobj_retire); + /* Put the drawobj refcount we took when registering this event */ + kgsl_drawobj_put(drawobj); } int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, @@ -1063,18 +1068,30 @@ int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, timelineobj->count = cmd.count; /* - * Take a refcount that we put when the last queued timestamp on this - * context is retired. Use a kgsl_event to notify us when this - * timestamp retires. + * Register a kgsl_event to notify us when the last queued timestamp + * retires. Take a refcount on the drawobj to keep it valid for the + * callback, and take the sig_refcount to synchronize with the + * timelineobj retire. Both these refcounts are put in the callback. */ + kref_get(&drawobj->refcount); kref_get(&timelineobj->sig_refcount); ret = kgsl_add_event(device, &context->events, queued, _timeline_signaled, timelineobj); if (ret) - goto err; + goto event_err; return 0; + +event_err: + /* + * If there was an error, put back sig_refcount and drawobj refcounts. + * The caller still holds initial refcounts on both and puts them in + * kgsl_drawobj_destroy(). Clean up the timelinelines array since we + * do not want to signal anything now. + */ + kgsl_timelineobj_signal(timelineobj); + kgsl_drawobj_put(drawobj); err: for (i = 0; i < cmd.count; i++) { kgsl_timeline_put(timelineobj->timelines[i].timeline); diff --git a/kgsl_drawobj.h b/kgsl_drawobj.h index 5a38009ab3..b32ba58873 100644 --- a/kgsl_drawobj.h +++ b/kgsl_drawobj.h @@ -344,12 +344,4 @@ int kgsl_drawobj_add_timeline(struct kgsl_device_private *dev_priv, struct kgsl_drawobj_timeline *timelineobj, void __user *src, u64 cmdsize); -/** - * kgsl_drawobj_timelineobj_retire - Retire the timeline drawobj - * @timelineobj: Pointer to a timeline drawobject - * - * Retire the timelineobj when it is popped off the context queue. - */ -void kgsl_drawobj_timelineobj_retire(struct kgsl_drawobj_timeline *timelineobj); - #endif /* __KGSL_DRAWOBJ_H */ From 4c06d884fecf89a36f8a1c04a1338c932f62c293 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Thu, 18 May 2023 14:36:21 +0530 Subject: [PATCH 516/750] msm: kgsl: Support qcs405 target Add config changes to support qcs405 target. Change-Id: If104542c5364ee76e9a29e5975abaef0336f011a Signed-off-by: Abhishek Barman --- Kbuild | 3 +++ config/gki_qcs405.conf | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 config/gki_qcs405.conf diff --git a/Kbuild b/Kbuild index 91192e6f60..762f33ef94 100644 --- a/Kbuild +++ b/Kbuild @@ -49,6 +49,9 @@ endif ifeq ($(CONFIG_ARCH_TRINKET), y) include $(KGSL_PATH)/config/gki_trinket.conf endif +ifeq ($(CONFIG_ARCH_QCS405), y) + include $(KGSL_PATH)/config/gki_qcs405.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_qcs405.conf b/config/gki_qcs405.conf new file mode 100644 index 0000000000..b9a6982a8a --- /dev/null +++ b/config/gki_qcs405.conf @@ -0,0 +1,15 @@ +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +# CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT is not set +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" + +ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ + -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ + -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=0 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" From a22f7484b90cfbf177372ef6aa7b3d73f42146ed Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Mon, 24 Jul 2023 15:34:09 +0530 Subject: [PATCH 517/750] msm: kgsl: Set correct values for SMMU protect register for A3xx For programming the CP Protect register for SMMU in A3xx GPU, pass correct values for SMMU registers base offset and the count of registers to be protected. Change-Id: I9fa809db79efc79bb7a59304fa2b4607ed1fc567 Signed-off-by: Abhishek Barman --- adreno_a3xx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/adreno_a3xx.c b/adreno_a3xx.c index ffb2604e80..9f69430463 100644 --- a/adreno_a3xx.c +++ b/adreno_a3xx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1096,8 +1096,14 @@ static struct { { A3XX_CP_PROTECT_REG_0 + 13, 0x0cc0, 0 }, /* VBIF */ { A3XX_CP_PROTECT_REG_0 + 14, 0x3000, 6 }, - /* SMMU */ - { A3XX_CP_PROTECT_REG_0 + 15, 0xa000, 12 }, + /* + * SMMU + * For A3xx, base offset for smmu region is 0xa000 and length is + * 0x1000 bytes. Offset must be in dword and length of the block + * must be ilog2(dword length). + * 0xa000 >> 2 = 0x2800, ilog2(0x1000 >> 2) = 10. + */ + { A3XX_CP_PROTECT_REG_0 + 15, 0x2800, 10 }, /* There are no remaining protected mode registers for a3xx */ }; From c2fa8a4482f4b628a093b23a41023a0fc2634a84 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Mon, 24 Jul 2023 16:41:08 +0530 Subject: [PATCH 518/750] msm: kgsl: Add iommu clock names for A306 GPU Add "gcc_smmu_cfg_clk" and "gcc_gfx_tcu_clk" iommu clock names to control these clocks on A306 GPU. Change-Id: I79d7a4c73217c6ebf931aed9f50efe8177944eda Signed-off-by: Abhishek Barman --- kgsl_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 5b23759cc6..212825ca00 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2415,6 +2415,8 @@ static const char * const kgsl_iommu_clocks[] = { "gcc_bimc_gpu_axi", "gcc_gpu_ahb", "gcc_gpu_axi_clk", + "gcc_smmu_cfg_clk", + "gcc_gfx_tcu_clk", }; static const struct kgsl_mmu_ops kgsl_iommu_ops; From f572cc31f20e8a2622db99aa65b1dd4a0c526817 Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 3 Aug 2023 11:14:53 -0600 Subject: [PATCH 519/750] kgsl: hwsched: Fix HFI sequence number wrap issue When comparing the ack, make sure the entire header is compared instead of just the sequence number. This is required because two packets (waiting for their acks) can have the same sequence number (once it wraps around). Use a different sequence number generator for cmdbatch submissions to context queues or dispatch queues to reduce the chance of wrapping around the cmdq sequence number. For the same reason, use a different sequence number generator for hardware fence packets as well. Remove instances where the sequence number is getting updated twice for the same packet. Change-Id: I56232a3b5cf74b725f9572bd34eb4041774dc6d1 Signed-off-by: Harshdeep Dhatt --- adreno_a6xx_hfi.c | 6 ++-- adreno_a6xx_hwsched_hfi.c | 41 ++++++++++----------- adreno_gen7_hfi.c | 7 ++-- adreno_gen7_hwsched_hfi.c | 76 +++++++++++++++++++-------------------- adreno_gen7_hwsched_hfi.h | 2 ++ adreno_hfi.h | 7 ++-- adreno_hwsched.h | 5 +++ 7 files changed, 74 insertions(+), 70 deletions(-) diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index a0dc91ddfe..7bde0a2afe 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -123,8 +123,6 @@ int a6xx_hfi_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx, if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -244,7 +242,7 @@ int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd, if (ret_cmd == NULL) return -EINVAL; - if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) { memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); return 0; } @@ -319,7 +317,7 @@ static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, struct a6xx_hfi *hfi = &gmu->hfi; unsigned int seqnum = atomic_inc_return(&hfi->seqnum); - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); if (ret_cmd == NULL) return a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes); diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 9c3fc84541..02fac3497a 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -103,7 +103,7 @@ static void a6xx_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_lock(&hfi->msglock); list_for_each_entry(cmd, &hfi->msglist, node) { - if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) { memcpy(cmd->results, ack, min_t(u32, size_bytes, sizeof(cmd->results))); @@ -624,11 +624,12 @@ int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); u32 *cmd = data; - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc; struct pending_cmd pending_ack; - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); add_waiter(hfi, *cmd, &pending_ack); @@ -926,6 +927,7 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) struct hfi_mem_alloc_desc desc = {0}; struct hfi_mem_alloc_reply_cmd out = {0}; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + u32 seqnum; int ret; hfi_get_mem_alloc_desc(rcvd, &desc); @@ -937,8 +939,8 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) memcpy(&out.desc, &desc, sizeof(out.desc)); out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); - out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = *(u32 *)rcvd; return a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out)); @@ -948,7 +950,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) { struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; struct hfi_gmu_cntr_register_reply_cmd out = {0}; - u32 lo = 0, hi = 0; + struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + u32 lo = 0, hi = 0, seqnum; /* * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 @@ -958,6 +961,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = in->hdr; out.group_id = in->group_id; out.countable = in->countable; @@ -972,7 +977,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc, read_size; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; @@ -982,7 +987,8 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); pending_ack.sent_hdr = cmd.hdr; @@ -1705,8 +1711,6 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - write = hdr->write_index; for (i = 0; i < size_dwords; i++) { @@ -1749,9 +1753,8 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - struct a6xx_hfi *hfi = to_a6xx_hfi(adreno_dev); int ret = 0; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj); struct hfi_submit_cmd *cmd; struct adreno_submit_time time = {0}; @@ -1815,9 +1818,9 @@ int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev, skipib: adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); ret = a6xx_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, @@ -1842,7 +1845,6 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct a6xx_hfi *hfi = to_a6xx_hfi(adreno_dev); struct hfi_submit_cmd *cmd; struct kgsl_memobj_node *ib; u32 cmd_sizebytes; @@ -1900,8 +1902,6 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, } cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); ret = a6xx_hfi_send_cmd_async(adreno_dev, cmd, cmd_sizebytes); @@ -1953,7 +1953,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ts = ts, seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, &pending_ack); @@ -2044,6 +2044,7 @@ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev) struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; int rc; + u32 seqnum; if (device->state != KGSL_STATE_ACTIVE) return 0; @@ -2052,8 +2053,8 @@ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev) if (rc) return 0; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); cmd.type = HFI_VALUE_PREEMPT_COUNT; cmd.subtype = 0; diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index f1a799dade..26363771c8 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -124,8 +124,6 @@ int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -246,7 +244,7 @@ int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, if (ret_cmd == NULL) return -EINVAL; - if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) { memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); return 0; } @@ -316,7 +314,8 @@ static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, struct gen7_hfi *hfi = &gmu->hfi; unsigned int seqnum = atomic_inc_return(&hfi->seqnum); - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); + if (ret_cmd == NULL) return gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index b4c4fb0715..cf1096eb8a 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -121,7 +121,7 @@ static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_lock(&hfi->msglock); list_for_each_entry(cmd, &hfi->msglist, node) { - if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) { memcpy(cmd->results, ack, min_t(u32, size_bytes, sizeof(cmd->results))); @@ -818,7 +818,6 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd) { struct hfi_syncobj_query_cmd reply = {0}; - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int i, j, fence_index = 0; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); @@ -850,8 +849,6 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, } reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD); - reply.hdr = MSG_HDR_SET_SEQNUM(reply.hdr, - atomic_inc_return(&hfi->seqnum)); reply.gmu_ctxt_id = cmd->gmu_ctxt_id; reply.sync_obj_ts = cmd->sync_obj_ts; @@ -1015,11 +1012,13 @@ static void _increment_hw_fence_unack_count(struct adreno_device *adreno_dev) static int _send_hw_fence_no_ack(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + u32 seqnum; int ret; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd)); if (!ret) @@ -1183,7 +1182,7 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ spin_lock(&hfi->hw_fence.lock); /* If this ack is being waited on, we don't need to touch the unack count */ - if (hw_fence_ack.sent_hdr && HDR_CMP_SEQNUM(hw_fence_ack.sent_hdr, received_hdr)) { + if (hw_fence_ack.sent_hdr && CMP_HFI_ACK_HDR(hw_fence_ack.sent_hdr, received_hdr)) { spin_unlock(&hfi->hw_fence.lock); complete(&hw_fence_ack.complete); return; @@ -1404,11 +1403,12 @@ int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); u32 *cmd = data; - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc; struct pending_cmd pending_ack; - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); add_waiter(hfi, *cmd, &pending_ack); @@ -1708,6 +1708,7 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) struct hfi_mem_alloc_desc desc = {0}; struct hfi_mem_alloc_reply_cmd out = {0}; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 seqnum; int ret; hfi_get_mem_alloc_desc(rcvd, &desc); @@ -1720,8 +1721,8 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); - out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = *(u32 *)rcvd; @@ -1732,7 +1733,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) { struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; struct hfi_gmu_cntr_register_reply_cmd out = {0}; - u32 lo = 0, hi = 0; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 lo = 0, hi = 0, seqnum; /* * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 @@ -1742,6 +1744,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = in->hdr; out.group_id = in->group_id; out.countable = in->countable; @@ -1754,8 +1758,6 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) static int send_warmboot_start_msg(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); int ret = 0; struct hfi_start_cmd cmd; @@ -1766,8 +1768,6 @@ static int send_warmboot_start_msg(struct adreno_device *adreno_dev) if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); - cmd.hdr = RECORD_NOP_MSG_HDR(cmd.hdr); return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); @@ -1777,7 +1777,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int read_size, rc = 0; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; @@ -1787,7 +1787,8 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); pending_ack.sent_hdr = cmd.hdr; @@ -2023,13 +2024,14 @@ u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; int rc; + u32 seqnum; rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); if (rc) return 0; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); cmd.type = prop; cmd.subtype = 0; @@ -2173,6 +2175,7 @@ static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, .flags = HFI_WARMBOOT_EXEC_SCRATCH, }; int ret = 0; + u32 seqnum; if (!adreno_dev->warmboot_enabled) return 0; @@ -2181,8 +2184,8 @@ static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, ret_cmd); @@ -3032,8 +3035,6 @@ int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, if (!IS_ALIGNED(size_bytes, sizeof(u32))) return -EINVAL; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -3131,12 +3132,12 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, void *cmdbuf) { struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int i, j; u32 cmd_sizebytes; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; struct hfi_syncobj *obj = NULL; + u32 seqnum; /* Add hfi_syncobj struct for sync object */ cmd_sizebytes = sizeof(*cmd) + @@ -3215,9 +3216,9 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, drawobj->timestamp = ++drawctxt->syncobj_timestamp; cmd->timestamp = drawobj->timestamp; + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); return gen7_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, NULL); @@ -3358,6 +3359,7 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 seqnum; int ret = 0; /* Device mutex is necessary to ensure only one hardware fence ack is being waited for */ @@ -3369,8 +3371,8 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, init_completion(&hw_fence_ack.complete); entry->cmd.flags |= flags; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); hw_fence_ack.sent_hdr = entry->cmd.hdr; @@ -3695,8 +3697,6 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - write = hdr->write_index; for (i = 0; i < size_dwords; i++) { @@ -3732,9 +3732,8 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int ret = 0; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_cmd *cmdobj = NULL; struct hfi_submit_cmd *cmd; struct adreno_submit_time time = {0}; @@ -3814,9 +3813,9 @@ int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_dr skipib: adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); if (adreno_hwsched_context_queue_enabled(adreno_dev)) ret = gen7_gmu_context_queue_write(adreno_dev, @@ -3849,7 +3848,6 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); struct hfi_submit_cmd *cmd; struct kgsl_memobj_node *ib; u32 cmd_sizebytes; @@ -3907,8 +3905,6 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, } cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); ret = gen7_hfi_send_cmd_async(adreno_dev, cmd, sizeof(*cmd)); @@ -4015,7 +4011,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ts = ts, seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, &pending_ack); diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index 6a6f02f49f..695d11e2a2 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -83,6 +83,8 @@ struct gen7_hwsched_hfi { * @flags: Flags to control the creation of new hardware fences */ unsigned long flags; + /** @seqnum: Sequence number for hardware fence packet header */ + atomic_t seqnum; } hw_fence; /** * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop diff --git a/adreno_hfi.h b/adreno_hfi.h index f3a2ad011d..2760a0119b 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -414,12 +414,15 @@ struct hfi_queue_table { #define MSG_HDR_GET_TYPE(hdr) (((hdr) >> 16) & 0xF) #define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF) -#define HDR_CMP_SEQNUM(out_hdr, in_hdr) \ - (MSG_HDR_GET_SEQNUM(out_hdr) == MSG_HDR_GET_SEQNUM(in_hdr)) +/* Clear the HFI_MSG_RECORD bit from both headers since some acks may have it set, and some not. */ +#define CMP_HFI_ACK_HDR(sent, rcvd) ((sent &= ~HFI_MSG_RECORD) == (rcvd &= ~HFI_MSG_RECORD)) #define MSG_HDR_SET_SEQNUM(hdr, num) \ (((hdr) & 0xFFFFF) | ((num) << 20)) +#define MSG_HDR_SET_SEQNUM_SIZE(hdr, seqnum, sizedwords) \ + (FIELD_PREP(GENMASK(31, 20), seqnum) | FIELD_PREP(GENMASK(15, 8), sizedwords) | hdr) + #define MSG_HDR_SET_TYPE(hdr, type) \ (((hdr) & 0xFFFFF) | ((type) << 16)) diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 80b8651491..ef0c457359 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -110,6 +110,11 @@ struct adreno_hwsched { struct kmem_cache *hw_fence_cache; /** @hw_fence_count: Number of hardware fences that haven't yet been sent to Tx Queue */ atomic_t hw_fence_count; + /** + * @submission_seqnum: Sequence number for sending submissions to GMU context queues or + * dispatch queues + */ + atomic_t submission_seqnum; }; From a50124c22bd25b46a431b6d8f09bf667f6ed94ae Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Fri, 16 Jun 2023 16:22:07 +0530 Subject: [PATCH 520/750] msm: kgsl: Give hint to SMMU for skipping TLB ops during slumber Currently, TLB operation is performed irrespective of the GPU state. This results in unnecessary cx gdsc toggling. Use qcom_skip_tlb_management() API to request smmu driver to skip TLB flush operation during GPU slumber state. Moved kgsl_mmu_flush_tlb() to kgsl_mmu_send_tlb_hint(). Add kernel specific compatibility checks for older skip tlb logic and qcom_skip_tlb_management() API. Change-Id: Ic538e4404e8dddef56274e21eef7cf0e0f65bef6 Signed-off-by: Sanjay Yadav --- adreno_a6xx_gmu.c | 10 +++------- adreno_a6xx_hwsched.c | 7 ------- adreno_a6xx_rgmu.c | 4 ++++ adreno_gen7_gmu.c | 10 +++------- adreno_gen7_hwsched.c | 7 ------- kgsl_iommu.c | 44 ++++++++++++++++++++++++++++++++++++------- kgsl_mmu.h | 11 ++++------- kgsl_pwrctrl.c | 5 ++++- 8 files changed, 55 insertions(+), 43 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 40bbf74b4d..b8d35c663d 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -612,6 +612,7 @@ int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "Failed to enable GMU CX gdsc, error %d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); return ret; } @@ -619,7 +620,9 @@ int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); reinit_completion(&gmu->gdsc_gate); if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) @@ -2513,13 +2516,6 @@ static int a6xx_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = a6xx_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 32e2c7e6eb..f40182170f 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -436,13 +436,6 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = a6xx_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 18358e300d..2f345ee191 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -518,6 +518,8 @@ static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); + /* Wait up to 5 seconds for the regulator to go off */ if (kgsl_regulator_disable_wait(rgmu->cx_gdsc, 5000)) return 0; @@ -590,6 +592,7 @@ static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; if (IS_ERR_OR_NULL(rgmu->cx_gdsc)) @@ -600,6 +603,7 @@ static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&rgmu->pdev->dev, "Fail to enable CX gdsc:%d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); return ret; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 86c58d0033..6194621bf9 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -364,6 +364,7 @@ int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "Failed to enable GMU CX gdsc, error %d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); return ret; } @@ -371,7 +372,9 @@ int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); reinit_completion(&gmu->gdsc_gate); set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); regulator_disable(gmu->cx_gdsc); @@ -2040,13 +2043,6 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 5e4ed32dee..492c3f0859 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -597,13 +597,6 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 22a55d4d9f..33cd3301f7 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -189,7 +189,6 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { - struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); struct io_pgtable_ops *ops = pt->pgtbl_ops; while (size) { @@ -200,13 +199,22 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) size -= PAGE_SIZE; } - /* Skip TLB Operations if GPU is in slumber */ - if (mutex_trylock(&device->mutex)) { - if (device->state == KGSL_STATE_SLUMBER) { + /* + * Skip below logic for 6.1 kernel version and above as + * qcom_skip_tlb_management() API takes care of avoiding + * TLB operations during slumber. + */ + if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) { + struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); + + /* Skip TLB Operations if GPU is in slumber */ + if (mutex_trylock(&device->mutex)) { + if (device->state == KGSL_STATE_SLUMBER) { + mutex_unlock(&device->mutex); + return 0; + } mutex_unlock(&device->mutex); - return 0; } - mutex_unlock(&device->mutex); } kgsl_iommu_flush_tlb(pt->base.mmu); @@ -267,6 +275,28 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, return mapped; } +static void kgsl_iommu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) +{ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + struct kgsl_iommu *iommu = &mmu->iommu; + + /* + * Send hint to SMMU driver for skipping TLB operations during slumber. + * This will help to avoid unnecessary cx gdsc toggling. + */ + qcom_skip_tlb_management(&iommu->user_context.pdev->dev, hint); + if (iommu->lpac_context.domain) + qcom_skip_tlb_management(&iommu->lpac_context.pdev->dev, hint); +#endif + + /* + * TLB operations are skipped during slumber. Incase CX doesn't + * go down, it can result in incorrect translations due to stale + * TLB entries. Flush TLB before boot up to ensure fresh start. + */ + if (!hint) + kgsl_iommu_flush_tlb(mmu); +} static int kgsl_iopgtbl_map_child(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, @@ -2582,7 +2612,7 @@ static const struct kgsl_mmu_ops kgsl_iommu_ops = { .mmu_pagefault_resume = kgsl_iommu_pagefault_resume, .mmu_getpagetable = kgsl_iommu_getpagetable, .mmu_map_global = kgsl_iommu_map_global, - .mmu_flush_tlb = kgsl_iommu_flush_tlb, + .mmu_send_tlb_hint = kgsl_iommu_send_tlb_hint, }; static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops = { diff --git a/kgsl_mmu.h b/kgsl_mmu.h index 8b5e083081..3035cdec41 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -115,7 +115,7 @@ struct kgsl_mmu_ops { unsigned long name); void (*mmu_map_global)(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc, u32 padding); - void (*mmu_flush_tlb)(struct kgsl_mmu *mmu); + void (*mmu_send_tlb_hint)(struct kgsl_mmu *mmu, bool hint); }; struct kgsl_mmu_pt_ops { @@ -359,13 +359,10 @@ kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable) return 0; } -static inline void kgsl_mmu_flush_tlb(struct kgsl_mmu *mmu) +static inline void kgsl_mmu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) { - if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) - return; - - if (MMU_OP_VALID(mmu, mmu_flush_tlb)) - return mmu->mmu_ops->mmu_flush_tlb(mmu); + if (MMU_OP_VALID(mmu, mmu_send_tlb_hint)) + return mmu->mmu_ops->mmu_send_tlb_hint(mmu, hint); } /** diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index cf7504f625..7a99babbd6 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1376,14 +1376,17 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) if (!state) { if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) { + kgsl_mmu_send_tlb_hint(&device->mmu, true); trace_kgsl_rail(device, state); if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) dev_err(device->dev, "Regulator vdd is stuck on\n"); if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) dev_err(device->dev, "Regulator vddcx is stuck on\n"); } - } else + } else { status = enable_regulators(device); + kgsl_mmu_send_tlb_hint(&device->mmu, false); + } return status; } From db6e014625ac56ef11ecc393c4ba721f79fcf9f2 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 8 Aug 2023 22:05:10 +0530 Subject: [PATCH 521/750] msm: kgsl: Update power state machine for rgmu Power state machine is different for RGMU and GMU targets. Update the power state machine of RGMU to make it same as GMU targets. Change-Id: I44eba52b6eab90b4686c27d84509ac9ef85def89 Signed-off-by: Kamal Agrawal --- adreno_a6xx_rgmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 2f345ee191..89d290b968 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -516,7 +516,6 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); kgsl_mmu_send_tlb_hint(&device->mmu, true); @@ -526,8 +525,6 @@ static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) dev_err(&rgmu->pdev->dev, "RGMU CX gdsc off timeout\n"); - kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); - return -ETIMEDOUT; } @@ -556,6 +553,8 @@ static void a6xx_rgmu_suspend(struct adreno_device *adreno_dev) a6xx_rgmu_disable_clks(adreno_dev); a6xx_rgmu_disable_gdsc(adreno_dev); + + kgsl_pwrctrl_set_state(KGSL_DEVICE(adreno_dev), KGSL_STATE_NONE); } static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) @@ -716,6 +715,8 @@ static void a6xx_rgmu_power_off(struct adreno_device *adreno_dev) a6xx_rgmu_disable_gdsc(adreno_dev); kgsl_pwrctrl_clear_l3_vote(device); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); } static int a6xx_rgmu_clock_set(struct adreno_device *adreno_dev, From 2864d0656c051b850954bf1f2bf78f3fca4b4e47 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 17 Dec 2022 20:04:52 +0530 Subject: [PATCH 522/750] msm: kgsl: Add cx gdsc notifier for rgmu and non-gmu targets Extend cx gdsc notifier support to rgmu and non-gmu targets. With this, KGSL waits for cx collapse notifier event instead of polling for gdsc state. This helps to remove the CPU cycles spent for polling. Also, it addresses the corner case scenario where cx gdsc collapse event can get missed due to sleep operation during polling. Also, remove cx gdsc and gx gdsc members from gmu structure and use members in power control structure. Change-Id: I6199b612a18651dc53a46b666569742a21dda2df Signed-off-by: Kamal Agrawal --- adreno_a6xx.c | 6 ++ adreno_a6xx_gmu.c | 110 ++++------------------------------- adreno_a6xx_gmu.h | 26 --------- adreno_a6xx_hwsched.c | 4 +- adreno_a6xx_rgmu.c | 78 ++++--------------------- adreno_a6xx_rgmu.h | 5 +- adreno_a6xx_snapshot.c | 2 - adreno_gen7.c | 1 + adreno_gen7_gmu.c | 112 ++++------------------------------- adreno_gen7_gmu.h | 32 ---------- adreno_gen7_hwsched.c | 10 ++-- kgsl_gmu_core.h | 1 - kgsl_pwrctrl.c | 129 +++++++++++++++++++++++++++++++++++++---- kgsl_pwrctrl.h | 32 ++++++++++ 14 files changed, 201 insertions(+), 347 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 248f5364b8..7857bb72a4 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1888,6 +1888,7 @@ int a6xx_probe_common(struct platform_device *pdev, struct adreno_device *adreno_dev, u32 chipid, const struct adreno_gpu_core *gpucore) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gpudev *gpudev = gpucore->gpudev; int ret; @@ -1896,6 +1897,11 @@ int a6xx_probe_common(struct platform_device *pdev, adreno_reg_offset_init(gpudev->reg_offsets); + if (gmu_core_isenabled(device) && (gpudev != &adreno_a6xx_rgmu_gpudev)) + device->pwrctrl.cx_gdsc_offset = (adreno_is_a662(adreno_dev) || + adreno_is_a621(adreno_dev)) ? A662_GPU_CC_CX_GDSCR : + A6XX_GPU_CC_CX_GDSCR; + adreno_dev->hwcg_enabled = true; adreno_dev->uche_client_pf = 1; diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index b8d35c663d..f139dd046e 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -594,45 +593,18 @@ static void gmu_ao_sync_event(struct adreno_device *adreno_dev) local_irq_restore(flags); } -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - int ret; - - ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); - if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); - /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, gmu->cx_gdsc, false); - } - - ret = regulator_enable(gmu->cx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "Failed to enable GMU CX gdsc, error %d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - return ret; -} - void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&gmu->gdsc_gate); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(gmu->cx_gdsc, REGULATOR_MODE_IDLE); + regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_IDLE); - set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - regulator_disable(gmu->cx_gdsc); + kgsl_pwrctrl_disable_cx_gdsc(device); if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(gmu->cx_gdsc, REGULATOR_MODE_NORMAL); + regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_NORMAL); } int a6xx_gmu_device_start(struct adreno_device *adreno_dev) @@ -1866,6 +1838,7 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) int ret = 0; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* If SPTP_RAC is on, turn off SPTP_RAC HS */ a6xx_gmu_sptprac_disable(adreno_dev); @@ -1910,14 +1883,14 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) * the GX HS. This code path is the only client voting for GX through * the regulator interface. */ - if (gmu->gx_gdsc) { + if (pwr->gx_gdsc) { if (a6xx_gmu_gx_is_on(adreno_dev)) { /* Switch gx gdsc control from GMU to CPU * force non-zero reference count in clk driver * so next disable call will turn * off the GDSC */ - ret = regulator_enable(gmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx enable %d\n", ret); @@ -1934,7 +1907,7 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) ndelay(520); } - ret = regulator_disable(gmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx disable %d\n", ret); @@ -2382,7 +2355,7 @@ static void a6xx_gmu_force_first_boot(struct kgsl_device *device) u32 val = 0; if (gmu->pdc_cfg_base) { - a6xx_gmu_enable_gdsc(adreno_dev); + kgsl_pwrctrl_enable_cx_gdsc(device); a6xx_gmu_enable_clks(adreno_dev, 0); val = __raw_readl(gmu->pdc_cfg_base + (PDC_GPU_ENABLE_PDC << 2)); @@ -2412,7 +2385,7 @@ static int a6xx_gmu_first_boot(struct adreno_device *adreno_dev) a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2508,7 +2481,7 @@ static int a6xx_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2826,65 +2799,6 @@ static void a6xx_gmu_rdpm_probe(struct a6xx_gmu_device *gmu, res->start, resource_size(res)); } -static int gmu_cx_gdsc_event(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct a6xx_gmu_device *gmu = container_of(nb, struct a6xx_gmu_device, gdsc_nb); - struct adreno_device *adreno_dev = a6xx_gmu_to_adreno(gmu); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 val, offset; - - if (!(event & REGULATOR_EVENT_DISABLE) || - !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) - return 0; - - offset = (adreno_is_a662(ADRENO_DEVICE(device)) || - adreno_is_a621(ADRENO_DEVICE(device))) ? - A662_GPU_CC_CX_GDSCR : A6XX_GPU_CC_CX_GDSCR; - - if (kgsl_regmap_read_poll_timeout(&device->regmap, offset, val, - !(val & BIT(31)), 100, 100 * 1000)) - dev_err(device->dev, "GPU CX wait timeout.\n"); - - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - complete_all(&gmu->gdsc_gate); - - return 0; -} - -static int a6xx_gmu_regulators_probe(struct a6xx_gmu_device *gmu, - struct platform_device *pdev) -{ - int ret; - - gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(gmu->cx_gdsc)) { - if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(gmu->cx_gdsc); - } - - gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(gmu->gx_gdsc)) { - if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(gmu->gx_gdsc); - } - - gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; - ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); - - if (ret) { - dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); - return ret; - } - - init_completion(&gmu->gdsc_gate); - complete_all(&gmu->gdsc_gate); - - return 0; -} - void a6xx_gmu_remove(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -2990,7 +2904,7 @@ int a6xx_gmu_probe(struct kgsl_device *device, a6xx_gmu_rdpm_probe(gmu, device); /* Set up GMU regulators */ - ret = a6xx_gmu_regulators_probe(gmu, pdev); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 6060b325c0..ba29ca53c6 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -24,9 +24,6 @@ * @num_bwlevel: number of GPU BW levels * @num_cnocbwlevel: number CNOC BW levels * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling - * @cx_gdsc: CX headswitch that controls power of GMU and - subsystem peripherals - * @gx_gdsc: GX headswitch that controls power of GPU subsystem * @clks: GPU subsystem clocks required for GMU functionality * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different * than default power level @@ -51,9 +48,6 @@ struct a6xx_gmu_device { /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; struct a6xx_hfi hfi; - /** @pwrlevels: Array of GMU power levels */ - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ int num_clks; @@ -99,10 +93,6 @@ struct a6xx_gmu_device { u32 perf_ddr_bw; /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ u32 num_oob_perfcntr; - /** @gdsc_nb: Notifier block for cx gdsc regulator */ - struct notifier_block gdsc_nb; - /** @gdsc_gate: Completion to signal cx gdsc collapse status */ - struct completion gdsc_gate; /** @pdc_cfg_base: Base address of PDC cfg registers */ void __iomem *pdc_cfg_base; /** @pdc_seq_base: Base address of PDC seq registers */ @@ -267,14 +257,6 @@ int a6xx_gmu_memory_init(struct adreno_device *adreno_dev); */ void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag); -/** - * a6xx_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * a6xx_gmu_disable_gdsc - Disable gmu gdsc * @adreno_dev: Pointer to the adreno device @@ -445,14 +427,6 @@ void a6xx_gmu_remove(struct kgsl_device *device); */ int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); -/** - * a6xx_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * a6xx_gmu_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index f40182170f..a341ca6bba 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -345,7 +345,7 @@ static int a6xx_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -428,7 +428,7 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 89d290b968..dc988f34be 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -483,12 +483,14 @@ static void a6xx_rgmu_notify_slumber(struct adreno_device *adreno_dev) static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; /* Check GX GDSC is status */ if (a6xx_rgmu_gx_is_on(adreno_dev)) { - if (IS_ERR_OR_NULL(rgmu->gx_gdsc)) + if (IS_ERR_OR_NULL(pwr->gx_gdsc)) return; /* @@ -496,12 +498,12 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) * reference count in clk driver so next disable call will * turn off the GDSC. */ - ret = regulator_enable(rgmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&rgmu->pdev->dev, "Fail to enable gx gdsc:%d\n", ret); - ret = regulator_disable(rgmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&rgmu->pdev->dev, "Fail to disable gx gdsc:%d\n", ret); @@ -513,21 +515,6 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(rgmu->num_clks, rgmu->clks); } -static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) -{ - struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - - /* Wait up to 5 seconds for the regulator to go off */ - if (kgsl_regulator_disable_wait(rgmu->cx_gdsc, 5000)) - return 0; - - dev_err(&rgmu->pdev->dev, "RGMU CX gdsc off timeout\n"); - - return -ETIMEDOUT; -} - void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -549,10 +536,11 @@ void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev, static void a6xx_rgmu_suspend(struct adreno_device *adreno_dev) { - a6xx_rgmu_irq_disable(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + a6xx_rgmu_irq_disable(adreno_dev); a6xx_rgmu_disable_clks(adreno_dev); - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); kgsl_pwrctrl_set_state(KGSL_DEVICE(adreno_dev), KGSL_STATE_NONE); } @@ -588,24 +576,6 @@ static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) return 0; } -static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int ret; - - if (IS_ERR_OR_NULL(rgmu->cx_gdsc)) - return 0; - - ret = regulator_enable(rgmu->cx_gdsc); - if (ret) - dev_err(&rgmu->pdev->dev, - "Fail to enable CX gdsc:%d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - return ret; -} - /* * a6xx_rgmu_load_firmware() - Load the ucode into the RGMU TCM * @adreno_dev: Pointer to adreno device @@ -712,7 +682,7 @@ static void a6xx_rgmu_power_off(struct adreno_device *adreno_dev) a6xx_rgmu_irq_disable(adreno_dev); a6xx_rgmu_disable_clks(adreno_dev); - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); kgsl_pwrctrl_clear_l3_vote(device); @@ -814,13 +784,13 @@ static int a6xx_rgmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_rgmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; ret = a6xx_rgmu_enable_clks(adreno_dev); if (ret) { - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); return ret; } @@ -1266,30 +1236,6 @@ static int a6xx_rgmu_irq_probe(struct kgsl_device *device) return 0; } -static int a6xx_rgmu_regulators_probe(struct a6xx_rgmu_device *rgmu) -{ - int ret = 0; - - rgmu->cx_gdsc = devm_regulator_get(&rgmu->pdev->dev, "vddcx"); - if (IS_ERR(rgmu->cx_gdsc)) { - ret = PTR_ERR(rgmu->cx_gdsc); - if (ret != -EPROBE_DEFER) - dev_err(&rgmu->pdev->dev, - "Couldn't get CX gdsc error:%d\n", ret); - return ret; - } - - rgmu->gx_gdsc = devm_regulator_get(&rgmu->pdev->dev, "vdd"); - if (IS_ERR(rgmu->gx_gdsc)) { - ret = PTR_ERR(rgmu->gx_gdsc); - if (ret != -EPROBE_DEFER) - dev_err(&rgmu->pdev->dev, - "Couldn't get GX gdsc error:%d\n", ret); - } - - return ret; -} - static int a6xx_rgmu_clocks_probe(struct a6xx_rgmu_device *rgmu, struct device_node *node) { @@ -1394,7 +1340,7 @@ static int a6xx_rgmu_probe(struct kgsl_device *device, rgmu->pdev = pdev; /* Set up RGMU regulators */ - ret = a6xx_rgmu_regulators_probe(rgmu); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; diff --git a/adreno_a6xx_rgmu.h b/adreno_a6xx_rgmu.h index 7f6f78b149..f34d2af7bf 100644 --- a/adreno_a6xx_rgmu.h +++ b/adreno_a6xx_rgmu.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_A6XX_RGMU_H #define __ADRENO_A6XX_RGMU_H @@ -31,8 +32,6 @@ enum { * @oob_interrupt_num: number of RGMU asserted OOB interrupt * @fw_hostptr: Buffer which holds the RGMU firmware * @fw_size: Size of RGMU firmware buffer - * @cx_gdsc: CX headswitch that controls power of RGMU and - subsystem peripherals * @clks: RGMU clocks including the GPU * @gpu_clk: Pointer to GPU core clock * @rgmu_clk: Pointer to rgmu clock @@ -47,8 +46,6 @@ struct a6xx_rgmu_device { unsigned int oob_interrupt_num; unsigned int *fw_hostptr; uint32_t fw_size; - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of clocks in @clks */ int num_clks; diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index e7a32e7822..ef1d55f385 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -4,8 +4,6 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include - #include "adreno.h" #include "adreno_a6xx.h" #include "adreno_snapshot.h" diff --git a/adreno_gen7.c b/adreno_gen7.c index 21f212a05c..733a59aca6 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1610,6 +1610,7 @@ int gen7_probe_common(struct platform_device *pdev, device->pwrscale.avoid_ddr_stall = true; device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; + device->pwrctrl.cx_gdsc_offset = GEN7_GPU_CC_CX_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6194621bf9..db6e8831e1 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -6,14 +6,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -346,40 +344,6 @@ static void gmu_ao_sync_event(struct adreno_device *adreno_dev) local_irq_restore(flags); } -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - int ret; - - ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); - if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); - /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, gmu->cx_gdsc, false); - } - - ret = regulator_enable(gmu->cx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "Failed to enable GMU CX gdsc, error %d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - return ret; -} - -void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev) -{ - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&gmu->gdsc_gate); - set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - regulator_disable(gmu->cx_gdsc); -} - int gen7_gmu_device_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1496,6 +1460,7 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) int ret = 0; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ @@ -1533,19 +1498,19 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) * the GX HS. This code path is the only client voting for GX through * the regulator interface. */ - if (gmu->gx_gdsc) { + if (pwr->gx_gdsc) { if (gen7_gmu_gx_is_on(adreno_dev)) { /* Switch gx gdsc control from GMU to CPU * force non-zero reference count in clk driver * so next disable call will turn * off the GDSC */ - ret = regulator_enable(gmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx enable %d\n", ret); - ret = regulator_disable(gmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx disable %d\n", ret); @@ -1600,7 +1565,7 @@ void gen7_gmu_suspend(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -1937,7 +1902,7 @@ static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2020,7 +1985,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -2035,7 +2000,7 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2085,7 +2050,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -2448,61 +2413,6 @@ static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu, res->start, resource_size(res)); } -static int gmu_cx_gdsc_event(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct gen7_gmu_device *gmu = container_of(nb, struct gen7_gmu_device, gdsc_nb); - struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 val; - - if (!(event & REGULATOR_EVENT_DISABLE) || - !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) - return 0; - - if (kgsl_regmap_read_poll_timeout(&device->regmap, GEN7_GPU_CC_CX_GDSCR, - val, !(val & BIT(31)), 100, 100 * 1000)) - dev_err(device->dev, "GPU CX wait timeout.\n"); - - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - complete_all(&gmu->gdsc_gate); - - return 0; -} - -static int gen7_gmu_regulators_probe(struct gen7_gmu_device *gmu, - struct platform_device *pdev) -{ - int ret; - - gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(gmu->cx_gdsc)) { - if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(gmu->cx_gdsc); - } - - gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(gmu->gx_gdsc)) { - if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(gmu->gx_gdsc); - } - - init_completion(&gmu->gdsc_gate); - complete_all(&gmu->gdsc_gate); - - gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; - ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); - - if (ret) { - dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); - return ret; - } - - return 0; -} - void gen7_gmu_remove(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -2615,7 +2525,7 @@ int gen7_gmu_probe(struct kgsl_device *device, gen7_gmu_rdpm_probe(gmu, device); /* Set up GMU regulators */ - ret = gen7_gmu_regulators_probe(gmu, pdev); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; @@ -2781,7 +2691,7 @@ static int gen7_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 1487e778df..483b9f3159 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -24,9 +24,6 @@ * @num_bwlevel: number of GPU BW levels * @num_cnocbwlevel: number CNOC BW levels * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling - * @cx_gdsc: CX headswitch that controls power of GMU and - * subsystem peripherals - * @gx_gdsc: GX headswitch that controls power of GPU subsystem * @clks: GPU subsystem clocks required for GMU functionality * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different * than default power level @@ -55,9 +52,6 @@ struct gen7_gmu_device { /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; struct gen7_hfi hfi; - /** @pwrlevels: Array of GMU power levels */ - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ int num_clks; @@ -104,10 +98,6 @@ struct gen7_gmu_device { u32 num_oob_perfcntr; /** @acd_debug_val: DVM value to calibrate ACD for a level */ u32 acd_debug_val; - /** @gdsc_nb: Notifier block for cx gdsc regulator */ - struct notifier_block gdsc_nb; - /** @gdsc_gate: Completion to signal cx gdsc collapse status */ - struct completion gdsc_gate; /** @stats_enable: GMU stats feature enable */ bool stats_enable; /** @stats_mask: GMU performance countables to enable */ @@ -300,14 +290,6 @@ int gen7_gmu_memory_init(struct adreno_device *adreno_dev); */ void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag); -/** - * gen7_gmu_enable_clocks - Enable gmu clocks - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * gen7_gmu_load_fw - Load gmu firmware * @adreno_dev: Pointer to the adreno device @@ -474,20 +456,6 @@ void gen7_gmu_remove(struct kgsl_device *device); */ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); -/** - * gen7_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); - -/** - * gen7_gmu_disable_gdsc - Disable gmu gdsc - * @adreno_dev: Pointer to the adreno device - */ -void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev); - /** * gen7_gmu_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 492c3f0859..fec3079ae6 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -484,7 +484,7 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -574,7 +574,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -589,7 +589,7 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -641,7 +641,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -729,7 +729,7 @@ static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 98a40d0d81..197cf353c1 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -236,7 +236,6 @@ enum { GMU_PRIV_RSCC_SLEEP_DONE, GMU_PRIV_PM_SUSPEND, GMU_PRIV_PDC_RSC_LOADED, - GMU_PRIV_CX_GDSC_WAIT, /* Indicates if GMU INIT HFI messages are recorded successfully */ GMU_PRIV_WARMBOOT_GMU_INIT_DONE, /* Indicates if GPU BOOT HFI messages are recorded successfully */ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 7a99babbd6..363e622ba9 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -4,7 +4,9 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ +#include #include +#include #include #include #include @@ -1316,9 +1318,34 @@ int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state) return 0; } -static int enable_regulator(struct device *dev, struct regulator *regulator, - const char *name) +int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) { + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct regulator *regulator = pwr->cx_gdsc; + int ret; + + if (IS_ERR_OR_NULL(regulator)) + return 0; + + ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); + if (!ret) { + dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); + /* Dump the cx regulator consumer list */ + qcom_clk_dump(NULL, regulator, false); + } + + ret = regulator_enable(regulator); + if (ret) + dev_err(device->dev, "Failed to enable CX regulator: %d\n", ret); + + kgsl_mmu_send_tlb_hint(&device->mmu, false); + pwr->cx_gdsc_wait = false; + return ret; +} + +static int kgsl_pwtctrl_enable_gx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.gx_gdsc; int ret; if (IS_ERR_OR_NULL(regulator)) @@ -1326,10 +1353,34 @@ static int enable_regulator(struct device *dev, struct regulator *regulator, ret = regulator_enable(regulator); if (ret) - dev_err(dev, "Unable to enable regulator %s: %d\n", name, ret); + dev_err(device->dev, "Failed to enable GX regulator: %d\n", ret); return ret; } +void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.cx_gdsc; + + if (IS_ERR_OR_NULL(regulator)) + return; + + kgsl_mmu_send_tlb_hint(&device->mmu, true); + reinit_completion(&device->pwrctrl.cx_gdsc_gate); + device->pwrctrl.cx_gdsc_wait = true; + regulator_disable(regulator); +} + +static void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.gx_gdsc; + + if (IS_ERR_OR_NULL(regulator)) + return; + + if (!kgsl_regulator_disable_wait(regulator, 200)) + dev_err(device->dev, "Regulator vdd is stuck on\n"); +} + static int enable_regulators(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; @@ -1338,15 +1389,14 @@ static int enable_regulators(struct kgsl_device *device) if (test_and_set_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) return 0; - ret = enable_regulator(&device->pdev->dev, pwr->cx_gdsc, "vddcx"); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (!ret) { /* Set parent in retention voltage to power up vdd supply */ ret = kgsl_regulator_set_voltage(device->dev, pwr->gx_gdsc_parent, pwr->gx_gdsc_parent_min_corner); if (!ret) - ret = enable_regulator(&device->pdev->dev, - pwr->gx_gdsc, "vdd"); + ret = kgsl_pwtctrl_enable_gx_gdsc(device); } if (ret) { @@ -1358,6 +1408,58 @@ static int enable_regulators(struct kgsl_device *device) return 0; } +int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, + struct platform_device *pdev) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + if (IS_ERR(pwr->cx_gdsc)) { + if (PTR_ERR(pwr->cx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); + return PTR_ERR(pwr->cx_gdsc); + } + + pwr->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(pwr->gx_gdsc)) { + if (PTR_ERR(pwr->gx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); + return PTR_ERR(pwr->gx_gdsc); + } + + return 0; +} + +static int kgsl_cx_gdsc_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct kgsl_pwrctrl *pwr = container_of(nb, struct kgsl_pwrctrl, cx_gdsc_nb); + struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); + u32 val; + + if (!(event & REGULATOR_EVENT_DISABLE) || !pwr->cx_gdsc_wait) + return 0; + + if (pwr->cx_gdsc_offset) { + if (kgsl_regmap_read_poll_timeout(&device->regmap, pwr->cx_gdsc_offset, + val, !(val & BIT(31)), 100, 100 * 1000)) + dev_err(device->dev, "GPU CX wait timeout.\n"); + } + + pwr->cx_gdsc_wait = false; + complete_all(&pwr->cx_gdsc_gate); + + return 0; +} + +int kgsl_register_gdsc_notifier(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->cx_gdsc_nb.notifier_call = kgsl_cx_gdsc_event; + return devm_regulator_register_notifier(pwr->cx_gdsc, &pwr->cx_gdsc_nb); +} + static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; @@ -1378,10 +1480,8 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) &pwr->power_flags)) { kgsl_mmu_send_tlb_hint(&device->mmu, true); trace_kgsl_rail(device, state); - if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) - dev_err(device->dev, "Regulator vdd is stuck on\n"); - if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) - dev_err(device->dev, "Regulator vddcx is stuck on\n"); + kgsl_pwrctrl_disable_gx_gdsc(device); + kgsl_pwrctrl_disable_cx_gdsc(device); } } else { status = enable_regulators(device); @@ -1618,6 +1718,15 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) } } + init_completion(&pwr->cx_gdsc_gate); + complete_all(&pwr->cx_gdsc_gate); + + result = kgsl_register_gdsc_notifier(device); + if (result) { + dev_err(&pdev->dev, "Failed to register gdsc notifier: %d\n", result); + return result; + } + pwr->power_flags = 0; pm_runtime_enable(&pdev->dev); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index ee0da22d61..39e99f85b6 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -117,6 +117,14 @@ struct kgsl_pwrctrl { struct regulator *gx_gdsc_parent; /** @gx_gdsc_parent_min_corner: Minimum supply voltage for GX parent */ u32 gx_gdsc_parent_min_corner; + /** @cx_gdsc_nb: Notifier block for cx gdsc regulator */ + struct notifier_block cx_gdsc_nb; + /** @cx_gdsc_gate: Completion to signal cx gdsc collapse status */ + struct completion cx_gdsc_gate; + /** @cx_gdsc_wait: Whether to wait for cx gdsc to turn off */ + bool cx_gdsc_wait; + /** @cx_gdsc_offset: Offset of CX GDSC register */ + u32 cx_gdsc_offset; int isense_clk_indx; int isense_clk_on_level; unsigned long power_flags; @@ -278,4 +286,28 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state); * Clear the l3 vote when going into slumber */ void kgsl_pwrctrl_clear_l3_vote(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_enable_cx_gdsc - Enable cx gdsc + * @device: Pointer to the kgsl device + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_disable_cx_gdsc - Disable cx gdsc + * @device: Pointer to the kgsl device + */ +void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_probe_regulators - Probe regulators + * @device: Pointer to the kgsl device + * @pdev: Pointer to the platform device + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, + struct platform_device *pdev); #endif /* __KGSL_PWRCTRL_H */ From 84659689435e6091427d3df3c5941f0b032563d5 Mon Sep 17 00:00:00 2001 From: Carter Cooper Date: Wed, 10 May 2023 16:40:53 -0600 Subject: [PATCH 523/750] kgsl: gen7: Allow using up to 32 DCVS points Add support to allow newer gen7 devices the ability to use more than 16 DCVS points. Using more than 16 levels is dependent on the GMU FW also being able to support the new hfi_table_cmd packet which allows dynamic table support rather than hardcoded support (as was done with hfi_dcvs_table_cmd packet). This patch will detect if the GMU supports the new packet and send the correct HFI message accordingly. Change-Id: Ie1df5ab069c49265a61485ee00fb7958ac6eeba7 Signed-off-by: Carter Cooper --- adreno_gen7_gmu.c | 6 +-- adreno_gen7_gmu.h | 9 +++++ adreno_gen7_hfi.c | 85 ++++++++++++++++++++++++++++++++++++++- adreno_gen7_hfi.h | 10 ++++- adreno_gen7_hwsched.c | 8 ++-- adreno_gen7_hwsched_hfi.c | 3 +- adreno_gen7_rpmh.c | 35 +++++++--------- adreno_hfi.h | 34 ++++++++++++++-- kgsl_gmu_core.h | 9 ++++- kgsl_pwrctrl.h | 2 +- msm_adreno_devfreq.h | 4 +- 11 files changed, 163 insertions(+), 42 deletions(-) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 86c58d0033..b168f457c3 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -1564,7 +1564,7 @@ static int gen7_gmu_notify_slumber(struct adreno_device *adreno_dev) struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); int bus_level = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; - int perf_idx = gmu->hfi.dcvs_table.gpu_level_num - + int perf_idx = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; struct hfi_prep_slumber_cmd req = { .freq = perf_idx, @@ -1612,7 +1612,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; struct hfi_gx_bw_perf_vote_cmd req = { .ack_type = DCVS_ACK_BLOCK, .freq = INVALID_DCVS_IDX, @@ -1661,7 +1661,7 @@ static int gen7_gmu_dcvs_set(struct adreno_device *adreno_dev, if (req.freq != INVALID_DCVS_IDX) gen7_rdpm_mx_freq_update(gmu, - gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + gmu->dcvs_table.gx_votes[req.freq].freq); return ret; } diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 1487e778df..51f0bc4674 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -11,6 +11,13 @@ #include "adreno_gen7_hfi.h" #include "kgsl_gmu_core.h" +struct gen7_dcvs_table { + u32 gpu_level_num; + u32 gmu_level_num; + struct opp_gx_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc cx_votes[MAX_CX_LEVELS]; +}; + /** * struct gen7_gmu_device - GMU device structure * @ver: GMU Version information @@ -120,6 +127,8 @@ struct gen7_gmu_device { u32 cp_init_hdr; /** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */ u32 switch_to_unsec_hdr; + /** @dcvs_table: Table for gpu dcvs levels */ + struct gen7_dcvs_table dcvs_table; }; /* Helper function to get to gen7 gmu device from adreno device */ diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index f1a799dade..7710da6624 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -9,6 +9,7 @@ #include "adreno.h" #include "adreno_gen7.h" +#include "adreno_gen7_gmu.h" #include "adreno_gen7_hfi.h" #include "kgsl_device.h" #include "kgsl_trace.h" @@ -659,6 +660,87 @@ static void reset_hfi_queues(struct adreno_device *adreno_dev) } } +/* Fill the entry and return the dword count written */ +static u32 _fill_table_entry(struct hfi_table_entry *entry, u32 count, + u32 stride_bytes, u32 *data) +{ + entry->count = count; + entry->stride = stride_bytes >> 2; /* entry->stride is in dwords */ + memcpy(entry->data, data, stride_bytes * count); + + /* Return total dword count of entry + data */ + return (sizeof(*entry) >> 2) + (entry->count * entry->stride); +} + +int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev) +{ + /* + * Buffer to store either hfi_table_cmd or hfi_dcvstable_cmd. + * Current max size for either is 165 dwords. + */ + static u32 cmd_buf[200]; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_dcvs_table *tbl = &gmu->dcvs_table; + int ret = 0; + + /* Starting with GMU HFI Version 2.6.1, use H2F_MSG_TABLE */ + if (gmu->ver.hfi >= HFI_VERSION(2, 6, 1)) { + struct hfi_table_cmd *cmd = (struct hfi_table_cmd *)&cmd_buf[0]; + u32 dword_off; + + /* Already setup, so just send cmd */ + if (cmd->hdr) + return gen7_hfi_send_generic_req(adreno_dev, cmd, + MSG_HDR_GET_SIZE(cmd->hdr) << 2); + + if (tbl->gpu_level_num > MAX_GX_LEVELS || tbl->gmu_level_num > MAX_CX_LEVELS) + return -EINVAL; + + /* CMD starts with struct hfi_table_cmd data */ + cmd->type = HFI_TABLE_GPU_PERF; + dword_off = sizeof(*cmd) >> 2; + + /* Fill in the table entry and data starting at dword_off */ + dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off], + tbl->gpu_level_num, sizeof(struct opp_gx_desc), + (u32 *)tbl->gx_votes); + + /* Fill in the table entry and data starting at dword_off */ + dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off], + tbl->gmu_level_num, sizeof(struct opp_desc), + (u32 *)tbl->cx_votes); + + cmd->hdr = CREATE_MSG_HDR(H2F_MSG_TABLE, HFI_MSG_CMD); + cmd->hdr = MSG_HDR_SET_SIZE(cmd->hdr, dword_off); + + ret = gen7_hfi_send_generic_req(adreno_dev, cmd, dword_off << 2); + } else { + struct hfi_dcvstable_cmd *cmd = (struct hfi_dcvstable_cmd *)&cmd_buf[0]; + + /* Already setup, so just send cmd */ + if (cmd->hdr) + return gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd)); + + if (tbl->gpu_level_num > MAX_GX_LEVELS_LEGACY || tbl->gmu_level_num > MAX_CX_LEVELS) + return -EINVAL; + + ret = CMD_MSG_HDR(*cmd, H2F_MSG_PERF_TBL); + if (ret) + return ret; + + cmd->gpu_level_num = tbl->gpu_level_num; + cmd->gmu_level_num = tbl->gmu_level_num; + memcpy(&cmd->gx_votes, tbl->gx_votes, + sizeof(struct opp_gx_desc) * cmd->gpu_level_num); + memcpy(&cmd->cx_votes, tbl->cx_votes, + sizeof(struct opp_desc) * cmd->gmu_level_num); + + ret = gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd)); + } + + return ret; +} + int gen7_hfi_start(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); @@ -667,8 +749,7 @@ int gen7_hfi_start(struct adreno_device *adreno_dev) reset_hfi_queues(adreno_dev); - result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, - sizeof(gmu->hfi.dcvs_table)); + result = gen7_hfi_send_gpu_perf_table(adreno_dev); if (result) goto err; diff --git a/adreno_gen7_hfi.h b/adreno_gen7_hfi.h index c7274ae912..086aa29850 100644 --- a/adreno_gen7_hfi.h +++ b/adreno_gen7_hfi.h @@ -24,8 +24,6 @@ struct gen7_hfi { struct hfi_bwtable_cmd bw_table; /** @acd_table: HFI table for ACD data */ struct hfi_acd_table_cmd acd_table; - /** @dcvs_table: HFI table for gpu dcvs levels */ - struct hfi_dcvstable_cmd dcvs_table; /** @cmdq_lock: Spinlock for accessing the cmdq */ spinlock_t cmdq_lock; /** @@ -201,6 +199,14 @@ int gen7_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev); */ int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev); +/** + * gen7_hfi_send_gpu_perf_table - Send the gpu perf table hfi packet + * @adreno_dev: Pointer to the adreno device + * + * Return: 0 on success or negative error on failure + */ +int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev); + /* * gen7_hfi_process_queue - Check hfi queue for messages from gmu * @gmu: Pointer to the gen7 gmu device diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 5e4ed32dee..df5a8223f6 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -690,8 +690,7 @@ static int gen7_hwsched_notify_slumber(struct adreno_device *adreno_dev) if (ret) return ret; - req.freq = gmu->hfi.dcvs_table.gpu_level_num - - pwr->default_pwrlevel - 1; + req.freq = gmu->dcvs_table.gpu_level_num - pwr->default_pwrlevel - 1; req.bw = pwr->pwrlevels[pwr->default_pwrlevel].bus_freq; req.bw |= gen7_bus_ab_quantize(adreno_dev, 0); @@ -1328,7 +1327,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; struct hfi_gx_bw_perf_vote_cmd req = { .ack_type = DCVS_ACK_BLOCK, .freq = INVALID_DCVS_IDX, @@ -1379,8 +1378,7 @@ static int gen7_hwsched_dcvs_set(struct adreno_device *adreno_dev, } if (req.freq != INVALID_DCVS_IDX) - gen7_rdpm_mx_freq_update(gmu, - gmu->hfi.dcvs_table.gx_votes[req.freq].freq); + gen7_rdpm_mx_freq_update(gmu, gmu->dcvs_table.gx_votes[req.freq].freq); return ret; } diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index b4c4fb0715..25661c8b86 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -2372,8 +2372,7 @@ int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev) if (ret) goto err; - ret = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.dcvs_table, - sizeof(gmu->hfi.dcvs_table)); + ret = gen7_hfi_send_gpu_perf_table(adreno_dev); if (ret) goto err; diff --git a/adreno_gen7_rpmh.c b/adreno_gen7_rpmh.c index 3590def0d6..62d46b703c 100644 --- a/adreno_gen7_rpmh.c +++ b/adreno_gen7_rpmh.c @@ -255,23 +255,22 @@ static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms, /* * setup_gmu_arc_votes - Build the gmu voting table - * @hfi: Pointer to hfi device + * @gmu: Pointer to gmu device * @pri_rail: Pointer to primary power rail vlvl table * @sec_rail: Pointer to second/dependent power rail vlvl table - * @freqs: List of GMU frequencies - * @vlvls: List of GMU voltage levels * * This function initializes the cx votes for all gmu frequencies * for gmu dcvs */ -static int setup_cx_arc_votes(struct gen7_hfi *hfi, - struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail, - u32 *freqs, u32 *vlvls) +static int setup_cx_arc_votes(struct gen7_gmu_device *gmu, + struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail) { /* Hardcoded values of GMU CX voltage levels */ u16 gmu_cx_vlvl[MAX_CX_LEVELS]; u32 cx_votes[MAX_CX_LEVELS]; - struct hfi_dcvstable_cmd *table = &hfi->dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; + u32 *freqs = gmu->freqs; + u32 *vlvls = gmu->vlvls; int ret, i; gmu_cx_vlvl[0] = 0; @@ -333,21 +332,21 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_pwrctrl *pwr = &device->pwrctrl; - struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table; + struct gen7_dcvs_table *table = &gmu->dcvs_table; u32 index; u16 vlvl_tbl[MAX_GX_LEVELS]; u32 gx_votes[MAX_GX_LEVELS]; int ret, i; - /* Add the zero powerlevel for the perf table */ - table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1; - - if (table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) { - dev_err(&gmu->pdev->dev, + if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) { + dev_err(device->dev, "Defined more GPU DCVS levels than RPMh can support\n"); return -ERANGE; } + /* Add the zero powerlevel for the perf table */ + table->gpu_level_num = pwr->num_pwrlevels + 1; + memset(vlvl_tbl, 0, sizeof(vlvl_tbl)); table->gx_votes[0].freq = 0; @@ -366,7 +365,7 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, ret = to_cx_hlvl(cx_rail, cx_vlvl, &table->gx_votes[index].cx_vote); if (ret) { - dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n", + dev_err(device->dev, "Unsupported cx corner: %u\n", cx_vlvl); return ret; } @@ -386,14 +385,9 @@ static int setup_gx_arc_votes(struct adreno_device *adreno_dev, static int build_dcvs_table(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct gen7_hfi *hfi = &gmu->hfi; struct rpmh_arc_vals gx_arc, cx_arc, mx_arc; int ret; - ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL); - if (ret) - return ret; - ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl"); if (ret) return ret; @@ -406,8 +400,7 @@ static int build_dcvs_table(struct adreno_device *adreno_dev) if (ret) return ret; - ret = setup_cx_arc_votes(hfi, &cx_arc, &mx_arc, - gmu->freqs, gmu->vlvls); + ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc); if (ret) return ret; diff --git a/adreno_hfi.h b/adreno_hfi.h index f3a2ad011d..035da6f98f 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -79,6 +79,19 @@ #define HFI_FEATURE_DMS 27 #define HFI_FEATURE_AQE 29 +/* Types to be used with H2F_MSG_TABLE */ +enum hfi_table_type { + HFI_TABLE_BW_VOTE = 0, + HFI_TABLE_GPU_PERF = 1, + HFI_TABLE_DIDT = 2, + HFI_TABLE_ACD = 3, + HFI_TABLE_CLX_V1 = 4, + HFI_TABLE_CLX_V2 = 5, + HFI_TABLE_THERM = 6, + HFI_TABLE_DCVS_DATA = 7, + HFI_TABLE_MAX, +}; + /* A6xx uses a different value for KPROF */ #define HFI_FEATURE_A6XX_KPROF 14 @@ -445,6 +458,7 @@ enum hfi_msg_type { H2F_MSG_GET_VALUE = 12, H2F_MSG_SET_VALUE = 13, H2F_MSG_CORE_FW_START = 14, + H2F_MSG_TABLE = 15, F2H_MSG_MEM_ALLOC = 20, H2F_MSG_GX_BW_PERF_VOTE = 30, H2F_MSG_FW_HALT = 32, @@ -512,7 +526,7 @@ struct hfi_bwtable_cmd { u32 cnoc_cmd_addrs[MAX_CNOC_CMDS]; u32 cnoc_cmd_data[MAX_CNOC_LEVELS][MAX_CNOC_CMDS]; u32 ddr_cmd_addrs[MAX_BW_CMDS]; - u32 ddr_cmd_data[MAX_GX_LEVELS][MAX_BW_CMDS]; + u32 ddr_cmd_data[MAX_BW_LEVELS][MAX_BW_CMDS]; } __packed; struct opp_gx_desc { @@ -532,7 +546,7 @@ struct hfi_dcvstable_v1_cmd { u32 hdr; u32 gpu_level_num; u32 gmu_level_num; - struct opp_desc gx_votes[MAX_GX_LEVELS]; + struct opp_desc gx_votes[MAX_GX_LEVELS_LEGACY]; struct opp_desc cx_votes[MAX_CX_LEVELS]; } __packed; @@ -541,10 +555,24 @@ struct hfi_dcvstable_cmd { u32 hdr; u32 gpu_level_num; u32 gmu_level_num; - struct opp_gx_desc gx_votes[MAX_GX_LEVELS]; + struct opp_gx_desc gx_votes[MAX_GX_LEVELS_LEGACY]; struct opp_desc cx_votes[MAX_CX_LEVELS]; } __packed; +/* H2F */ +struct hfi_table_entry { + u32 count; + u32 stride; + u32 data[]; +} __packed; + +struct hfi_table_cmd { + u32 hdr; + u32 version; + u32 type; + struct hfi_table_entry entry[]; +} __packed; + #define MAX_ACD_STRIDE 2 #define MAX_ACD_NUM_LEVELS KGSL_MAX_PWRLEVELS diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 98a40d0d81..bb7805f537 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -12,8 +12,10 @@ /* GMU_DEVICE - Given an KGSL device return the GMU specific struct */ #define GMU_DEVICE_OPS(_a) ((_a)->gmu_core.dev_ops) -#define MAX_GX_LEVELS 16 +#define MAX_GX_LEVELS 32 +#define MAX_GX_LEVELS_LEGACY 16 #define MAX_CX_LEVELS 4 +#define MAX_BW_LEVELS 16 #define MAX_CNOC_LEVELS 2 #define MAX_CNOC_CMDS 6 #define MAX_BW_CMDS 8 @@ -99,6 +101,11 @@ enum gmu_pwrctrl_mode { #define GMU_FREQ_MIN 200000000 #define GMU_FREQ_MAX 500000000 +#define HFI_VERSION(major, minor, step) \ + (FIELD_PREP(GENMASK(31, 28), major) | \ + FIELD_PREP(GENMASK(27, 16), minor) | \ + FIELD_PREP(GENMASK(15, 0), step)) + #define GMU_VER_MAJOR(ver) (((ver) >> 28) & 0xF) #define GMU_VER_MINOR(ver) (((ver) >> 16) & 0xFFF) #define GMU_VER_STEP(ver) ((ver) & 0xFFFF) diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index ee0da22d61..15e872c795 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -14,7 +14,7 @@ ****************************************************************************/ #define KGSL_MAX_CLKS 18 -#define KGSL_MAX_PWRLEVELS 16 +#define KGSL_MAX_PWRLEVELS 32 #define KGSL_PWRFLAGS_POWER_ON 0 #define KGSL_PWRFLAGS_CLK_ON 1 diff --git a/msm_adreno_devfreq.h b/msm_adreno_devfreq.h index 40d1f9790e..9944fff384 100644 --- a/msm_adreno_devfreq.h +++ b/msm_adreno_devfreq.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef MSM_ADRENO_DEVFREQ_H @@ -18,7 +18,7 @@ struct device; /* same as KGSL_MAX_PWRLEVELS */ -#define MSM_ADRENO_MAX_PWRLEVELS 16 +#define MSM_ADRENO_MAX_PWRLEVELS 32 struct xstats { u64 ram_time; From 0b331c044ab4eebeafac9ceb329493453b220dc8 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 2 Feb 2023 22:36:24 +0530 Subject: [PATCH 524/750] msm: kgsl: Use QCOM io-pagetables Use the optimized QCOM io-pagetables to make maps/unmaps faster. Change-Id: I29c018083f9fb4ce40f4d52f60ed9c83c742e2c7 Signed-off-by: Harshdeep Dhatt Signed-off-by: Kamal Agrawal Signed-off-by: Hareesh Gundu --- kgsl_iommu.c | 173 ++++++++++++++++++++++++++++++++++++++++----------- kgsl_iommu.h | 8 ++- 2 files changed, 140 insertions(+), 41 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 33cd3301f7..1dec807501 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -176,6 +176,92 @@ static struct page *iommu_get_guard_page(struct kgsl_memdesc *memdesc) return kgsl_guard_page; } +static size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long iova, + phys_addr_t paddr, size_t size, size_t *count) +{ + unsigned int pgsize_idx, pgsize_idx_next; + unsigned long pgsizes; + size_t offset, pgsize, pgsize_next; + unsigned long addr_merge = paddr | iova; + + /* Page sizes supported by the hardware and small enough for @size */ + pgsizes = pgsize_bitmap & GENMASK(__fls(size), 0); + + /* Constrain the page sizes further based on the maximum alignment */ + if (likely(addr_merge)) + pgsizes &= GENMASK(__ffs(addr_merge), 0); + + /* Make sure we have at least one suitable page size */ + if (!pgsizes) + return 0; + + /* Pick the biggest page size remaining */ + pgsize_idx = __fls(pgsizes); + pgsize = BIT(pgsize_idx); + if (!count) + return pgsize; + + /* Find the next biggest support page size, if it exists */ + pgsizes = pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + if (!pgsizes) + goto out_set_count; + + pgsize_idx_next = __ffs(pgsizes); + pgsize_next = BIT(pgsize_idx_next); + + /* + * There's no point trying a bigger page size unless the virtual + * and physical addresses are similarly offset within the larger page. + */ + if ((iova ^ paddr) & (pgsize_next - 1)) + goto out_set_count; + + /* Calculate the offset to the next page size alignment boundary */ + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); + + /* + * If size is big enough to accommodate the larger page, reduce + * the number of smaller pages. + */ + if (offset + pgsize_next <= size) + size = offset; + +out_set_count: + *count = size >> pgsize_idx; + return pgsize; +} + +static int _iopgtbl_unmap_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, + size_t size) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + size_t unmapped = 0; + + while (unmapped < size) { + size_t ret, size_to_unmap, remaining, pgcount; + + remaining = (size - unmapped); + size_to_unmap = iommu_pgsize(pt->info.cfg.pgsize_bitmap, + gpuaddr, gpuaddr, remaining, &pgcount); + if (size_to_unmap == 0) + break; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + ret = qcom_arm_lpae_unmap_pages(ops, gpuaddr, size_to_unmap, + pgcount, NULL); +#else + ret = ops->unmap_pages(ops, gpuaddr, size_to_unmap, + pgcount, NULL); +#endif + if (ret == 0) + break; + + gpuaddr += ret; + unmapped += ret; + } + + return (unmapped == size) ? 0 : -EINVAL; +} + static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) { struct kgsl_iommu *iommu = &mmu->iommu; @@ -190,6 +276,14 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { struct io_pgtable_ops *ops = pt->pgtbl_ops; + int ret = 0; + + if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { + ret = _iopgtbl_unmap_pages(pt, gpuaddr, size); + if (ret) + return ret; + goto flush; + } while (size) { if ((ops->unmap(ops, gpuaddr, PAGE_SIZE, NULL)) != PAGE_SIZE) @@ -204,6 +298,7 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) * qcom_skip_tlb_management() API takes care of avoiding * TLB operations during slumber. */ +flush: if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) { struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); @@ -221,29 +316,6 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) return 0; } -static size_t _iopgtbl_map_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, - struct page **pages, int npages, int prot) -{ - struct io_pgtable_ops *ops = pt->pgtbl_ops; - size_t mapped = 0; - u64 addr = gpuaddr; - int ret, i; - - for (i = 0; i < npages; i++) { - ret = ops->map(ops, addr, page_to_phys(pages[i]), PAGE_SIZE, - prot, GFP_KERNEL); - if (ret) { - _iopgtbl_unmap(pt, gpuaddr, mapped); - return 0; - } - - mapped += PAGE_SIZE; - addr += PAGE_SIZE; - } - - return mapped; -} - static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, struct sg_table *sgt, int prot) { @@ -253,6 +325,22 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, u64 addr = gpuaddr; int ret, i; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { + ret = qcom_arm_lpae_map_sg(ops, addr, sgt->sgl, sgt->nents, prot, + GFP_KERNEL, &mapped); +#else + if (ops->map_sg) { + ret = ops->map_sg(ops, addr, sgt->sgl, sgt->nents, prot, + GFP_KERNEL, &mapped); +#endif + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + return mapped; + } + for_each_sg(sgt->sgl, sg, sgt->nents, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); @@ -397,12 +485,20 @@ static int kgsl_iopgtbl_map(struct kgsl_pagetable *pagetable, /* Get the protection flags for the user context */ prot = _iommu_get_protection_flags(pagetable->mmu, memdesc); - if (memdesc->sgt) - mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, - memdesc->sgt, prot); - else - mapped = _iopgtbl_map_pages(pt, memdesc->gpuaddr, - memdesc->pages, memdesc->page_count, prot); + if (!memdesc->sgt) { + struct sg_table sgt; + int ret; + + ret = sg_alloc_table_from_pages(&sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + if (ret) + return ret; + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, &sgt, prot); + sg_free_table(&sgt); + } else { + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, memdesc->sgt, + prot); + } if (mapped == 0) return -ENOMEM; @@ -1224,7 +1320,7 @@ static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) { struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); - free_io_pgtable_ops(pt->pgtbl_ops); + qcom_free_io_pgtable_ops(pt->pgtbl_ops); kfree(pt); } @@ -1284,22 +1380,23 @@ static int kgsl_iopgtbl_alloc(struct kgsl_iommu_context *ctx, struct kgsl_iommu_ { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&ctx->pdev->dev); const struct io_pgtable_cfg *cfg = NULL; + void *domain = (void *)adreno_smmu->cookie; if (adreno_smmu->cookie) cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); if (!cfg) return -ENODEV; - pt->cfg = *cfg; - pt->cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - pt->cfg.tlb = &kgsl_iopgtbl_tlb_ops; - - pt->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pt->cfg, NULL); + pt->info = adreno_smmu->pgtbl_info; + pt->info.cfg = *cfg; + pt->info.cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; + pt->info.cfg.tlb = &kgsl_iopgtbl_tlb_ops; + pt->pgtbl_ops = qcom_alloc_io_pgtable_ops(QCOM_ARM_64_LPAE_S1, &pt->info, domain); if (!pt->pgtbl_ops) return -ENOMEM; - pt->ttbr0 = pt->cfg.arm_lpae_s1_cfg.ttbr; + pt->ttbr0 = pt->info.cfg.arm_lpae_s1_cfg.ttbr; return 0; } @@ -2339,11 +2436,11 @@ static int iommu_probe_user_context(struct kgsl_device *device, pt = to_iommu_pt(mmu->defaultpagetable); /* Enable TTBR0 on the default and LPAC contexts */ - kgsl_iommu_set_ttbr0(&iommu->user_context, mmu, &pt->cfg); + kgsl_iommu_set_ttbr0(&iommu->user_context, mmu, &pt->info.cfg); kgsl_set_smmu_aperture(device, &iommu->user_context); - kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->cfg); + kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->info.cfg); ret = set_smmu_lpac_aperture(device, &iommu->lpac_context); /* LPAC is optional, ignore setup failures in absence of LPAC feature */ diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 271043f1df..6bce555ff7 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_IOMMU_H #define __KGSL_IOMMU_H @@ -179,15 +179,17 @@ struct kgsl_iommu { /* * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver - * @domain: Pointer to the iommu domain that contains the iommu pagetable + * @base: Container of the base kgsl pagetable * @ttbr0: register value to set when using this pagetable + * @pgtbl_ops: Pagetable operations for mapping/unmapping buffers + * @info: Pagetable info used to allocate pagetable operations */ struct kgsl_iommu_pt { struct kgsl_pagetable base; u64 ttbr0; struct io_pgtable_ops *pgtbl_ops; - struct io_pgtable_cfg cfg; + struct qcom_io_pgtable_info info; }; /** From dafea907d30c1a4499e11809aa78615837871a16 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 10 Jul 2023 22:08:02 -0700 Subject: [PATCH 525/750] msm: kgsl: Retire single page map/unmap() callbacks Starting with the kernel version 6.2.0 single page map/unmap() callbacks are deprecated. Hence use the map_pages/unmap_pages() which serves the same purpose. Change-Id: I74bfe58a12f15958cd7ad1db2c50202ace6b839b Signed-off-by: Hareesh Gundu --- kgsl_iommu.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 8bce09dbe7..99a6e5c372 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -277,6 +277,7 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { struct io_pgtable_ops *ops = pt->pgtbl_ops; int ret = 0; + size_t unmapped; if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { ret = _iopgtbl_unmap_pages(pt, gpuaddr, size); @@ -285,13 +286,10 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) goto flush; } - while (size) { - if ((ops->unmap(ops, gpuaddr, PAGE_SIZE, NULL)) != PAGE_SIZE) - return -EINVAL; - - gpuaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } + unmapped = ops->unmap_pages(ops, gpuaddr, PAGE_SIZE, + size >> PAGE_SHIFT, NULL); + if (unmapped != size) + return -EINVAL; /* * Skip below logic for 6.1 kernel version and above as @@ -342,22 +340,17 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, } for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t size = sg->length; + size_t size = sg->length, map_size = 0; phys_addr_t phys = sg_phys(sg); - while (size) { - ret = ops->map(ops, addr, phys, PAGE_SIZE, prot, GFP_KERNEL); - - if (ret) { - _iopgtbl_unmap(pt, gpuaddr, mapped); - return 0; - } - - phys += PAGE_SIZE; - mapped += PAGE_SIZE; - addr += PAGE_SIZE; - size -= PAGE_SIZE; + ret = ops->map_pages(ops, addr, phys, PAGE_SIZE, size >> PAGE_SHIFT, + prot, GFP_KERNEL, &map_size); + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; } + addr += size; + mapped += map_size; } return mapped; @@ -427,19 +420,19 @@ static size_t _iopgtbl_map_page_to_range(struct kgsl_iommu_pt *pt, struct page *page, u64 gpuaddr, size_t range, int prot) { struct io_pgtable_ops *ops = pt->pgtbl_ops; - size_t mapped = 0; + size_t mapped = 0, map_size = 0; u64 addr = gpuaddr; int ret; while (range) { - ret = ops->map(ops, addr, page_to_phys(page), PAGE_SIZE, - prot, GFP_KERNEL); + ret = ops->map_pages(ops, addr, page_to_phys(page), PAGE_SIZE, + 1, prot, GFP_KERNEL, &map_size); if (ret) { _iopgtbl_unmap(pt, gpuaddr, mapped); return 0; } - mapped += PAGE_SIZE; + mapped += map_size; addr += PAGE_SIZE; range -= PAGE_SIZE; } From e2dc259daa3fb44c8582d8f453e90d26ffd0befb Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Mon, 28 Aug 2023 10:54:51 -0700 Subject: [PATCH 526/750] kgsl: hwsched: Add support for GMU tracepoints logging In hardware scheduling some events originating in GMU are not captured on host side. The preemption tracepoints for preempt trigger and preempt done are examples. Such tracepoints need to be continued to be supported with HW scheduling for backwards compatibility and profiling or debugging. We therefore need a means to be able to log these events on the GMU with the timestamp of when they occur and convey them to the host such that kgsl can log them lazily to ftrace. Change-Id: Ib12e2341f928091ad3918841c267a8f2e92dc766 Signed-off-by: Hareesh Gundu --- adreno_a6xx_gmu.c | 4 + adreno_a6xx_gmu.h | 2 + adreno_a6xx_gmu_snapshot.c | 2 + adreno_a6xx_hwsched.c | 36 ++++++++- adreno_a6xx_hwsched_hfi.c | 13 +++- adreno_gen7_gmu.c | 5 ++ adreno_gen7_gmu.h | 2 + adreno_gen7_gmu_snapshot.c | 2 + adreno_gen7_hwsched.c | 36 ++++++++- adreno_gen7_hwsched_hfi.c | 13 +++- adreno_hfi.h | 16 ++++ kgsl_gmu_core.c | 135 ++++++++++++++++++++++++++++++++++ kgsl_gmu_core.h | 145 +++++++++++++++++++++++++++++++++++++ kgsl_snapshot.h | 1 + kgsl_util.h | 1 + 15 files changed, 399 insertions(+), 14 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index f139dd046e..5ac165f8b1 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -614,6 +614,7 @@ int a6xx_gmu_device_start(struct adreno_device *adreno_dev) u32 val = 0x00000100; u32 mask = 0x000001FF; + gmu_core_reset_trace_header(&gmu->trace); gmu_ao_sync_event(adreno_dev); /* Check for 0xBABEFACE on legacy targets */ @@ -2945,6 +2946,9 @@ int a6xx_gmu_probe(struct kgsl_device *device, set_bit(GMU_ENABLED, &device->gmu_core.flags); + /* Initialize to zero to detect trace packet loss */ + gmu->trace.seq_num = 0; + device->gmu_core.dev_ops = &a6xx_gmudev; /* Set default GMU attributes */ diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index ba29ca53c6..4ad298f6ca 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -47,6 +47,8 @@ struct a6xx_gmu_device { struct kgsl_memdesc *gmu_log; /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; + /** @trace: gmu trace container */ + struct kgsl_gmu_trace trace; struct a6xx_hfi hfi; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ diff --git a/adreno_a6xx_gmu_snapshot.c b/adreno_a6xx_gmu_snapshot.c index b8188a07a3..9e0c73aabe 100644 --- a/adreno_a6xx_gmu_snapshot.c +++ b/adreno_a6xx_gmu_snapshot.c @@ -261,6 +261,8 @@ static void a6xx_gmu_snapshot_memories(struct kgsl_device *device, desc.type = SNAPSHOT_GMU_MEM_DEBUG; else if (md == gmu->vrb) desc.type = SNAPSHOT_GMU_MEM_VRB; + else if (md == gmu->trace.md) + desc.type = SNAPSHOT_GMU_MEM_TRACE; else desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index a341ca6bba..0e7efc21f4 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -654,13 +654,32 @@ static int a6xx_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) gmu->vrb = reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE, GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(gmu->vrb)) + return PTR_ERR(gmu->vrb); + /* Populate size of the virtual register bank */ - if (!IS_ERR(gmu->vrb)) - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_SIZE_IDX, gmu->vrb->size >> 2); + gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, + gmu->vrb->size >> 2); } - return PTR_ERR_OR_ZERO(gmu->vrb); + /* GMU trace log */ + if (IS_ERR_OR_NULL(gmu->trace.md)) { + gmu->trace.md = reserve_gmu_kernel_block(gmu, 0, + GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0); + + if (IS_ERR(gmu->trace.md)) + return PTR_ERR(gmu->trace.md); + + /* Pass trace buffer address to GMU through the VRB */ + gmu_core_set_vrb_register(gmu->vrb->hostptr, + VRB_TRACE_BUFFER_ADDR_IDX, + gmu->trace.md->gmuaddr); + + /* Initialize the GMU trace buffer header */ + gmu_core_trace_header_init(&gmu->trace); + } + + return 0; } static int a6xx_hwsched_gmu_init(struct adreno_device *adreno_dev) @@ -1334,6 +1353,15 @@ int a6xx_hwsched_add_to_minidump(struct adreno_device *adreno_dev) return ret; } + if (!IS_ERR_OR_NULL(a6xx_dev->gmu.trace.md)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_GMU_TRACE_ENTRY, + a6xx_dev->gmu.trace.md->hostptr, + a6xx_dev->gmu.trace.md->size); + if (ret) + return ret; + } + /* Dump HFI hwsched global mem alloc entries */ for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 02fac3497a..ea3750413e 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -1433,17 +1433,24 @@ static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); + struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() || - (!(is_queue_empty(adreno_dev, HFI_MSG_ID) && - is_queue_empty(adreno_dev, HFI_DBG_ID)) && - (hfi->irq_mask & HFI_IRQ_MSGQ_MASK))); + /* If msgq irq is enabled and msgq has messages to process */ + (((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) && + !is_queue_empty(adreno_dev, HFI_MSG_ID)) || + /* Trace buffer has messages to process */ + !gmu_core_is_trace_empty(gmu->trace.md->hostptr) || + /* Dbgq has messages to process */ + !is_queue_empty(adreno_dev, HFI_DBG_ID))); if (kthread_should_stop()) break; a6xx_hwsched_process_msgq(adreno_dev); + gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), + &gmu->pdev->dev, &gmu->trace); a6xx_hwsched_process_dbgq(adreno_dev, true); } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index db6e8831e1..439d0018fc 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -349,6 +349,8 @@ int gen7_gmu_device_start(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + gmu_core_reset_trace_header(&gmu->trace); + gmu_ao_sync_event(adreno_dev); /* Bring GMU out of reset */ @@ -2575,6 +2577,9 @@ int gen7_gmu_probe(struct kgsl_device *device, gmu->log_stream_enable = false; gmu->log_group_mask = 0x3; + /* Initialize to zero to detect trace packet loss */ + gmu->trace.seq_num = 0; + /* Disabled by default */ gmu->stats_enable = false; /* Set default to CM3 busy cycles countable */ diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 483b9f3159..9291683eda 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -51,6 +51,8 @@ struct gen7_gmu_device { struct kgsl_memdesc *gpu_boot_scratch; /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; + /** @trace: gmu trace container */ + struct kgsl_gmu_trace trace; struct gen7_hfi hfi; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ diff --git a/adreno_gen7_gmu_snapshot.c b/adreno_gen7_gmu_snapshot.c index 0a2c04d6d7..bd4df95184 100644 --- a/adreno_gen7_gmu_snapshot.c +++ b/adreno_gen7_gmu_snapshot.c @@ -140,6 +140,8 @@ static void gen7_gmu_snapshot_memories(struct kgsl_device *device, desc.type = SNAPSHOT_GMU_MEM_WARMBOOT; else if (md == gmu->vrb) desc.type = SNAPSHOT_GMU_MEM_VRB; + else if (md == gmu->trace.md) + desc.type = SNAPSHOT_GMU_MEM_TRACE; else desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index fec3079ae6..cbc0e1439f 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -879,13 +879,32 @@ static int gen7_hwsched_gmu_memory_init(struct adreno_device *adreno_dev) gmu->vrb = gen7_reserve_gmu_kernel_block(gmu, 0, GMU_VRB_SIZE, GMU_NONCACHED_KERNEL, 0); + if (IS_ERR(gmu->vrb)) + return PTR_ERR(gmu->vrb); + /* Populate size of the virtual register bank */ - if (!IS_ERR(gmu->vrb)) - gmu_core_set_vrb_register(gmu->vrb->hostptr, - VRB_SIZE_IDX, gmu->vrb->size >> 2); + gmu_core_set_vrb_register(gmu->vrb->hostptr, VRB_SIZE_IDX, + gmu->vrb->size >> 2); } - return PTR_ERR_OR_ZERO(gmu->vrb); + /* GMU trace log */ + if (IS_ERR_OR_NULL(gmu->trace.md)) { + gmu->trace.md = gen7_reserve_gmu_kernel_block(gmu, 0, + GMU_TRACE_SIZE, GMU_NONCACHED_KERNEL, 0); + + if (IS_ERR(gmu->trace.md)) + return PTR_ERR(gmu->trace.md); + + /* Pass trace buffer address to GMU through the VRB */ + gmu_core_set_vrb_register(gmu->vrb->hostptr, + VRB_TRACE_BUFFER_ADDR_IDX, + gmu->trace.md->gmuaddr); + + /* Initialize the GMU trace buffer header */ + gmu_core_trace_header_init(&gmu->trace); + } + + return 0; } static int gen7_hwsched_gmu_init(struct adreno_device *adreno_dev) @@ -1865,6 +1884,15 @@ int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev) return ret; } + if (!IS_ERR_OR_NULL(gen7_dev->gmu.trace.md)) { + ret = kgsl_add_va_to_minidump(adreno_dev->dev.dev, + KGSL_GMU_TRACE_ENTRY, + gen7_dev->gmu.trace.md->hostptr, + gen7_dev->gmu.trace.md->size); + if (ret) + return ret; + } + /* Dump HFI hwsched global mem alloc entries */ for (i = 0; i < hw_hfi->mem_alloc_entries; i++) { struct hfi_mem_alloc_entry *entry = &hw_hfi->mem_alloc_table[i]; diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index cf1096eb8a..4af8910fa9 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -2637,17 +2637,24 @@ static int hfi_f2h_main(void *arg) { struct adreno_device *adreno_dev = arg; struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); while (!kthread_should_stop()) { wait_event_interruptible(hfi->f2h_wq, kthread_should_stop() || - (!(is_queue_empty(adreno_dev, HFI_MSG_ID) && - is_queue_empty(adreno_dev, HFI_DBG_ID)) && - (hfi->irq_mask & HFI_IRQ_MSGQ_MASK))); + /* If msgq irq is enabled and msgq has messages to process */ + (((hfi->irq_mask & HFI_IRQ_MSGQ_MASK) && + !is_queue_empty(adreno_dev, HFI_MSG_ID)) || + /* Trace buffer has messages to process */ + !gmu_core_is_trace_empty(gmu->trace.md->hostptr) || + /* Dbgq has messages to process */ + !is_queue_empty(adreno_dev, HFI_DBG_ID))); if (kthread_should_stop()) break; gen7_hwsched_process_msgq(adreno_dev); + gmu_core_process_trace_data(KGSL_DEVICE(adreno_dev), + &gmu->pdev->dev, &gmu->trace); gen7_hwsched_process_dbgq(adreno_dev, true); } diff --git a/adreno_hfi.h b/adreno_hfi.h index 2760a0119b..e1843b429f 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -474,6 +474,7 @@ enum hfi_msg_type { H2F_MSG_ISSUE_SYNCOBJ = 152, F2H_MSG_SYNCOBJ_QUERY = 153, H2F_MSG_WARMBOOT_CMD = 154, + F2H_MSG_PROCESS_TRACE = 155, HFI_MAX_ID, }; @@ -733,6 +734,21 @@ struct hfi_debug_cmd { u32 data; } __packed; +/* F2H */ +struct hfi_trace_cmd { + u32 hdr; + u32 version; + u64 identifier; +} __packed; + +/* Trace packet definition */ +struct gmu_trace_packet { + u32 hdr; + u32 trace_id; + u64 ticks; + u32 payload[]; +} __packed; + /* F2H */ struct hfi_gmu_cntr_register_cmd { u32 hdr; diff --git a/kgsl_gmu_core.c b/kgsl_gmu_core.c index 7a5507546f..482ad65299 100644 --- a/kgsl_gmu_core.c +++ b/kgsl_gmu_core.c @@ -6,9 +6,11 @@ #include #include +#include #include #include "adreno.h" +#include "adreno_trace.h" #include "kgsl_device.h" #include "kgsl_gmu_core.h" #include "kgsl_trace.h" @@ -225,3 +227,136 @@ void gmu_core_set_vrb_register(void *ptr, u32 index, u32 val) /* Make sure the vrb write is posted before moving ahead */ wmb(); } + +static void stream_trace_data(struct gmu_trace_packet *pkt) +{ + switch (pkt->trace_id) { + case GMU_TRACE_PREEMPT_TRIGGER: { + struct trace_preempt_trigger *data = + (struct trace_preempt_trigger *)pkt->payload; + + trace_adreno_preempt_trigger(data->cur_rb, data->next_rb, + data->ctx_switch_cntl, pkt->ticks); + break; + } + case GMU_TRACE_PREEMPT_DONE: { + struct trace_preempt_done *data = + (struct trace_preempt_done *)pkt->payload; + + trace_adreno_preempt_done(data->prev_rb, data->next_rb, + data->ctx_switch_cntl, pkt->ticks); + break; + } + default: { + char str[64]; + + snprintf(str, sizeof(str), + "Unsupported GMU trace id %d\n", pkt->trace_id); + trace_kgsl_msg(str); + } + } +} + +void gmu_core_process_trace_data(struct kgsl_device *device, + struct device *dev, struct kgsl_gmu_trace *trace) +{ + struct gmu_trace_header *trace_hdr = trace->md->hostptr; + u32 size, *buffer = trace->md->hostptr; + struct gmu_trace_packet *pkt; + u16 seq_num, num_pkts = 0; + u32 ridx = readl(&trace_hdr->read_index); + u32 widx = readl(&trace_hdr->write_index); + + if (ridx == widx) + return; + + /* + * Don't process any traces and force set read_index to write_index if + * previously encountered invalid trace packet + */ + if (trace->reset_hdr) { + /* update read index to let f2h daemon to go to sleep */ + writel(trace_hdr->write_index, &trace_hdr->read_index); + return; + } + + /* start reading trace buffer data */ + pkt = (struct gmu_trace_packet *)&buffer[trace_hdr->payload_offset + ridx]; + + /* Validate packet header */ + if (TRACE_PKT_GET_VALID_FIELD(pkt->hdr) != TRACE_PKT_VALID) { + char str[128]; + + snprintf(str, sizeof(str), + "Invalid trace packet found at read index: %d resetting trace header\n", + trace_hdr->read_index); + /* + * GMU is not expected to write an invalid trace packet. This + * condition can be true in case there is memory corruption. In + * such scenario fastforward readindex to writeindex so the we + * don't process any trace packets until we reset the trace + * header in next slumber exit. + */ + dev_err_ratelimited(device->dev, "%s\n", str); + trace_kgsl_msg(str); + writel(trace_hdr->write_index, &trace_hdr->read_index); + trace->reset_hdr = true; + return; + } + + size = TRACE_PKT_GET_SIZE(pkt->hdr); + + if (TRACE_PKT_GET_SKIP_FIELD(pkt->hdr)) + goto done; + + seq_num = TRACE_PKT_GET_SEQNUM(pkt->hdr); + num_pkts = seq_num - trace->seq_num; + + /* Detect trace packet loss by tracking any gaps in the sequence number */ + if (num_pkts > 1) { + char str[128]; + + snprintf(str, sizeof(str), + "%d GMU trace packets dropped from sequence number: %d\n", + num_pkts - 1, trace->seq_num); + trace_kgsl_msg(str); + } + + trace->seq_num = seq_num; + stream_trace_data(pkt); +done: + ridx = (ridx + size) % trace_hdr->payload_size; + writel(ridx, &trace_hdr->read_index); +} + +bool gmu_core_is_trace_empty(struct gmu_trace_header *hdr) +{ + return (readl(&hdr->read_index) == readl(&hdr->write_index)) ? true : false; +} + +void gmu_core_trace_header_init(struct kgsl_gmu_trace *trace) +{ + struct gmu_trace_header *hdr = trace->md->hostptr; + + hdr->threshold = TRACE_BUFFER_THRESHOLD; + hdr->timeout = TRACE_TIMEOUT_MSEC; + hdr->metadata = FIELD_PREP(GENMASK(31, 30), TRACE_MODE_DROP) | + FIELD_PREP(GENMASK(3, 0), TRACE_HEADER_VERSION_1); + hdr->cookie = trace->md->gmuaddr; + hdr->size = trace->md->size; + hdr->log_type = TRACE_LOGTYPE_HWSCHED; +} + +void gmu_core_reset_trace_header(struct kgsl_gmu_trace *trace) +{ + struct gmu_trace_header *hdr = trace->md->hostptr; + + if (!trace->reset_hdr) + return; + + memset(hdr, 0, sizeof(struct gmu_trace_header)); + /* Reset sequence number to detect trace packet loss */ + trace->seq_num = 0; + gmu_core_trace_header_init(trace); + trace->reset_hdr = false; +} diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 197cf353c1..e39e8d8571 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -174,6 +174,122 @@ enum gmu_vrb_idx { VRB_TRACE_BUFFER_ADDR_IDX = 2, }; +/* For GMU Trace */ +#define GMU_TRACE_SIZE SZ_16K + +/* Trace header defines */ +/* Logtype to decode the trace pkt data */ +#define TRACE_LOGTYPE_HWSCHED 1 +/* Trace buffer threshold for GMU to send F2H message */ +#define TRACE_BUFFER_THRESHOLD 80 +/* + * GMU Trace timer value to check trace packet consumption. GMU timer handler tracks the + * readindex, If it's not moved since last timer fired, GMU will send the f2h message to + * drain trace packets. GMU Trace Timer will be restarted if the readindex is moving. + */ +#define TRACE_TIMEOUT_MSEC 5 + +/* Trace metadata defines */ +/* Trace drop mode hint for GMU to drop trace packets when trace buffer is full */ +#define TRACE_MODE_DROP 1 +/* Trace buffer header version */ +#define TRACE_HEADER_VERSION_1 1 + +/* Trace packet defines */ +#define TRACE_PKT_VALID 1 +#define TRACE_PKT_SEQ_MASK GENMASK(15, 0) +#define TRACE_PKT_SZ_MASK GENMASK(27, 16) +#define TRACE_PKT_SZ_SHIFT 16 +#define TRACE_PKT_VALID_MASK GENMASK(31, 31) +#define TRACE_PKT_SKIP_MASK GENMASK(30, 30) +#define TRACE_PKT_VALID_SHIFT 31 +#define TRACE_PKT_SKIP_SHIFT 30 + +#define TRACE_PKT_GET_SEQNUM(hdr) ((hdr) & TRACE_PKT_SEQ_MASK) +#define TRACE_PKT_GET_SIZE(hdr) (((hdr) & TRACE_PKT_SZ_MASK) >> TRACE_PKT_SZ_SHIFT) +#define TRACE_PKT_GET_VALID_FIELD(hdr) (((hdr) & TRACE_PKT_VALID_MASK) >> TRACE_PKT_VALID_SHIFT) +#define TRACE_PKT_GET_SKIP_FIELD(hdr) (((hdr) & TRACE_PKT_SKIP_MASK) >> TRACE_PKT_SKIP_SHIFT) + +/* + * Trace buffer header definition + * Trace buffer header fields initialized/updated by KGSL and GMU + * GMU input: Following header fields are initialized by KGSL + * - @metadata, @threshold, @size, @cookie, @timeout, @log_type + * - @readIndex updated by kgsl when traces messages are consumed. + * GMU output: Following header fields are initialized by GMU only + * - @magic, @payload_offset, @payload_size + * - @write_index updated by GMU upon filling the trace messages + */ +struct gmu_trace_header { + /** @magic: Initialized by GMU to check header is valid or not */ + u32 magic; + /** + * @metadata: Trace buffer metadata.Bit(31) Trace Mode to log tracepoints + * messages, Bits [3:0] Version for header format changes. + */ + u32 metadata; + /** + * @threshold: % at which GMU to send f2h message to wakeup KMD to consume + * tracepoints data. Set it to zero to disable thresholding. Threshold is % + * of buffer full condition not the trace packet count. If GMU is continuously + * writing to trace buffer makes it buffer full condition when KMD is not + * consuming it. So GMU check the how much trace buffer % space is full based + * on the threshold % value.If the trace packets are filling over % buffer full + * condition GMU will send the f2h message for KMD to drain the trace messages. + */ + u32 threshold; + /** @size: trace buffer allocation size in bytes */ + u32 size; + /** @read_index: trace buffer read index in dwords */ + u32 read_index; + /** @write_index: trace buffer write index in dwords */ + u32 write_index; + /** @payload_offset: trace buffer payload dword offset */ + u32 payload_offset; + /** @payload_size: trace buffer payload size in dword */ + u32 payload_size; + /** cookie: cookie data sent through F2H_PROCESS_MESSAGE */ + u64 cookie; + /** + * timeout: GMU Trace Timer value in msec - zero to disable trace timer else + * value for GMU trace timerhandler to send HFI msg. + */ + u32 timeout; + /** @log_type: To decode the trace buffer data */ + u32 log_type; +} __packed; + +/* Trace ID definition */ +enum gmu_trace_id { + GMU_TRACE_PREEMPT_TRIGGER = 1, + GMU_TRACE_PREEMPT_DONE = 2, + GMU_TRACE_MAX, +}; + +struct trace_preempt_trigger { + u32 cur_rb; + u32 next_rb; + u32 ctx_switch_cntl; +} __packed; + +struct trace_preempt_done { + u32 prev_rb; + u32 next_rb; + u32 ctx_switch_cntl; +} __packed; + +/** + * struct kgsl_gmu_trace - wrapper for gmu trace memory object + */ +struct kgsl_gmu_trace { + /** @md: gmu trace memory descriptor */ + struct kgsl_memdesc *md; + /* @seq_num: GMU trace packet sequence number to detect drop packet count */ + u16 seq_num; + /* @reset_hdr: To reset trace buffer header incase of invalid packet */ + bool reset_hdr; +}; + /* GMU memdesc entries */ #define GMU_KERNEL_ENTRIES 16 @@ -371,4 +487,33 @@ void gmu_core_dev_force_first_boot(struct kgsl_device *device); */ void gmu_core_set_vrb_register(void *ptr, u32 index, u32 val); +/** + * gmu_core_process_trace_data - Process gmu trace buffer data writes to default linux trace buffer + * @device: Pointer to KGSL device + * @dev: GMU device instance + * @trace: GMU trace memory pointer + */ +void gmu_core_process_trace_data(struct kgsl_device *device, + struct device *dev, struct kgsl_gmu_trace *trace); + +/** + * gmu_core_is_trace_empty - Check for trace buffer empty/full status + * @hdr: Pointer to gmu trace header + * + * Return: true if readidex equl to writeindex else false + */ +bool gmu_core_is_trace_empty(struct gmu_trace_header *hdr); + +/** + * gmu_core_trace_header_init - Initialize the GMU trace buffer header + * @trace: Pointer to kgsl gmu trace + */ +void gmu_core_trace_header_init(struct kgsl_gmu_trace *trace); + +/** + * gmu_core_reset_trace_header - Reset GMU trace buffer header + * @trace: Pointer to kgsl gmu trace + */ +void gmu_core_reset_trace_header(struct kgsl_gmu_trace *trace); + #endif /* __KGSL_GMU_CORE_H */ diff --git a/kgsl_snapshot.h b/kgsl_snapshot.h index c45dd2804d..50f245dbf6 100644 --- a/kgsl_snapshot.h +++ b/kgsl_snapshot.h @@ -217,6 +217,7 @@ struct kgsl_snapshot_ib_v2 { #define SNAPSHOT_GMU_MEM_HW_FENCE 0x07 #define SNAPSHOT_GMU_MEM_WARMBOOT 0x08 #define SNAPSHOT_GMU_MEM_VRB 0x09 +#define SNAPSHOT_GMU_MEM_TRACE 0x0a /* GMU memory section data */ struct kgsl_snapshot_gmu_mem { diff --git a/kgsl_util.h b/kgsl_util.h index 72c3e8986b..24e041f720 100644 --- a/kgsl_util.h +++ b/kgsl_util.h @@ -18,6 +18,7 @@ #define KGSL_SCRATCH_ENTRY "kgsl_scratch" #define KGSL_MEMSTORE_ENTRY "kgsl_memstore" #define KGSL_GMU_LOG_ENTRY "kgsl_gmu_log" +#define KGSL_GMU_TRACE_ENTRY "kgsl_gmu_trace" #define KGSL_HFIMEM_ENTRY "kgsl_hfi_mem" #define KGSL_GMU_DUMPMEM_ENTRY "kgsl_gmu_dump_mem" #define KGSL_GMU_RB_ENTRY "kgsl_gmu_rb" From b081c3cbb1f1bf7bed52aa0221c2e313f53ab352 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Fri, 18 Aug 2023 14:04:44 +0530 Subject: [PATCH 527/750] msm: kgsl: Skip SMMU PT switch when using default PT If per-process pagetable is not enabled then current process pagetable points to default global pt and per-process pagetable ttbr0 config is set to 0x0. No CP SMMU UPDATE command is required to be submitted to ringbuffer. So,skip process pagetable switch if current process pagetable is using default pt. Change-Id: I85cdfbac704705cd4cb1c5e8a964231a8e66fe88 Signed-off-by: Abhishek Barman --- adreno_a3xx_ringbuffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/adreno_a3xx_ringbuffer.c b/adreno_a3xx_ringbuffer.c index 9222af6b7c..3fbc91b8b5 100644 --- a/adreno_a3xx_ringbuffer.c +++ b/adreno_a3xx_ringbuffer.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "adreno.h" @@ -109,6 +110,9 @@ static int a3xx_rb_pagetable_switch(struct adreno_device *adreno_dev, struct kgsl_iommu *iommu = KGSL_IOMMU(device); int count = 0; + /* Skip pagetable switch if current context is using default PT. */ + if (pagetable == device->mmu.defaultpagetable) + return 0; /* * Adding an indirect buffer ensures that the prefetch stalls until * the commands in indirect buffer have completed. We need to stall From 4a3e6e27dbc15ecbee8fc2f6e0566997fef9224d Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Wed, 23 Aug 2023 23:38:40 +0530 Subject: [PATCH 528/750] msm: kgsl: Fix GPU microcode load for A3xx For A3xx kgsl_bulkwrite didn't load the firmware properly. Hence firmware is loaded by explicitly passing the index for the dword where the firmware load need to be started. Change-Id: Iaad3f3e205a5b6ba30e5166fc2bb1a2c1eded4a9 Signed-off-by: Abhishek Barman --- adreno_a3xx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/adreno_a3xx.c b/adreno_a3xx.c index 264f31d8ea..fc9c6ab644 100644 --- a/adreno_a3xx.c +++ b/adreno_a3xx.c @@ -1318,17 +1318,20 @@ static void a3xx_microcode_load(struct adreno_device *adreno_dev) struct kgsl_device *device = KGSL_DEVICE(adreno_dev); size_t pm4_size = adreno_dev->fw[ADRENO_FW_PM4].size; size_t pfp_size = adreno_dev->fw[ADRENO_FW_PFP].size; + int i; /* load the CP ucode using AHB writes */ kgsl_regwrite(device, A3XX_CP_ME_RAM_WADDR, 0); - kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_ME_RAM_DATA, - &adreno_dev->fw[ADRENO_FW_PM4].fwvirt[1], pm4_size - 1); + for (i = 1; i < pm4_size; i++) + kgsl_regwrite(device, A3XX_CP_ME_RAM_DATA, + adreno_dev->fw[ADRENO_FW_PM4].fwvirt[i]); kgsl_regwrite(device, A3XX_CP_PFP_UCODE_ADDR, 0); - kgsl_regmap_bulk_write(&device->regmap, A3XX_CP_PFP_UCODE_DATA, - &adreno_dev->fw[ADRENO_FW_PFP].fwvirt[1], pfp_size - 1); + for (i = 1; i < pfp_size; i++) + kgsl_regwrite(device, A3XX_CP_PFP_UCODE_DATA, + adreno_dev->fw[ADRENO_FW_PFP].fwvirt[i]); } static u64 a3xx_read_alwayson(struct adreno_device *adreno_dev) From b1a6063c4831e90757e5ddc10725423152d0cf72 Mon Sep 17 00:00:00 2001 From: Hareesh Gundu Date: Fri, 14 Apr 2023 16:08:28 -0700 Subject: [PATCH 529/750] msm: kgsl: Remove legacy low power states Remove legacy power states NAP and MINBW. As these states does not show any power benefit in latest generation GPUs. Change-Id: I5a7f07c3d0e2d3110fe106c94fe96df1ed66c4fc Signed-off-by: Hareesh Gundu --- adreno.c | 49 +---------- adreno.h | 2 - adreno_a3xx.c | 30 +------ adreno_a5xx.c | 41 +--------- adreno_a6xx.c | 31 ------- kgsl_device.h | 12 +-- kgsl_pwrctrl.c | 215 ++++++------------------------------------------- kgsl_pwrctrl.h | 5 -- 8 files changed, 34 insertions(+), 351 deletions(-) diff --git a/adreno.c b/adreno.c index 5df0926fb9..fb8b120ce4 100644 --- a/adreno.c +++ b/adreno.c @@ -233,28 +233,8 @@ void adreno_touch_wake(struct kgsl_device *device) if (adreno_dev->wake_on_touch) return; - if (gmu_core_isenabled(device)) { + if (gmu_core_isenabled(device) || (device->state == KGSL_STATE_SLUMBER)) schedule_work(&adreno_dev->input_work); - return; - } - - /* - * If the device is in nap, kick the idle timer to make sure that we - * don't go into slumber before the first render. If the device is - * already in slumber schedule the wake. - */ - - if (device->state == KGSL_STATE_NAP) { - /* - * Set the wake on touch bit to keep from coming back here and - * keeping the device in nap without rendering - */ - adreno_dev->wake_on_touch = true; - kgsl_start_idle_timer(device); - - } else if (device->state == KGSL_STATE_SLUMBER) { - schedule_work(&adreno_dev->input_work); - } } /* @@ -914,8 +894,6 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev, device->pwrctrl.interval_timeout = CONFIG_QCOM_KGSL_IDLE_TIMEOUT; - device->pwrctrl.minbw_timeout = 10; - /* Set default bus control to true on all targets */ device->pwrctrl.bus_control = true; @@ -1780,16 +1758,8 @@ static void adreno_pwrctrl_active_count_put(struct adreno_device *adreno_dev) return; if (atomic_dec_and_test(&device->active_cnt)) { - bool nap_on = !(device->pwrctrl.ctrl_flags & - BIT(KGSL_PWRFLAGS_NAP_OFF)); - if (nap_on && device->state == KGSL_STATE_ACTIVE && - device->requested_state == KGSL_STATE_NONE) { - kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); - kgsl_schedule_work(&device->idle_check_ws); - } else if (!nap_on) { - kgsl_pwrscale_update_stats(device); - kgsl_pwrscale_update(device); - } + kgsl_pwrscale_update_stats(device); + kgsl_pwrscale_update(device); kgsl_start_idle_timer(device); } @@ -2085,8 +2055,6 @@ int adreno_reset(struct kgsl_device *device, int fault) if (atomic_read(&device->active_cnt)) kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); - else - kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP); return ret; } @@ -3043,16 +3011,6 @@ static void adreno_pwrlevel_change_settings(struct kgsl_device *device, postlevel, post); } -static void adreno_clk_set_options(struct kgsl_device *device, const char *name, - struct clk *clk, bool on) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - - if (gpudev->clk_set_options) - gpudev->clk_set_options(adreno_dev, name, clk, on); -} - static bool adreno_is_hwcg_on(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -3428,7 +3386,6 @@ static const struct kgsl_functable adreno_functable = { .is_hw_collapsible = adreno_is_hw_collapsible, .regulator_disable = adreno_regulator_disable, .pwrlevel_change_settings = adreno_pwrlevel_change_settings, - .clk_set_options = adreno_clk_set_options, .query_property_list = adreno_query_property_list, .is_hwcg_on = adreno_is_hwcg_on, .gpu_clock_set = adreno_gpu_clock_set, diff --git a/adreno.h b/adreno.h index c92ba5a323..ee9ac6093f 100644 --- a/adreno.h +++ b/adreno.h @@ -905,8 +905,6 @@ struct adreno_gpudev { void (*preemption_schedule)(struct adreno_device *adreno_dev); int (*preemption_context_init)(struct kgsl_context *context); void (*context_detach)(struct adreno_context *drawctxt); - void (*clk_set_options)(struct adreno_device *adreno_dev, - const char *name, struct clk *clk, bool on); void (*pre_reset)(struct adreno_device *adreno_dev); void (*gpu_keepalive)(struct adreno_device *adreno_dev, bool state); diff --git a/adreno_a3xx.c b/adreno_a3xx.c index ffb2604e80..876032c5dd 100644 --- a/adreno_a3xx.c +++ b/adreno_a3xx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1325,31 +1325,6 @@ static void a3xx_microcode_load(struct adreno_device *adreno_dev) &adreno_dev->fw[ADRENO_FW_PFP].fwvirt[1], pfp_size - 1); } -#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) -static void a3xx_clk_set_options(struct adreno_device *adreno_dev, - const char *name, struct clk *clk, bool on) -{ - if (!clk || !adreno_is_a306a(adreno_dev)) - return; - - /* Handle clock settings for GFX PSCBCs */ - if (on) { - if (!strcmp(name, "mem_iface_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); - } else if (!strcmp(name, "core_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_RETAIN_MEM); - } - } else { - if (!strcmp(name, "core_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); - } - } -} -#endif - static u64 a3xx_read_alwayson(struct adreno_device *adreno_dev) { /* A3XX does not have a always on timer */ @@ -1502,9 +1477,6 @@ const struct adreno_gpudev adreno_a3xx_gpudev = { .init = a3xx_init, .start = a3xx_start, .snapshot = a3xx_snapshot, -#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) - .clk_set_options = a3xx_clk_set_options, -#endif .read_alwayson = a3xx_read_alwayson, .hw_isidle = a3xx_hw_isidle, .power_ops = &adreno_power_operations, diff --git a/adreno_a5xx.c b/adreno_a5xx.c index f7b5777e30..242b4cffd2 100644 --- a/adreno_a5xx.c +++ b/adreno_a5xx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -1153,35 +1153,6 @@ static void a5xx_pwrlevel_change_settings(struct adreno_device *adreno_dev, } } -#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) -static void a5xx_clk_set_options(struct adreno_device *adreno_dev, - const char *name, struct clk *clk, bool on) -{ - if (!clk) - return; - - if (!adreno_is_a540(adreno_dev) && !adreno_is_a512(adreno_dev) && - !adreno_is_a508(adreno_dev)) - return; - - /* Handle clock settings for GFX PSCBCs */ - if (on) { - if (!strcmp(name, "mem_iface_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); - } else if (!strcmp(name, "core_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_RETAIN_MEM); - } - } else { - if (!strcmp(name, "core_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); - } - } -} -#endif - /* FW driven idle 10% throttle */ #define IDLE_10PCT 0 /* number of cycles when clock is throttled by 50% (CRC) */ @@ -1240,16 +1211,11 @@ static void a5xx_gpmu_reset(struct work_struct *work) * after the watchdog timeout, then there is no need to reset GPMU * again. */ - if (device->state != KGSL_STATE_NAP && - device->state != KGSL_STATE_AWARE && - device->state != KGSL_STATE_ACTIVE) + if (device->state != KGSL_STATE_AWARE && device->state != KGSL_STATE_ACTIVE) return; mutex_lock(&device->mutex); - if (device->state == KGSL_STATE_NAP) - kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); - if (a5xx_regulator_enable(adreno_dev)) goto out; @@ -2522,9 +2488,6 @@ const struct adreno_gpudev adreno_a5xx_gpudev = { .regulator_disable = a5xx_regulator_disable, .pwrlevel_change_settings = a5xx_pwrlevel_change_settings, .preemption_schedule = a5xx_preemption_schedule, -#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) - .clk_set_options = a5xx_clk_set_options, -#endif .read_alwayson = a5xx_read_alwayson, .hw_isidle = a5xx_hw_isidle, .power_ops = &adreno_power_operations, diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 248f5364b8..92402826a8 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -2050,31 +2050,6 @@ update: return 0; } -#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) -static void a6xx_clk_set_options(struct adreno_device *adreno_dev, - const char *name, struct clk *clk, bool on) -{ - if (!clk) - return; - - /* Handle clock settings for GFX PSCBCs */ - if (on) { - if (!strcmp(name, "mem_iface_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); - } else if (!strcmp(name, "core_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_RETAIN_MEM); - } - } else { - if (!strcmp(name, "core_clk")) { - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH); - qcom_clk_set_flags(clk, CLKFLAG_NORETAIN_MEM); - } - } -} -#endif - u64 a6xx_read_alwayson(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -2333,9 +2308,6 @@ const struct adreno_gpudev adreno_a6xx_gpudev = { .reset = a6xx_reset, .preemption_schedule = a6xx_preemption_schedule, .preemption_context_init = a6xx_preemption_context_init, -#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) - .clk_set_options = a6xx_clk_set_options, -#endif .read_alwayson = a6xx_read_alwayson, .power_ops = &adreno_power_operations, .clear_pending_transactions = a6xx_clear_pending_transactions, @@ -2444,9 +2416,6 @@ const struct adreno_gpudev adreno_a619_holi_gpudev = { .reset = a6xx_reset, .preemption_schedule = a6xx_preemption_schedule, .preemption_context_init = a6xx_preemption_context_init, -#if IS_ENABLED(CONFIG_COMMON_CLK_QCOM) - .clk_set_options = a6xx_clk_set_options, -#endif .read_alwayson = a6xx_read_alwayson, .power_ops = &adreno_power_operations, .clear_pending_transactions = a6xx_clear_pending_transactions, diff --git a/kgsl_device.h b/kgsl_device.h index 0e0be9bb88..519979e0e1 100644 --- a/kgsl_device.h +++ b/kgsl_device.h @@ -35,11 +35,11 @@ #define KGSL_STATE_NONE 0x00000000 #define KGSL_STATE_INIT 0x00000001 #define KGSL_STATE_ACTIVE 0x00000002 -#define KGSL_STATE_NAP 0x00000004 +#define KGSL_STATE_NAP 0x00000004 /* Not Used */ #define KGSL_STATE_SUSPEND 0x00000010 #define KGSL_STATE_AWARE 0x00000020 #define KGSL_STATE_SLUMBER 0x00000080 -#define KGSL_STATE_MINBW 0x00000100 +#define KGSL_STATE_MINBW 0x00000100 /* Not Used */ /** * enum kgsl_event_results - result codes passed to an event callback when the @@ -146,8 +146,6 @@ struct kgsl_functable { void (*regulator_disable)(struct kgsl_device *device); void (*pwrlevel_change_settings)(struct kgsl_device *device, unsigned int prelevel, unsigned int postlevel, bool post); - void (*clk_set_options)(struct kgsl_device *device, - const char *name, struct clk *clk, bool on); /** * @query_property_list: query the list of properties * supported by the device. If 'list' is NULL just return the total @@ -656,12 +654,6 @@ static inline bool kgsl_state_is_awake(struct kgsl_device *device) device->state == KGSL_STATE_AWARE); } -static inline bool kgsl_state_is_nap_or_minbw(struct kgsl_device *device) -{ - return (device->state == KGSL_STATE_NAP || - device->state == KGSL_STATE_MINBW); -} - /** * kgsl_start_idle_timer - Start the idle timer * @device: A KGSL device handle diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index cf7504f625..812b9ba6ee 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -583,28 +583,15 @@ static ssize_t idle_timer_show(struct device *dev, static ssize_t minbw_timer_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct kgsl_device *device = dev_get_drvdata(dev); - u32 val; - int ret; - - if (device->pwrctrl.ctrl_flags & BIT(KGSL_PWRFLAGS_NAP_OFF)) - return -EINVAL; - - ret = kstrtou32(buf, 0, &val); - if (ret) - return ret; - - device->pwrctrl.minbw_timeout = val; - return count; + /* minbw_timer is deprecated, so return EOPNOTSUPP */ + return -EOPNOTSUPP; } static ssize_t minbw_timer_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct kgsl_device *device = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%u\n", - device->pwrctrl.minbw_timeout); + /* minbw_timer is deprecated, so return it as always disabled */ + return scnprintf(buf, PAGE_SIZE, "0\n"); } static ssize_t gpubusy_show(struct device *dev, @@ -766,15 +753,16 @@ static ssize_t force_rail_on_store(struct device *dev, static ssize_t force_no_nap_show(struct device *dev, struct device_attribute *attr, char *buf) { - return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_NAP_OFF); + /* force_no_nap is deprecated, so return it as always disabled */ + return scnprintf(buf, PAGE_SIZE, "0\n"); } static ssize_t force_no_nap_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - return __force_on_store(dev, attr, buf, count, - KGSL_PWRFLAGS_NAP_OFF); + /* force_no_nap is deprecated, so return EOPNOTSUPP */ + return -EOPNOTSUPP; } static ssize_t bus_split_show(struct device *dev, @@ -1226,9 +1214,7 @@ static void kgsl_pwrctrl_clk(struct kgsl_device *device, bool state, for (i = KGSL_MAX_CLKS - 1; i > 0; i--) clk_disable(pwr->grp_clks[i]); /* High latency clock maintenance. */ - if ((pwr->pwrlevels[0].gpu_freq > 0) && - (requested_state != KGSL_STATE_NAP) && - (requested_state != KGSL_STATE_MINBW)) { + if (pwr->pwrlevels[0].gpu_freq > 0) { for (i = KGSL_MAX_CLKS - 1; i > 0; i--) clk_unprepare(pwr->grp_clks[i]); device->ftbl->gpu_clock_set(device, @@ -1255,15 +1241,12 @@ static void kgsl_pwrctrl_clk(struct kgsl_device *device, bool state, &pwr->power_flags)) { trace_kgsl_clk(device, state, kgsl_pwrctrl_active_freq(pwr)); - /* High latency clock maintenance. */ - if ((device->state != KGSL_STATE_NAP) && - (device->state != KGSL_STATE_MINBW)) { - if (pwr->pwrlevels[0].gpu_freq > 0) { - device->ftbl->gpu_clock_set(device, - pwr->active_pwrlevel); - _isense_clk_set_rate(pwr, + + if (pwr->pwrlevels[0].gpu_freq > 0) { + device->ftbl->gpu_clock_set(device, pwr->active_pwrlevel); - } + _isense_clk_set_rate(pwr, + pwr->active_pwrlevel); } for (i = KGSL_MAX_CLKS - 1; i > 0; i--) @@ -1410,18 +1393,6 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state) } } -static void kgsl_minbw_timer(struct timer_list *t) -{ - struct kgsl_pwrctrl *pwr = from_timer(pwr, t, minbw_timer); - struct kgsl_device *device = container_of(pwr, - struct kgsl_device, pwrctrl); - - if (device->state == KGSL_STATE_NAP) { - kgsl_pwrctrl_request_state(device, KGSL_STATE_MINBW); - kgsl_schedule_work(&device->idle_check_ws); - } -} - static int _get_clocks(struct kgsl_device *device) { struct device *dev = &device->pdev->dev; @@ -1486,21 +1457,10 @@ static int _isense_clk_set_rate(struct kgsl_pwrctrl *pwr, int level) static void _gpu_clk_prepare_enable(struct kgsl_device *device, struct clk *clk, const char *name) { - int ret; + int ret = clk_prepare_enable(clk); - if (kgsl_state_is_nap_or_minbw(device)) { - ret = clk_enable(clk); - if (ret) - goto err; - return; - } - - ret = clk_prepare_enable(clk); - if (!ret) - return; -err: - /* Failure is fatal so BUG() to facilitate debug */ - dev_err(device->dev, "GPU Clock %s enable error:%d\n", name, ret); + if (ret) + dev_err(device->dev, "GPU Clock %s enable error:%d\n", name, ret); } /* @@ -1547,13 +1507,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) pwr->gpu_bimc_int_clk = devm_clk_get(&pdev->dev, "bimc_gpu_clk"); - if (of_property_read_bool(pdev->dev.of_node, "qcom,no-nap")) - device->pwrctrl.ctrl_flags |= BIT(KGSL_PWRFLAGS_NAP_OFF); - else if (!IS_ENABLED(CONFIG_COMMON_CLK_QCOM)) { - dev_warn(device->dev, "KGSL nap state is not supported\n"); - device->pwrctrl.ctrl_flags |= BIT(KGSL_PWRFLAGS_NAP_OFF); - } - if (pwr->num_pwrlevels == 0) { dev_err(device->dev, "No power levels are defined\n"); return -EINVAL; @@ -1619,8 +1572,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) pm_runtime_enable(&pdev->dev); - timer_setup(&pwr->minbw_timer, kgsl_minbw_timer, 0); - return 0; } @@ -1646,11 +1597,10 @@ void kgsl_idle_check(struct work_struct *work) mutex_lock(&device->mutex); /* - * After scheduling idle work for transitioning to either NAP or - * SLUMBER, it's possible that requested state can change to NONE - * if any new workload comes before kgsl_idle_check is executed or - * it gets the device mutex. In such case, no need to change state - * to NONE. + * After scheduling idle work for transitioning to SLUMBER, it's + * possbile that requested state can change to NONE if any new workload + * comes before kgsl_idle_check is executed or it gets the device mutex. + * In such case, no need to change state to NONE. */ if (device->requested_state == KGSL_STATE_NONE) { mutex_unlock(&device->mutex); @@ -1659,8 +1609,7 @@ void kgsl_idle_check(struct work_struct *work) requested_state = device->requested_state; - if (device->state == KGSL_STATE_ACTIVE - || kgsl_state_is_nap_or_minbw(device)) { + if (device->state == KGSL_STATE_ACTIVE) { if (!atomic_read(&device->active_cnt)) { spin_lock(&device->submit_lock); @@ -1699,8 +1648,7 @@ done: kgsl_start_idle_timer(device); } - if (device->state != KGSL_STATE_MINBW) - kgsl_pwrscale_update(device); + kgsl_pwrscale_update(device); mutex_unlock(&device->mutex); } @@ -1795,16 +1743,6 @@ static void kgsl_pwrctrl_disable(struct kgsl_device *device) kgsl_pwrctrl_pwrrail(device, false); } -static void -kgsl_pwrctrl_clk_set_options(struct kgsl_device *device, bool on) -{ - int i; - - for (i = 0; i < KGSL_MAX_CLKS; i++) - device->ftbl->clk_set_options(device, clocks[i], - device->pwrctrl.grp_clks[i], on); -} - /** * _init() - Get the GPU ready to start, but don't turn anything on * @device - Pointer to the kgsl_device struct @@ -1814,13 +1752,6 @@ static int _init(struct kgsl_device *device) int status = 0; switch (device->state) { - case KGSL_STATE_MINBW: - fallthrough; - case KGSL_STATE_NAP: - del_timer_sync(&device->pwrctrl.minbw_timer); - /* Force power on to do the stop */ - status = kgsl_pwrctrl_enable(device); - fallthrough; case KGSL_STATE_ACTIVE: kgsl_pwrctrl_irq(device, false); del_timer_sync(&device->idle_timer); @@ -1856,7 +1787,6 @@ static int _wake(struct kgsl_device *device) device->ftbl->resume(device); fallthrough; case KGSL_STATE_SLUMBER: - kgsl_pwrctrl_clk_set_options(device, true); status = device->ftbl->start(device, device->pwrctrl.superfast); device->pwrctrl.superfast = false; @@ -1871,11 +1801,9 @@ static int _wake(struct kgsl_device *device) kgsl_pwrctrl_irq(device, true); trace_gpu_frequency( pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq/1000, 0); - fallthrough; - case KGSL_STATE_MINBW: + kgsl_bus_update(device, KGSL_BUS_VOTE_ON); - fallthrough; - case KGSL_STATE_NAP: + /* Turn on the core clocks */ kgsl_pwrctrl_clk(device, true, KGSL_STATE_ACTIVE); @@ -1887,25 +1815,15 @@ static int _wake(struct kgsl_device *device) */ kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); - /* - * Change register settings if any after pwrlevel change. - * If there was dcvs level change during nap - call - * pre and post in the row after clock is enabled. - */ - kgsl_pwrctrl_pwrlevel_change_settings(device, 0); - kgsl_pwrctrl_pwrlevel_change_settings(device, 1); /* All settings for power level transitions are complete*/ pwr->previous_pwrlevel = pwr->active_pwrlevel; kgsl_start_idle_timer(device); - del_timer_sync(&device->pwrctrl.minbw_timer); break; case KGSL_STATE_AWARE: - kgsl_pwrctrl_clk_set_options(device, true); /* Enable state before turning on irq */ kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); kgsl_pwrctrl_irq(device, true); kgsl_start_idle_timer(device); - del_timer_sync(&device->pwrctrl.minbw_timer); break; default: dev_warn(device->dev, "unhandled state %s\n", @@ -1936,12 +1854,6 @@ _aware(struct kgsl_device *device) case KGSL_STATE_INIT: status = kgsl_pwrctrl_enable(device); break; - /* The following 4 cases shouldn't occur, but don't panic. */ - case KGSL_STATE_MINBW: - fallthrough; - case KGSL_STATE_NAP: - status = _wake(device); - fallthrough; case KGSL_STATE_ACTIVE: kgsl_pwrctrl_irq(device, false); del_timer_sync(&device->idle_timer); @@ -1959,65 +1871,6 @@ _aware(struct kgsl_device *device) return status; } -static int -_nap(struct kgsl_device *device) -{ - switch (device->state) { - case KGSL_STATE_ACTIVE: - if (!device->ftbl->is_hw_collapsible(device)) { - kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); - return -EBUSY; - } - - - /* - * Read HW busy counters before going to NAP state. - * The data might be used by power scale governors - * independently of the HW activity. For example - * the simple-on-demand governor will get the latest - * busy_time data even if the gpu isn't active. - */ - kgsl_pwrscale_update_stats(device); - - mod_timer(&device->pwrctrl.minbw_timer, jiffies + - msecs_to_jiffies(device->pwrctrl.minbw_timeout)); - - kgsl_pwrctrl_clk(device, false, KGSL_STATE_NAP); - kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP); - fallthrough; - case KGSL_STATE_SLUMBER: - break; - case KGSL_STATE_AWARE: - dev_warn(device->dev, - "transition AWARE -> NAP is not permitted\n"); - fallthrough; - default: - kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); - break; - } - return 0; -} - -static int -_minbw(struct kgsl_device *device) -{ - switch (device->state) { - /* - * Device is expected to be clock gated to move to - * a deeper low power state. No other transition is - * permitted - */ - case KGSL_STATE_NAP: - kgsl_bus_update(device, KGSL_BUS_VOTE_MINIMUM); - kgsl_pwrctrl_set_state(device, KGSL_STATE_MINBW); - break; - default: - kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); - break; - } - return 0; -} - static int _slumber(struct kgsl_device *device) { @@ -2029,18 +1882,12 @@ _slumber(struct kgsl_device *device) kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); return -EBUSY; } - fallthrough; - case KGSL_STATE_NAP: - fallthrough; - case KGSL_STATE_MINBW: - del_timer_sync(&device->pwrctrl.minbw_timer); del_timer_sync(&device->idle_timer); kgsl_pwrctrl_irq(device, false); /* make sure power is on to stop the device*/ status = kgsl_pwrctrl_enable(device); device->ftbl->suspend_context(device); device->ftbl->stop(device); - kgsl_pwrctrl_clk_set_options(device, false); kgsl_pwrctrl_disable(device); kgsl_pwrscale_sleep(device); trace_gpu_frequency(0, 0); @@ -2109,7 +1956,7 @@ err: * is valid, execute it. Otherwise return an error code explaining * why the change has not taken place. Also print an error if an * unexpected state change failure occurs. For example, a change to - * NAP may be rejected because the GPU is busy, this is not an error. + * SLUMBER may be rejected because the GPU is busy, this is not an error. * A change to SUSPEND should go through no matter what, so if it * fails an additional error message will be printed to dmesg. */ @@ -2132,12 +1979,6 @@ int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state) case KGSL_STATE_ACTIVE: status = _wake(device); break; - case KGSL_STATE_NAP: - status = _nap(device); - break; - case KGSL_STATE_MINBW: - status = _minbw(device); - break; case KGSL_STATE_SLUMBER: status = _slumber(device); break; @@ -2188,10 +2029,6 @@ const char *kgsl_pwrstate_to_str(unsigned int state) return "AWARE"; case KGSL_STATE_ACTIVE: return "ACTIVE"; - case KGSL_STATE_NAP: - return "NAP"; - case KGSL_STATE_MINBW: - return "MINBW"; case KGSL_STATE_SUSPEND: return "SUSPEND"; case KGSL_STATE_SLUMBER: diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index ee0da22d61..c8169f7900 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -20,7 +20,6 @@ #define KGSL_PWRFLAGS_CLK_ON 1 #define KGSL_PWRFLAGS_AXI_ON 2 #define KGSL_PWRFLAGS_IRQ_ON 3 -#define KGSL_PWRFLAGS_NAP_OFF 5 /* Use to enable all the force power on states at once */ #define KGSL_PWR_ON GENMASK(5, 0) @@ -155,10 +154,6 @@ struct kgsl_pwrctrl { struct icc_path *icc_path; /** cur_ab: The last ab voted by the driver */ u32 cur_ab; - /** @minbw_timer - Timer struct for entering minimum bandwidth state */ - struct timer_list minbw_timer; - /** @minbw_timeout - Timeout for entering minimum bandwidth state */ - u32 minbw_timeout; /** @sysfs_thermal_req - PM QoS maximum frequency request from user (via sysfs) */ struct dev_pm_qos_request sysfs_thermal_req; /** @time_in_pwrlevel: Each pwrlevel active duration in usec */ From ff81f3e0a2ccdcc1780f069b7d5f8eb2289c7da2 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Fri, 16 Jun 2023 16:22:07 +0530 Subject: [PATCH 530/750] msm: kgsl: Give hint to SMMU for skipping TLB ops during slumber Currently, TLB operation is performed irrespective of the GPU state. This results in unnecessary cx gdsc toggling. Use qcom_skip_tlb_management() API to request smmu driver to skip TLB flush operation during GPU slumber state. Moved kgsl_mmu_flush_tlb() to kgsl_mmu_send_tlb_hint(). Add kernel specific compatibility checks for older skip tlb logic and qcom_skip_tlb_management() API. Change-Id: Ic538e4404e8dddef56274e21eef7cf0e0f65bef6 Signed-off-by: Sanjay Yadav --- adreno_a6xx_gmu.c | 10 +++------- adreno_a6xx_hwsched.c | 7 ------- adreno_a6xx_rgmu.c | 4 ++++ adreno_gen7_gmu.c | 10 +++------- adreno_gen7_hwsched.c | 7 ------- kgsl_iommu.c | 44 ++++++++++++++++++++++++++++++++++++------- kgsl_mmu.h | 11 ++++------- kgsl_pwrctrl.c | 5 ++++- 8 files changed, 55 insertions(+), 43 deletions(-) diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index 40bbf74b4d..b8d35c663d 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -612,6 +612,7 @@ int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "Failed to enable GMU CX gdsc, error %d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); return ret; } @@ -619,7 +620,9 @@ int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); reinit_completion(&gmu->gdsc_gate); if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) @@ -2513,13 +2516,6 @@ static int a6xx_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = a6xx_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index 32e2c7e6eb..f40182170f 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -436,13 +436,6 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = a6xx_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 18358e300d..2f345ee191 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -518,6 +518,8 @@ static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); + /* Wait up to 5 seconds for the regulator to go off */ if (kgsl_regulator_disable_wait(rgmu->cx_gdsc, 5000)) return 0; @@ -590,6 +592,7 @@ static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; if (IS_ERR_OR_NULL(rgmu->cx_gdsc)) @@ -600,6 +603,7 @@ static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&rgmu->pdev->dev, "Fail to enable CX gdsc:%d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); return ret; } diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 86c58d0033..6194621bf9 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -364,6 +364,7 @@ int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) dev_err(&gmu->pdev->dev, "Failed to enable GMU CX gdsc, error %d\n", ret); + kgsl_mmu_send_tlb_hint(&device->mmu, false); clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); return ret; } @@ -371,7 +372,9 @@ int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + kgsl_mmu_send_tlb_hint(&device->mmu, true); reinit_completion(&gmu->gdsc_gate); set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); regulator_disable(gmu->cx_gdsc); @@ -2040,13 +2043,6 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 5e4ed32dee..492c3f0859 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -597,13 +597,6 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) if (ret) goto gdsc_off; - /* - * TLB operations are skipped during slumber. Incase CX doesn't - * go down, it can result in incorrect translations due to stale - * TLB entries. Flush TLB before boot up to ensure fresh start. - */ - kgsl_mmu_flush_tlb(&device->mmu); - ret = gen7_rscc_wakeup_sequence(adreno_dev); if (ret) goto clks_gdsc_off; diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 22a55d4d9f..33cd3301f7 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -189,7 +189,6 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { - struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); struct io_pgtable_ops *ops = pt->pgtbl_ops; while (size) { @@ -200,13 +199,22 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) size -= PAGE_SIZE; } - /* Skip TLB Operations if GPU is in slumber */ - if (mutex_trylock(&device->mutex)) { - if (device->state == KGSL_STATE_SLUMBER) { + /* + * Skip below logic for 6.1 kernel version and above as + * qcom_skip_tlb_management() API takes care of avoiding + * TLB operations during slumber. + */ + if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) { + struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); + + /* Skip TLB Operations if GPU is in slumber */ + if (mutex_trylock(&device->mutex)) { + if (device->state == KGSL_STATE_SLUMBER) { + mutex_unlock(&device->mutex); + return 0; + } mutex_unlock(&device->mutex); - return 0; } - mutex_unlock(&device->mutex); } kgsl_iommu_flush_tlb(pt->base.mmu); @@ -267,6 +275,28 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, return mapped; } +static void kgsl_iommu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) +{ +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + struct kgsl_iommu *iommu = &mmu->iommu; + + /* + * Send hint to SMMU driver for skipping TLB operations during slumber. + * This will help to avoid unnecessary cx gdsc toggling. + */ + qcom_skip_tlb_management(&iommu->user_context.pdev->dev, hint); + if (iommu->lpac_context.domain) + qcom_skip_tlb_management(&iommu->lpac_context.pdev->dev, hint); +#endif + + /* + * TLB operations are skipped during slumber. Incase CX doesn't + * go down, it can result in incorrect translations due to stale + * TLB entries. Flush TLB before boot up to ensure fresh start. + */ + if (!hint) + kgsl_iommu_flush_tlb(mmu); +} static int kgsl_iopgtbl_map_child(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc, @@ -2582,7 +2612,7 @@ static const struct kgsl_mmu_ops kgsl_iommu_ops = { .mmu_pagefault_resume = kgsl_iommu_pagefault_resume, .mmu_getpagetable = kgsl_iommu_getpagetable, .mmu_map_global = kgsl_iommu_map_global, - .mmu_flush_tlb = kgsl_iommu_flush_tlb, + .mmu_send_tlb_hint = kgsl_iommu_send_tlb_hint, }; static const struct kgsl_mmu_pt_ops iopgtbl_pt_ops = { diff --git a/kgsl_mmu.h b/kgsl_mmu.h index 8b5e083081..3035cdec41 100644 --- a/kgsl_mmu.h +++ b/kgsl_mmu.h @@ -115,7 +115,7 @@ struct kgsl_mmu_ops { unsigned long name); void (*mmu_map_global)(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc, u32 padding); - void (*mmu_flush_tlb)(struct kgsl_mmu *mmu); + void (*mmu_send_tlb_hint)(struct kgsl_mmu *mmu, bool hint); }; struct kgsl_mmu_pt_ops { @@ -359,13 +359,10 @@ kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable) return 0; } -static inline void kgsl_mmu_flush_tlb(struct kgsl_mmu *mmu) +static inline void kgsl_mmu_send_tlb_hint(struct kgsl_mmu *mmu, bool hint) { - if (!test_bit(KGSL_MMU_IOPGTABLE, &mmu->features)) - return; - - if (MMU_OP_VALID(mmu, mmu_flush_tlb)) - return mmu->mmu_ops->mmu_flush_tlb(mmu); + if (MMU_OP_VALID(mmu, mmu_send_tlb_hint)) + return mmu->mmu_ops->mmu_send_tlb_hint(mmu, hint); } /** diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 812b9ba6ee..c92900a39b 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -1359,14 +1359,17 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) if (!state) { if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) { + kgsl_mmu_send_tlb_hint(&device->mmu, true); trace_kgsl_rail(device, state); if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) dev_err(device->dev, "Regulator vdd is stuck on\n"); if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) dev_err(device->dev, "Regulator vddcx is stuck on\n"); } - } else + } else { status = enable_regulators(device); + kgsl_mmu_send_tlb_hint(&device->mmu, false); + } return status; } From 5a0817351b09ecb57d2d0967f990f1bd83c51e1a Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Thu, 2 Feb 2023 22:36:24 +0530 Subject: [PATCH 531/750] msm: kgsl: Use QCOM io-pagetables Use the optimized QCOM io-pagetables to make maps/unmaps faster. Change-Id: I29c018083f9fb4ce40f4d52f60ed9c83c742e2c7 Signed-off-by: Harshdeep Dhatt Signed-off-by: Kamal Agrawal Signed-off-by: Hareesh Gundu --- kgsl_iommu.c | 173 ++++++++++++++++++++++++++++++++++++++++----------- kgsl_iommu.h | 8 ++- 2 files changed, 140 insertions(+), 41 deletions(-) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 33cd3301f7..1dec807501 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -176,6 +176,92 @@ static struct page *iommu_get_guard_page(struct kgsl_memdesc *memdesc) return kgsl_guard_page; } +static size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long iova, + phys_addr_t paddr, size_t size, size_t *count) +{ + unsigned int pgsize_idx, pgsize_idx_next; + unsigned long pgsizes; + size_t offset, pgsize, pgsize_next; + unsigned long addr_merge = paddr | iova; + + /* Page sizes supported by the hardware and small enough for @size */ + pgsizes = pgsize_bitmap & GENMASK(__fls(size), 0); + + /* Constrain the page sizes further based on the maximum alignment */ + if (likely(addr_merge)) + pgsizes &= GENMASK(__ffs(addr_merge), 0); + + /* Make sure we have at least one suitable page size */ + if (!pgsizes) + return 0; + + /* Pick the biggest page size remaining */ + pgsize_idx = __fls(pgsizes); + pgsize = BIT(pgsize_idx); + if (!count) + return pgsize; + + /* Find the next biggest support page size, if it exists */ + pgsizes = pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + if (!pgsizes) + goto out_set_count; + + pgsize_idx_next = __ffs(pgsizes); + pgsize_next = BIT(pgsize_idx_next); + + /* + * There's no point trying a bigger page size unless the virtual + * and physical addresses are similarly offset within the larger page. + */ + if ((iova ^ paddr) & (pgsize_next - 1)) + goto out_set_count; + + /* Calculate the offset to the next page size alignment boundary */ + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); + + /* + * If size is big enough to accommodate the larger page, reduce + * the number of smaller pages. + */ + if (offset + pgsize_next <= size) + size = offset; + +out_set_count: + *count = size >> pgsize_idx; + return pgsize; +} + +static int _iopgtbl_unmap_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, + size_t size) +{ + struct io_pgtable_ops *ops = pt->pgtbl_ops; + size_t unmapped = 0; + + while (unmapped < size) { + size_t ret, size_to_unmap, remaining, pgcount; + + remaining = (size - unmapped); + size_to_unmap = iommu_pgsize(pt->info.cfg.pgsize_bitmap, + gpuaddr, gpuaddr, remaining, &pgcount); + if (size_to_unmap == 0) + break; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + ret = qcom_arm_lpae_unmap_pages(ops, gpuaddr, size_to_unmap, + pgcount, NULL); +#else + ret = ops->unmap_pages(ops, gpuaddr, size_to_unmap, + pgcount, NULL); +#endif + if (ret == 0) + break; + + gpuaddr += ret; + unmapped += ret; + } + + return (unmapped == size) ? 0 : -EINVAL; +} + static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) { struct kgsl_iommu *iommu = &mmu->iommu; @@ -190,6 +276,14 @@ static void kgsl_iommu_flush_tlb(struct kgsl_mmu *mmu) static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) { struct io_pgtable_ops *ops = pt->pgtbl_ops; + int ret = 0; + + if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { + ret = _iopgtbl_unmap_pages(pt, gpuaddr, size); + if (ret) + return ret; + goto flush; + } while (size) { if ((ops->unmap(ops, gpuaddr, PAGE_SIZE, NULL)) != PAGE_SIZE) @@ -204,6 +298,7 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) * qcom_skip_tlb_management() API takes care of avoiding * TLB operations during slumber. */ +flush: if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE) { struct kgsl_device *device = KGSL_MMU_DEVICE(pt->base.mmu); @@ -221,29 +316,6 @@ static int _iopgtbl_unmap(struct kgsl_iommu_pt *pt, u64 gpuaddr, size_t size) return 0; } -static size_t _iopgtbl_map_pages(struct kgsl_iommu_pt *pt, u64 gpuaddr, - struct page **pages, int npages, int prot) -{ - struct io_pgtable_ops *ops = pt->pgtbl_ops; - size_t mapped = 0; - u64 addr = gpuaddr; - int ret, i; - - for (i = 0; i < npages; i++) { - ret = ops->map(ops, addr, page_to_phys(pages[i]), PAGE_SIZE, - prot, GFP_KERNEL); - if (ret) { - _iopgtbl_unmap(pt, gpuaddr, mapped); - return 0; - } - - mapped += PAGE_SIZE; - addr += PAGE_SIZE; - } - - return mapped; -} - static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, struct sg_table *sgt, int prot) { @@ -253,6 +325,22 @@ static size_t _iopgtbl_map_sg(struct kgsl_iommu_pt *pt, u64 gpuaddr, u64 addr = gpuaddr; int ret, i; +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE)) { + ret = qcom_arm_lpae_map_sg(ops, addr, sgt->sgl, sgt->nents, prot, + GFP_KERNEL, &mapped); +#else + if (ops->map_sg) { + ret = ops->map_sg(ops, addr, sgt->sgl, sgt->nents, prot, + GFP_KERNEL, &mapped); +#endif + if (ret) { + _iopgtbl_unmap(pt, gpuaddr, mapped); + return 0; + } + return mapped; + } + for_each_sg(sgt->sgl, sg, sgt->nents, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); @@ -397,12 +485,20 @@ static int kgsl_iopgtbl_map(struct kgsl_pagetable *pagetable, /* Get the protection flags for the user context */ prot = _iommu_get_protection_flags(pagetable->mmu, memdesc); - if (memdesc->sgt) - mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, - memdesc->sgt, prot); - else - mapped = _iopgtbl_map_pages(pt, memdesc->gpuaddr, - memdesc->pages, memdesc->page_count, prot); + if (!memdesc->sgt) { + struct sg_table sgt; + int ret; + + ret = sg_alloc_table_from_pages(&sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + if (ret) + return ret; + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, &sgt, prot); + sg_free_table(&sgt); + } else { + mapped = _iopgtbl_map_sg(pt, memdesc->gpuaddr, memdesc->sgt, + prot); + } if (mapped == 0) return -ENOMEM; @@ -1224,7 +1320,7 @@ static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pagetable) { struct kgsl_iommu_pt *pt = to_iommu_pt(pagetable); - free_io_pgtable_ops(pt->pgtbl_ops); + qcom_free_io_pgtable_ops(pt->pgtbl_ops); kfree(pt); } @@ -1284,22 +1380,23 @@ static int kgsl_iopgtbl_alloc(struct kgsl_iommu_context *ctx, struct kgsl_iommu_ { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&ctx->pdev->dev); const struct io_pgtable_cfg *cfg = NULL; + void *domain = (void *)adreno_smmu->cookie; if (adreno_smmu->cookie) cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); if (!cfg) return -ENODEV; - pt->cfg = *cfg; - pt->cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - pt->cfg.tlb = &kgsl_iopgtbl_tlb_ops; - - pt->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pt->cfg, NULL); + pt->info = adreno_smmu->pgtbl_info; + pt->info.cfg = *cfg; + pt->info.cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; + pt->info.cfg.tlb = &kgsl_iopgtbl_tlb_ops; + pt->pgtbl_ops = qcom_alloc_io_pgtable_ops(QCOM_ARM_64_LPAE_S1, &pt->info, domain); if (!pt->pgtbl_ops) return -ENOMEM; - pt->ttbr0 = pt->cfg.arm_lpae_s1_cfg.ttbr; + pt->ttbr0 = pt->info.cfg.arm_lpae_s1_cfg.ttbr; return 0; } @@ -2339,11 +2436,11 @@ static int iommu_probe_user_context(struct kgsl_device *device, pt = to_iommu_pt(mmu->defaultpagetable); /* Enable TTBR0 on the default and LPAC contexts */ - kgsl_iommu_set_ttbr0(&iommu->user_context, mmu, &pt->cfg); + kgsl_iommu_set_ttbr0(&iommu->user_context, mmu, &pt->info.cfg); kgsl_set_smmu_aperture(device, &iommu->user_context); - kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->cfg); + kgsl_iommu_set_ttbr0(&iommu->lpac_context, mmu, &pt->info.cfg); ret = set_smmu_lpac_aperture(device, &iommu->lpac_context); /* LPAC is optional, ignore setup failures in absence of LPAC feature */ diff --git a/kgsl_iommu.h b/kgsl_iommu.h index 271043f1df..6bce555ff7 100644 --- a/kgsl_iommu.h +++ b/kgsl_iommu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __KGSL_IOMMU_H #define __KGSL_IOMMU_H @@ -179,15 +179,17 @@ struct kgsl_iommu { /* * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver - * @domain: Pointer to the iommu domain that contains the iommu pagetable + * @base: Container of the base kgsl pagetable * @ttbr0: register value to set when using this pagetable + * @pgtbl_ops: Pagetable operations for mapping/unmapping buffers + * @info: Pagetable info used to allocate pagetable operations */ struct kgsl_iommu_pt { struct kgsl_pagetable base; u64 ttbr0; struct io_pgtable_ops *pgtbl_ops; - struct io_pgtable_cfg cfg; + struct qcom_io_pgtable_info info; }; /** From a1ed1f2900eaf7e3cba5d2a51476d752dfba19a8 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Thu, 18 May 2023 14:36:21 +0530 Subject: [PATCH 532/750] msm: kgsl: Support qcs405 target Add config changes to support qcs405 target. Change-Id: If104542c5364ee76e9a29e5975abaef0336f011a Signed-off-by: Abhishek Barman --- Kbuild | 3 +++ config/gki_qcs405.conf | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 config/gki_qcs405.conf diff --git a/Kbuild b/Kbuild index 91192e6f60..762f33ef94 100644 --- a/Kbuild +++ b/Kbuild @@ -49,6 +49,9 @@ endif ifeq ($(CONFIG_ARCH_TRINKET), y) include $(KGSL_PATH)/config/gki_trinket.conf endif +ifeq ($(CONFIG_ARCH_QCS405), y) + include $(KGSL_PATH)/config/gki_qcs405.conf +endif ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq diff --git a/config/gki_qcs405.conf b/config/gki_qcs405.conf new file mode 100644 index 0000000000..b9a6982a8a --- /dev/null +++ b/config/gki_qcs405.conf @@ -0,0 +1,15 @@ +CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ = y +CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON = y +CONFIG_QCOM_KGSL_IDLE_TIMEOUT = 80 +CONFIG_QCOM_KGSL_SORT_POOL = y +CONFIG_QCOM_KGSL_CONTEXT_DEBUG = y +# CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT is not set +CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR = \"msm-adreno-tz\" + +ccflags-y += -DCONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ=1 \ + -DCONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON=1 \ + -DCONFIG_QCOM_KGSL_IDLE_TIMEOUT=80 \ + -DCONFIG_QCOM_KGSL_SORT_POOL=1 \ + -DCONFIG_QCOM_KGSL_CONTEXT_DEBUG=1 \ + -DCONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT=0 \ + -DCONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR=\"msm-adreno-tz\" From b3265ace72394b5eef33bf54edcf3068a179aab7 Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Mon, 24 Jul 2023 15:34:09 +0530 Subject: [PATCH 533/750] msm: kgsl: Set correct values for SMMU protect register for A3xx For programming the CP Protect register for SMMU in A3xx GPU, pass correct values for SMMU registers base offset and the count of registers to be protected. Change-Id: I9fa809db79efc79bb7a59304fa2b4607ed1fc567 Signed-off-by: Abhishek Barman --- adreno_a3xx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/adreno_a3xx.c b/adreno_a3xx.c index 876032c5dd..264f31d8ea 100644 --- a/adreno_a3xx.c +++ b/adreno_a3xx.c @@ -1096,8 +1096,14 @@ static struct { { A3XX_CP_PROTECT_REG_0 + 13, 0x0cc0, 0 }, /* VBIF */ { A3XX_CP_PROTECT_REG_0 + 14, 0x3000, 6 }, - /* SMMU */ - { A3XX_CP_PROTECT_REG_0 + 15, 0xa000, 12 }, + /* + * SMMU + * For A3xx, base offset for smmu region is 0xa000 and length is + * 0x1000 bytes. Offset must be in dword and length of the block + * must be ilog2(dword length). + * 0xa000 >> 2 = 0x2800, ilog2(0x1000 >> 2) = 10. + */ + { A3XX_CP_PROTECT_REG_0 + 15, 0x2800, 10 }, /* There are no remaining protected mode registers for a3xx */ }; From ae8fa751d1cbfb55fe1e8fbad4dfbf293f88f7bf Mon Sep 17 00:00:00 2001 From: Abhishek Barman Date: Mon, 24 Jul 2023 16:41:08 +0530 Subject: [PATCH 534/750] msm: kgsl: Add iommu clock names for A306 GPU Add "gcc_smmu_cfg_clk" and "gcc_gfx_tcu_clk" iommu clock names to control these clocks on A306 GPU. Change-Id: I79d7a4c73217c6ebf931aed9f50efe8177944eda Signed-off-by: Abhishek Barman --- kgsl_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kgsl_iommu.c b/kgsl_iommu.c index 1dec807501..8bce09dbe7 100644 --- a/kgsl_iommu.c +++ b/kgsl_iommu.c @@ -2547,6 +2547,8 @@ static const char * const kgsl_iommu_clocks[] = { "gcc_bimc_gpu_axi", "gcc_gpu_ahb", "gcc_gpu_axi_clk", + "gcc_smmu_cfg_clk", + "gcc_gfx_tcu_clk", }; static const struct kgsl_mmu_ops kgsl_iommu_ops; From 1e045be2db468cd6b2e10f0bee0df294333ab5e9 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 1 Aug 2023 21:39:35 +0530 Subject: [PATCH 535/750] msm: kgsl: Honor power level constraint from user properly Consider below sequence from userspace: 1. Set minimum power level to 0 2. Force minimum power level constraint from a context 3. Submit commands from that context => _adjust_pwrlevel() returns 0 as min and max pwrlevels are forced to 0. This sets pwrc_old->hint.pwrlevel.level to 0. 4. Set minimum power level to default value 5. Before the first constraint expires, force maximum power level constraint from another context => Since constraint type is not none and pwrc_old->hint.pwrlevel.level is same as return value from _adjust_pwrlevel() i.e., 0, power level update doesn't happen. Long story short, there exist races when both power level related sysfs and constraints are modified. To address these races, update the condition to set a new constraint if the requested constraint is max and active constraint is min. Update the ownership and timestamp always if old constraint is same as requested constraint to avoid pre-mature constraint removal. Change-Id: Id2b501fe714c51c4a5a511f88fb3ae0d244f3db6 Signed-off-by: Kamal Agrawal --- kgsl_pwrctrl.c | 9 ++++----- kgsl_pwrctrl.h | 5 ----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index c92900a39b..8c4e744d64 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -297,14 +297,14 @@ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, /* * If a constraint is already set, set a new constraint only - * if it is faster. If the requested constraint is the same + * if it is faster. If the requested constraint is the same * as the current one, update ownership and timestamp. */ if ((pwrc_old->type == KGSL_CONSTRAINT_NONE) || - (constraint < pwrc_old->hint.pwrlevel.level)) { + (pwrc_old->sub_type == KGSL_CONSTRAINT_PWR_MIN && + pwrc->sub_type == KGSL_CONSTRAINT_PWR_MAX)) { pwrc_old->type = pwrc->type; pwrc_old->sub_type = pwrc->sub_type; - pwrc_old->hint.pwrlevel.level = constraint; pwrc_old->owner_id = id; pwrc_old->expires = jiffies + msecs_to_jiffies(device->pwrctrl.interval_timeout); @@ -312,8 +312,7 @@ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, kgsl_pwrctrl_pwrlevel_change(device, constraint); /* Trace the constraint being set by the driver */ trace_kgsl_constraint(device, pwrc_old->type, constraint, 1); - } else if ((pwrc_old->type == pwrc->type) && - (pwrc_old->hint.pwrlevel.level == constraint)) { + } else if (pwrc_old->type == pwrc->type) { pwrc_old->owner_id = id; pwrc_old->owner_timestamp = ts; pwrc_old->expires = jiffies + diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index c8169f7900..6dfbe88502 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -43,11 +43,6 @@ struct kgsl_clk_stats { struct kgsl_pwr_constraint { unsigned int type; unsigned int sub_type; - union { - struct { - unsigned int level; - } pwrlevel; - } hint; unsigned long expires; uint32_t owner_id; u32 owner_timestamp; From 2464bd957a5234fa470153905ad33768ed915f6a Mon Sep 17 00:00:00 2001 From: Harshdeep Dhatt Date: Thu, 3 Aug 2023 11:14:53 -0600 Subject: [PATCH 536/750] kgsl: hwsched: Fix HFI sequence number wrap issue When comparing the ack, make sure the entire header is compared instead of just the sequence number. This is required because two packets (waiting for their acks) can have the same sequence number (once it wraps around). Use a different sequence number generator for cmdbatch submissions to context queues or dispatch queues to reduce the chance of wrapping around the cmdq sequence number. For the same reason, use a different sequence number generator for hardware fence packets as well. Remove instances where the sequence number is getting updated twice for the same packet. Change-Id: I56232a3b5cf74b725f9572bd34eb4041774dc6d1 Signed-off-by: Harshdeep Dhatt --- adreno_a6xx_hfi.c | 6 ++-- adreno_a6xx_hwsched_hfi.c | 41 ++++++++++----------- adreno_gen7_hfi.c | 7 ++-- adreno_gen7_hwsched_hfi.c | 76 +++++++++++++++++++-------------------- adreno_gen7_hwsched_hfi.h | 2 ++ adreno_hfi.h | 7 ++-- adreno_hwsched.h | 5 +++ 7 files changed, 74 insertions(+), 70 deletions(-) diff --git a/adreno_a6xx_hfi.c b/adreno_a6xx_hfi.c index a0dc91ddfe..7bde0a2afe 100644 --- a/adreno_a6xx_hfi.c +++ b/adreno_a6xx_hfi.c @@ -123,8 +123,6 @@ int a6xx_hfi_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx, if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -244,7 +242,7 @@ int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd, if (ret_cmd == NULL) return -EINVAL; - if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) { memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); return 0; } @@ -319,7 +317,7 @@ static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, struct a6xx_hfi *hfi = &gmu->hfi; unsigned int seqnum = atomic_inc_return(&hfi->seqnum); - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); if (ret_cmd == NULL) return a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes); diff --git a/adreno_a6xx_hwsched_hfi.c b/adreno_a6xx_hwsched_hfi.c index 9c3fc84541..02fac3497a 100644 --- a/adreno_a6xx_hwsched_hfi.c +++ b/adreno_a6xx_hwsched_hfi.c @@ -103,7 +103,7 @@ static void a6xx_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_lock(&hfi->msglock); list_for_each_entry(cmd, &hfi->msglist, node) { - if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) { memcpy(cmd->results, ack, min_t(u32, size_bytes, sizeof(cmd->results))); @@ -624,11 +624,12 @@ int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); u32 *cmd = data; - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc; struct pending_cmd pending_ack; - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); add_waiter(hfi, *cmd, &pending_ack); @@ -926,6 +927,7 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) struct hfi_mem_alloc_desc desc = {0}; struct hfi_mem_alloc_reply_cmd out = {0}; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + u32 seqnum; int ret; hfi_get_mem_alloc_desc(rcvd, &desc); @@ -937,8 +939,8 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) memcpy(&out.desc, &desc, sizeof(out.desc)); out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); - out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = *(u32 *)rcvd; return a6xx_hfi_cmdq_write(adreno_dev, (u32 *)&out, sizeof(out)); @@ -948,7 +950,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) { struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; struct hfi_gmu_cntr_register_reply_cmd out = {0}; - u32 lo = 0, hi = 0; + struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); + u32 lo = 0, hi = 0, seqnum; /* * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 @@ -958,6 +961,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = in->hdr; out.group_id = in->group_id; out.countable = in->countable; @@ -972,7 +977,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc, read_size; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; @@ -982,7 +987,8 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); pending_ack.sent_hdr = cmd.hdr; @@ -1705,8 +1711,6 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - write = hdr->write_index; for (i = 0; i < size_dwords; i++) { @@ -1749,9 +1753,8 @@ static int a6xx_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, uint3 int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - struct a6xx_hfi *hfi = to_a6xx_hfi(adreno_dev); int ret = 0; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj); struct hfi_submit_cmd *cmd; struct adreno_submit_time time = {0}; @@ -1815,9 +1818,9 @@ int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev, skipib: adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); ret = a6xx_hfi_dispatch_queue_write(adreno_dev, HFI_DSP_ID_0 + drawobj->context->gmu_dispatch_queue, @@ -1842,7 +1845,6 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct a6xx_hfi *hfi = to_a6xx_hfi(adreno_dev); struct hfi_submit_cmd *cmd; struct kgsl_memobj_node *ib; u32 cmd_sizebytes; @@ -1900,8 +1902,6 @@ int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, } cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); ret = a6xx_hfi_send_cmd_async(adreno_dev, cmd, cmd_sizebytes); @@ -1953,7 +1953,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ts = ts, seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, &pending_ack); @@ -2044,6 +2044,7 @@ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev) struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; int rc; + u32 seqnum; if (device->state != KGSL_STATE_ACTIVE) return 0; @@ -2052,8 +2053,8 @@ u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev) if (rc) return 0; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); cmd.type = HFI_VALUE_PREEMPT_COUNT; cmd.subtype = 0; diff --git a/adreno_gen7_hfi.c b/adreno_gen7_hfi.c index f1a799dade..26363771c8 100644 --- a/adreno_gen7_hfi.c +++ b/adreno_gen7_hfi.c @@ -124,8 +124,6 @@ int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx, if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -246,7 +244,7 @@ int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd, if (ret_cmd == NULL) return -EINVAL; - if (HDR_CMP_SEQNUM(ret_cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) { memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2); return 0; } @@ -316,7 +314,8 @@ static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev, struct gen7_hfi *hfi = &gmu->hfi; unsigned int seqnum = atomic_inc_return(&hfi->seqnum); - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); + if (ret_cmd == NULL) return gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes); diff --git a/adreno_gen7_hwsched_hfi.c b/adreno_gen7_hwsched_hfi.c index b4c4fb0715..cf1096eb8a 100644 --- a/adreno_gen7_hwsched_hfi.c +++ b/adreno_gen7_hwsched_hfi.c @@ -121,7 +121,7 @@ static void gen7_receive_ack_async(struct adreno_device *adreno_dev, void *rcvd) read_lock(&hfi->msglock); list_for_each_entry(cmd, &hfi->msglist, node) { - if (HDR_CMP_SEQNUM(cmd->sent_hdr, req_hdr)) { + if (CMP_HFI_ACK_HDR(cmd->sent_hdr, req_hdr)) { memcpy(cmd->results, ack, min_t(u32, size_bytes, sizeof(cmd->results))); @@ -818,7 +818,6 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, struct hfi_syncobj_query_cmd *cmd) { struct hfi_syncobj_query_cmd reply = {0}; - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int i, j, fence_index = 0; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); @@ -850,8 +849,6 @@ static void gen7_syncobj_query_reply(struct adreno_device *adreno_dev, } reply.hdr = CREATE_MSG_HDR(F2H_MSG_SYNCOBJ_QUERY, HFI_MSG_CMD); - reply.hdr = MSG_HDR_SET_SEQNUM(reply.hdr, - atomic_inc_return(&hfi->seqnum)); reply.gmu_ctxt_id = cmd->gmu_ctxt_id; reply.sync_obj_ts = cmd->sync_obj_ts; @@ -1015,11 +1012,13 @@ static void _increment_hw_fence_unack_count(struct adreno_device *adreno_dev) static int _send_hw_fence_no_ack(struct adreno_device *adreno_dev, struct adreno_hw_fence_entry *entry) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); + u32 seqnum; int ret; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); ret = gen7_hfi_cmdq_write(adreno_dev, (u32 *)&entry->cmd, sizeof(entry->cmd)); if (!ret) @@ -1183,7 +1182,7 @@ static void process_hw_fence_ack(struct adreno_device *adreno_dev, u32 received_ spin_lock(&hfi->hw_fence.lock); /* If this ack is being waited on, we don't need to touch the unack count */ - if (hw_fence_ack.sent_hdr && HDR_CMP_SEQNUM(hw_fence_ack.sent_hdr, received_hdr)) { + if (hw_fence_ack.sent_hdr && CMP_HFI_ACK_HDR(hw_fence_ack.sent_hdr, received_hdr)) { spin_unlock(&hfi->hw_fence.lock); complete(&hw_fence_ack.complete); return; @@ -1404,11 +1403,12 @@ int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 si struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); u32 *cmd = data; - u32 seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int rc; struct pending_cmd pending_ack; - *cmd = MSG_HDR_SET_SEQNUM(*cmd, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + *cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2); add_waiter(hfi, *cmd, &pending_ack); @@ -1708,6 +1708,7 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) struct hfi_mem_alloc_desc desc = {0}; struct hfi_mem_alloc_reply_cmd out = {0}; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 seqnum; int ret; hfi_get_mem_alloc_desc(rcvd, &desc); @@ -1720,8 +1721,8 @@ static int mem_alloc_reply(struct adreno_device *adreno_dev, void *rcvd) out.hdr = ACK_MSG_HDR(F2H_MSG_MEM_ALLOC); - out.hdr = MSG_HDR_SET_SEQNUM(out.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = *(u32 *)rcvd; @@ -1732,7 +1733,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) { struct hfi_gmu_cntr_register_cmd *in = (struct hfi_gmu_cntr_register_cmd *)rcvd; struct hfi_gmu_cntr_register_reply_cmd out = {0}; - u32 lo = 0, hi = 0; + struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); + u32 lo = 0, hi = 0, seqnum; /* * Failure to allocate counter is not fatal. Sending lo = 0, hi = 0 @@ -1742,6 +1744,8 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) in->group_id, in->countable, &lo, &hi, PERFCOUNTER_FLAG_KERNEL); out.hdr = ACK_MSG_HDR(F2H_MSG_GMU_CNTR_REGISTER); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + out.hdr = MSG_HDR_SET_SEQNUM_SIZE(out.hdr, seqnum, sizeof(out) >> 2); out.req_hdr = in->hdr; out.group_id = in->group_id; out.countable = in->countable; @@ -1754,8 +1758,6 @@ static int gmu_cntr_register_reply(struct adreno_device *adreno_dev, void *rcvd) static int send_warmboot_start_msg(struct adreno_device *adreno_dev) { - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); int ret = 0; struct hfi_start_cmd cmd; @@ -1766,8 +1768,6 @@ static int send_warmboot_start_msg(struct adreno_device *adreno_dev) if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); - cmd.hdr = RECORD_NOP_MSG_HDR(cmd.hdr); return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd)); @@ -1777,7 +1777,7 @@ static int send_start_msg(struct adreno_device *adreno_dev) { struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - unsigned int seqnum = atomic_inc_return(&gmu->hfi.seqnum); + u32 seqnum; int read_size, rc = 0; struct hfi_start_cmd cmd; u32 rcvd[MAX_RCVD_SIZE]; @@ -1787,7 +1787,8 @@ static int send_start_msg(struct adreno_device *adreno_dev) if (rc) return rc; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); pending_ack.sent_hdr = cmd.hdr; @@ -2023,13 +2024,14 @@ u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop) struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct pending_cmd pending_ack; int rc; + u32 seqnum; rc = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE); if (rc) return 0; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); cmd.type = prop; cmd.subtype = 0; @@ -2173,6 +2175,7 @@ static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, .flags = HFI_WARMBOOT_EXEC_SCRATCH, }; int ret = 0; + u32 seqnum; if (!adreno_dev->warmboot_enabled) return 0; @@ -2181,8 +2184,8 @@ static int gen7_hwsched_hfi_warmboot_gpu_cmd(struct adreno_device *adreno_dev, if (ret) return ret; - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&gmu->hfi.seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, ret_cmd); @@ -3032,8 +3035,6 @@ int gen7_gmu_context_queue_write(struct adreno_device *adreno_dev, if (!IS_ALIGNED(size_bytes, sizeof(u32))) return -EINVAL; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - for (i = 0; i < size_dwords; i++) { queue[write_idx] = msg[i]; write_idx = (write_idx + 1) % hdr->queue_size; @@ -3131,12 +3132,12 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj, void *cmdbuf) { struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context); - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int i, j; u32 cmd_sizebytes; struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj); struct hfi_submit_syncobj *cmd; struct hfi_syncobj *obj = NULL; + u32 seqnum; /* Add hfi_syncobj struct for sync object */ cmd_sizebytes = sizeof(*cmd) + @@ -3215,9 +3216,9 @@ static int _submit_hw_fence(struct adreno_device *adreno_dev, drawobj->timestamp = ++drawctxt->syncobj_timestamp; cmd->timestamp = drawobj->timestamp; + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_SYNCOBJ, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); return gen7_gmu_context_queue_write(adreno_dev, drawctxt, (u32 *)cmd, cmd_sizebytes, drawobj, NULL); @@ -3358,6 +3359,7 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct gen7_hwsched_hfi *hfi = to_gen7_hwsched_hfi(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + u32 seqnum; int ret = 0; /* Device mutex is necessary to ensure only one hardware fence ack is being waited for */ @@ -3369,8 +3371,8 @@ int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev, init_completion(&hw_fence_ack.complete); entry->cmd.flags |= flags; - entry->cmd.hdr = MSG_HDR_SET_SEQNUM(entry->cmd.hdr, - atomic_inc_return(&gmu->hfi.seqnum)); + seqnum = atomic_inc_return(&hfi->hw_fence.seqnum); + entry->cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(entry->cmd.hdr, seqnum, sizeof(entry->cmd) >> 2); hw_fence_ack.sent_hdr = entry->cmd.hdr; @@ -3695,8 +3697,6 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q if (empty_space <= align_size) return -ENOSPC; - *msg = MSG_HDR_SET_SIZE(*msg, size_dwords); - write = hdr->write_index; for (i = 0; i < size_dwords; i++) { @@ -3732,9 +3732,8 @@ static int gen7_hfi_dispatch_queue_write(struct adreno_device *adreno_dev, u32 q int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_drawobj *drawobj) { - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); int ret = 0; - u32 cmd_sizebytes; + u32 cmd_sizebytes, seqnum; struct kgsl_drawobj_cmd *cmdobj = NULL; struct hfi_submit_cmd *cmd; struct adreno_submit_time time = {0}; @@ -3814,9 +3813,9 @@ int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev, struct kgsl_dr skipib: adreno_drawobj_set_constraint(KGSL_DEVICE(adreno_dev), drawobj); + seqnum = atomic_inc_return(&adreno_dev->hwsched.submission_seqnum); cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); + cmd->hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd->hdr, seqnum, cmd_sizebytes >> 2); if (adreno_hwsched_context_queue_enabled(adreno_dev)) ret = gen7_gmu_context_queue_write(adreno_dev, @@ -3849,7 +3848,6 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, { struct adreno_hwsched *hwsched = &adreno_dev->hwsched; struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj); - struct gen7_hfi *hfi = to_gen7_hfi(adreno_dev); struct hfi_submit_cmd *cmd; struct kgsl_memobj_node *ib; u32 cmd_sizebytes; @@ -3907,8 +3905,6 @@ int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev, } cmd->hdr = CREATE_MSG_HDR(H2F_MSG_ISSUE_RECURRING_CMD, HFI_MSG_CMD); - cmd->hdr = MSG_HDR_SET_SEQNUM(cmd->hdr, - atomic_inc_return(&hfi->seqnum)); ret = gen7_hfi_send_cmd_async(adreno_dev, cmd, sizeof(*cmd)); @@ -4015,7 +4011,7 @@ static int send_context_unregister_hfi(struct adreno_device *adreno_dev, cmd.ts = ts, seqnum = atomic_inc_return(&gmu->hfi.seqnum); - cmd.hdr = MSG_HDR_SET_SEQNUM(cmd.hdr, seqnum); + cmd.hdr = MSG_HDR_SET_SEQNUM_SIZE(cmd.hdr, seqnum, sizeof(cmd) >> 2); add_waiter(hfi, cmd.hdr, &pending_ack); diff --git a/adreno_gen7_hwsched_hfi.h b/adreno_gen7_hwsched_hfi.h index 6a6f02f49f..695d11e2a2 100644 --- a/adreno_gen7_hwsched_hfi.h +++ b/adreno_gen7_hwsched_hfi.h @@ -83,6 +83,8 @@ struct gen7_hwsched_hfi { * @flags: Flags to control the creation of new hardware fences */ unsigned long flags; + /** @seqnum: Sequence number for hardware fence packet header */ + atomic_t seqnum; } hw_fence; /** * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop diff --git a/adreno_hfi.h b/adreno_hfi.h index f3a2ad011d..2760a0119b 100644 --- a/adreno_hfi.h +++ b/adreno_hfi.h @@ -414,12 +414,15 @@ struct hfi_queue_table { #define MSG_HDR_GET_TYPE(hdr) (((hdr) >> 16) & 0xF) #define MSG_HDR_GET_SEQNUM(hdr) (((hdr) >> 20) & 0xFFF) -#define HDR_CMP_SEQNUM(out_hdr, in_hdr) \ - (MSG_HDR_GET_SEQNUM(out_hdr) == MSG_HDR_GET_SEQNUM(in_hdr)) +/* Clear the HFI_MSG_RECORD bit from both headers since some acks may have it set, and some not. */ +#define CMP_HFI_ACK_HDR(sent, rcvd) ((sent &= ~HFI_MSG_RECORD) == (rcvd &= ~HFI_MSG_RECORD)) #define MSG_HDR_SET_SEQNUM(hdr, num) \ (((hdr) & 0xFFFFF) | ((num) << 20)) +#define MSG_HDR_SET_SEQNUM_SIZE(hdr, seqnum, sizedwords) \ + (FIELD_PREP(GENMASK(31, 20), seqnum) | FIELD_PREP(GENMASK(15, 8), sizedwords) | hdr) + #define MSG_HDR_SET_TYPE(hdr, type) \ (((hdr) & 0xFFFFF) | ((type) << 16)) diff --git a/adreno_hwsched.h b/adreno_hwsched.h index 80b8651491..ef0c457359 100644 --- a/adreno_hwsched.h +++ b/adreno_hwsched.h @@ -110,6 +110,11 @@ struct adreno_hwsched { struct kmem_cache *hw_fence_cache; /** @hw_fence_count: Number of hardware fences that haven't yet been sent to Tx Queue */ atomic_t hw_fence_count; + /** + * @submission_seqnum: Sequence number for sending submissions to GMU context queues or + * dispatch queues + */ + atomic_t submission_seqnum; }; From 693a51eeb453c8b013b7c3532c6d4cd41b4e70ef Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 8 Aug 2023 22:05:10 +0530 Subject: [PATCH 537/750] msm: kgsl: Update power state machine for rgmu Power state machine is different for RGMU and GMU targets. Update the power state machine of RGMU to make it same as GMU targets. Change-Id: I44eba52b6eab90b4686c27d84509ac9ef85def89 Signed-off-by: Kamal Agrawal --- adreno_a6xx_rgmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 2f345ee191..89d290b968 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -516,7 +516,6 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); kgsl_mmu_send_tlb_hint(&device->mmu, true); @@ -526,8 +525,6 @@ static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) dev_err(&rgmu->pdev->dev, "RGMU CX gdsc off timeout\n"); - kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); - return -ETIMEDOUT; } @@ -556,6 +553,8 @@ static void a6xx_rgmu_suspend(struct adreno_device *adreno_dev) a6xx_rgmu_disable_clks(adreno_dev); a6xx_rgmu_disable_gdsc(adreno_dev); + + kgsl_pwrctrl_set_state(KGSL_DEVICE(adreno_dev), KGSL_STATE_NONE); } static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) @@ -716,6 +715,8 @@ static void a6xx_rgmu_power_off(struct adreno_device *adreno_dev) a6xx_rgmu_disable_gdsc(adreno_dev); kgsl_pwrctrl_clear_l3_vote(device); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_NONE); } static int a6xx_rgmu_clock_set(struct adreno_device *adreno_dev, From 900c12c34d3a7ad089ef6c53f791e98ede499690 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Sat, 17 Dec 2022 20:04:52 +0530 Subject: [PATCH 538/750] msm: kgsl: Add cx gdsc notifier for rgmu and non-gmu targets Extend cx gdsc notifier support to rgmu and non-gmu targets. With this, KGSL waits for cx collapse notifier event instead of polling for gdsc state. This helps to remove the CPU cycles spent for polling. Also, it addresses the corner case scenario where cx gdsc collapse event can get missed due to sleep operation during polling. Also, remove cx gdsc and gx gdsc members from gmu structure and use members in power control structure. Change-Id: I6199b612a18651dc53a46b666569742a21dda2df Signed-off-by: Kamal Agrawal --- adreno_a6xx.c | 6 ++ adreno_a6xx_gmu.c | 110 ++++------------------------------- adreno_a6xx_gmu.h | 26 --------- adreno_a6xx_hwsched.c | 4 +- adreno_a6xx_rgmu.c | 78 ++++--------------------- adreno_a6xx_rgmu.h | 5 +- adreno_a6xx_snapshot.c | 2 - adreno_gen7.c | 1 + adreno_gen7_gmu.c | 112 ++++------------------------------- adreno_gen7_gmu.h | 32 ---------- adreno_gen7_hwsched.c | 10 ++-- kgsl_gmu_core.h | 1 - kgsl_pwrctrl.c | 129 +++++++++++++++++++++++++++++++++++++---- kgsl_pwrctrl.h | 32 ++++++++++ 14 files changed, 201 insertions(+), 347 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 92402826a8..8ec6240458 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -1888,6 +1888,7 @@ int a6xx_probe_common(struct platform_device *pdev, struct adreno_device *adreno_dev, u32 chipid, const struct adreno_gpu_core *gpucore) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); const struct adreno_gpudev *gpudev = gpucore->gpudev; int ret; @@ -1896,6 +1897,11 @@ int a6xx_probe_common(struct platform_device *pdev, adreno_reg_offset_init(gpudev->reg_offsets); + if (gmu_core_isenabled(device) && (gpudev != &adreno_a6xx_rgmu_gpudev)) + device->pwrctrl.cx_gdsc_offset = (adreno_is_a662(adreno_dev) || + adreno_is_a621(adreno_dev)) ? A662_GPU_CC_CX_GDSCR : + A6XX_GPU_CC_CX_GDSCR; + adreno_dev->hwcg_enabled = true; adreno_dev->uche_client_pf = 1; diff --git a/adreno_a6xx_gmu.c b/adreno_a6xx_gmu.c index b8d35c663d..f139dd046e 100644 --- a/adreno_a6xx_gmu.c +++ b/adreno_a6xx_gmu.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -594,45 +593,18 @@ static void gmu_ao_sync_event(struct adreno_device *adreno_dev) local_irq_restore(flags); } -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); - int ret; - - ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); - if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); - /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, gmu->cx_gdsc, false); - } - - ret = regulator_enable(gmu->cx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "Failed to enable GMU CX gdsc, error %d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - return ret; -} - void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev) { - struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&gmu->gdsc_gate); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(gmu->cx_gdsc, REGULATOR_MODE_IDLE); + regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_IDLE); - set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - regulator_disable(gmu->cx_gdsc); + kgsl_pwrctrl_disable_cx_gdsc(device); if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CX_GDSC)) - regulator_set_mode(gmu->cx_gdsc, REGULATOR_MODE_NORMAL); + regulator_set_mode(pwr->cx_gdsc, REGULATOR_MODE_NORMAL); } int a6xx_gmu_device_start(struct adreno_device *adreno_dev) @@ -1866,6 +1838,7 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) int ret = 0; struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* If SPTP_RAC is on, turn off SPTP_RAC HS */ a6xx_gmu_sptprac_disable(adreno_dev); @@ -1910,14 +1883,14 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) * the GX HS. This code path is the only client voting for GX through * the regulator interface. */ - if (gmu->gx_gdsc) { + if (pwr->gx_gdsc) { if (a6xx_gmu_gx_is_on(adreno_dev)) { /* Switch gx gdsc control from GMU to CPU * force non-zero reference count in clk driver * so next disable call will turn * off the GDSC */ - ret = regulator_enable(gmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx enable %d\n", ret); @@ -1934,7 +1907,7 @@ static void a6xx_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) ndelay(520); } - ret = regulator_disable(gmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx disable %d\n", ret); @@ -2382,7 +2355,7 @@ static void a6xx_gmu_force_first_boot(struct kgsl_device *device) u32 val = 0; if (gmu->pdc_cfg_base) { - a6xx_gmu_enable_gdsc(adreno_dev); + kgsl_pwrctrl_enable_cx_gdsc(device); a6xx_gmu_enable_clks(adreno_dev, 0); val = __raw_readl(gmu->pdc_cfg_base + (PDC_GPU_ENABLE_PDC << 2)); @@ -2412,7 +2385,7 @@ static int a6xx_gmu_first_boot(struct adreno_device *adreno_dev) a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2508,7 +2481,7 @@ static int a6xx_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2826,65 +2799,6 @@ static void a6xx_gmu_rdpm_probe(struct a6xx_gmu_device *gmu, res->start, resource_size(res)); } -static int gmu_cx_gdsc_event(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct a6xx_gmu_device *gmu = container_of(nb, struct a6xx_gmu_device, gdsc_nb); - struct adreno_device *adreno_dev = a6xx_gmu_to_adreno(gmu); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 val, offset; - - if (!(event & REGULATOR_EVENT_DISABLE) || - !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) - return 0; - - offset = (adreno_is_a662(ADRENO_DEVICE(device)) || - adreno_is_a621(ADRENO_DEVICE(device))) ? - A662_GPU_CC_CX_GDSCR : A6XX_GPU_CC_CX_GDSCR; - - if (kgsl_regmap_read_poll_timeout(&device->regmap, offset, val, - !(val & BIT(31)), 100, 100 * 1000)) - dev_err(device->dev, "GPU CX wait timeout.\n"); - - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - complete_all(&gmu->gdsc_gate); - - return 0; -} - -static int a6xx_gmu_regulators_probe(struct a6xx_gmu_device *gmu, - struct platform_device *pdev) -{ - int ret; - - gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(gmu->cx_gdsc)) { - if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(gmu->cx_gdsc); - } - - gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(gmu->gx_gdsc)) { - if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(gmu->gx_gdsc); - } - - gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; - ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); - - if (ret) { - dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); - return ret; - } - - init_completion(&gmu->gdsc_gate); - complete_all(&gmu->gdsc_gate); - - return 0; -} - void a6xx_gmu_remove(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -2990,7 +2904,7 @@ int a6xx_gmu_probe(struct kgsl_device *device, a6xx_gmu_rdpm_probe(gmu, device); /* Set up GMU regulators */ - ret = a6xx_gmu_regulators_probe(gmu, pdev); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; diff --git a/adreno_a6xx_gmu.h b/adreno_a6xx_gmu.h index 6060b325c0..ba29ca53c6 100644 --- a/adreno_a6xx_gmu.h +++ b/adreno_a6xx_gmu.h @@ -24,9 +24,6 @@ * @num_bwlevel: number of GPU BW levels * @num_cnocbwlevel: number CNOC BW levels * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling - * @cx_gdsc: CX headswitch that controls power of GMU and - subsystem peripherals - * @gx_gdsc: GX headswitch that controls power of GPU subsystem * @clks: GPU subsystem clocks required for GMU functionality * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different * than default power level @@ -51,9 +48,6 @@ struct a6xx_gmu_device { /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; struct a6xx_hfi hfi; - /** @pwrlevels: Array of GMU power levels */ - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ int num_clks; @@ -99,10 +93,6 @@ struct a6xx_gmu_device { u32 perf_ddr_bw; /** @num_oob_perfcntr: Number of active oob_perfcntr requests */ u32 num_oob_perfcntr; - /** @gdsc_nb: Notifier block for cx gdsc regulator */ - struct notifier_block gdsc_nb; - /** @gdsc_gate: Completion to signal cx gdsc collapse status */ - struct completion gdsc_gate; /** @pdc_cfg_base: Base address of PDC cfg registers */ void __iomem *pdc_cfg_base; /** @pdc_seq_base: Base address of PDC seq registers */ @@ -267,14 +257,6 @@ int a6xx_gmu_memory_init(struct adreno_device *adreno_dev); */ void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag); -/** - * a6xx_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * a6xx_gmu_disable_gdsc - Disable gmu gdsc * @adreno_dev: Pointer to the adreno device @@ -445,14 +427,6 @@ void a6xx_gmu_remove(struct kgsl_device *device); */ int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); -/** - * a6xx_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int a6xx_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * a6xx_gmu_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_a6xx_hwsched.c b/adreno_a6xx_hwsched.c index f40182170f..a341ca6bba 100644 --- a/adreno_a6xx_hwsched.c +++ b/adreno_a6xx_hwsched.c @@ -345,7 +345,7 @@ static int a6xx_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) a6xx_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -428,7 +428,7 @@ static int a6xx_hwsched_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; diff --git a/adreno_a6xx_rgmu.c b/adreno_a6xx_rgmu.c index 89d290b968..dc988f34be 100644 --- a/adreno_a6xx_rgmu.c +++ b/adreno_a6xx_rgmu.c @@ -483,12 +483,14 @@ static void a6xx_rgmu_notify_slumber(struct adreno_device *adreno_dev) static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) { struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; int ret; /* Check GX GDSC is status */ if (a6xx_rgmu_gx_is_on(adreno_dev)) { - if (IS_ERR_OR_NULL(rgmu->gx_gdsc)) + if (IS_ERR_OR_NULL(pwr->gx_gdsc)) return; /* @@ -496,12 +498,12 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) * reference count in clk driver so next disable call will * turn off the GDSC. */ - ret = regulator_enable(rgmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&rgmu->pdev->dev, "Fail to enable gx gdsc:%d\n", ret); - ret = regulator_disable(rgmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&rgmu->pdev->dev, "Fail to disable gx gdsc:%d\n", ret); @@ -513,21 +515,6 @@ static void a6xx_rgmu_disable_clks(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(rgmu->num_clks, rgmu->clks); } -static int a6xx_rgmu_disable_gdsc(struct adreno_device *adreno_dev) -{ - struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - - /* Wait up to 5 seconds for the regulator to go off */ - if (kgsl_regulator_disable_wait(rgmu->cx_gdsc, 5000)) - return 0; - - dev_err(&rgmu->pdev->dev, "RGMU CX gdsc off timeout\n"); - - return -ETIMEDOUT; -} - void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { @@ -549,10 +536,11 @@ void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev, static void a6xx_rgmu_suspend(struct adreno_device *adreno_dev) { - a6xx_rgmu_irq_disable(adreno_dev); + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + a6xx_rgmu_irq_disable(adreno_dev); a6xx_rgmu_disable_clks(adreno_dev); - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); kgsl_pwrctrl_set_state(KGSL_DEVICE(adreno_dev), KGSL_STATE_NONE); } @@ -588,24 +576,6 @@ static int a6xx_rgmu_enable_clks(struct adreno_device *adreno_dev) return 0; } -static int a6xx_rgmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct a6xx_rgmu_device *rgmu = to_a6xx_rgmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - int ret; - - if (IS_ERR_OR_NULL(rgmu->cx_gdsc)) - return 0; - - ret = regulator_enable(rgmu->cx_gdsc); - if (ret) - dev_err(&rgmu->pdev->dev, - "Fail to enable CX gdsc:%d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - return ret; -} - /* * a6xx_rgmu_load_firmware() - Load the ucode into the RGMU TCM * @adreno_dev: Pointer to adreno device @@ -712,7 +682,7 @@ static void a6xx_rgmu_power_off(struct adreno_device *adreno_dev) a6xx_rgmu_irq_disable(adreno_dev); a6xx_rgmu_disable_clks(adreno_dev); - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); kgsl_pwrctrl_clear_l3_vote(device); @@ -814,13 +784,13 @@ static int a6xx_rgmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = a6xx_rgmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; ret = a6xx_rgmu_enable_clks(adreno_dev); if (ret) { - a6xx_rgmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); return ret; } @@ -1266,30 +1236,6 @@ static int a6xx_rgmu_irq_probe(struct kgsl_device *device) return 0; } -static int a6xx_rgmu_regulators_probe(struct a6xx_rgmu_device *rgmu) -{ - int ret = 0; - - rgmu->cx_gdsc = devm_regulator_get(&rgmu->pdev->dev, "vddcx"); - if (IS_ERR(rgmu->cx_gdsc)) { - ret = PTR_ERR(rgmu->cx_gdsc); - if (ret != -EPROBE_DEFER) - dev_err(&rgmu->pdev->dev, - "Couldn't get CX gdsc error:%d\n", ret); - return ret; - } - - rgmu->gx_gdsc = devm_regulator_get(&rgmu->pdev->dev, "vdd"); - if (IS_ERR(rgmu->gx_gdsc)) { - ret = PTR_ERR(rgmu->gx_gdsc); - if (ret != -EPROBE_DEFER) - dev_err(&rgmu->pdev->dev, - "Couldn't get GX gdsc error:%d\n", ret); - } - - return ret; -} - static int a6xx_rgmu_clocks_probe(struct a6xx_rgmu_device *rgmu, struct device_node *node) { @@ -1394,7 +1340,7 @@ static int a6xx_rgmu_probe(struct kgsl_device *device, rgmu->pdev = pdev; /* Set up RGMU regulators */ - ret = a6xx_rgmu_regulators_probe(rgmu); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; diff --git a/adreno_a6xx_rgmu.h b/adreno_a6xx_rgmu.h index 7f6f78b149..f34d2af7bf 100644 --- a/adreno_a6xx_rgmu.h +++ b/adreno_a6xx_rgmu.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef __ADRENO_A6XX_RGMU_H #define __ADRENO_A6XX_RGMU_H @@ -31,8 +32,6 @@ enum { * @oob_interrupt_num: number of RGMU asserted OOB interrupt * @fw_hostptr: Buffer which holds the RGMU firmware * @fw_size: Size of RGMU firmware buffer - * @cx_gdsc: CX headswitch that controls power of RGMU and - subsystem peripherals * @clks: RGMU clocks including the GPU * @gpu_clk: Pointer to GPU core clock * @rgmu_clk: Pointer to rgmu clock @@ -47,8 +46,6 @@ struct a6xx_rgmu_device { unsigned int oob_interrupt_num; unsigned int *fw_hostptr; uint32_t fw_size; - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of clocks in @clks */ int num_clks; diff --git a/adreno_a6xx_snapshot.c b/adreno_a6xx_snapshot.c index e7a32e7822..ef1d55f385 100644 --- a/adreno_a6xx_snapshot.c +++ b/adreno_a6xx_snapshot.c @@ -4,8 +4,6 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ -#include - #include "adreno.h" #include "adreno_a6xx.h" #include "adreno_snapshot.h" diff --git a/adreno_gen7.c b/adreno_gen7.c index 21f212a05c..733a59aca6 100644 --- a/adreno_gen7.c +++ b/adreno_gen7.c @@ -1610,6 +1610,7 @@ int gen7_probe_common(struct platform_device *pdev, device->pwrscale.avoid_ddr_stall = true; device->pwrctrl.rt_bus_hint = gen7_core->rt_bus_hint; + device->pwrctrl.cx_gdsc_offset = GEN7_GPU_CC_CX_GDSCR; ret = adreno_device_probe(pdev, adreno_dev); if (ret) diff --git a/adreno_gen7_gmu.c b/adreno_gen7_gmu.c index 6194621bf9..db6e8831e1 100644 --- a/adreno_gen7_gmu.c +++ b/adreno_gen7_gmu.c @@ -6,14 +6,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -346,40 +344,6 @@ static void gmu_ao_sync_event(struct adreno_device *adreno_dev) local_irq_restore(flags); } -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - int ret; - - ret = wait_for_completion_timeout(&gmu->gdsc_gate, msecs_to_jiffies(5000)); - if (!ret) { - dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); - /* Dump the cx regulator consumer list */ - qcom_clk_dump(NULL, gmu->cx_gdsc, false); - } - - ret = regulator_enable(gmu->cx_gdsc); - if (ret) - dev_err(&gmu->pdev->dev, - "Failed to enable GMU CX gdsc, error %d\n", ret); - - kgsl_mmu_send_tlb_hint(&device->mmu, false); - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - return ret; -} - -void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev) -{ - struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - kgsl_mmu_send_tlb_hint(&device->mmu, true); - reinit_completion(&gmu->gdsc_gate); - set_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - regulator_disable(gmu->cx_gdsc); -} - int gen7_gmu_device_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1496,6 +1460,7 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) int ret = 0; struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; /* Disconnect GPU from BUS is not needed if CX GDSC goes off later */ @@ -1533,19 +1498,19 @@ static void gen7_gmu_pwrctrl_suspend(struct adreno_device *adreno_dev) * the GX HS. This code path is the only client voting for GX through * the regulator interface. */ - if (gmu->gx_gdsc) { + if (pwr->gx_gdsc) { if (gen7_gmu_gx_is_on(adreno_dev)) { /* Switch gx gdsc control from GMU to CPU * force non-zero reference count in clk driver * so next disable call will turn * off the GDSC */ - ret = regulator_enable(gmu->gx_gdsc); + ret = regulator_enable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx enable %d\n", ret); - ret = regulator_disable(gmu->gx_gdsc); + ret = regulator_disable(pwr->gx_gdsc); if (ret) dev_err(&gmu->pdev->dev, "suspend fail: gx disable %d\n", ret); @@ -1600,7 +1565,7 @@ void gen7_gmu_suspend(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -1937,7 +1902,7 @@ static int gen7_gmu_first_boot(struct adreno_device *adreno_dev) gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2020,7 +1985,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -2035,7 +2000,7 @@ static int gen7_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -2085,7 +2050,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -2448,61 +2413,6 @@ static void gen7_gmu_rdpm_probe(struct gen7_gmu_device *gmu, res->start, resource_size(res)); } -static int gmu_cx_gdsc_event(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct gen7_gmu_device *gmu = container_of(nb, struct gen7_gmu_device, gdsc_nb); - struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu); - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - u32 val; - - if (!(event & REGULATOR_EVENT_DISABLE) || - !test_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags)) - return 0; - - if (kgsl_regmap_read_poll_timeout(&device->regmap, GEN7_GPU_CC_CX_GDSCR, - val, !(val & BIT(31)), 100, 100 * 1000)) - dev_err(device->dev, "GPU CX wait timeout.\n"); - - clear_bit(GMU_PRIV_CX_GDSC_WAIT, &gmu->flags); - complete_all(&gmu->gdsc_gate); - - return 0; -} - -static int gen7_gmu_regulators_probe(struct gen7_gmu_device *gmu, - struct platform_device *pdev) -{ - int ret; - - gmu->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); - if (IS_ERR(gmu->cx_gdsc)) { - if (PTR_ERR(gmu->cx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); - return PTR_ERR(gmu->cx_gdsc); - } - - gmu->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(gmu->gx_gdsc)) { - if (PTR_ERR(gmu->gx_gdsc) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); - return PTR_ERR(gmu->gx_gdsc); - } - - init_completion(&gmu->gdsc_gate); - complete_all(&gmu->gdsc_gate); - - gmu->gdsc_nb.notifier_call = gmu_cx_gdsc_event; - ret = devm_regulator_register_notifier(gmu->cx_gdsc, &gmu->gdsc_nb); - - if (ret) { - dev_err(&pdev->dev, "Failed to register gmu cx gdsc notifier: %d\n", ret); - return ret; - } - - return 0; -} - void gen7_gmu_remove(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -2615,7 +2525,7 @@ int gen7_gmu_probe(struct kgsl_device *device, gen7_gmu_rdpm_probe(gmu, device); /* Set up GMU regulators */ - ret = gen7_gmu_regulators_probe(gmu, pdev); + ret = kgsl_pwrctrl_probe_regulators(device, pdev); if (ret) return ret; @@ -2781,7 +2691,7 @@ static int gen7_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); diff --git a/adreno_gen7_gmu.h b/adreno_gen7_gmu.h index 1487e778df..483b9f3159 100644 --- a/adreno_gen7_gmu.h +++ b/adreno_gen7_gmu.h @@ -24,9 +24,6 @@ * @num_bwlevel: number of GPU BW levels * @num_cnocbwlevel: number CNOC BW levels * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling - * @cx_gdsc: CX headswitch that controls power of GMU and - * subsystem peripherals - * @gx_gdsc: GX headswitch that controls power of GPU subsystem * @clks: GPU subsystem clocks required for GMU functionality * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different * than default power level @@ -55,9 +52,6 @@ struct gen7_gmu_device { /** @vrb: GMU virtual register bank memory */ struct kgsl_memdesc *vrb; struct gen7_hfi hfi; - /** @pwrlevels: Array of GMU power levels */ - struct regulator *cx_gdsc; - struct regulator *gx_gdsc; struct clk_bulk_data *clks; /** @num_clks: Number of entries in the @clks array */ int num_clks; @@ -104,10 +98,6 @@ struct gen7_gmu_device { u32 num_oob_perfcntr; /** @acd_debug_val: DVM value to calibrate ACD for a level */ u32 acd_debug_val; - /** @gdsc_nb: Notifier block for cx gdsc regulator */ - struct notifier_block gdsc_nb; - /** @gdsc_gate: Completion to signal cx gdsc collapse status */ - struct completion gdsc_gate; /** @stats_enable: GMU stats feature enable */ bool stats_enable; /** @stats_mask: GMU performance countables to enable */ @@ -300,14 +290,6 @@ int gen7_gmu_memory_init(struct adreno_device *adreno_dev); */ void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag); -/** - * gen7_gmu_enable_clocks - Enable gmu clocks - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); - /** * gen7_gmu_load_fw - Load gmu firmware * @adreno_dev: Pointer to the adreno device @@ -474,20 +456,6 @@ void gen7_gmu_remove(struct kgsl_device *device); */ int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level); -/** - * gen7_gmu_enable_gdsc - Enable gmu gdsc - * @adreno_dev: Pointer to the adreno device - * - * Return: 0 on success or negative error on failure - */ -int gen7_gmu_enable_gdsc(struct adreno_device *adreno_dev); - -/** - * gen7_gmu_disable_gdsc - Disable gmu gdsc - * @adreno_dev: Pointer to the adreno device - */ -void gen7_gmu_disable_gdsc(struct adreno_device *adreno_dev); - /** * gen7_gmu_handle_watchdog - Handle watchdog interrupt * @adreno_dev: Pointer to the adreno device diff --git a/adreno_gen7_hwsched.c b/adreno_gen7_hwsched.c index 492c3f0859..fec3079ae6 100644 --- a/adreno_gen7_hwsched.c +++ b/adreno_gen7_hwsched.c @@ -484,7 +484,7 @@ static int gen7_hwsched_gmu_first_boot(struct adreno_device *adreno_dev) gen7_gmu_aop_send_acd_state(gmu, adreno_dev->acd_enabled); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -574,7 +574,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -589,7 +589,7 @@ static int gen7_hwsched_gmu_boot(struct adreno_device *adreno_dev) kgsl_pwrctrl_request_state(device, KGSL_STATE_AWARE); - ret = gen7_gmu_enable_gdsc(adreno_dev); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (ret) return ret; @@ -641,7 +641,7 @@ clks_gdsc_off: clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); gdsc_off: - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); @@ -729,7 +729,7 @@ static int gen7_hwsched_gmu_power_off(struct adreno_device *adreno_dev) clk_bulk_disable_unprepare(gmu->num_clks, gmu->clks); - gen7_gmu_disable_gdsc(adreno_dev); + kgsl_pwrctrl_disable_cx_gdsc(device); gen7_rdpm_cx_freq_update(gmu, 0); diff --git a/kgsl_gmu_core.h b/kgsl_gmu_core.h index 98a40d0d81..197cf353c1 100644 --- a/kgsl_gmu_core.h +++ b/kgsl_gmu_core.h @@ -236,7 +236,6 @@ enum { GMU_PRIV_RSCC_SLEEP_DONE, GMU_PRIV_PM_SUSPEND, GMU_PRIV_PDC_RSC_LOADED, - GMU_PRIV_CX_GDSC_WAIT, /* Indicates if GMU INIT HFI messages are recorded successfully */ GMU_PRIV_WARMBOOT_GMU_INIT_DONE, /* Indicates if GPU BOOT HFI messages are recorded successfully */ diff --git a/kgsl_pwrctrl.c b/kgsl_pwrctrl.c index 8c4e744d64..d23236a16f 100644 --- a/kgsl_pwrctrl.c +++ b/kgsl_pwrctrl.c @@ -4,7 +4,9 @@ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. */ +#include #include +#include #include #include #include @@ -1298,9 +1300,34 @@ int kgsl_pwrctrl_axi(struct kgsl_device *device, bool state) return 0; } -static int enable_regulator(struct device *dev, struct regulator *regulator, - const char *name) +int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device) { + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct regulator *regulator = pwr->cx_gdsc; + int ret; + + if (IS_ERR_OR_NULL(regulator)) + return 0; + + ret = wait_for_completion_timeout(&pwr->cx_gdsc_gate, msecs_to_jiffies(5000)); + if (!ret) { + dev_err(device->dev, "GPU CX wait timeout. Dumping CX votes:\n"); + /* Dump the cx regulator consumer list */ + qcom_clk_dump(NULL, regulator, false); + } + + ret = regulator_enable(regulator); + if (ret) + dev_err(device->dev, "Failed to enable CX regulator: %d\n", ret); + + kgsl_mmu_send_tlb_hint(&device->mmu, false); + pwr->cx_gdsc_wait = false; + return ret; +} + +static int kgsl_pwtctrl_enable_gx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.gx_gdsc; int ret; if (IS_ERR_OR_NULL(regulator)) @@ -1308,10 +1335,34 @@ static int enable_regulator(struct device *dev, struct regulator *regulator, ret = regulator_enable(regulator); if (ret) - dev_err(dev, "Unable to enable regulator %s: %d\n", name, ret); + dev_err(device->dev, "Failed to enable GX regulator: %d\n", ret); return ret; } +void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.cx_gdsc; + + if (IS_ERR_OR_NULL(regulator)) + return; + + kgsl_mmu_send_tlb_hint(&device->mmu, true); + reinit_completion(&device->pwrctrl.cx_gdsc_gate); + device->pwrctrl.cx_gdsc_wait = true; + regulator_disable(regulator); +} + +static void kgsl_pwrctrl_disable_gx_gdsc(struct kgsl_device *device) +{ + struct regulator *regulator = device->pwrctrl.gx_gdsc; + + if (IS_ERR_OR_NULL(regulator)) + return; + + if (!kgsl_regulator_disable_wait(regulator, 200)) + dev_err(device->dev, "Regulator vdd is stuck on\n"); +} + static int enable_regulators(struct kgsl_device *device) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; @@ -1320,15 +1371,14 @@ static int enable_regulators(struct kgsl_device *device) if (test_and_set_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->power_flags)) return 0; - ret = enable_regulator(&device->pdev->dev, pwr->cx_gdsc, "vddcx"); + ret = kgsl_pwrctrl_enable_cx_gdsc(device); if (!ret) { /* Set parent in retention voltage to power up vdd supply */ ret = kgsl_regulator_set_voltage(device->dev, pwr->gx_gdsc_parent, pwr->gx_gdsc_parent_min_corner); if (!ret) - ret = enable_regulator(&device->pdev->dev, - pwr->gx_gdsc, "vdd"); + ret = kgsl_pwtctrl_enable_gx_gdsc(device); } if (ret) { @@ -1340,6 +1390,58 @@ static int enable_regulators(struct kgsl_device *device) return 0; } +int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, + struct platform_device *pdev) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->cx_gdsc = devm_regulator_get(&pdev->dev, "vddcx"); + if (IS_ERR(pwr->cx_gdsc)) { + if (PTR_ERR(pwr->cx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vddcx gdsc\n"); + return PTR_ERR(pwr->cx_gdsc); + } + + pwr->gx_gdsc = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(pwr->gx_gdsc)) { + if (PTR_ERR(pwr->gx_gdsc) != -EPROBE_DEFER) + dev_err(&pdev->dev, "Couldn't get the vdd gdsc\n"); + return PTR_ERR(pwr->gx_gdsc); + } + + return 0; +} + +static int kgsl_cx_gdsc_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct kgsl_pwrctrl *pwr = container_of(nb, struct kgsl_pwrctrl, cx_gdsc_nb); + struct kgsl_device *device = container_of(pwr, struct kgsl_device, pwrctrl); + u32 val; + + if (!(event & REGULATOR_EVENT_DISABLE) || !pwr->cx_gdsc_wait) + return 0; + + if (pwr->cx_gdsc_offset) { + if (kgsl_regmap_read_poll_timeout(&device->regmap, pwr->cx_gdsc_offset, + val, !(val & BIT(31)), 100, 100 * 1000)) + dev_err(device->dev, "GPU CX wait timeout.\n"); + } + + pwr->cx_gdsc_wait = false; + complete_all(&pwr->cx_gdsc_gate); + + return 0; +} + +int kgsl_register_gdsc_notifier(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + pwr->cx_gdsc_nb.notifier_call = kgsl_cx_gdsc_event; + return devm_regulator_register_notifier(pwr->cx_gdsc, &pwr->cx_gdsc_nb); +} + static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) { struct kgsl_pwrctrl *pwr = &device->pwrctrl; @@ -1360,10 +1462,8 @@ static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, bool state) &pwr->power_flags)) { kgsl_mmu_send_tlb_hint(&device->mmu, true); trace_kgsl_rail(device, state); - if (!kgsl_regulator_disable_wait(pwr->gx_gdsc, 200)) - dev_err(device->dev, "Regulator vdd is stuck on\n"); - if (!kgsl_regulator_disable_wait(pwr->cx_gdsc, 200)) - dev_err(device->dev, "Regulator vddcx is stuck on\n"); + kgsl_pwrctrl_disable_gx_gdsc(device); + kgsl_pwrctrl_disable_cx_gdsc(device); } } else { status = enable_regulators(device); @@ -1570,6 +1670,15 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) } } + init_completion(&pwr->cx_gdsc_gate); + complete_all(&pwr->cx_gdsc_gate); + + result = kgsl_register_gdsc_notifier(device); + if (result) { + dev_err(&pdev->dev, "Failed to register gdsc notifier: %d\n", result); + return result; + } + pwr->power_flags = 0; pm_runtime_enable(&pdev->dev); diff --git a/kgsl_pwrctrl.h b/kgsl_pwrctrl.h index 6dfbe88502..ffdac61f7c 100644 --- a/kgsl_pwrctrl.h +++ b/kgsl_pwrctrl.h @@ -111,6 +111,14 @@ struct kgsl_pwrctrl { struct regulator *gx_gdsc_parent; /** @gx_gdsc_parent_min_corner: Minimum supply voltage for GX parent */ u32 gx_gdsc_parent_min_corner; + /** @cx_gdsc_nb: Notifier block for cx gdsc regulator */ + struct notifier_block cx_gdsc_nb; + /** @cx_gdsc_gate: Completion to signal cx gdsc collapse status */ + struct completion cx_gdsc_gate; + /** @cx_gdsc_wait: Whether to wait for cx gdsc to turn off */ + bool cx_gdsc_wait; + /** @cx_gdsc_offset: Offset of CX GDSC register */ + u32 cx_gdsc_offset; int isense_clk_indx; int isense_clk_on_level; unsigned long power_flags; @@ -268,4 +276,28 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, bool state); * Clear the l3 vote when going into slumber */ void kgsl_pwrctrl_clear_l3_vote(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_enable_cx_gdsc - Enable cx gdsc + * @device: Pointer to the kgsl device + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_enable_cx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_disable_cx_gdsc - Disable cx gdsc + * @device: Pointer to the kgsl device + */ +void kgsl_pwrctrl_disable_cx_gdsc(struct kgsl_device *device); + +/** + * kgsl_pwrctrl_probe_regulators - Probe regulators + * @device: Pointer to the kgsl device + * @pdev: Pointer to the platform device + * + * Return: 0 on success or negative error on failure + */ +int kgsl_pwrctrl_probe_regulators(struct kgsl_device *device, + struct platform_device *pdev); #endif /* __KGSL_PWRCTRL_H */ From c2351533e560fc1f8ae479240f32b20a2504a1d6 Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Tue, 8 Aug 2023 22:05:10 +0530 Subject: [PATCH 539/750] msm: kgsl: Reduce contention in cpu gpu shared lock CP doesn't update power up register list. And KGSL updates it within device mutex. Thus, there is no need to take shared lock for reading power up register list. Change-Id: I85364cbd317a8e68f84e60ce46eb59c4fca9a1c5 Signed-off-by: Kamal Agrawal --- adreno_a6xx.c | 27 ++++++++++++++++----------- adreno_gen7.c | 42 ++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/adreno_a6xx.c b/adreno_a6xx.c index 8ec6240458..6a142c4d24 100644 --- a/adreno_a6xx.c +++ b/adreno_a6xx.c @@ -2011,6 +2011,19 @@ int a6xx_perfcounter_update(struct adreno_device *adreno_dev, struct cpu_gpu_lock *lock = ptr; u32 *data = ptr + sizeof(*lock); int i, offset = 0; + bool select_reg_present = false; + + for (i = 0; i < lock->list_length >> 1; i++) { + if (data[offset] == reg->select) { + select_reg_present = true; + break; + } + + if (data[offset] == A6XX_RBBM_PERFCTR_CNTL) + break; + + offset += 2; + } if (kgsl_hwlock(lock)) { kgsl_hwunlock(lock); @@ -2022,16 +2035,9 @@ int a6xx_perfcounter_update(struct adreno_device *adreno_dev, * update it, otherwise append the